diff options
| author | 2023-12-30 04:37:25 +0100 | |
|---|---|---|
| committer | 2024-01-18 21:12:30 -0500 | |
| commit | 303cd311621b25fbb8d55e0ed2cc4c3248de44ad (patch) | |
| tree | f52d147d935449c21e8a8edf8ad28272859d94c5 /src | |
| parent | GPU-SMMU: Estimate game leak and preallocate device region. (diff) | |
| download | yuzu-303cd311621b25fbb8d55e0ed2cc4c3248de44ad.tar.gz yuzu-303cd311621b25fbb8d55e0ed2cc4c3248de44ad.tar.xz yuzu-303cd311621b25fbb8d55e0ed2cc4c3248de44ad.zip | |
SMMU: Add Android compatibility
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/device_memory_manager.inc | 7 | ||||
| -rw-r--r-- | src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp | 6 | ||||
| -rw-r--r-- | src/core/memory.cpp | 62 | ||||
| -rw-r--r-- | src/core/memory.h | 4 | ||||
| -rw-r--r-- | src/video_core/host1x/gpu_device_memory_manager.h | 2 | ||||
| -rw-r--r-- | src/video_core/host1x/host1x.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/host1x/host1x.h | 1 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 2 | ||||
| -rw-r--r-- | src/video_core/query_cache/query_cache.h | 6 |
9 files changed, 42 insertions, 50 deletions
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index 138eb5017..4f883cece 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc | |||
| @@ -217,9 +217,6 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo | |||
| 217 | cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { | 217 | cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { |
| 218 | impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); | 218 | impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); |
| 219 | cached_pages = std::make_unique<CachedPages>(); | 219 | cached_pages = std::make_unique<CachedPages>(); |
| 220 | for (size_t i = 0; i < 1ULL << (33 - 12); i++) { | ||
| 221 | compressed_device_addr[i] = 0; | ||
| 222 | } | ||
| 223 | } | 220 | } |
| 224 | 221 | ||
| 225 | template <typename Traits> | 222 | template <typename Traits> |
| @@ -517,7 +514,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | |||
| 517 | u64 cache_begin = 0; | 514 | u64 cache_begin = 0; |
| 518 | u64 uncache_bytes = 0; | 515 | u64 uncache_bytes = 0; |
| 519 | u64 cache_bytes = 0; | 516 | u64 cache_bytes = 0; |
| 520 | const auto* MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching; | 517 | const auto MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching; |
| 521 | 518 | ||
| 522 | std::atomic_thread_fence(std::memory_order_acquire); | 519 | std::atomic_thread_fence(std::memory_order_acquire); |
| 523 | const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); | 520 | const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); |
| @@ -577,4 +574,4 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | |||
| 577 | } | 574 | } |
| 578 | } | 575 | } |
| 579 | 576 | ||
| 580 | } // namespace Core \ No newline at end of file | 577 | } // namespace Core |
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 0b6aa9993..a50577c75 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "core/core.h" | 10 | #include "core/core.h" |
| 11 | #include "core/hle/kernel/k_process.h" | ||
| 11 | #include "core/hle/service/nvdrv/core/container.h" | 12 | #include "core/hle/service/nvdrv/core/container.h" |
| 12 | #include "core/hle/service/nvdrv/core/nvmap.h" | 13 | #include "core/hle/service/nvdrv/core/nvmap.h" |
| 13 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" | 14 | #include "core/hle/service/nvdrv/core/syncpoint_manager.h" |
| @@ -109,7 +110,7 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De | |||
| 109 | ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); | 110 | ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); |
| 110 | Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); | 111 | Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); |
| 111 | session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), | 112 | session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), |
| 112 | cmdlist.size() * sizeof(u32)); | 113 | cmdlist.size() * sizeof(u32)); |
| 113 | gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); | 114 | gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); |
| 114 | } | 115 | } |
| 115 | // Some games expect command_buffers to be written back | 116 | // Some games expect command_buffers to be written back |
| @@ -135,7 +136,8 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) { | |||
| 135 | return NvResult::Success; | 136 | return NvResult::Success; |
| 136 | } | 137 | } |
| 137 | 138 | ||
| 138 | NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) { | 139 | NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, |
| 140 | DeviceFD fd) { | ||
| 139 | const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); | 141 | const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); |
| 140 | for (size_t i = 0; i < num_entries; i++) { | 142 | for (size_t i = 0; i < num_entries; i++) { |
| 141 | DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true); | 143 | DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true); |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f126840cb..1c218566f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -44,8 +44,7 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA | |||
| 44 | // from outside classes. This also allows modification to the internals of the memory | 44 | // from outside classes. This also allows modification to the internals of the memory |
| 45 | // subsystem without needing to rebuild all files that make use of the memory interface. | 45 | // subsystem without needing to rebuild all files that make use of the memory interface. |
| 46 | struct Memory::Impl { | 46 | struct Memory::Impl { |
| 47 | explicit Impl(Core::System& system_) | 47 | explicit Impl(Core::System& system_) : system{system_} {} |
| 48 | : system{system_} {} | ||
| 49 | 48 | ||
| 50 | void SetCurrentPageTable(Kernel::KProcess& process) { | 49 | void SetCurrentPageTable(Kernel::KProcess& process) { |
| 51 | current_page_table = &process.GetPageTable().GetImpl(); | 50 | current_page_table = &process.GetPageTable().GetImpl(); |
| @@ -640,18 +639,6 @@ struct Memory::Impl { | |||
| 640 | LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target), | 639 | LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target), |
| 641 | base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); | 640 | base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); |
| 642 | 641 | ||
| 643 | // During boot, current_page_table might not be set yet, in which case we need not flush | ||
| 644 | /*if (system.IsPoweredOn()) { | ||
| 645 | auto& gpu = system.GPU(); | ||
| 646 | for (u64 i = 0; i < size; i++) { | ||
| 647 | const auto page = base + i; | ||
| 648 | if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) { | ||
| 649 | |||
| 650 | gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE); | ||
| 651 | } | ||
| 652 | } | ||
| 653 | }*/ | ||
| 654 | |||
| 655 | const auto end = base + size; | 642 | const auto end = base + size; |
| 656 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", | 643 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", |
| 657 | base + page_table.pointers.size()); | 644 | base + page_table.pointers.size()); |
| @@ -823,8 +810,7 @@ struct Memory::Impl { | |||
| 823 | } | 810 | } |
| 824 | const size_t core = system.GetCurrentHostThreadID(); | 811 | const size_t core = system.GetCurrentHostThreadID(); |
| 825 | auto& current_area = rasterizer_read_areas[core]; | 812 | auto& current_area = rasterizer_read_areas[core]; |
| 826 | gpu_device_memory->ApplyOpOnPointer( | 813 | gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) { |
| 827 | p, scratch_buffers[core], [&](DAddr address) { | ||
| 828 | const DAddr end_address = address + size; | 814 | const DAddr end_address = address + size; |
| 829 | if (current_area.start_address <= address && end_address <= current_area.end_address) | 815 | if (current_area.start_address <= address && end_address <= current_area.end_address) |
| 830 | [[likely]] { | 816 | [[likely]] { |
| @@ -852,8 +838,7 @@ struct Memory::Impl { | |||
| 852 | sys_core_guard.unlock(); | 838 | sys_core_guard.unlock(); |
| 853 | } | 839 | } |
| 854 | }); | 840 | }); |
| 855 | gpu_device_memory->ApplyOpOnPointer( | 841 | gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) { |
| 856 | p, scratch_buffers[core], [&](DAddr address) { | ||
| 857 | auto& current_area = rasterizer_write_areas[core]; | 842 | auto& current_area = rasterizer_write_areas[core]; |
| 858 | PAddr subaddress = address >> YUZU_PAGEBITS; | 843 | PAddr subaddress = address >> YUZU_PAGEBITS; |
| 859 | bool do_collection = current_area.last_address == subaddress; | 844 | bool do_collection = current_area.last_address == subaddress; |
| @@ -872,12 +857,25 @@ struct Memory::Impl { | |||
| 872 | PAddr last_address; | 857 | PAddr last_address; |
| 873 | }; | 858 | }; |
| 874 | 859 | ||
| 875 | void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { | 860 | void InvalidateGPUMemory(u8* p, size_t size) { |
| 876 | system.GPU().InvalidateRegion(GetInteger(dest_addr), size); | 861 | constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1; |
| 877 | } | 862 | const size_t core = std::min(system.GetCurrentHostThreadID(), |
| 878 | 863 | sys_core); // any other calls threads go to syscore. | |
| 879 | void FlushRegion(Common::ProcessAddress dest_addr, size_t size) { | 864 | if (!gpu_device_memory) [[unlikely]] { |
| 880 | system.GPU().FlushRegion(GetInteger(dest_addr), size); | 865 | gpu_device_memory = &system.Host1x().MemoryManager(); |
| 866 | } | ||
| 867 | // Guard on sys_core; | ||
| 868 | if (core == sys_core) [[unlikely]] { | ||
| 869 | sys_core_guard.lock(); | ||
| 870 | } | ||
| 871 | SCOPE_EXIT({ | ||
| 872 | if (core == sys_core) [[unlikely]] { | ||
| 873 | sys_core_guard.unlock(); | ||
| 874 | } | ||
| 875 | }); | ||
| 876 | auto& gpu = system.GPU(); | ||
| 877 | gpu_device_memory->ApplyOpOnPointer( | ||
| 878 | p, scratch_buffers[core], [&](DAddr address) { gpu.InvalidateRegion(address, size); }); | ||
| 881 | } | 879 | } |
| 882 | 880 | ||
| 883 | Core::System& system; | 881 | Core::System& system; |
| @@ -1081,14 +1079,6 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) | |||
| 1081 | impl->MarkRegionDebug(GetInteger(vaddr), size, debug); | 1079 | impl->MarkRegionDebug(GetInteger(vaddr), size, debug); |
| 1082 | } | 1080 | } |
| 1083 | 1081 | ||
| 1084 | void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { | ||
| 1085 | impl->InvalidateRegion(dest_addr, size); | ||
| 1086 | } | ||
| 1087 | |||
| 1088 | void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) { | ||
| 1089 | impl->FlushRegion(dest_addr, size); | ||
| 1090 | } | ||
| 1091 | |||
| 1092 | bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { | 1082 | bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { |
| 1093 | [[maybe_unused]] bool mapped = true; | 1083 | [[maybe_unused]] bool mapped = true; |
| 1094 | [[maybe_unused]] bool rasterizer = false; | 1084 | [[maybe_unused]] bool rasterizer = false; |
| @@ -1100,10 +1090,10 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { | |||
| 1100 | GetInteger(vaddr)); | 1090 | GetInteger(vaddr)); |
| 1101 | mapped = false; | 1091 | mapped = false; |
| 1102 | }, | 1092 | }, |
| 1103 | [&] { | 1093 | [&] { rasterizer = true; }); |
| 1104 | impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); | 1094 | if (rasterizer) { |
| 1105 | rasterizer = true; | 1095 | impl->InvalidateGPUMemory(ptr, size); |
| 1106 | }); | 1096 | } |
| 1107 | 1097 | ||
| 1108 | #ifdef __linux__ | 1098 | #ifdef __linux__ |
| 1109 | if (!rasterizer && mapped) { | 1099 | if (!rasterizer && mapped) { |
diff --git a/src/core/memory.h b/src/core/memory.h index 47ca6a35a..9d29cfd3f 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -486,10 +486,10 @@ public: | |||
| 486 | void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); | 486 | void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); |
| 487 | 487 | ||
| 488 | void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); | 488 | void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); |
| 489 | void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); | 489 | |
| 490 | bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); | 490 | bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); |
| 491 | |||
| 491 | bool InvalidateSeparateHeap(void* fault_address); | 492 | bool InvalidateSeparateHeap(void* fault_address); |
| 492 | void FlushRegion(Common::ProcessAddress dest_addr, size_t size); | ||
| 493 | 493 | ||
| 494 | private: | 494 | private: |
| 495 | Core::System& system; | 495 | Core::System& system; |
diff --git a/src/video_core/host1x/gpu_device_memory_manager.h b/src/video_core/host1x/gpu_device_memory_manager.h index 6c7858848..9ccd84b9a 100644 --- a/src/video_core/host1x/gpu_device_memory_manager.h +++ b/src/video_core/host1x/gpu_device_memory_manager.h | |||
| @@ -17,7 +17,7 @@ struct MaxwellDeviceTraits { | |||
| 17 | static constexpr bool supports_pinning = false; | 17 | static constexpr bool supports_pinning = false; |
| 18 | static constexpr size_t device_virtual_bits = 34; | 18 | static constexpr size_t device_virtual_bits = 34; |
| 19 | using DeviceInterface = typename VideoCore::RasterizerInterface; | 19 | using DeviceInterface = typename VideoCore::RasterizerInterface; |
| 20 | using DeviceMethods = typename MaxwellDeviceMethods; | 20 | using DeviceMethods = MaxwellDeviceMethods; |
| 21 | }; | 21 | }; |
| 22 | 22 | ||
| 23 | using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>; | 23 | using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>; |
diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp index b7f9a08cf..c4c7a5883 100644 --- a/src/video_core/host1x/host1x.cpp +++ b/src/video_core/host1x/host1x.cpp | |||
| @@ -13,6 +13,8 @@ Host1x::Host1x(Core::System& system_) | |||
| 13 | memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12}, | 13 | memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12}, |
| 14 | allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {} | 14 | allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {} |
| 15 | 15 | ||
| 16 | Host1x::~Host1x() = default; | ||
| 17 | |||
| 16 | } // namespace Host1x | 18 | } // namespace Host1x |
| 17 | 19 | ||
| 18 | } // namespace Tegra | 20 | } // namespace Tegra |
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h index 13c37e6b4..d72d97b7b 100644 --- a/src/video_core/host1x/host1x.h +++ b/src/video_core/host1x/host1x.h | |||
| @@ -21,6 +21,7 @@ namespace Host1x { | |||
| 21 | class Host1x { | 21 | class Host1x { |
| 22 | public: | 22 | public: |
| 23 | explicit Host1x(Core::System& system); | 23 | explicit Host1x(Core::System& system); |
| 24 | ~Host1x(); | ||
| 24 | 25 | ||
| 25 | SyncpointManager& GetSyncpointManager() { | 26 | SyncpointManager& GetSyncpointManager() { |
| 26 | return syncpoint_manager; | 27 | return syncpoint_manager; |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 6b2cd7efb..00d64dcce 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -68,7 +68,7 @@ public: | |||
| 68 | if (!address) { | 68 | if (!address) { |
| 69 | return {}; | 69 | return {}; |
| 70 | } | 70 | } |
| 71 | return memory.GetPointer(*address); | 71 | return memory.GetPointer<T>(*address); |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | template <typename T> | 74 | template <typename T> |
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 508afb10a..b5e90cf8c 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h | |||
| @@ -256,8 +256,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type | |||
| 256 | return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, | 256 | return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, |
| 257 | static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); | 257 | static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); |
| 258 | }; | 258 | }; |
| 259 | u8* pointer = impl->device_memory.GetPointer<u8>(cpu_addr); | 259 | u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr); |
| 260 | u8* pointer_timestamp = impl->device_memory.GetPointer<u8>(cpu_addr + 8); | 260 | u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8); |
| 261 | bool is_synced = !Settings::IsGPULevelHigh() && is_fence; | 261 | bool is_synced = !Settings::IsGPULevelHigh() && is_fence; |
| 262 | std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, | 262 | std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, |
| 263 | pointer, pointer_timestamp] { | 263 | pointer, pointer_timestamp] { |
| @@ -561,7 +561,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo | |||
| 561 | } | 561 | } |
| 562 | if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && | 562 | if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && |
| 563 | False(query_base->flags & QueryFlagBits::IsGuestSynced)) { | 563 | False(query_base->flags & QueryFlagBits::IsGuestSynced)) { |
| 564 | auto* ptr = impl->device_memory.GetPointer<u8>(query_base->guest_address); | 564 | auto* ptr = impl->device_memory.template GetPointer<u8>(query_base->guest_address); |
| 565 | if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { | 565 | if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { |
| 566 | std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); | 566 | std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); |
| 567 | return false; | 567 | return false; |