summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-12-30 04:37:25 +0100
committerGravatar Liam2024-01-18 21:12:30 -0500
commit303cd311621b25fbb8d55e0ed2cc4c3248de44ad (patch)
treef52d147d935449c21e8a8edf8ad28272859d94c5 /src
parentGPU-SMMU: Estimate game leak and preallocate device region. (diff)
downloadyuzu-303cd311621b25fbb8d55e0ed2cc4c3248de44ad.tar.gz
yuzu-303cd311621b25fbb8d55e0ed2cc4c3248de44ad.tar.xz
yuzu-303cd311621b25fbb8d55e0ed2cc4c3248de44ad.zip
SMMU: Add Android compatibility
Diffstat (limited to 'src')
-rw-r--r--src/core/device_memory_manager.inc7
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp6
-rw-r--r--src/core/memory.cpp62
-rw-r--r--src/core/memory.h4
-rw-r--r--src/video_core/host1x/gpu_device_memory_manager.h2
-rw-r--r--src/video_core/host1x/host1x.cpp2
-rw-r--r--src/video_core/host1x/host1x.h1
-rw-r--r--src/video_core/memory_manager.h2
-rw-r--r--src/video_core/query_cache/query_cache.h6
9 files changed, 42 insertions, 50 deletions
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc
index 138eb5017..4f883cece 100644
--- a/src/core/device_memory_manager.inc
+++ b/src/core/device_memory_manager.inc
@@ -217,9 +217,6 @@ DeviceMemoryManager<Traits>::DeviceMemoryManager(const DeviceMemory& device_memo
217 cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) { 217 cpu_backing_address(device_as_size >> Memory::YUZU_PAGEBITS) {
218 impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>(); 218 impl = std::make_unique<DeviceMemoryManagerAllocator<Traits>>();
219 cached_pages = std::make_unique<CachedPages>(); 219 cached_pages = std::make_unique<CachedPages>();
220 for (size_t i = 0; i < 1ULL << (33 - 12); i++) {
221 compressed_device_addr[i] = 0;
222 }
223} 220}
224 221
225template <typename Traits> 222template <typename Traits>
@@ -517,7 +514,7 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
517 u64 cache_begin = 0; 514 u64 cache_begin = 0;
518 u64 uncache_bytes = 0; 515 u64 uncache_bytes = 0;
519 u64 cache_bytes = 0; 516 u64 cache_bytes = 0;
520 const auto* MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching; 517 const auto MarkRegionCaching = &DeviceMemoryManager<Traits>::DeviceMethods::MarkRegionCaching;
521 518
522 std::atomic_thread_fence(std::memory_order_acquire); 519 std::atomic_thread_fence(std::memory_order_acquire);
523 const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); 520 const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE);
@@ -577,4 +574,4 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size
577 } 574 }
578} 575}
579 576
580} // namespace Core \ No newline at end of file 577} // namespace Core
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 0b6aa9993..a50577c75 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -8,6 +8,7 @@
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/hle/kernel/k_process.h"
11#include "core/hle/service/nvdrv/core/container.h" 12#include "core/hle/service/nvdrv/core/container.h"
12#include "core/hle/service/nvdrv/core/nvmap.h" 13#include "core/hle/service/nvdrv/core/nvmap.h"
13#include "core/hle/service/nvdrv/core/syncpoint_manager.h" 14#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
@@ -109,7 +110,7 @@ NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span<u8> data, De
109 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;); 110 ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
110 Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); 111 Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count);
111 session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(), 112 session->process->GetMemory().ReadBlock(object->address + cmd_buffer.offset, cmdlist.data(),
112 cmdlist.size() * sizeof(u32)); 113 cmdlist.size() * sizeof(u32));
113 gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); 114 gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist);
114 } 115 }
115 // Some games expect command_buffers to be written back 116 // Some games expect command_buffers to be written back
@@ -135,7 +136,8 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) {
135 return NvResult::Success; 136 return NvResult::Success;
136} 137}
137 138
138NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries, DeviceFD fd) { 139NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span<MapBufferEntry> entries,
140 DeviceFD fd) {
139 const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size())); 141 const size_t num_entries = std::min(params.num_entries, static_cast<u32>(entries.size()));
140 for (size_t i = 0; i < num_entries; i++) { 142 for (size_t i = 0; i < num_entries; i++) {
141 DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true); 143 DAddr pin_address = nvmap.PinHandle(entries[i].map_handle, sessions[fd], true);
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index f126840cb..1c218566f 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -44,8 +44,7 @@ bool AddressSpaceContains(const Common::PageTable& table, const Common::ProcessA
44// from outside classes. This also allows modification to the internals of the memory 44// from outside classes. This also allows modification to the internals of the memory
45// subsystem without needing to rebuild all files that make use of the memory interface. 45// subsystem without needing to rebuild all files that make use of the memory interface.
46struct Memory::Impl { 46struct Memory::Impl {
47 explicit Impl(Core::System& system_) 47 explicit Impl(Core::System& system_) : system{system_} {}
48 : system{system_} {}
49 48
50 void SetCurrentPageTable(Kernel::KProcess& process) { 49 void SetCurrentPageTable(Kernel::KProcess& process) {
51 current_page_table = &process.GetPageTable().GetImpl(); 50 current_page_table = &process.GetPageTable().GetImpl();
@@ -640,18 +639,6 @@ struct Memory::Impl {
640 LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target), 639 LOG_DEBUG(HW_Memory, "Mapping {:016X} onto {:016X}-{:016X}", GetInteger(target),
641 base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE); 640 base * YUZU_PAGESIZE, (base + size) * YUZU_PAGESIZE);
642 641
643 // During boot, current_page_table might not be set yet, in which case we need not flush
644 /*if (system.IsPoweredOn()) {
645 auto& gpu = system.GPU();
646 for (u64 i = 0; i < size; i++) {
647 const auto page = base + i;
648 if (page_table.pointers[page].Type() == Common::PageType::RasterizerCachedMemory) {
649
650 gpu.FlushAndInvalidateRegion(page << YUZU_PAGEBITS, YUZU_PAGESIZE);
651 }
652 }
653 }*/
654
655 const auto end = base + size; 642 const auto end = base + size;
656 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", 643 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
657 base + page_table.pointers.size()); 644 base + page_table.pointers.size());
@@ -823,8 +810,7 @@ struct Memory::Impl {
823 } 810 }
824 const size_t core = system.GetCurrentHostThreadID(); 811 const size_t core = system.GetCurrentHostThreadID();
825 auto& current_area = rasterizer_read_areas[core]; 812 auto& current_area = rasterizer_read_areas[core];
826 gpu_device_memory->ApplyOpOnPointer( 813 gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
827 p, scratch_buffers[core], [&](DAddr address) {
828 const DAddr end_address = address + size; 814 const DAddr end_address = address + size;
829 if (current_area.start_address <= address && end_address <= current_area.end_address) 815 if (current_area.start_address <= address && end_address <= current_area.end_address)
830 [[likely]] { 816 [[likely]] {
@@ -852,8 +838,7 @@ struct Memory::Impl {
852 sys_core_guard.unlock(); 838 sys_core_guard.unlock();
853 } 839 }
854 }); 840 });
855 gpu_device_memory->ApplyOpOnPointer( 841 gpu_device_memory->ApplyOpOnPointer(p, scratch_buffers[core], [&](DAddr address) {
856 p, scratch_buffers[core], [&](DAddr address) {
857 auto& current_area = rasterizer_write_areas[core]; 842 auto& current_area = rasterizer_write_areas[core];
858 PAddr subaddress = address >> YUZU_PAGEBITS; 843 PAddr subaddress = address >> YUZU_PAGEBITS;
859 bool do_collection = current_area.last_address == subaddress; 844 bool do_collection = current_area.last_address == subaddress;
@@ -872,12 +857,25 @@ struct Memory::Impl {
872 PAddr last_address; 857 PAddr last_address;
873 }; 858 };
874 859
875 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { 860 void InvalidateGPUMemory(u8* p, size_t size) {
876 system.GPU().InvalidateRegion(GetInteger(dest_addr), size); 861 constexpr size_t sys_core = Core::Hardware::NUM_CPU_CORES - 1;
877 } 862 const size_t core = std::min(system.GetCurrentHostThreadID(),
878 863 sys_core); // any other calls threads go to syscore.
879 void FlushRegion(Common::ProcessAddress dest_addr, size_t size) { 864 if (!gpu_device_memory) [[unlikely]] {
880 system.GPU().FlushRegion(GetInteger(dest_addr), size); 865 gpu_device_memory = &system.Host1x().MemoryManager();
866 }
867 // Guard on sys_core;
868 if (core == sys_core) [[unlikely]] {
869 sys_core_guard.lock();
870 }
871 SCOPE_EXIT({
872 if (core == sys_core) [[unlikely]] {
873 sys_core_guard.unlock();
874 }
875 });
876 auto& gpu = system.GPU();
877 gpu_device_memory->ApplyOpOnPointer(
878 p, scratch_buffers[core], [&](DAddr address) { gpu.InvalidateRegion(address, size); });
881 } 879 }
882 880
883 Core::System& system; 881 Core::System& system;
@@ -1081,14 +1079,6 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug)
1081 impl->MarkRegionDebug(GetInteger(vaddr), size, debug); 1079 impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
1082} 1080}
1083 1081
1084void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
1085 impl->InvalidateRegion(dest_addr, size);
1086}
1087
1088void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
1089 impl->FlushRegion(dest_addr, size);
1090}
1091
1092bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) { 1082bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
1093 [[maybe_unused]] bool mapped = true; 1083 [[maybe_unused]] bool mapped = true;
1094 [[maybe_unused]] bool rasterizer = false; 1084 [[maybe_unused]] bool rasterizer = false;
@@ -1100,10 +1090,10 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
1100 GetInteger(vaddr)); 1090 GetInteger(vaddr));
1101 mapped = false; 1091 mapped = false;
1102 }, 1092 },
1103 [&] { 1093 [&] { rasterizer = true; });
1104 impl->system.GPU().InvalidateRegion(GetInteger(vaddr), size); 1094 if (rasterizer) {
1105 rasterizer = true; 1095 impl->InvalidateGPUMemory(ptr, size);
1106 }); 1096 }
1107 1097
1108#ifdef __linux__ 1098#ifdef __linux__
1109 if (!rasterizer && mapped) { 1099 if (!rasterizer && mapped) {
diff --git a/src/core/memory.h b/src/core/memory.h
index 47ca6a35a..9d29cfd3f 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -486,10 +486,10 @@ public:
486 void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); 486 void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
487 487
488 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); 488 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
489 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); 489
490 bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size); 490 bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
491
491 bool InvalidateSeparateHeap(void* fault_address); 492 bool InvalidateSeparateHeap(void* fault_address);
492 void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
493 493
494private: 494private:
495 Core::System& system; 495 Core::System& system;
diff --git a/src/video_core/host1x/gpu_device_memory_manager.h b/src/video_core/host1x/gpu_device_memory_manager.h
index 6c7858848..9ccd84b9a 100644
--- a/src/video_core/host1x/gpu_device_memory_manager.h
+++ b/src/video_core/host1x/gpu_device_memory_manager.h
@@ -17,7 +17,7 @@ struct MaxwellDeviceTraits {
17 static constexpr bool supports_pinning = false; 17 static constexpr bool supports_pinning = false;
18 static constexpr size_t device_virtual_bits = 34; 18 static constexpr size_t device_virtual_bits = 34;
19 using DeviceInterface = typename VideoCore::RasterizerInterface; 19 using DeviceInterface = typename VideoCore::RasterizerInterface;
20 using DeviceMethods = typename MaxwellDeviceMethods; 20 using DeviceMethods = MaxwellDeviceMethods;
21}; 21};
22 22
23using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>; 23using MaxwellDeviceMemoryManager = Core::DeviceMemoryManager<MaxwellDeviceTraits>;
diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp
index b7f9a08cf..c4c7a5883 100644
--- a/src/video_core/host1x/host1x.cpp
+++ b/src/video_core/host1x/host1x.cpp
@@ -13,6 +13,8 @@ Host1x::Host1x(Core::System& system_)
13 memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12}, 13 memory_manager(system.DeviceMemory()), gmmu_manager{system, memory_manager, 32, 12},
14 allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {} 14 allocator{std::make_unique<Common::FlatAllocator<u32, 0, 32>>(1 << 12)} {}
15 15
16Host1x::~Host1x() = default;
17
16} // namespace Host1x 18} // namespace Host1x
17 19
18} // namespace Tegra 20} // namespace Tegra
diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h
index 13c37e6b4..d72d97b7b 100644
--- a/src/video_core/host1x/host1x.h
+++ b/src/video_core/host1x/host1x.h
@@ -21,6 +21,7 @@ namespace Host1x {
21class Host1x { 21class Host1x {
22public: 22public:
23 explicit Host1x(Core::System& system); 23 explicit Host1x(Core::System& system);
24 ~Host1x();
24 25
25 SyncpointManager& GetSyncpointManager() { 26 SyncpointManager& GetSyncpointManager() {
26 return syncpoint_manager; 27 return syncpoint_manager;
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 6b2cd7efb..00d64dcce 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -68,7 +68,7 @@ public:
68 if (!address) { 68 if (!address) {
69 return {}; 69 return {};
70 } 70 }
71 return memory.GetPointer(*address); 71 return memory.GetPointer<T>(*address);
72 } 72 }
73 73
74 template <typename T> 74 template <typename T>
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
index 508afb10a..b5e90cf8c 100644
--- a/src/video_core/query_cache/query_cache.h
+++ b/src/video_core/query_cache/query_cache.h
@@ -256,8 +256,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
256 return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS, 256 return std::make_pair<u64, u32>(cur_addr >> Core::Memory::YUZU_PAGEBITS,
257 static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK)); 257 static_cast<u32>(cur_addr & Core::Memory::YUZU_PAGEMASK));
258 }; 258 };
259 u8* pointer = impl->device_memory.GetPointer<u8>(cpu_addr); 259 u8* pointer = impl->device_memory.template GetPointer<u8>(cpu_addr);
260 u8* pointer_timestamp = impl->device_memory.GetPointer<u8>(cpu_addr + 8); 260 u8* pointer_timestamp = impl->device_memory.template GetPointer<u8>(cpu_addr + 8);
261 bool is_synced = !Settings::IsGPULevelHigh() && is_fence; 261 bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
262 std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location, 262 std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
263 pointer, pointer_timestamp] { 263 pointer, pointer_timestamp] {
@@ -561,7 +561,7 @@ bool QueryCacheBase<Traits>::SemiFlushQueryDirty(QueryCacheBase<Traits>::QueryLo
561 } 561 }
562 if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) && 562 if (True(query_base->flags & QueryFlagBits::IsFinalValueSynced) &&
563 False(query_base->flags & QueryFlagBits::IsGuestSynced)) { 563 False(query_base->flags & QueryFlagBits::IsGuestSynced)) {
564 auto* ptr = impl->device_memory.GetPointer<u8>(query_base->guest_address); 564 auto* ptr = impl->device_memory.template GetPointer<u8>(query_base->guest_address);
565 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { 565 if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
566 std::memcpy(ptr, &query_base->value, sizeof(query_base->value)); 566 std::memcpy(ptr, &query_base->value, sizeof(query_base->value));
567 return false; 567 return false;