diff options
| author | 2022-11-17 16:36:53 +0100 | |
|---|---|---|
| committer | 2023-01-01 16:43:57 -0500 | |
| commit | 18637766efd1ff9a0c22967553983cfda69c96ca (patch) | |
| tree | 142d4cab2de27ce250d246f0355fec97748a7bcf /src/video_core | |
| parent | MacroHLE: Add HLE replacement for base vertex and base instance. (diff) | |
| download | yuzu-18637766efd1ff9a0c22967553983cfda69c96ca.tar.gz yuzu-18637766efd1ff9a0c22967553983cfda69c96ca.tar.xz yuzu-18637766efd1ff9a0c22967553983cfda69c96ca.zip | |
MacroHLE: Reduce massive calculations on sizing estimation.
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 2 | ||||
| -rw-r--r-- | src/video_core/memory_manager.cpp | 91 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 11 |
5 files changed, 27 insertions, 95 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 5ad40abaa..7a82355da 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -75,7 +75,8 @@ bool DmaPusher::Step() { | |||
| 75 | 75 | ||
| 76 | // Push buffer non-empty, read a word | 76 | // Push buffer non-empty, read a word |
| 77 | command_headers.resize_destructive(command_list_header.size); | 77 | command_headers.resize_destructive(command_list_header.size); |
| 78 | if (Settings::IsGPULevelExtreme()) { | 78 | constexpr u32 MacroRegistersStart = 0xE00; |
| 79 | if (dma_state.method < MacroRegistersStart) { | ||
| 79 | memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), | 80 | memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), |
| 80 | command_list_header.size * sizeof(u32)); | 81 | command_list_header.size * sizeof(u32)); |
| 81 | } else { | 82 | } else { |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 50d8a94b1..a9fd6d960 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -157,6 +157,21 @@ void Maxwell3D::RefreshParameters() { | |||
| 157 | } | 157 | } |
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | bool Maxwell3D::AnyParametersDirty() { | ||
| 161 | size_t current_index = 0; | ||
| 162 | for (auto& segment : macro_segments) { | ||
| 163 | if (segment.first == 0) { | ||
| 164 | current_index += segment.second; | ||
| 165 | continue; | ||
| 166 | } | ||
| 167 | if (memory_manager.IsMemoryDirty(segment.first, sizeof(u32) * segment.second)) { | ||
| 168 | return true; | ||
| 169 | } | ||
| 170 | current_index += segment.second; | ||
| 171 | } | ||
| 172 | return false; | ||
| 173 | } | ||
| 174 | |||
| 160 | u32 Maxwell3D::GetMaxCurrentVertices() { | 175 | u32 Maxwell3D::GetMaxCurrentVertices() { |
| 161 | u32 num_vertices = 0; | 176 | u32 num_vertices = 0; |
| 162 | for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { | 177 | for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 397e88f67..cd996413c 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -3092,6 +3092,8 @@ public: | |||
| 3092 | 3092 | ||
| 3093 | void RefreshParameters(); | 3093 | void RefreshParameters(); |
| 3094 | 3094 | ||
| 3095 | bool AnyParametersDirty(); | ||
| 3096 | |||
| 3095 | u32 GetMaxCurrentVertices(); | 3097 | u32 GetMaxCurrentVertices(); |
| 3096 | 3098 | ||
| 3097 | size_t EstimateIndexBufferSize(); | 3099 | size_t EstimateIndexBufferSize(); |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 11e7d225e..4fcae9909 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -25,7 +25,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 | |||
| 25 | address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, | 25 | address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, |
| 26 | entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, | 26 | entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, |
| 27 | page_bits != big_page_bits ? page_bits : 0}, | 27 | page_bits != big_page_bits ? page_bits : 0}, |
| 28 | unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} { | 28 | kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( |
| 29 | 1, std::memory_order_acq_rel)} { | ||
| 29 | address_space_size = 1ULL << address_space_bits; | 30 | address_space_size = 1ULL << address_space_bits; |
| 30 | page_size = 1ULL << page_bits; | 31 | page_size = 1ULL << page_bits; |
| 31 | page_mask = page_size - 1ULL; | 32 | page_mask = page_size - 1ULL; |
| @@ -41,11 +42,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 | |||
| 41 | big_entries.resize(big_page_table_size / 32, 0); | 42 | big_entries.resize(big_page_table_size / 32, 0); |
| 42 | big_page_table_cpu.resize(big_page_table_size); | 43 | big_page_table_cpu.resize(big_page_table_size); |
| 43 | big_page_continous.resize(big_page_table_size / continous_bits, 0); | 44 | big_page_continous.resize(big_page_table_size / continous_bits, 0); |
| 44 | std::array<PTEKind, 32> kind_valus; | ||
| 45 | kind_valus.fill(PTEKind::INVALID); | ||
| 46 | big_kinds.resize(big_page_table_size / 32, kind_valus); | ||
| 47 | entries.resize(page_table_size / 32, 0); | 45 | entries.resize(page_table_size / 32, 0); |
| 48 | kinds.resize(page_table_size / 32, kind_valus); | ||
| 49 | } | 46 | } |
| 50 | 47 | ||
| 51 | MemoryManager::~MemoryManager() = default; | 48 | MemoryManager::~MemoryManager() = default; |
| @@ -83,38 +80,7 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) { | |||
| 83 | } | 80 | } |
| 84 | 81 | ||
| 85 | PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const { | 82 | PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const { |
| 86 | auto entry = GetEntry<true>(gpu_addr); | 83 | return kind_map.GetValueAt(gpu_addr); |
| 87 | if (entry == EntryType::Mapped || entry == EntryType::Reserved) [[likely]] { | ||
| 88 | return GetKind<true>(gpu_addr); | ||
| 89 | } else { | ||
| 90 | return GetKind<false>(gpu_addr); | ||
| 91 | } | ||
| 92 | } | ||
| 93 | |||
| 94 | template <bool is_big_page> | ||
| 95 | PTEKind MemoryManager::GetKind(size_t position) const { | ||
| 96 | if constexpr (is_big_page) { | ||
| 97 | position = position >> big_page_bits; | ||
| 98 | const size_t sub_index = position % 32; | ||
| 99 | return big_kinds[position / 32][sub_index]; | ||
| 100 | } else { | ||
| 101 | position = position >> page_bits; | ||
| 102 | const size_t sub_index = position % 32; | ||
| 103 | return kinds[position / 32][sub_index]; | ||
| 104 | } | ||
| 105 | } | ||
| 106 | |||
| 107 | template <bool is_big_page> | ||
| 108 | void MemoryManager::SetKind(size_t position, PTEKind kind) { | ||
| 109 | if constexpr (is_big_page) { | ||
| 110 | position = position >> big_page_bits; | ||
| 111 | const size_t sub_index = position % 32; | ||
| 112 | big_kinds[position / 32][sub_index] = kind; | ||
| 113 | } else { | ||
| 114 | position = position >> page_bits; | ||
| 115 | const size_t sub_index = position % 32; | ||
| 116 | kinds[position / 32][sub_index] = kind; | ||
| 117 | } | ||
| 118 | } | 84 | } |
| 119 | 85 | ||
| 120 | inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const { | 86 | inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const { |
| @@ -141,7 +107,6 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | |||
| 141 | const GPUVAddr current_gpu_addr = gpu_addr + offset; | 107 | const GPUVAddr current_gpu_addr = gpu_addr + offset; |
| 142 | [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr); | 108 | [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr); |
| 143 | SetEntry<false>(current_gpu_addr, entry_type); | 109 | SetEntry<false>(current_gpu_addr, entry_type); |
| 144 | SetKind<false>(current_gpu_addr, kind); | ||
| 145 | if (current_entry_type != entry_type) { | 110 | if (current_entry_type != entry_type) { |
| 146 | rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); | 111 | rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); |
| 147 | } | 112 | } |
| @@ -153,6 +118,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | |||
| 153 | } | 118 | } |
| 154 | remaining_size -= page_size; | 119 | remaining_size -= page_size; |
| 155 | } | 120 | } |
| 121 | kind_map.Map(gpu_addr, gpu_addr + size, kind); | ||
| 156 | return gpu_addr; | 122 | return gpu_addr; |
| 157 | } | 123 | } |
| 158 | 124 | ||
| @@ -164,7 +130,6 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr | |||
| 164 | const GPUVAddr current_gpu_addr = gpu_addr + offset; | 130 | const GPUVAddr current_gpu_addr = gpu_addr + offset; |
| 165 | [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); | 131 | [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); |
| 166 | SetEntry<true>(current_gpu_addr, entry_type); | 132 | SetEntry<true>(current_gpu_addr, entry_type); |
| 167 | SetKind<true>(current_gpu_addr, kind); | ||
| 168 | if (current_entry_type != entry_type) { | 133 | if (current_entry_type != entry_type) { |
| 169 | rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); | 134 | rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); |
| 170 | } | 135 | } |
| @@ -193,6 +158,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr | |||
| 193 | } | 158 | } |
| 194 | remaining_size -= big_page_size; | 159 | remaining_size -= big_page_size; |
| 195 | } | 160 | } |
| 161 | kind_map.Map(gpu_addr, gpu_addr + size, kind); | ||
| 196 | return gpu_addr; | 162 | return gpu_addr; |
| 197 | } | 163 | } |
| 198 | 164 | ||
| @@ -578,52 +544,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { | |||
| 578 | } | 544 | } |
| 579 | 545 | ||
| 580 | size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { | 546 | size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { |
| 581 | PTEKind base_kind = GetPageKind(gpu_addr); | 547 | return kind_map.GetContinousSizeFrom(gpu_addr); |
| 582 | if (base_kind == PTEKind::INVALID) { | ||
| 583 | return 0; | ||
| 584 | } | ||
| 585 | size_t range_so_far = 0; | ||
| 586 | bool result{false}; | ||
| 587 | auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset, | ||
| 588 | std::size_t copy_amount) { | ||
| 589 | result = true; | ||
| 590 | return true; | ||
| 591 | }; | ||
| 592 | auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||
| 593 | PTEKind base_kind_other = GetKind<false>((page_index << page_bits) + offset); | ||
| 594 | if (base_kind != base_kind_other) { | ||
| 595 | result = true; | ||
| 596 | return true; | ||
| 597 | } | ||
| 598 | range_so_far += copy_amount; | ||
| 599 | if (range_so_far >= max_size) { | ||
| 600 | result = true; | ||
| 601 | return true; | ||
| 602 | } | ||
| 603 | return false; | ||
| 604 | }; | ||
| 605 | auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||
| 606 | PTEKind base_kind_other = GetKind<true>((page_index << big_page_bits) + offset); | ||
| 607 | if (base_kind != base_kind_other) { | ||
| 608 | result = true; | ||
| 609 | return true; | ||
| 610 | } | ||
| 611 | range_so_far += copy_amount; | ||
| 612 | if (range_so_far >= max_size) { | ||
| 613 | result = true; | ||
| 614 | return true; | ||
| 615 | } | ||
| 616 | return false; | ||
| 617 | }; | ||
| 618 | auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | ||
| 619 | std::size_t copy_amount) { | ||
| 620 | GPUVAddr base = (page_index << big_page_bits) + offset; | ||
| 621 | MemoryOperation<false>(base, copy_amount, short_check, fail, fail); | ||
| 622 | return result; | ||
| 623 | }; | ||
| 624 | MemoryOperation<true>(gpu_addr, address_space_size - gpu_addr, big_check, fail, | ||
| 625 | check_short_pages); | ||
| 626 | return range_so_far; | ||
| 627 | } | 548 | } |
| 628 | 549 | ||
| 629 | void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { | 550 | void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const { |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index ca22520d7..50043a8ae 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/multi_level_page_table.h" | 12 | #include "common/multi_level_page_table.h" |
| 13 | #include "common/range_map.h" | ||
| 13 | #include "common/virtual_buffer.h" | 14 | #include "common/virtual_buffer.h" |
| 14 | #include "video_core/pte_kind.h" | 15 | #include "video_core/pte_kind.h" |
| 15 | 16 | ||
| @@ -186,16 +187,8 @@ private: | |||
| 186 | template <bool is_big_page> | 187 | template <bool is_big_page> |
| 187 | inline void SetEntry(size_t position, EntryType entry); | 188 | inline void SetEntry(size_t position, EntryType entry); |
| 188 | 189 | ||
| 189 | std::vector<std::array<PTEKind, 32>> kinds; | ||
| 190 | std::vector<std::array<PTEKind, 32>> big_kinds; | ||
| 191 | |||
| 192 | template <bool is_big_page> | ||
| 193 | inline PTEKind GetKind(size_t position) const; | ||
| 194 | |||
| 195 | template <bool is_big_page> | ||
| 196 | inline void SetKind(size_t position, PTEKind kind); | ||
| 197 | |||
| 198 | Common::MultiLevelPageTable<u32> page_table; | 190 | Common::MultiLevelPageTable<u32> page_table; |
| 191 | Common::RangeMap<GPUVAddr, PTEKind> kind_map; | ||
| 199 | Common::VirtualBuffer<u32> big_page_table_cpu; | 192 | Common::VirtualBuffer<u32> big_page_table_cpu; |
| 200 | 193 | ||
| 201 | std::vector<u64> big_page_continous; | 194 | std::vector<u64> big_page_continous; |