summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp12
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/maxwell_dma.cpp127
-rw-r--r--src/video_core/engines/maxwell_dma.h2
-rw-r--r--src/video_core/memory_manager.cpp61
-rw-r--r--src/video_core/memory_manager.h21
-rw-r--r--src/video_core/pte_kind.h264
7 files changed, 415 insertions, 73 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 6411dbf43..b635e6ed1 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -311,7 +311,8 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
311 handle->address + 311 handle->address +
312 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; 312 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))};
313 313
314 gmmu->Map(virtual_address, cpu_address, size, use_big_pages); 314 gmmu->Map(virtual_address, cpu_address, size, static_cast<Tegra::PTEKind>(entry.kind),
315 use_big_pages);
315 } 316 }
316 } 317 }
317 318
@@ -350,7 +351,8 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
350 u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; 351 u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)};
351 VAddr cpu_address{mapping->ptr + params.buffer_offset}; 352 VAddr cpu_address{mapping->ptr + params.buffer_offset};
352 353
353 gmmu->Map(gpu_address, cpu_address, params.mapping_size, mapping->big_page); 354 gmmu->Map(gpu_address, cpu_address, params.mapping_size,
355 static_cast<Tegra::PTEKind>(params.kind), mapping->big_page);
354 356
355 return NvResult::Success; 357 return NvResult::Success;
356 } catch (const std::out_of_range&) { 358 } catch (const std::out_of_range&) {
@@ -389,7 +391,8 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
389 } 391 }
390 392
391 const bool use_big_pages = alloc->second.big_pages && big_page; 393 const bool use_big_pages = alloc->second.big_pages && big_page;
392 gmmu->Map(params.offset, cpu_address, size, use_big_pages); 394 gmmu->Map(params.offset, cpu_address, size, static_cast<Tegra::PTEKind>(params.kind),
395 use_big_pages);
393 396
394 auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, 397 auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true,
395 use_big_pages, alloc->second.sparse)}; 398 use_big_pages, alloc->second.sparse)};
@@ -409,7 +412,8 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
409 return NvResult::InsufficientMemory; 412 return NvResult::InsufficientMemory;
410 } 413 }
411 414
412 gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), big_page); 415 gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size),
416 static_cast<Tegra::PTEKind>(params.kind), big_page);
413 417
414 auto mapping{ 418 auto mapping{
415 std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; 419 std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)};
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 40e6d1ec4..cb8b46edf 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -82,6 +82,7 @@ add_library(video_core STATIC
82 gpu_thread.h 82 gpu_thread.h
83 memory_manager.cpp 83 memory_manager.cpp
84 memory_manager.h 84 memory_manager.h
85 pte_kind.h
85 query_cache.h 86 query_cache.h
86 rasterizer_accelerated.cpp 87 rasterizer_accelerated.cpp
87 rasterizer_accelerated.h 88 rasterizer_accelerated.h
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 3909d36c1..4eb7a100d 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -56,66 +56,85 @@ void MaxwellDMA::Launch() {
56 ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); 56 ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE);
57 ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); 57 ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED);
58 58
59 const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; 59 if (launch.multi_line_enable) {
60 const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; 60 const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
61 61 const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
62 if (!is_src_pitch && !is_dst_pitch) { 62
63 // If both the source and the destination are in block layout, assert. 63 if (!is_src_pitch && !is_dst_pitch) {
64 UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented"); 64 // If both the source and the destination are in block layout, assert.
65 return; 65 UNIMPLEMENTED_MSG("Tiled->Tiled DMA transfers are not yet implemented");
66 } 66 return;
67 }
67 68
68 if (is_src_pitch && is_dst_pitch) { 69 if (is_src_pitch && is_dst_pitch) {
69 CopyPitchToPitch(); 70 for (u32 line = 0; line < regs.line_count; ++line) {
71 const GPUVAddr source_line =
72 regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
73 const GPUVAddr dest_line =
74 regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
75 memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
76 }
77 } else {
78 if (!is_src_pitch && is_dst_pitch) {
79 CopyBlockLinearToPitch();
80 } else {
81 CopyPitchToBlockLinear();
82 }
83 }
70 } else { 84 } else {
71 ASSERT(launch.multi_line_enable == 1); 85 // TODO: allow multisized components.
72 86 auto& accelerate = rasterizer->AccessAccelerateDMA();
73 if (!is_src_pitch && is_dst_pitch) { 87 const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
74 CopyBlockLinearToPitch(); 88 if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
89 ASSERT(regs.remap_const.component_size_minus_one == 3);
90 accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
91 std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
92 memory_manager.WriteBlockUnsafe(regs.offset_out,
93 reinterpret_cast<u8*>(tmp_buffer.data()),
94 regs.line_length_in * sizeof(u32));
75 } else { 95 } else {
76 CopyPitchToBlockLinear(); 96 auto convert_linear_2_blocklinear_addr = [](u64 address) {
97 return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
98 ((address & 0x180) >> 1) | ((address & 0x20) << 3);
99 };
100 auto src_kind = memory_manager.GetPageKind(regs.offset_in);
101 auto dst_kind = memory_manager.GetPageKind(regs.offset_out);
102 const bool is_src_pitch = IsPitchKind(static_cast<PTEKind>(src_kind));
103 const bool is_dst_pitch = IsPitchKind(static_cast<PTEKind>(dst_kind));
104 if (!is_src_pitch && is_dst_pitch) {
105 std::vector<u8> tmp_buffer(regs.line_length_in);
106 std::vector<u8> dst_buffer(regs.line_length_in);
107 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
108 regs.line_length_in);
109 for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
110 dst_buffer[offset] =
111 tmp_buffer[convert_linear_2_blocklinear_addr(regs.offset_in + offset) -
112 regs.offset_in];
113 }
114 memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
115 } else if (is_src_pitch && !is_dst_pitch) {
116 std::vector<u8> tmp_buffer(regs.line_length_in);
117 std::vector<u8> dst_buffer(regs.line_length_in);
118 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
119 regs.line_length_in);
120 for (u32 offset = 0; offset < regs.line_length_in; ++offset) {
121 dst_buffer[convert_linear_2_blocklinear_addr(regs.offset_out + offset) -
122 regs.offset_out] = tmp_buffer[offset];
123 }
124 memory_manager.WriteBlock(regs.offset_out, dst_buffer.data(), regs.line_length_in);
125 } else {
126 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
127 std::vector<u8> tmp_buffer(regs.line_length_in);
128 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
129 regs.line_length_in);
130 memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(),
131 regs.line_length_in);
132 }
133 }
77 } 134 }
78 } 135 }
79 ReleaseSemaphore();
80}
81 136
82void MaxwellDMA::CopyPitchToPitch() { 137 ReleaseSemaphore();
83 // When `multi_line_enable` bit is enabled we copy a 2D image of dimensions
84 // (line_length_in, line_count).
85 // Otherwise the copy is performed as if we were copying a 1D buffer of length line_length_in.
86 const bool remap_enabled = regs.launch_dma.remap_enable != 0;
87 if (regs.launch_dma.multi_line_enable) {
88 UNIMPLEMENTED_IF(remap_enabled);
89
90 // Perform a line-by-line copy.
91 // We're going to take a subrect of size (line_length_in, line_count) from the source
92 // rectangle. There is no need to manually flush/invalidate the regions because CopyBlock
93 // does that for us.
94 for (u32 line = 0; line < regs.line_count; ++line) {
95 const GPUVAddr source_line = regs.offset_in + static_cast<size_t>(line) * regs.pitch_in;
96 const GPUVAddr dest_line = regs.offset_out + static_cast<size_t>(line) * regs.pitch_out;
97 memory_manager.CopyBlock(dest_line, source_line, regs.line_length_in);
98 }
99 return;
100 }
101 // TODO: allow multisized components.
102 auto& accelerate = rasterizer->AccessAccelerateDMA();
103 const bool is_const_a_dst = regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A;
104 const bool is_buffer_clear = remap_enabled && is_const_a_dst;
105 if (is_buffer_clear) {
106 ASSERT(regs.remap_const.component_size_minus_one == 3);
107 accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
108 std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value);
109 memory_manager.WriteBlockUnsafe(regs.offset_out, reinterpret_cast<u8*>(tmp_buffer.data()),
110 regs.line_length_in * sizeof(u32));
111 return;
112 }
113 UNIMPLEMENTED_IF(remap_enabled);
114 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
115 std::vector<u8> tmp_buffer(regs.line_length_in);
116 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in);
117 memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in);
118 }
119} 138}
120 139
121void MaxwellDMA::CopyBlockLinearToPitch() { 140void MaxwellDMA::CopyBlockLinearToPitch() {
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index bc48320ce..953e34adc 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -219,8 +219,6 @@ private:
219 /// registers. 219 /// registers.
220 void Launch(); 220 void Launch();
221 221
222 void CopyPitchToPitch();
223
224 void CopyBlockLinearToPitch(); 222 void CopyBlockLinearToPitch();
225 223
226 void CopyPitchToBlockLinear(); 224 void CopyPitchToBlockLinear();
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index cca401c74..d07b21bd6 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -41,7 +41,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
41 big_entries.resize(big_page_table_size / 32, 0); 41 big_entries.resize(big_page_table_size / 32, 0);
42 big_page_table_cpu.resize(big_page_table_size); 42 big_page_table_cpu.resize(big_page_table_size);
43 big_page_continous.resize(big_page_table_size / continous_bits, 0); 43 big_page_continous.resize(big_page_table_size / continous_bits, 0);
44 std::array<PTEKind, 32> kind_valus;
45 kind_valus.fill(PTEKind::INVALID);
46 big_kinds.resize(big_page_table_size / 32, kind_valus);
44 entries.resize(page_table_size / 32, 0); 47 entries.resize(page_table_size / 32, 0);
48 kinds.resize(big_page_table_size / 32, kind_valus);
45} 49}
46 50
47MemoryManager::~MemoryManager() = default; 51MemoryManager::~MemoryManager() = default;
@@ -78,6 +82,41 @@ void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) {
78 } 82 }
79} 83}
80 84
85PTEKind MemoryManager::GetPageKind(GPUVAddr gpu_addr) const {
86 auto entry = GetEntry<true>(gpu_addr);
87 if (entry == EntryType::Mapped || entry == EntryType::Reserved) [[likely]] {
88 return GetKind<true>(gpu_addr);
89 } else {
90 return GetKind<false>(gpu_addr);
91 }
92}
93
94template <bool is_big_page>
95PTEKind MemoryManager::GetKind(size_t position) const {
96 if constexpr (is_big_page) {
97 position = position >> big_page_bits;
98 const size_t sub_index = position % 32;
99 return big_kinds[position / 32][sub_index];
100 } else {
101 position = position >> page_bits;
102 const size_t sub_index = position % 32;
103 return kinds[position / 32][sub_index];
104 }
105}
106
107template <bool is_big_page>
108void MemoryManager::SetKind(size_t position, PTEKind kind) {
109 if constexpr (is_big_page) {
110 position = position >> big_page_bits;
111 const size_t sub_index = position % 32;
112 big_kinds[position / 32][sub_index] = kind;
113 } else {
114 position = position >> page_bits;
115 const size_t sub_index = position % 32;
116 kinds[position / 32][sub_index] = kind;
117 }
118}
119
81inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const { 120inline bool MemoryManager::IsBigPageContinous(size_t big_page_index) const {
82 const u64 entry_mask = big_page_continous[big_page_index / continous_bits]; 121 const u64 entry_mask = big_page_continous[big_page_index / continous_bits];
83 const size_t sub_index = big_page_index % continous_bits; 122 const size_t sub_index = big_page_index % continous_bits;
@@ -92,8 +131,8 @@ inline void MemoryManager::SetBigPageContinous(size_t big_page_index, bool value
92} 131}
93 132
94template <MemoryManager::EntryType entry_type> 133template <MemoryManager::EntryType entry_type>
95GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, 134GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
96 size_t size) { 135 PTEKind kind) {
97 u64 remaining_size{size}; 136 u64 remaining_size{size};
98 if constexpr (entry_type == EntryType::Mapped) { 137 if constexpr (entry_type == EntryType::Mapped) {
99 page_table.ReserveRange(gpu_addr, size); 138 page_table.ReserveRange(gpu_addr, size);
@@ -102,6 +141,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
102 const GPUVAddr current_gpu_addr = gpu_addr + offset; 141 const GPUVAddr current_gpu_addr = gpu_addr + offset;
103 [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr); 142 [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
104 SetEntry<false>(current_gpu_addr, entry_type); 143 SetEntry<false>(current_gpu_addr, entry_type);
144 SetKind<false>(current_gpu_addr, kind);
105 if (current_entry_type != entry_type) { 145 if (current_entry_type != entry_type) {
106 rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); 146 rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size);
107 } 147 }
@@ -118,12 +158,13 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
118 158
119template <MemoryManager::EntryType entry_type> 159template <MemoryManager::EntryType entry_type>
120GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, 160GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr,
121 size_t size) { 161 size_t size, PTEKind kind) {
122 u64 remaining_size{size}; 162 u64 remaining_size{size};
123 for (u64 offset{}; offset < size; offset += big_page_size) { 163 for (u64 offset{}; offset < size; offset += big_page_size) {
124 const GPUVAddr current_gpu_addr = gpu_addr + offset; 164 const GPUVAddr current_gpu_addr = gpu_addr + offset;
125 [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); 165 [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
126 SetEntry<true>(current_gpu_addr, entry_type); 166 SetEntry<true>(current_gpu_addr, entry_type);
167 SetKind<true>(current_gpu_addr, kind);
127 if (current_entry_type != entry_type) { 168 if (current_entry_type != entry_type) {
128 rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); 169 rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size);
129 } 170 }
@@ -159,19 +200,19 @@ void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_)
159 rasterizer = rasterizer_; 200 rasterizer = rasterizer_;
160} 201}
161 202
162GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, 203GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, PTEKind kind,
163 bool is_big_pages) { 204 bool is_big_pages) {
164 if (is_big_pages) [[likely]] { 205 if (is_big_pages) [[likely]] {
165 return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); 206 return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind);
166 } 207 }
167 return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); 208 return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size, kind);
168} 209}
169 210
170GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { 211GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) {
171 if (is_big_pages) [[likely]] { 212 if (is_big_pages) [[likely]] {
172 return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size); 213 return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID);
173 } 214 }
174 return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size); 215 return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size, PTEKind::INVALID);
175} 216}
176 217
177void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { 218void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
@@ -188,8 +229,8 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
188 rasterizer->UnmapMemory(*cpu_addr, map_size); 229 rasterizer->UnmapMemory(*cpu_addr, map_size);
189 } 230 }
190 231
191 BigPageTableOp<EntryType::Free>(gpu_addr, 0, size); 232 BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
192 PageTableOp<EntryType::Free>(gpu_addr, 0, size); 233 PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
193} 234}
194 235
195std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { 236std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index f992e29f3..ab4bc9ec6 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -11,6 +11,7 @@
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/multi_level_page_table.h" 12#include "common/multi_level_page_table.h"
13#include "common/virtual_buffer.h" 13#include "common/virtual_buffer.h"
14#include "video_core/pte_kind.h"
14 15
15namespace VideoCore { 16namespace VideoCore {
16class RasterizerInterface; 17class RasterizerInterface;
@@ -98,7 +99,8 @@ public:
98 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, 99 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
99 std::size_t size) const; 100 std::size_t size) const;
100 101
101 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, bool is_big_pages = true); 102 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
103 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
102 GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); 104 GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true);
103 void Unmap(GPUVAddr gpu_addr, std::size_t size); 105 void Unmap(GPUVAddr gpu_addr, std::size_t size);
104 106
@@ -114,6 +116,8 @@ public:
114 return gpu_addr < address_space_size; 116 return gpu_addr < address_space_size;
115 } 117 }
116 118
119 PTEKind GetPageKind(GPUVAddr gpu_addr) const;
120
117private: 121private:
118 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> 122 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
119 inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, 123 inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
@@ -166,10 +170,12 @@ private:
166 std::vector<u64> big_entries; 170 std::vector<u64> big_entries;
167 171
168 template <EntryType entry_type> 172 template <EntryType entry_type>
169 GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); 173 GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
174 PTEKind kind);
170 175
171 template <EntryType entry_type> 176 template <EntryType entry_type>
172 GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); 177 GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size,
178 PTEKind kind);
173 179
174 template <bool is_big_page> 180 template <bool is_big_page>
175 inline EntryType GetEntry(size_t position) const; 181 inline EntryType GetEntry(size_t position) const;
@@ -177,6 +183,15 @@ private:
177 template <bool is_big_page> 183 template <bool is_big_page>
178 inline void SetEntry(size_t position, EntryType entry); 184 inline void SetEntry(size_t position, EntryType entry);
179 185
186 std::vector<std::array<PTEKind, 32>> kinds;
187 std::vector<std::array<PTEKind, 32>> big_kinds;
188
189 template <bool is_big_page>
190 inline PTEKind GetKind(size_t position) const;
191
192 template <bool is_big_page>
193 inline void SetKind(size_t position, PTEKind kind);
194
180 Common::MultiLevelPageTable<u32> page_table; 195 Common::MultiLevelPageTable<u32> page_table;
181 Common::VirtualBuffer<u32> big_page_table_cpu; 196 Common::VirtualBuffer<u32> big_page_table_cpu;
182 197
diff --git a/src/video_core/pte_kind.h b/src/video_core/pte_kind.h
new file mode 100644
index 000000000..591d7214b
--- /dev/null
+++ b/src/video_core/pte_kind.h
@@ -0,0 +1,264 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "common/common_types.h"
7
8namespace Tegra {
9
10// https://github.com/NVIDIA/open-gpu-doc/blob/master/manuals/volta/gv100/dev_mmu.ref.txt
11enum class PTEKind : u8 {
12 INVALID = 0xff,
13 PITCH = 0x00,
14 Z16 = 0x01,
15 Z16_2C = 0x02,
16 Z16_MS2_2C = 0x03,
17 Z16_MS4_2C = 0x04,
18 Z16_MS8_2C = 0x05,
19 Z16_MS16_2C = 0x06,
20 Z16_2Z = 0x07,
21 Z16_MS2_2Z = 0x08,
22 Z16_MS4_2Z = 0x09,
23 Z16_MS8_2Z = 0x0a,
24 Z16_MS16_2Z = 0x0b,
25 Z16_2CZ = 0x36,
26 Z16_MS2_2CZ = 0x37,
27 Z16_MS4_2CZ = 0x38,
28 Z16_MS8_2CZ = 0x39,
29 Z16_MS16_2CZ = 0x5f,
30 Z16_4CZ = 0x0c,
31 Z16_MS2_4CZ = 0x0d,
32 Z16_MS4_4CZ = 0x0e,
33 Z16_MS8_4CZ = 0x0f,
34 Z16_MS16_4CZ = 0x10,
35 S8Z24 = 0x11,
36 S8Z24_1Z = 0x12,
37 S8Z24_MS2_1Z = 0x13,
38 S8Z24_MS4_1Z = 0x14,
39 S8Z24_MS8_1Z = 0x15,
40 S8Z24_MS16_1Z = 0x16,
41 S8Z24_2CZ = 0x17,
42 S8Z24_MS2_2CZ = 0x18,
43 S8Z24_MS4_2CZ = 0x19,
44 S8Z24_MS8_2CZ = 0x1a,
45 S8Z24_MS16_2CZ = 0x1b,
46 S8Z24_2CS = 0x1c,
47 S8Z24_MS2_2CS = 0x1d,
48 S8Z24_MS4_2CS = 0x1e,
49 S8Z24_MS8_2CS = 0x1f,
50 S8Z24_MS16_2CS = 0x20,
51 S8Z24_4CSZV = 0x21,
52 S8Z24_MS2_4CSZV = 0x22,
53 S8Z24_MS4_4CSZV = 0x23,
54 S8Z24_MS8_4CSZV = 0x24,
55 S8Z24_MS16_4CSZV = 0x25,
56 V8Z24_MS4_VC12 = 0x26,
57 V8Z24_MS4_VC4 = 0x27,
58 V8Z24_MS8_VC8 = 0x28,
59 V8Z24_MS8_VC24 = 0x29,
60 V8Z24_MS4_VC12_1ZV = 0x2e,
61 V8Z24_MS4_VC4_1ZV = 0x2f,
62 V8Z24_MS8_VC8_1ZV = 0x30,
63 V8Z24_MS8_VC24_1ZV = 0x31,
64 V8Z24_MS4_VC12_2CS = 0x32,
65 V8Z24_MS4_VC4_2CS = 0x33,
66 V8Z24_MS8_VC8_2CS = 0x34,
67 V8Z24_MS8_VC24_2CS = 0x35,
68 V8Z24_MS4_VC12_2CZV = 0x3a,
69 V8Z24_MS4_VC4_2CZV = 0x3b,
70 V8Z24_MS8_VC8_2CZV = 0x3c,
71 V8Z24_MS8_VC24_2CZV = 0x3d,
72 V8Z24_MS4_VC12_2ZV = 0x3e,
73 V8Z24_MS4_VC4_2ZV = 0x3f,
74 V8Z24_MS8_VC8_2ZV = 0x40,
75 V8Z24_MS8_VC24_2ZV = 0x41,
76 V8Z24_MS4_VC12_4CSZV = 0x42,
77 V8Z24_MS4_VC4_4CSZV = 0x43,
78 V8Z24_MS8_VC8_4CSZV = 0x44,
79 V8Z24_MS8_VC24_4CSZV = 0x45,
80 Z24S8 = 0x46,
81 Z24S8_1Z = 0x47,
82 Z24S8_MS2_1Z = 0x48,
83 Z24S8_MS4_1Z = 0x49,
84 Z24S8_MS8_1Z = 0x4a,
85 Z24S8_MS16_1Z = 0x4b,
86 Z24S8_2CS = 0x4c,
87 Z24S8_MS2_2CS = 0x4d,
88 Z24S8_MS4_2CS = 0x4e,
89 Z24S8_MS8_2CS = 0x4f,
90 Z24S8_MS16_2CS = 0x50,
91 Z24S8_2CZ = 0x51,
92 Z24S8_MS2_2CZ = 0x52,
93 Z24S8_MS4_2CZ = 0x53,
94 Z24S8_MS8_2CZ = 0x54,
95 Z24S8_MS16_2CZ = 0x55,
96 Z24S8_4CSZV = 0x56,
97 Z24S8_MS2_4CSZV = 0x57,
98 Z24S8_MS4_4CSZV = 0x58,
99 Z24S8_MS8_4CSZV = 0x59,
100 Z24S8_MS16_4CSZV = 0x5a,
101 Z24V8_MS4_VC12 = 0x5b,
102 Z24V8_MS4_VC4 = 0x5c,
103 Z24V8_MS8_VC8 = 0x5d,
104 Z24V8_MS8_VC24 = 0x5e,
105 YUV_B8C1_2Y = 0x60,
106 YUV_B8C2_2Y = 0x61,
107 YUV_B10C1_2Y = 0x62,
108 YUV_B10C2_2Y = 0x6b,
109 YUV_B12C1_2Y = 0x6c,
110 YUV_B12C2_2Y = 0x6d,
111 Z24V8_MS4_VC12_1ZV = 0x63,
112 Z24V8_MS4_VC4_1ZV = 0x64,
113 Z24V8_MS8_VC8_1ZV = 0x65,
114 Z24V8_MS8_VC24_1ZV = 0x66,
115 Z24V8_MS4_VC12_2CS = 0x67,
116 Z24V8_MS4_VC4_2CS = 0x68,
117 Z24V8_MS8_VC8_2CS = 0x69,
118 Z24V8_MS8_VC24_2CS = 0x6a,
119 Z24V8_MS4_VC12_2CZV = 0x6f,
120 Z24V8_MS4_VC4_2CZV = 0x70,
121 Z24V8_MS8_VC8_2CZV = 0x71,
122 Z24V8_MS8_VC24_2CZV = 0x72,
123 Z24V8_MS4_VC12_2ZV = 0x73,
124 Z24V8_MS4_VC4_2ZV = 0x74,
125 Z24V8_MS8_VC8_2ZV = 0x75,
126 Z24V8_MS8_VC24_2ZV = 0x76,
127 Z24V8_MS4_VC12_4CSZV = 0x77,
128 Z24V8_MS4_VC4_4CSZV = 0x78,
129 Z24V8_MS8_VC8_4CSZV = 0x79,
130 Z24V8_MS8_VC24_4CSZV = 0x7a,
131 ZF32 = 0x7b,
132 ZF32_1Z = 0x7c,
133 ZF32_MS2_1Z = 0x7d,
134 ZF32_MS4_1Z = 0x7e,
135 ZF32_MS8_1Z = 0x7f,
136 ZF32_MS16_1Z = 0x80,
137 ZF32_2CS = 0x81,
138 ZF32_MS2_2CS = 0x82,
139 ZF32_MS4_2CS = 0x83,
140 ZF32_MS8_2CS = 0x84,
141 ZF32_MS16_2CS = 0x85,
142 ZF32_2CZ = 0x86,
143 ZF32_MS2_2CZ = 0x87,
144 ZF32_MS4_2CZ = 0x88,
145 ZF32_MS8_2CZ = 0x89,
146 ZF32_MS16_2CZ = 0x8a,
147 X8Z24_X16V8S8_MS4_VC12 = 0x8b,
148 X8Z24_X16V8S8_MS4_VC4 = 0x8c,
149 X8Z24_X16V8S8_MS8_VC8 = 0x8d,
150 X8Z24_X16V8S8_MS8_VC24 = 0x8e,
151 X8Z24_X16V8S8_MS4_VC12_1CS = 0x8f,
152 X8Z24_X16V8S8_MS4_VC4_1CS = 0x90,
153 X8Z24_X16V8S8_MS8_VC8_1CS = 0x91,
154 X8Z24_X16V8S8_MS8_VC24_1CS = 0x92,
155 X8Z24_X16V8S8_MS4_VC12_1ZV = 0x97,
156 X8Z24_X16V8S8_MS4_VC4_1ZV = 0x98,
157 X8Z24_X16V8S8_MS8_VC8_1ZV = 0x99,
158 X8Z24_X16V8S8_MS8_VC24_1ZV = 0x9a,
159 X8Z24_X16V8S8_MS4_VC12_1CZV = 0x9b,
160 X8Z24_X16V8S8_MS4_VC4_1CZV = 0x9c,
161 X8Z24_X16V8S8_MS8_VC8_1CZV = 0x9d,
162 X8Z24_X16V8S8_MS8_VC24_1CZV = 0x9e,
163 X8Z24_X16V8S8_MS4_VC12_2CS = 0x9f,
164 X8Z24_X16V8S8_MS4_VC4_2CS = 0xa0,
165 X8Z24_X16V8S8_MS8_VC8_2CS = 0xa1,
166 X8Z24_X16V8S8_MS8_VC24_2CS = 0xa2,
167 X8Z24_X16V8S8_MS4_VC12_2CSZV = 0xa3,
168 X8Z24_X16V8S8_MS4_VC4_2CSZV = 0xa4,
169 X8Z24_X16V8S8_MS8_VC8_2CSZV = 0xa5,
170 X8Z24_X16V8S8_MS8_VC24_2CSZV = 0xa6,
171 ZF32_X16V8S8_MS4_VC12 = 0xa7,
172 ZF32_X16V8S8_MS4_VC4 = 0xa8,
173 ZF32_X16V8S8_MS8_VC8 = 0xa9,
174 ZF32_X16V8S8_MS8_VC24 = 0xaa,
175 ZF32_X16V8S8_MS4_VC12_1CS = 0xab,
176 ZF32_X16V8S8_MS4_VC4_1CS = 0xac,
177 ZF32_X16V8S8_MS8_VC8_1CS = 0xad,
178 ZF32_X16V8S8_MS8_VC24_1CS = 0xae,
179 ZF32_X16V8S8_MS4_VC12_1ZV = 0xb3,
180 ZF32_X16V8S8_MS4_VC4_1ZV = 0xb4,
181 ZF32_X16V8S8_MS8_VC8_1ZV = 0xb5,
182 ZF32_X16V8S8_MS8_VC24_1ZV = 0xb6,
183 ZF32_X16V8S8_MS4_VC12_1CZV = 0xb7,
184 ZF32_X16V8S8_MS4_VC4_1CZV = 0xb8,
185 ZF32_X16V8S8_MS8_VC8_1CZV = 0xb9,
186 ZF32_X16V8S8_MS8_VC24_1CZV = 0xba,
187 ZF32_X16V8S8_MS4_VC12_2CS = 0xbb,
188 ZF32_X16V8S8_MS4_VC4_2CS = 0xbc,
189 ZF32_X16V8S8_MS8_VC8_2CS = 0xbd,
190 ZF32_X16V8S8_MS8_VC24_2CS = 0xbe,
191 ZF32_X16V8S8_MS4_VC12_2CSZV = 0xbf,
192 ZF32_X16V8S8_MS4_VC4_2CSZV = 0xc0,
193 ZF32_X16V8S8_MS8_VC8_2CSZV = 0xc1,
194 ZF32_X16V8S8_MS8_VC24_2CSZV = 0xc2,
195 ZF32_X24S8 = 0xc3,
196 ZF32_X24S8_1CS = 0xc4,
197 ZF32_X24S8_MS2_1CS = 0xc5,
198 ZF32_X24S8_MS4_1CS = 0xc6,
199 ZF32_X24S8_MS8_1CS = 0xc7,
200 ZF32_X24S8_MS16_1CS = 0xc8,
201 ZF32_X24S8_2CSZV = 0xce,
202 ZF32_X24S8_MS2_2CSZV = 0xcf,
203 ZF32_X24S8_MS4_2CSZV = 0xd0,
204 ZF32_X24S8_MS8_2CSZV = 0xd1,
205 ZF32_X24S8_MS16_2CSZV = 0xd2,
206 ZF32_X24S8_2CS = 0xd3,
207 ZF32_X24S8_MS2_2CS = 0xd4,
208 ZF32_X24S8_MS4_2CS = 0xd5,
209 ZF32_X24S8_MS8_2CS = 0xd6,
210 ZF32_X24S8_MS16_2CS = 0xd7,
211 S8 = 0x2a,
212 S8_2S = 0x2b,
213 GENERIC_16BX2 = 0xfe,
214 C32_2C = 0xd8,
215 C32_2CBR = 0xd9,
216 C32_2CBA = 0xda,
217 C32_2CRA = 0xdb,
218 C32_2BRA = 0xdc,
219 C32_MS2_2C = 0xdd,
220 C32_MS2_2CBR = 0xde,
221 C32_MS2_4CBRA = 0xcc,
222 C32_MS4_2C = 0xdf,
223 C32_MS4_2CBR = 0xe0,
224 C32_MS4_2CBA = 0xe1,
225 C32_MS4_2CRA = 0xe2,
226 C32_MS4_2BRA = 0xe3,
227 C32_MS4_4CBRA = 0x2c,
228 C32_MS8_MS16_2C = 0xe4,
229 C32_MS8_MS16_2CRA = 0xe5,
230 C64_2C = 0xe6,
231 C64_2CBR = 0xe7,
232 C64_2CBA = 0xe8,
233 C64_2CRA = 0xe9,
234 C64_2BRA = 0xea,
235 C64_MS2_2C = 0xeb,
236 C64_MS2_2CBR = 0xec,
237 C64_MS2_4CBRA = 0xcd,
238 C64_MS4_2C = 0xed,
239 C64_MS4_2CBR = 0xee,
240 C64_MS4_2CBA = 0xef,
241 C64_MS4_2CRA = 0xf0,
242 C64_MS4_2BRA = 0xf1,
243 C64_MS4_4CBRA = 0x2d,
244 C64_MS8_MS16_2C = 0xf2,
245 C64_MS8_MS16_2CRA = 0xf3,
246 C128_2C = 0xf4,
247 C128_2CR = 0xf5,
248 C128_MS2_2C = 0xf6,
249 C128_MS2_2CR = 0xf7,
250 C128_MS4_2C = 0xf8,
251 C128_MS4_2CR = 0xf9,
252 C128_MS8_MS16_2C = 0xfa,
253 C128_MS8_MS16_2CR = 0xfb,
254 X8C24 = 0xfc,
255 PITCH_NO_SWIZZLE = 0xfd,
256 SMSKED_MESSAGE = 0xca,
257 SMHOST_MESSAGE = 0xcb,
258};
259
260constexpr bool IsPitchKind(PTEKind kind) {
261 return kind == PTEKind::PITCH || kind == PTEKind::PITCH_NO_SWIZZLE;
262}
263
264} // namespace Tegra