summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar bunnei2018-04-24 23:22:24 -0400
committerGravatar GitHub2018-04-24 23:22:24 -0400
commitea3151f475e170eaaec3ded306a0fe5c1e5944db (patch)
tree6f7e127c4f58de6071d9a7dbd2af464dbbd14b9b
parentMerge pull request #393 from lioncash/loader (diff)
parentrenderer_opengl: Use correct byte order for framebuffer pixel format ABGR8. (diff)
downloadyuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.gz
yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.tar.xz
yuzu-ea3151f475e170eaaec3ded306a0fe5c1e5944db.zip
Merge pull request #388 from bunnei/refactor-rasterizer-cache
Refactor rasterizer cache
-rw-r--r--src/core/memory.cpp64
-rw-r--r--src/core/memory.h3
-rw-r--r--src/video_core/command_processor.cpp8
-rw-r--r--src/video_core/engines/maxwell_3d.cpp22
-rw-r--r--src/video_core/memory_manager.cpp94
-rw-r--r--src/video_core/memory_manager.h30
-rw-r--r--src/video_core/rasterizer_interface.h7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp28
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp147
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h84
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp6
-rw-r--r--src/video_core/textures/decoders.cpp1
-rw-r--r--src/yuzu/debugger/graphics/graphics_surface.cpp8
14 files changed, 334 insertions, 175 deletions
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 291bf066f..ff0420c56 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -325,15 +325,29 @@ u8* GetPhysicalPointer(PAddr address) {
325 return target_pointer; 325 return target_pointer;
326} 326}
327 327
328void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) { 328void RasterizerMarkRegionCached(Tegra::GPUVAddr gpu_addr, u64 size, bool cached) {
329 if (start == 0) { 329 if (gpu_addr == 0) {
330 return; 330 return;
331 } 331 }
332 332
333 u64 num_pages = ((start + size - 1) >> PAGE_BITS) - (start >> PAGE_BITS) + 1; 333 // Iterate over a contiguous CPU address space, which corresponds to the specified GPU address
334 VAddr vaddr = start; 334 // space, marking the region as un/cached. The region is marked un/cached at a granularity of
335 // CPU pages, hence why we iterate on a CPU page basis (note: GPU page size is different). This
336 // assumes the specified GPU address region is contiguous as well.
337
338 u64 num_pages = ((gpu_addr + size - 1) >> PAGE_BITS) - (gpu_addr >> PAGE_BITS) + 1;
339 for (unsigned i = 0; i < num_pages; ++i, gpu_addr += PAGE_SIZE) {
340 boost::optional<VAddr> maybe_vaddr =
341 Core::System::GetInstance().GPU().memory_manager->GpuToCpuAddress(gpu_addr);
342 // The GPU <-> CPU virtual memory mapping is not 1:1
343 if (!maybe_vaddr) {
344 LOG_ERROR(HW_Memory,
345 "Trying to flush a cached region to an invalid physical address %08X",
346 gpu_addr);
347 continue;
348 }
349 VAddr vaddr = *maybe_vaddr;
335 350
336 for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
337 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; 351 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
338 352
339 if (cached) { 353 if (cached) {
@@ -347,6 +361,10 @@ void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) {
347 page_type = PageType::RasterizerCachedMemory; 361 page_type = PageType::RasterizerCachedMemory;
348 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; 362 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
349 break; 363 break;
364 case PageType::RasterizerCachedMemory:
365 // There can be more than one GPU region mapped per CPU region, so it's common that
366 // this area is already marked as cached.
367 break;
350 default: 368 default:
351 UNREACHABLE(); 369 UNREACHABLE();
352 } 370 }
@@ -357,6 +375,10 @@ void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached) {
357 // It is not necessary for a process to have this region mapped into its address 375 // It is not necessary for a process to have this region mapped into its address
358 // space, for example, a system module need not have a VRAM mapping. 376 // space, for example, a system module need not have a VRAM mapping.
359 break; 377 break;
378 case PageType::Memory:
379 // There can be more than one GPU region mapped per CPU region, so it's common that
380 // this area is already unmarked as cached.
381 break;
360 case PageType::RasterizerCachedMemory: { 382 case PageType::RasterizerCachedMemory: {
361 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK); 383 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK);
362 if (pointer == nullptr) { 384 if (pointer == nullptr) {
@@ -394,19 +416,29 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
394 416
395 VAddr overlap_start = std::max(start, region_start); 417 VAddr overlap_start = std::max(start, region_start);
396 VAddr overlap_end = std::min(end, region_end); 418 VAddr overlap_end = std::min(end, region_end);
419
420 std::vector<Tegra::GPUVAddr> gpu_addresses =
421 Core::System::GetInstance().GPU().memory_manager->CpuToGpuAddress(overlap_start);
422
423 if (gpu_addresses.empty()) {
424 return;
425 }
426
397 u64 overlap_size = overlap_end - overlap_start; 427 u64 overlap_size = overlap_end - overlap_start;
398 428
399 auto* rasterizer = VideoCore::g_renderer->Rasterizer(); 429 for (const auto& gpu_address : gpu_addresses) {
400 switch (mode) { 430 auto* rasterizer = VideoCore::g_renderer->Rasterizer();
401 case FlushMode::Flush: 431 switch (mode) {
402 rasterizer->FlushRegion(overlap_start, overlap_size); 432 case FlushMode::Flush:
403 break; 433 rasterizer->FlushRegion(gpu_address, overlap_size);
404 case FlushMode::Invalidate: 434 break;
405 rasterizer->InvalidateRegion(overlap_start, overlap_size); 435 case FlushMode::Invalidate:
406 break; 436 rasterizer->InvalidateRegion(gpu_address, overlap_size);
407 case FlushMode::FlushAndInvalidate: 437 break;
408 rasterizer->FlushAndInvalidateRegion(overlap_start, overlap_size); 438 case FlushMode::FlushAndInvalidate:
409 break; 439 rasterizer->FlushAndInvalidateRegion(gpu_address, overlap_size);
440 break;
441 }
410 } 442 }
411 }; 443 };
412 444
diff --git a/src/core/memory.h b/src/core/memory.h
index e9b8ca873..3f56a2c6a 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -14,6 +14,7 @@
14#include <boost/optional.hpp> 14#include <boost/optional.hpp>
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "core/memory_hook.h" 16#include "core/memory_hook.h"
17#include "video_core/memory_manager.h"
17 18
18namespace Kernel { 19namespace Kernel {
19class Process; 20class Process;
@@ -258,7 +259,7 @@ enum class FlushMode {
258/** 259/**
259 * Mark each page touching the region as cached. 260 * Mark each page touching the region as cached.
260 */ 261 */
261void RasterizerMarkRegionCached(VAddr start, u64 size, bool cached); 262void RasterizerMarkRegionCached(Tegra::GPUVAddr start, u64 size, bool cached);
262 263
263/** 264/**
264 * Flushes and invalidates any externally cached rasterizer resources touching the given virtual 265 * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d4cdb4ab2..2c04daba3 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -90,11 +90,9 @@ void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params)
90} 90}
91 91
92void GPU::ProcessCommandList(GPUVAddr address, u32 size) { 92void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
93 // TODO(Subv): PhysicalToVirtualAddress is a misnomer, it converts a GPU VAddr into an 93 const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
94 // application VAddr. 94 VAddr current_addr = *head_address;
95 const VAddr head_address = memory_manager->PhysicalToVirtualAddress(address); 95 while (current_addr < *head_address + size * sizeof(CommandHeader)) {
96 VAddr current_addr = head_address;
97 while (current_addr < head_address + size * sizeof(CommandHeader)) {
98 const CommandHeader header = {Memory::Read32(current_addr)}; 96 const CommandHeader header = {Memory::Read32(current_addr)};
99 current_addr += sizeof(u32); 97 current_addr += sizeof(u32);
100 98
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 35773a695..4e9aed380 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -145,7 +145,7 @@ void Maxwell3D::ProcessQueryGet() {
145 GPUVAddr sequence_address = regs.query.QueryAddress(); 145 GPUVAddr sequence_address = regs.query.QueryAddress();
146 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application 146 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
147 // VAddr before writing. 147 // VAddr before writing.
148 VAddr address = memory_manager.PhysicalToVirtualAddress(sequence_address); 148 boost::optional<VAddr> address = memory_manager.GpuToCpuAddress(sequence_address);
149 149
150 // TODO(Subv): Support the other query units. 150 // TODO(Subv): Support the other query units.
151 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 151 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -153,7 +153,7 @@ void Maxwell3D::ProcessQueryGet() {
153 ASSERT_MSG(regs.query.query_get.short_query, 153 ASSERT_MSG(regs.query.query_get.short_query,
154 "Writing the entire query result structure is unimplemented"); 154 "Writing the entire query result structure is unimplemented");
155 155
156 u32 value = Memory::Read32(address); 156 u32 value = Memory::Read32(*address);
157 u32 result = 0; 157 u32 result = 0;
158 158
159 // TODO(Subv): Support the other query variables 159 // TODO(Subv): Support the other query variables
@@ -173,7 +173,7 @@ void Maxwell3D::ProcessQueryGet() {
173 case Regs::QueryMode::Write2: { 173 case Regs::QueryMode::Write2: {
174 // Write the current query sequence to the sequence address. 174 // Write the current query sequence to the sequence address.
175 u32 sequence = regs.query.query_sequence; 175 u32 sequence = regs.query.query_sequence;
176 Memory::Write32(address, sequence); 176 Memory::Write32(*address, sequence);
177 177
178 // TODO(Subv): Write the proper query response structure to the address when not using short 178 // TODO(Subv): Write the proper query response structure to the address when not using short
179 // mode. 179 // mode.
@@ -225,10 +225,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
225 // Don't allow writing past the end of the buffer. 225 // Don't allow writing past the end of the buffer.
226 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 226 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
227 227
228 VAddr address = 228 boost::optional<VAddr> address =
229 memory_manager.PhysicalToVirtualAddress(buffer_address + regs.const_buffer.cb_pos); 229 memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
230 230
231 Memory::Write32(address, value); 231 Memory::Write32(*address, value);
232 232
233 // Increment the current buffer position. 233 // Increment the current buffer position.
234 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4; 234 regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
@@ -238,10 +238,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
238 GPUVAddr tic_base_address = regs.tic.TICAddress(); 238 GPUVAddr tic_base_address = regs.tic.TICAddress();
239 239
240 GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry); 240 GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
241 VAddr tic_address_cpu = memory_manager.PhysicalToVirtualAddress(tic_address_gpu); 241 boost::optional<VAddr> tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
242 242
243 Texture::TICEntry tic_entry; 243 Texture::TICEntry tic_entry;
244 Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); 244 Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
245 245
246 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || 246 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
247 tic_entry.header_version == Texture::TICHeaderVersion::Pitch, 247 tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -268,10 +268,10 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
268 GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); 268 GPUVAddr tsc_base_address = regs.tsc.TSCAddress();
269 269
270 GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry); 270 GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
271 VAddr tsc_address_cpu = memory_manager.PhysicalToVirtualAddress(tsc_address_gpu); 271 boost::optional<VAddr> tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
272 272
273 Texture::TSCEntry tsc_entry; 273 Texture::TSCEntry tsc_entry;
274 Memory::ReadBlock(tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); 274 Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry));
275 return tsc_entry; 275 return tsc_entry;
276} 276}
277 277
@@ -293,7 +293,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
293 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { 293 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
294 294
295 Texture::TextureHandle tex_handle{ 295 Texture::TextureHandle tex_handle{
296 Memory::Read32(memory_manager.PhysicalToVirtualAddress(current_texture))}; 296 Memory::Read32(*memory_manager.GpuToCpuAddress(current_texture))};
297 297
298 Texture::FullTextureInfo tex_info{}; 298 Texture::FullTextureInfo tex_info{};
299 // TODO(Subv): Use the shader to determine which textures are actually accessed. 299 // TODO(Subv): Use the shader to determine which textures are actually accessed.
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 2e1edee03..25984439d 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -8,90 +8,112 @@
8 8
9namespace Tegra { 9namespace Tegra {
10 10
11PAddr MemoryManager::AllocateSpace(u64 size, u64 align) { 11GPUVAddr MemoryManager::AllocateSpace(u64 size, u64 align) {
12 boost::optional<PAddr> paddr = FindFreeBlock(size, align); 12 boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, align);
13 ASSERT(paddr); 13 ASSERT(gpu_addr);
14 14
15 for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { 15 for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
16 ASSERT(PageSlot(*paddr + offset) == static_cast<u64>(PageStatus::Unmapped)); 16 ASSERT(PageSlot(*gpu_addr + offset) == static_cast<u64>(PageStatus::Unmapped));
17 PageSlot(*paddr + offset) = static_cast<u64>(PageStatus::Allocated); 17 PageSlot(*gpu_addr + offset) = static_cast<u64>(PageStatus::Allocated);
18 } 18 }
19 19
20 return *paddr; 20 return *gpu_addr;
21} 21}
22 22
23PAddr MemoryManager::AllocateSpace(PAddr paddr, u64 size, u64 align) { 23GPUVAddr MemoryManager::AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align) {
24 for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { 24 for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
25 ASSERT(PageSlot(paddr + offset) == static_cast<u64>(PageStatus::Unmapped)); 25 ASSERT(PageSlot(gpu_addr + offset) == static_cast<u64>(PageStatus::Unmapped));
26 PageSlot(paddr + offset) = static_cast<u64>(PageStatus::Allocated); 26 PageSlot(gpu_addr + offset) = static_cast<u64>(PageStatus::Allocated);
27 } 27 }
28 28
29 return paddr; 29 return gpu_addr;
30} 30}
31 31
32PAddr MemoryManager::MapBufferEx(VAddr vaddr, u64 size) { 32GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) {
33 boost::optional<PAddr> paddr = FindFreeBlock(size, PAGE_SIZE); 33 boost::optional<GPUVAddr> gpu_addr = FindFreeBlock(size, PAGE_SIZE);
34 ASSERT(paddr); 34 ASSERT(gpu_addr);
35 35
36 for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { 36 for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
37 ASSERT(PageSlot(*paddr + offset) == static_cast<u64>(PageStatus::Unmapped)); 37 ASSERT(PageSlot(*gpu_addr + offset) == static_cast<u64>(PageStatus::Unmapped));
38 PageSlot(*paddr + offset) = vaddr + offset; 38 PageSlot(*gpu_addr + offset) = cpu_addr + offset;
39 } 39 }
40 40
41 return *paddr; 41 MappedRegion region{cpu_addr, *gpu_addr, size};
42 mapped_regions.push_back(region);
43
44 return *gpu_addr;
42} 45}
43 46
44PAddr MemoryManager::MapBufferEx(VAddr vaddr, PAddr paddr, u64 size) { 47GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) {
45 ASSERT((paddr & PAGE_MASK) == 0); 48 ASSERT((gpu_addr & PAGE_MASK) == 0);
46 49
47 for (u64 offset = 0; offset < size; offset += PAGE_SIZE) { 50 for (u64 offset = 0; offset < size; offset += PAGE_SIZE) {
48 ASSERT(PageSlot(paddr + offset) == static_cast<u64>(PageStatus::Allocated)); 51 ASSERT(PageSlot(gpu_addr + offset) == static_cast<u64>(PageStatus::Allocated));
49 PageSlot(paddr + offset) = vaddr + offset; 52 PageSlot(gpu_addr + offset) = cpu_addr + offset;
50 } 53 }
51 54
52 return paddr; 55 MappedRegion region{cpu_addr, gpu_addr, size};
56 mapped_regions.push_back(region);
57
58 return gpu_addr;
53} 59}
54 60
55boost::optional<PAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) { 61boost::optional<GPUVAddr> MemoryManager::FindFreeBlock(u64 size, u64 align) {
56 PAddr paddr = 0; 62 GPUVAddr gpu_addr = 0;
57 u64 free_space = 0; 63 u64 free_space = 0;
58 align = (align + PAGE_MASK) & ~PAGE_MASK; 64 align = (align + PAGE_MASK) & ~PAGE_MASK;
59 65
60 while (paddr + free_space < MAX_ADDRESS) { 66 while (gpu_addr + free_space < MAX_ADDRESS) {
61 if (!IsPageMapped(paddr + free_space)) { 67 if (!IsPageMapped(gpu_addr + free_space)) {
62 free_space += PAGE_SIZE; 68 free_space += PAGE_SIZE;
63 if (free_space >= size) { 69 if (free_space >= size) {
64 return paddr; 70 return gpu_addr;
65 } 71 }
66 } else { 72 } else {
67 paddr += free_space + PAGE_SIZE; 73 gpu_addr += free_space + PAGE_SIZE;
68 free_space = 0; 74 free_space = 0;
69 paddr = Common::AlignUp(paddr, align); 75 gpu_addr = Common::AlignUp(gpu_addr, align);
70 } 76 }
71 } 77 }
72 78
73 return {}; 79 return {};
74} 80}
75 81
76VAddr MemoryManager::PhysicalToVirtualAddress(PAddr paddr) { 82boost::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
77 VAddr base_addr = PageSlot(paddr); 83 VAddr base_addr = PageSlot(gpu_addr);
78 ASSERT(base_addr != static_cast<u64>(PageStatus::Unmapped)); 84 ASSERT(base_addr != static_cast<u64>(PageStatus::Unmapped));
79 return base_addr + (paddr & PAGE_MASK); 85
86 if (base_addr == static_cast<u64>(PageStatus::Allocated)) {
87 return {};
88 }
89
90 return base_addr + (gpu_addr & PAGE_MASK);
91}
92
93std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const {
94 std::vector<GPUVAddr> results;
95 for (const auto& region : mapped_regions) {
96 if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) {
97 u64 offset = cpu_addr - region.cpu_addr;
98 results.push_back(region.gpu_addr + offset);
99 }
100 }
101 return results;
80} 102}
81 103
82bool MemoryManager::IsPageMapped(PAddr paddr) { 104bool MemoryManager::IsPageMapped(GPUVAddr gpu_addr) {
83 return PageSlot(paddr) != static_cast<u64>(PageStatus::Unmapped); 105 return PageSlot(gpu_addr) != static_cast<u64>(PageStatus::Unmapped);
84} 106}
85 107
86VAddr& MemoryManager::PageSlot(PAddr paddr) { 108VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
87 auto& block = page_table[(paddr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK]; 109 auto& block = page_table[(gpu_addr >> (PAGE_BITS + PAGE_TABLE_BITS)) & PAGE_TABLE_MASK];
88 if (!block) { 110 if (!block) {
89 block = std::make_unique<PageBlock>(); 111 block = std::make_unique<PageBlock>();
90 for (unsigned index = 0; index < PAGE_BLOCK_SIZE; index++) { 112 for (unsigned index = 0; index < PAGE_BLOCK_SIZE; index++) {
91 (*block)[index] = static_cast<u64>(PageStatus::Unmapped); 113 (*block)[index] = static_cast<u64>(PageStatus::Unmapped);
92 } 114 }
93 } 115 }
94 return (*block)[(paddr >> PAGE_BITS) & PAGE_BLOCK_MASK]; 116 return (*block)[(gpu_addr >> PAGE_BITS) & PAGE_BLOCK_MASK];
95} 117}
96 118
97} // namespace Tegra 119} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index b73e283f8..08140c83a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -6,8 +6,11 @@
6 6
7#include <array> 7#include <array>
8#include <memory> 8#include <memory>
9#include <vector>
10
11#include <boost/optional.hpp>
12
9#include "common/common_types.h" 13#include "common/common_types.h"
10#include "core/memory.h"
11 14
12namespace Tegra { 15namespace Tegra {
13 16
@@ -18,20 +21,21 @@ class MemoryManager final {
18public: 21public:
19 MemoryManager() = default; 22 MemoryManager() = default;
20 23
21 PAddr AllocateSpace(u64 size, u64 align); 24 GPUVAddr AllocateSpace(u64 size, u64 align);
22 PAddr AllocateSpace(PAddr paddr, u64 size, u64 align); 25 GPUVAddr AllocateSpace(GPUVAddr gpu_addr, u64 size, u64 align);
23 PAddr MapBufferEx(VAddr vaddr, u64 size); 26 GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size);
24 PAddr MapBufferEx(VAddr vaddr, PAddr paddr, u64 size); 27 GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size);
25 VAddr PhysicalToVirtualAddress(PAddr paddr); 28 boost::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
29 std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;
26 30
27 static constexpr u64 PAGE_BITS = 16; 31 static constexpr u64 PAGE_BITS = 16;
28 static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; 32 static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
29 static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; 33 static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
30 34
31private: 35private:
32 boost::optional<PAddr> FindFreeBlock(u64 size, u64 align = 1); 36 boost::optional<GPUVAddr> FindFreeBlock(u64 size, u64 align = 1);
33 bool IsPageMapped(PAddr paddr); 37 bool IsPageMapped(GPUVAddr gpu_addr);
34 VAddr& PageSlot(PAddr paddr); 38 VAddr& PageSlot(GPUVAddr gpu_addr);
35 39
36 enum class PageStatus : u64 { 40 enum class PageStatus : u64 {
37 Unmapped = 0xFFFFFFFFFFFFFFFFULL, 41 Unmapped = 0xFFFFFFFFFFFFFFFFULL,
@@ -48,6 +52,14 @@ private:
48 52
49 using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>; 53 using PageBlock = std::array<VAddr, PAGE_BLOCK_SIZE>;
50 std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{}; 54 std::array<std::unique_ptr<PageBlock>, PAGE_TABLE_SIZE> page_table{};
55
56 struct MappedRegion {
57 VAddr cpu_addr;
58 GPUVAddr gpu_addr;
59 u64 size;
60 };
61
62 std::vector<MappedRegion> mapped_regions;
51}; 63};
52 64
53} // namespace Tegra 65} // namespace Tegra
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 36629dd11..f0e48a802 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,7 @@
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "video_core/gpu.h" 8#include "video_core/gpu.h"
9#include "video_core/memory_manager.h"
9 10
10struct ScreenInfo; 11struct ScreenInfo;
11 12
@@ -25,14 +26,14 @@ public:
25 virtual void FlushAll() = 0; 26 virtual void FlushAll() = 0;
26 27
27 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 28 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
28 virtual void FlushRegion(VAddr addr, u64 size) = 0; 29 virtual void FlushRegion(Tegra::GPUVAddr addr, u64 size) = 0;
29 30
30 /// Notify rasterizer that any caches of the specified region should be invalidated 31 /// Notify rasterizer that any caches of the specified region should be invalidated
31 virtual void InvalidateRegion(VAddr addr, u64 size) = 0; 32 virtual void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0;
32 33
33 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 34 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
34 /// and invalidated 35 /// and invalidated
35 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 36 virtual void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) = 0;
36 37
37 /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 38 /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0
38 virtual bool AccelerateDisplayTransfer(const void* config) { 39 virtual bool AccelerateDisplayTransfer(const void* config) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 82001e7b4..b457b1fbe 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -150,9 +150,8 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
150 u64 size = end - start + 1; 150 u64 size = end - start + 1;
151 151
152 // Copy vertex array data 152 // Copy vertex array data
153 const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(start)}; 153 res_cache.FlushRegion(start, size, nullptr);
154 res_cache.FlushRegion(data_addr, size, nullptr); 154 Memory::ReadBlock(*memory_manager->GpuToCpuAddress(start), array_ptr, size);
155 Memory::ReadBlock(data_addr, array_ptr, size);
156 155
157 // Bind the vertex array to the buffer at the current offset. 156 // Bind the vertex array to the buffer at the current offset.
158 glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); 157 glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride);
@@ -233,8 +232,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
233 // Fetch program code from memory 232 // Fetch program code from memory
234 GLShader::ProgramCode program_code; 233 GLShader::ProgramCode program_code;
235 const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; 234 const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
236 const VAddr cpu_address{gpu.memory_manager.PhysicalToVirtualAddress(gpu_address)}; 235 const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)};
237 Memory::ReadBlock(cpu_address, program_code.data(), program_code.size() * sizeof(u64)); 236 Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64));
238 GLShader::ShaderSetup setup{std::move(program_code)}; 237 GLShader::ShaderSetup setup{std::move(program_code)};
239 238
240 GLShader::ShaderEntries shader_resources; 239 GLShader::ShaderEntries shader_resources;
@@ -394,9 +393,9 @@ void RasterizerOpenGL::DrawArrays() {
394 GLintptr index_buffer_offset = 0; 393 GLintptr index_buffer_offset = 0;
395 if (is_indexed) { 394 if (is_indexed) {
396 const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; 395 const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
397 const VAddr index_data_addr{ 396 const boost::optional<VAddr> index_data_addr{
398 memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; 397 memory_manager->GpuToCpuAddress(regs.index_array.StartAddress())};
399 Memory::ReadBlock(index_data_addr, offseted_buffer, index_buffer_size); 398 Memory::ReadBlock(*index_data_addr, offseted_buffer, index_buffer_size);
400 399
401 index_buffer_offset = buffer_offset; 400 index_buffer_offset = buffer_offset;
402 offseted_buffer += index_buffer_size; 401 offseted_buffer += index_buffer_size;
@@ -519,17 +518,17 @@ void RasterizerOpenGL::FlushAll() {
519 res_cache.FlushAll(); 518 res_cache.FlushAll();
520} 519}
521 520
522void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { 521void RasterizerOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size) {
523 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 522 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
524 res_cache.FlushRegion(addr, size); 523 res_cache.FlushRegion(addr, size);
525} 524}
526 525
527void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 526void RasterizerOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
528 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 527 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
529 res_cache.InvalidateRegion(addr, size, nullptr); 528 res_cache.InvalidateRegion(addr, size, nullptr);
530} 529}
531 530
532void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { 531void RasterizerOpenGL::FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) {
533 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 532 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
534 res_cache.FlushRegion(addr, size); 533 res_cache.FlushRegion(addr, size);
535 res_cache.InvalidateRegion(addr, size, nullptr); 534 res_cache.InvalidateRegion(addr, size, nullptr);
@@ -560,7 +559,8 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu
560 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 559 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
561 560
562 SurfaceParams src_params; 561 SurfaceParams src_params;
563 src_params.addr = framebuffer_addr; 562 src_params.cpu_addr = framebuffer_addr;
563 src_params.addr = res_cache.TryFindFramebufferGpuAddress(framebuffer_addr).get_value_or(0);
564 src_params.width = std::min(framebuffer.width, pixel_stride); 564 src_params.width = std::min(framebuffer.width, pixel_stride);
565 src_params.height = framebuffer.height; 565 src_params.height = framebuffer.height;
566 src_params.stride = pixel_stride; 566 src_params.stride = pixel_stride;
@@ -659,9 +659,9 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
659 buffer_draw_state.enabled = true; 659 buffer_draw_state.enabled = true;
660 buffer_draw_state.bindpoint = current_bindpoint + bindpoint; 660 buffer_draw_state.bindpoint = current_bindpoint + bindpoint;
661 661
662 VAddr addr = gpu.memory_manager->PhysicalToVirtualAddress(buffer.address); 662 boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address);
663 std::vector<u8> data(used_buffer.GetSize() * sizeof(float)); 663 std::vector<u8> data(used_buffer.GetSize() * sizeof(float));
664 Memory::ReadBlock(addr, data.data(), data.size()); 664 Memory::ReadBlock(*addr, data.data(), data.size());
665 665
666 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); 666 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo);
667 glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); 667 glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 544714b95..9709e595e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -11,6 +11,7 @@
11#include <glad/glad.h> 11#include <glad/glad.h>
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "video_core/engines/maxwell_3d.h" 13#include "video_core/engines/maxwell_3d.h"
14#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
15#include "video_core/renderer_opengl/gl_rasterizer_cache.h" 16#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
16#include "video_core/renderer_opengl/gl_resource_manager.h" 17#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -29,9 +30,9 @@ public:
29 void DrawArrays() override; 30 void DrawArrays() override;
30 void NotifyMaxwellRegisterChanged(u32 method) override; 31 void NotifyMaxwellRegisterChanged(u32 method) override;
31 void FlushAll() override; 32 void FlushAll() override;
32 void FlushRegion(VAddr addr, u64 size) override; 33 void FlushRegion(Tegra::GPUVAddr addr, u64 size) override;
33 void InvalidateRegion(VAddr addr, u64 size) override; 34 void InvalidateRegion(Tegra::GPUVAddr addr, u64 size) override;
34 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 35 void FlushAndInvalidateRegion(Tegra::GPUVAddr addr, u64 size) override;
35 bool AccelerateDisplayTransfer(const void* config) override; 36 bool AccelerateDisplayTransfer(const void* config) override;
36 bool AccelerateTextureCopy(const void* config) override; 37 bool AccelerateTextureCopy(const void* config) override;
37 bool AccelerateFill(const void* config) override; 38 bool AccelerateFill(const void* config) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 7410471cc..501d15e98 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -41,18 +41,15 @@ struct FormatTuple {
41 GLenum format; 41 GLenum format;
42 GLenum type; 42 GLenum type;
43 bool compressed; 43 bool compressed;
44 // How many pixels in the original texture are equivalent to one pixel in the compressed
45 // texture.
46 u32 compression_factor;
47}; 44};
48 45
49static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{ 46static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
50 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1}, // ABGR8 47 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false}, // ABGR8
51 {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1}, // B5G6R5 48 {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false}, // B5G6R5
52 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false, 1}, // A2B10G10R10 49 {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false}, // A2B10G10R10
53 {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1 50 {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1
54 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT23 51 {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23
55 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT45 52 {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45
56}}; 53}};
57 54
58static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { 55static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
@@ -83,26 +80,30 @@ static u16 GetResolutionScaleFactor() {
83} 80}
84 81
85template <bool morton_to_gl, PixelFormat format> 82template <bool morton_to_gl, PixelFormat format>
86void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start, 83void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, Tegra::GPUVAddr base,
87 VAddr end) { 84 Tegra::GPUVAddr start, Tegra::GPUVAddr end) {
88 constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8; 85 constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
89 constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); 86 constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
87 const auto& gpu = Core::System::GetInstance().GPU();
90 88
91 if (morton_to_gl) { 89 if (morton_to_gl) {
92 auto data = Tegra::Texture::UnswizzleTexture( 90 auto data = Tegra::Texture::UnswizzleTexture(
93 base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, 91 *gpu.memory_manager->GpuToCpuAddress(base),
94 block_height); 92 SurfaceParams::TextureFormatFromPixelFormat(format), stride, height, block_height);
95 std::memcpy(gl_buffer, data.data(), data.size()); 93 std::memcpy(gl_buffer, data.data(), data.size());
96 } else { 94 } else {
97 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check 95 // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
98 // the configuration for this and perform more generic un/swizzle 96 // the configuration for this and perform more generic un/swizzle
99 LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); 97 NGLOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
100 VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, 98 VideoCore::MortonCopyPixels128(
101 Memory::GetPointer(base), gl_buffer, morton_to_gl); 99 stride, height, bytes_per_pixel, gl_bytes_per_pixel,
100 Memory::GetPointer(*gpu.memory_manager->GpuToCpuAddress(base)), gl_buffer,
101 morton_to_gl);
102 } 102 }
103} 103}
104 104
105static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), 105static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
106 Tegra::GPUVAddr),
106 SurfaceParams::MaxPixelFormat> 107 SurfaceParams::MaxPixelFormat>
107 morton_to_gl_fns = { 108 morton_to_gl_fns = {
108 MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>, 109 MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
@@ -110,7 +111,8 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
110 MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>, 111 MortonCopy<true, PixelFormat::DXT23>, MortonCopy<true, PixelFormat::DXT45>,
111}; 112};
112 113
113static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr), 114static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr, Tegra::GPUVAddr,
115 Tegra::GPUVAddr),
114 SurfaceParams::MaxPixelFormat> 116 SurfaceParams::MaxPixelFormat>
115 gl_to_morton_fns = { 117 gl_to_morton_fns = {
116 MortonCopy<false, PixelFormat::ABGR8>, 118 MortonCopy<false, PixelFormat::ABGR8>,
@@ -219,9 +221,9 @@ SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const {
219 SurfaceParams params = *this; 221 SurfaceParams params = *this;
220 const u32 tiled_size = is_tiled ? 8 : 1; 222 const u32 tiled_size = is_tiled ? 8 : 1;
221 const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size); 223 const u64 stride_tiled_bytes = BytesInPixels(stride * tiled_size);
222 VAddr aligned_start = 224 Tegra::GPUVAddr aligned_start =
223 addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes); 225 addr + Common::AlignDown(boost::icl::first(interval) - addr, stride_tiled_bytes);
224 VAddr aligned_end = 226 Tegra::GPUVAddr aligned_end =
225 addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes); 227 addr + Common::AlignUp(boost::icl::last_next(interval) - addr, stride_tiled_bytes);
226 228
227 if (aligned_end - aligned_start > stride_tiled_bytes) { 229 if (aligned_end - aligned_start > stride_tiled_bytes) {
@@ -342,6 +344,13 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
342 return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval(); 344 return FromInterval(texcopy_params.GetInterval()).GetInterval() == texcopy_params.GetInterval();
343} 345}
344 346
347VAddr SurfaceParams::GetCpuAddr() const {
348 // When this function is used, only cpu_addr or (GPU) addr should be set, not both
349 ASSERT(!(cpu_addr && addr));
350 const auto& gpu = Core::System::GetInstance().GPU();
351 return cpu_addr.get_value_or(*gpu.memory_manager->GpuToCpuAddress(addr));
352}
353
345bool CachedSurface::CanFill(const SurfaceParams& dest_surface, 354bool CachedSurface::CanFill(const SurfaceParams& dest_surface,
346 SurfaceInterval fill_interval) const { 355 SurfaceInterval fill_interval) const {
347 if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && 356 if (type == SurfaceType::Fill && IsRegionValid(fill_interval) &&
@@ -349,9 +358,9 @@ bool CachedSurface::CanFill(const SurfaceParams& dest_surface,
349 boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range 358 boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range
350 dest_surface.FromInterval(fill_interval).GetInterval() == 359 dest_surface.FromInterval(fill_interval).GetInterval() ==
351 fill_interval) { // make sure interval is a rectangle in dest surface 360 fill_interval) { // make sure interval is a rectangle in dest surface
352 if (fill_size * 8 != dest_surface.GetFormatBpp()) { 361 if (fill_size * CHAR_BIT != dest_surface.GetFormatBpp()) {
353 // Check if bits repeat for our fill_size 362 // Check if bits repeat for our fill_size
354 const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); 363 const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / CHAR_BIT, 1u);
355 std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel); 364 std::vector<u8> fill_test(fill_size * dest_bytes_per_pixel);
356 365
357 for (u32 i = 0; i < dest_bytes_per_pixel; ++i) 366 for (u32 i = 0; i < dest_bytes_per_pixel; ++i)
@@ -456,15 +465,15 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
456} 465}
457 466
458MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); 467MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
459void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) { 468void CachedSurface::LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end) {
460 ASSERT(type != SurfaceType::Fill); 469 ASSERT(type != SurfaceType::Fill);
461 470
462 u8* const texture_src_data = Memory::GetPointer(addr); 471 u8* const texture_src_data = Memory::GetPointer(GetCpuAddr());
463 if (texture_src_data == nullptr) 472 if (texture_src_data == nullptr)
464 return; 473 return;
465 474
466 if (gl_buffer == nullptr) { 475 if (gl_buffer == nullptr) {
467 gl_buffer_size = width * height * GetGLBytesPerPixel(pixel_format); 476 gl_buffer_size = GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format);
468 gl_buffer.reset(new u8[gl_buffer_size]); 477 gl_buffer.reset(new u8[gl_buffer_size]);
469 } 478 }
470 479
@@ -479,14 +488,15 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) {
479 std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset, 488 std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
480 bytes_per_pixel * width * height); 489 bytes_per_pixel * width * height);
481 } else { 490 } else {
482 morton_to_gl_fns[static_cast<size_t>(pixel_format)]( 491 morton_to_gl_fns[static_cast<size_t>(pixel_format)](GetActualWidth(), block_height,
483 stride, block_height, height, &gl_buffer[0], addr, load_start, load_end); 492 GetActualHeight(), &gl_buffer[0], addr,
493 load_start, load_end);
484 } 494 }
485} 495}
486 496
487MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); 497MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
488void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) { 498void CachedSurface::FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end) {
489 u8* const dst_buffer = Memory::GetPointer(addr); 499 u8* const dst_buffer = Memory::GetPointer(GetCpuAddr());
490 if (dst_buffer == nullptr) 500 if (dst_buffer == nullptr)
491 return; 501 return;
492 502
@@ -536,7 +546,8 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
536 546
537 MICROPROFILE_SCOPE(OpenGL_TextureUL); 547 MICROPROFILE_SCOPE(OpenGL_TextureUL);
538 548
539 ASSERT(gl_buffer_size == width * height * GetGLBytesPerPixel(pixel_format)); 549 ASSERT(gl_buffer_size ==
550 GetActualWidth() * GetActualHeight() * GetGLBytesPerPixel(pixel_format));
540 551
541 // Load data from memory to the surface 552 // Load data from memory to the surface
542 GLint x0 = static_cast<GLint>(rect.left); 553 GLint x0 = static_cast<GLint>(rect.left);
@@ -571,11 +582,9 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
571 glActiveTexture(GL_TEXTURE0); 582 glActiveTexture(GL_TEXTURE0);
572 if (tuple.compressed) { 583 if (tuple.compressed) {
573 glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format, 584 glCompressedTexImage2D(GL_TEXTURE_2D, 0, tuple.internal_format,
574 static_cast<GLsizei>(rect.GetWidth()), 585 static_cast<GLsizei>(rect.GetWidth() * GetCompresssionFactor()),
575 static_cast<GLsizei>(rect.GetHeight()), 0, 586 static_cast<GLsizei>(rect.GetHeight() * GetCompresssionFactor()), 0,
576 rect.GetWidth() * rect.GetHeight() * 587 size, &gl_buffer[buffer_offset]);
577 GetGLBytesPerPixel(pixel_format) / tuple.compression_factor,
578 &gl_buffer[buffer_offset]);
579 } else { 588 } else {
580 glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()), 589 glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
581 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, 590 static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
@@ -945,6 +954,33 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, ScaleMatc
945 return surface; 954 return surface;
946} 955}
947 956
957boost::optional<Tegra::GPUVAddr> RasterizerCacheOpenGL::TryFindFramebufferGpuAddress(
958 VAddr cpu_addr) const {
959 // Tries to find the GPU address of a framebuffer based on the CPU address. This is because
960 // final output framebuffers are specified by CPU address, but internally our GPU cache uses GPU
961 // addresses. We iterate through all cached framebuffers, and compare their starting CPU address
962 // to the one provided. This is obviously not great, and won't work if the framebuffer overlaps
963 // surfaces.
964
965 std::vector<Tegra::GPUVAddr> gpu_addresses;
966 for (const auto& pair : surface_cache) {
967 for (const auto& surface : pair.second) {
968 const VAddr surface_cpu_addr = surface->GetCpuAddr();
969 if (cpu_addr >= surface_cpu_addr && cpu_addr < (surface_cpu_addr + surface->size)) {
970 ASSERT_MSG(cpu_addr == surface_cpu_addr, "overlapping surfaces are unsupported");
971 gpu_addresses.push_back(surface->addr);
972 }
973 }
974 }
975
976 if (gpu_addresses.empty()) {
977 return {};
978 }
979
980 ASSERT_MSG(gpu_addresses.size() == 1, ">1 surface is unsupported");
981 return gpu_addresses[0];
982}
983
948SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params, 984SurfaceRect_Tuple RasterizerCacheOpenGL::GetSurfaceSubRect(const SurfaceParams& params,
949 ScaleMatch match_res_scale, 985 ScaleMatch match_res_scale,
950 bool load_if_create) { 986 bool load_if_create) {
@@ -1028,11 +1064,11 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
1028 auto& gpu = Core::System::GetInstance().GPU(); 1064 auto& gpu = Core::System::GetInstance().GPU();
1029 1065
1030 SurfaceParams params; 1066 SurfaceParams params;
1031 params.addr = gpu.memory_manager->PhysicalToVirtualAddress(config.tic.Address()); 1067 params.addr = config.tic.Address();
1032 params.width = config.tic.Width();
1033 params.height = config.tic.Height();
1034 params.is_tiled = config.tic.IsTiled(); 1068 params.is_tiled = config.tic.IsTiled();
1035 params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format); 1069 params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
1070 params.width = config.tic.Width() / params.GetCompresssionFactor();
1071 params.height = config.tic.Height() / params.GetCompresssionFactor();
1036 1072
1037 // TODO(Subv): Different types per component are not supported. 1073 // TODO(Subv): Different types per component are not supported.
1038 ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() && 1074 ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() &&
@@ -1045,7 +1081,7 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
1045 params.block_height = config.tic.BlockHeight(); 1081 params.block_height = config.tic.BlockHeight();
1046 } else { 1082 } else {
1047 // Use the texture-provided stride value if the texture isn't tiled. 1083 // Use the texture-provided stride value if the texture isn't tiled.
1048 params.stride = params.PixelsInBytes(config.tic.Pitch()); 1084 params.stride = static_cast<u32>(params.PixelsInBytes(config.tic.Pitch()));
1049 } 1085 }
1050 1086
1051 params.UpdateParams(); 1087 params.UpdateParams();
@@ -1073,11 +1109,10 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
1073SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces( 1109SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
1074 bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) { 1110 bool using_color_fb, bool using_depth_fb, const MathUtil::Rectangle<s32>& viewport) {
1075 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; 1111 const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
1076 const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
1077 const auto& config = regs.rt[0]; 1112 const auto& config = regs.rt[0];
1078 1113
1079 // TODO(bunnei): This is hard corded to use just the first render buffer 1114 // TODO(bunnei): This is hard corded to use just the first render buffer
1080 LOG_WARNING(Render_OpenGL, "hard-coded for render target 0!"); 1115 NGLOG_WARNING(Render_OpenGL, "hard-coded for render target 0!");
1081 1116
1082 // update resolution_scale_factor and reset cache if changed 1117 // update resolution_scale_factor and reset cache if changed
1083 // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We 1118 // TODO (bunnei): This code was ported as-is from Citra, and is technically not thread-safe. We
@@ -1106,7 +1141,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
1106 color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight; 1141 color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
1107 SurfaceParams depth_params = color_params; 1142 SurfaceParams depth_params = color_params;
1108 1143
1109 color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address()); 1144 color_params.addr = config.Address();
1110 color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format); 1145 color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format);
1111 color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format); 1146 color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format);
1112 color_params.UpdateParams(); 1147 color_params.UpdateParams();
@@ -1122,8 +1157,8 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
1122 // Make sure that framebuffers don't overlap if both color and depth are being used 1157 // Make sure that framebuffers don't overlap if both color and depth are being used
1123 if (using_color_fb && using_depth_fb && 1158 if (using_color_fb && using_depth_fb &&
1124 boost::icl::length(color_vp_interval & depth_vp_interval)) { 1159 boost::icl::length(color_vp_interval & depth_vp_interval)) {
1125 LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " 1160 NGLOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; "
1126 "overlapping framebuffers not supported!"); 1161 "overlapping framebuffers not supported!");
1127 using_depth_fb = false; 1162 using_depth_fb = false;
1128 } 1163 }
1129 1164
@@ -1222,7 +1257,8 @@ void RasterizerCacheOpenGL::DuplicateSurface(const Surface& src_surface,
1222 } 1257 }
1223} 1258}
1224 1259
1225void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr, u64 size) { 1260void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr,
1261 u64 size) {
1226 if (size == 0) 1262 if (size == 0)
1227 return; 1263 return;
1228 1264
@@ -1261,7 +1297,7 @@ void RasterizerCacheOpenGL::ValidateSurface(const Surface& surface, VAddr addr,
1261 } 1297 }
1262} 1298}
1263 1299
1264void RasterizerCacheOpenGL::FlushRegion(VAddr addr, u64 size, Surface flush_surface) { 1300void RasterizerCacheOpenGL::FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface) {
1265 if (size == 0) 1301 if (size == 0)
1266 return; 1302 return;
1267 1303
@@ -1297,7 +1333,8 @@ void RasterizerCacheOpenGL::FlushAll() {
1297 FlushRegion(0, Kernel::VMManager::MAX_ADDRESS); 1333 FlushRegion(0, Kernel::VMManager::MAX_ADDRESS);
1298} 1334}
1299 1335
1300void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner) { 1336void RasterizerCacheOpenGL::InvalidateRegion(Tegra::GPUVAddr addr, u64 size,
1337 const Surface& region_owner) {
1301 if (size == 0) 1338 if (size == 0)
1302 return; 1339 return;
1303 1340
@@ -1390,10 +1427,10 @@ void RasterizerCacheOpenGL::UnregisterSurface(const Surface& surface) {
1390 surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); 1427 surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}});
1391} 1428}
1392 1429
1393void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { 1430void RasterizerCacheOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {
1394 const u64 num_pages = 1431 const u64 num_pages = ((addr + size - 1) >> Tegra::MemoryManager::PAGE_BITS) -
1395 ((addr + size - 1) >> Memory::PAGE_BITS) - (addr >> Memory::PAGE_BITS) + 1; 1432 (addr >> Tegra::MemoryManager::PAGE_BITS) + 1;
1396 const u64 page_start = addr >> Memory::PAGE_BITS; 1433 const u64 page_start = addr >> Tegra::MemoryManager::PAGE_BITS;
1397 const u64 page_end = page_start + num_pages; 1434 const u64 page_end = page_start + num_pages;
1398 1435
1399 // Interval maps will erase segments if count reaches 0, so if delta is negative we have to 1436 // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
@@ -1406,8 +1443,10 @@ void RasterizerCacheOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int del
1406 const auto interval = pair.first & pages_interval; 1443 const auto interval = pair.first & pages_interval;
1407 const int count = pair.second; 1444 const int count = pair.second;
1408 1445
1409 const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; 1446 const Tegra::GPUVAddr interval_start_addr = boost::icl::first(interval)
1410 const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; 1447 << Tegra::MemoryManager::PAGE_BITS;
1448 const Tegra::GPUVAddr interval_end_addr = boost::icl::last_next(interval)
1449 << Tegra::MemoryManager::PAGE_BITS;
1411 const u64 interval_size = interval_end_addr - interval_start_addr; 1450 const u64 interval_size = interval_end_addr - interval_start_addr;
1412 1451
1413 if (delta > 0 && count == delta) 1452 if (delta > 0 && count == delta)
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index bf0fabb29..55f1bdee8 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -17,12 +17,14 @@
17#ifdef __GNUC__ 17#ifdef __GNUC__
18#pragma GCC diagnostic pop 18#pragma GCC diagnostic pop
19#endif 19#endif
20#include <boost/optional.hpp>
20#include <glad/glad.h> 21#include <glad/glad.h>
21#include "common/assert.h" 22#include "common/assert.h"
22#include "common/common_funcs.h" 23#include "common/common_funcs.h"
23#include "common/common_types.h" 24#include "common/common_types.h"
24#include "common/math_util.h" 25#include "common/math_util.h"
25#include "video_core/gpu.h" 26#include "video_core/gpu.h"
27#include "video_core/memory_manager.h"
26#include "video_core/renderer_opengl/gl_resource_manager.h" 28#include "video_core/renderer_opengl/gl_resource_manager.h"
27#include "video_core/textures/texture.h" 29#include "video_core/textures/texture.h"
28 30
@@ -30,9 +32,9 @@ struct CachedSurface;
30using Surface = std::shared_ptr<CachedSurface>; 32using Surface = std::shared_ptr<CachedSurface>;
31using SurfaceSet = std::set<Surface>; 33using SurfaceSet = std::set<Surface>;
32 34
33using SurfaceRegions = boost::icl::interval_set<VAddr>; 35using SurfaceRegions = boost::icl::interval_set<Tegra::GPUVAddr>;
34using SurfaceMap = boost::icl::interval_map<VAddr, Surface>; 36using SurfaceMap = boost::icl::interval_map<Tegra::GPUVAddr, Surface>;
35using SurfaceCache = boost::icl::interval_map<VAddr, SurfaceSet>; 37using SurfaceCache = boost::icl::interval_map<Tegra::GPUVAddr, SurfaceSet>;
36 38
37using SurfaceInterval = SurfaceCache::interval_type; 39using SurfaceInterval = SurfaceCache::interval_type;
38static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() && 40static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval_type>() &&
@@ -82,23 +84,49 @@ struct SurfaceParams {
82 Invalid = 4, 84 Invalid = 4,
83 }; 85 };
84 86
85 static constexpr unsigned int GetFormatBpp(PixelFormat format) { 87 /**
88 * Gets the compression factor for the specified PixelFormat. This applies to just the
89 * "compressed width" and "compressed height", not the overall compression factor of a
90 * compressed image. This is used for maintaining proper surface sizes for compressed texture
91 * formats.
92 */
93 static constexpr u32 GetCompresssionFactor(PixelFormat format) {
86 if (format == PixelFormat::Invalid) 94 if (format == PixelFormat::Invalid)
87 return 0; 95 return 0;
88 96
89 constexpr std::array<unsigned int, MaxPixelFormat> bpp_table = { 97 constexpr std::array<u32, MaxPixelFormat> compression_factor_table = {{
98 1, // ABGR8
99 1, // B5G6R5
100 1, // A2B10G10R10
101 4, // DXT1
102 4, // DXT23
103 4, // DXT45
104 }};
105
106 ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
107 return compression_factor_table[static_cast<size_t>(format)];
108 }
109 u32 GetCompresssionFactor() const {
110 return GetCompresssionFactor(pixel_format);
111 }
112
113 static constexpr u32 GetFormatBpp(PixelFormat format) {
114 if (format == PixelFormat::Invalid)
115 return 0;
116
117 constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
90 32, // ABGR8 118 32, // ABGR8
91 16, // B5G6R5 119 16, // B5G6R5
92 32, // A2B10G10R10 120 32, // A2B10G10R10
93 64, // DXT1 121 64, // DXT1
94 128, // DXT23 122 128, // DXT23
95 128, // DXT45 123 128, // DXT45
96 }; 124 }};
97 125
98 ASSERT(static_cast<size_t>(format) < bpp_table.size()); 126 ASSERT(static_cast<size_t>(format) < bpp_table.size());
99 return bpp_table[static_cast<size_t>(format)]; 127 return bpp_table[static_cast<size_t>(format)];
100 } 128 }
101 unsigned int GetFormatBpp() const { 129 u32 GetFormatBpp() const {
102 return GetFormatBpp(pixel_format); 130 return GetFormatBpp(pixel_format);
103 } 131 }
104 132
@@ -253,6 +281,24 @@ struct SurfaceParams {
253 // Returns the region of the biggest valid rectange within interval 281 // Returns the region of the biggest valid rectange within interval
254 SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; 282 SurfaceInterval GetCopyableInterval(const Surface& src_surface) const;
255 283
284 /**
285 * Gets the actual width (in pixels) of the surface. This is provided because `width` is used
286 * for tracking the surface region in memory, which may be compressed for certain formats. In
287 * this scenario, `width` is actually the compressed width.
288 */
289 u32 GetActualWidth() const {
290 return width * GetCompresssionFactor();
291 }
292
293 /**
294 * Gets the actual height (in pixels) of the surface. This is provided because `height` is used
295 * for tracking the surface region in memory, which may be compressed for certain formats. In
296 * this scenario, `height` is actually the compressed height.
297 */
298 u32 GetActualHeight() const {
299 return height * GetCompresssionFactor();
300 }
301
256 u32 GetScaledWidth() const { 302 u32 GetScaledWidth() const {
257 return width * res_scale; 303 return width * res_scale;
258 } 304 }
@@ -277,6 +323,8 @@ struct SurfaceParams {
277 return pixels * GetFormatBpp(pixel_format) / CHAR_BIT; 323 return pixels * GetFormatBpp(pixel_format) / CHAR_BIT;
278 } 324 }
279 325
326 VAddr GetCpuAddr() const;
327
280 bool ExactMatch(const SurfaceParams& other_surface) const; 328 bool ExactMatch(const SurfaceParams& other_surface) const;
281 bool CanSubRect(const SurfaceParams& sub_surface) const; 329 bool CanSubRect(const SurfaceParams& sub_surface) const;
282 bool CanExpand(const SurfaceParams& expanded_surface) const; 330 bool CanExpand(const SurfaceParams& expanded_surface) const;
@@ -285,8 +333,9 @@ struct SurfaceParams {
285 MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const; 333 MathUtil::Rectangle<u32> GetSubRect(const SurfaceParams& sub_surface) const;
286 MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const; 334 MathUtil::Rectangle<u32> GetScaledSubRect(const SurfaceParams& sub_surface) const;
287 335
288 VAddr addr = 0; 336 Tegra::GPUVAddr addr = 0;
289 VAddr end = 0; 337 Tegra::GPUVAddr end = 0;
338 boost::optional<VAddr> cpu_addr;
290 u64 size = 0; 339 u64 size = 0;
291 340
292 u32 width = 0; 341 u32 width = 0;
@@ -325,15 +374,15 @@ struct CachedSurface : SurfaceParams {
325 if (format == PixelFormat::Invalid) 374 if (format == PixelFormat::Invalid)
326 return 0; 375 return 0;
327 376
328 return SurfaceParams::GetFormatBpp(format) / 8; 377 return SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
329 } 378 }
330 379
331 std::unique_ptr<u8[]> gl_buffer; 380 std::unique_ptr<u8[]> gl_buffer;
332 size_t gl_buffer_size = 0; 381 size_t gl_buffer_size = 0;
333 382
334 // Read/Write data in Switch memory to/from gl_buffer 383 // Read/Write data in Switch memory to/from gl_buffer
335 void LoadGLBuffer(VAddr load_start, VAddr load_end); 384 void LoadGLBuffer(Tegra::GPUVAddr load_start, Tegra::GPUVAddr load_end);
336 void FlushGLBuffer(VAddr flush_start, VAddr flush_end); 385 void FlushGLBuffer(Tegra::GPUVAddr flush_start, Tegra::GPUVAddr flush_end);
337 386
338 // Upload/Download data in gl_buffer in/to this surface's texture 387 // Upload/Download data in gl_buffer in/to this surface's texture
339 void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle, 388 void UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint read_fb_handle,
@@ -362,6 +411,9 @@ public:
362 Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, 411 Surface GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale,
363 bool load_if_create); 412 bool load_if_create);
364 413
414 /// Tries to find a framebuffer GPU address based on the provided CPU address
415 boost::optional<Tegra::GPUVAddr> TryFindFramebufferGpuAddress(VAddr cpu_addr) const;
416
365 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from 417 /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
366 /// Switch memory to OpenGL and caches it (if not already cached) 418 /// Switch memory to OpenGL and caches it (if not already cached)
367 SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, 419 SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
@@ -381,10 +433,10 @@ public:
381 SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params); 433 SurfaceRect_Tuple GetTexCopySurface(const SurfaceParams& params);
382 434
383 /// Write any cached resources overlapping the region back to memory (if dirty) 435 /// Write any cached resources overlapping the region back to memory (if dirty)
384 void FlushRegion(VAddr addr, u64 size, Surface flush_surface = nullptr); 436 void FlushRegion(Tegra::GPUVAddr addr, u64 size, Surface flush_surface = nullptr);
385 437
386 /// Mark region as being invalidated by region_owner (nullptr if Switch memory) 438 /// Mark region as being invalidated by region_owner (nullptr if Switch memory)
387 void InvalidateRegion(VAddr addr, u64 size, const Surface& region_owner); 439 void InvalidateRegion(Tegra::GPUVAddr addr, u64 size, const Surface& region_owner);
388 440
389 /// Flush all cached resources tracked by this cache manager 441 /// Flush all cached resources tracked by this cache manager
390 void FlushAll(); 442 void FlushAll();
@@ -393,7 +445,7 @@ private:
393 void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); 445 void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface);
394 446
395 /// Update surface's texture for given region when necessary 447 /// Update surface's texture for given region when necessary
396 void ValidateSurface(const Surface& surface, VAddr addr, u64 size); 448 void ValidateSurface(const Surface& surface, Tegra::GPUVAddr addr, u64 size);
397 449
398 /// Create a new surface 450 /// Create a new surface
399 Surface CreateSurface(const SurfaceParams& params); 451 Surface CreateSurface(const SurfaceParams& params);
@@ -405,7 +457,7 @@ private:
405 void UnregisterSurface(const Surface& surface); 457 void UnregisterSurface(const Surface& surface);
406 458
407 /// Increase/decrease the number of surface in pages touching the specified region 459 /// Increase/decrease the number of surface in pages touching the specified region
408 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta); 460 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta);
409 461
410 SurfaceCache surface_cache; 462 SurfaceCache surface_cache;
411 PageMap cached_pages; 463 PageMap cached_pages;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index baff2c7af..5ca9821b7 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -152,7 +152,8 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
152 screen_info.display_texture = screen_info.texture.resource.handle; 152 screen_info.display_texture = screen_info.texture.resource.handle;
153 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f); 153 screen_info.display_texcoords = MathUtil::Rectangle<float>(0.f, 0.f, 1.f, 1.f);
154 154
155 Rasterizer()->FlushRegion(framebuffer_addr, size_in_bytes); 155 Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes,
156 Memory::FlushMode::Flush);
156 157
157 VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4, 158 VideoCore::MortonCopyPixels128(framebuffer.width, framebuffer.height, bytes_per_pixel, 4,
158 Memory::GetPointer(framebuffer_addr), 159 Memory::GetPointer(framebuffer_addr),
@@ -269,10 +270,9 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
269 GLint internal_format; 270 GLint internal_format;
270 switch (framebuffer.pixel_format) { 271 switch (framebuffer.pixel_format) {
271 case Tegra::FramebufferConfig::PixelFormat::ABGR8: 272 case Tegra::FramebufferConfig::PixelFormat::ABGR8:
272 // Use RGBA8 and swap in the fragment shader
273 internal_format = GL_RGBA; 273 internal_format = GL_RGBA;
274 texture.gl_format = GL_RGBA; 274 texture.gl_format = GL_RGBA;
275 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8; 275 texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
276 gl_framebuffer_data.resize(texture.width * texture.height * 4); 276 gl_framebuffer_data.resize(texture.width * texture.height * 4);
277 break; 277 break;
278 default: 278 default:
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index e0509f0ce..9c3ae875c 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -4,6 +4,7 @@
4 4
5#include <cstring> 5#include <cstring>
6#include "common/assert.h" 6#include "common/assert.h"
7#include "core/memory.h"
7#include "video_core/textures/decoders.h" 8#include "video_core/textures/decoders.h"
8#include "video_core/textures/texture.h" 9#include "video_core/textures/texture.h"
9 10
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index 5fada74be..1fbca8ad0 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -378,10 +378,10 @@ void GraphicsSurfaceWidget::OnUpdate() {
378 // TODO: Implement a good way to visualize alpha components! 378 // TODO: Implement a good way to visualize alpha components!
379 379
380 QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32); 380 QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32);
381 VAddr address = gpu.memory_manager->PhysicalToVirtualAddress(surface_address); 381 boost::optional<VAddr> address = gpu.memory_manager->GpuToCpuAddress(surface_address);
382 382
383 auto unswizzled_data = 383 auto unswizzled_data =
384 Tegra::Texture::UnswizzleTexture(address, surface_format, surface_width, surface_height); 384 Tegra::Texture::UnswizzleTexture(*address, surface_format, surface_width, surface_height);
385 385
386 auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format, 386 auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
387 surface_width, surface_height); 387 surface_width, surface_height);
@@ -437,9 +437,9 @@ void GraphicsSurfaceWidget::SaveSurface() {
437 pixmap->save(&file, "PNG"); 437 pixmap->save(&file, "PNG");
438 } else if (selectedFilter == bin_filter) { 438 } else if (selectedFilter == bin_filter) {
439 auto& gpu = Core::System::GetInstance().GPU(); 439 auto& gpu = Core::System::GetInstance().GPU();
440 VAddr address = gpu.memory_manager->PhysicalToVirtualAddress(surface_address); 440 boost::optional<VAddr> address = gpu.memory_manager->GpuToCpuAddress(surface_address);
441 441
442 const u8* buffer = Memory::GetPointer(address); 442 const u8* buffer = Memory::GetPointer(*address);
443 ASSERT_MSG(buffer != nullptr, "Memory not accessible"); 443 ASSERT_MSG(buffer != nullptr, "Memory not accessible");
444 444
445 QFile file(filename); 445 QFile file(filename);