summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2019-02-18 20:58:32 -0500
committerGravatar bunnei2019-03-14 22:34:42 -0400
commit2eaf6c41a4686028c0abc84d1be6fd48a67cf49f (patch)
tree6ad0848c848aea68e637386cad5068e13c831b92 /src
parentMerge pull request #2233 from ReinUsesLisp/morton-cleanup (diff)
downloadyuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.tar.gz
yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.tar.xz
yuzu-2eaf6c41a4686028c0abc84d1be6fd48a67cf49f.zip
gpu: Use host address for caching instead of guest address.
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp3
-rw-r--r--src/core/memory.cpp13
-rw-r--r--src/video_core/engines/kepler_memory.cpp4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp5
-rw-r--r--src/video_core/engines/maxwell_dma.cpp7
-rw-r--r--src/video_core/gpu.h11
-rw-r--r--src/video_core/gpu_asynch.cpp6
-rw-r--r--src/video_core/gpu_asynch.h6
-rw-r--r--src/video_core/gpu_synch.cpp6
-rw-r--r--src/video_core/gpu_synch.h6
-rw-r--r--src/video_core/gpu_thread.cpp136
-rw-r--r--src/video_core/gpu_thread.h132
-rw-r--r--src/video_core/rasterizer_cache.h70
-rw-r--r--src/video_core/rasterizer_interface.h8
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp24
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h31
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp18
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h13
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp36
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h19
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp27
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h31
26 files changed, 394 insertions, 294 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index a34b9e753..b031ebc66 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,6 +10,7 @@
10#include "core/core.h" 10#include "core/core.h"
11#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" 11#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
12#include "core/hle/service/nvdrv/devices/nvmap.h" 12#include "core/hle/service/nvdrv/devices/nvmap.h"
13#include "core/memory.h"
13#include "video_core/memory_manager.h" 14#include "video_core/memory_manager.h"
14#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
15#include "video_core/renderer_base.h" 16#include "video_core/renderer_base.h"
@@ -178,7 +179,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou
178 auto& gpu = system_instance.GPU(); 179 auto& gpu = system_instance.GPU();
179 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); 180 auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset);
180 ASSERT(cpu_addr); 181 ASSERT(cpu_addr);
181 gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size); 182 gpu.FlushAndInvalidateRegion(ToCacheAddr(Memory::GetPointer(*cpu_addr)), itr->second.size);
182 183
183 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); 184 params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size);
184 185
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 6591c45d2..4fde53033 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -67,8 +67,11 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
67 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, 67 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
68 (base + size) * PAGE_SIZE); 68 (base + size) * PAGE_SIZE);
69 69
70 RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, 70 // During boot, current_page_table might not be set yet, in which case we need not flush
71 FlushMode::FlushAndInvalidate); 71 if (current_page_table) {
72 RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE,
73 FlushMode::FlushAndInvalidate);
74 }
72 75
73 VAddr end = base + size; 76 VAddr end = base + size;
74 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", 77 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
@@ -359,13 +362,13 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
359 auto& gpu = system_instance.GPU(); 362 auto& gpu = system_instance.GPU();
360 switch (mode) { 363 switch (mode) {
361 case FlushMode::Flush: 364 case FlushMode::Flush:
362 gpu.FlushRegion(overlap_start, overlap_size); 365 gpu.FlushRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
363 break; 366 break;
364 case FlushMode::Invalidate: 367 case FlushMode::Invalidate:
365 gpu.InvalidateRegion(overlap_start, overlap_size); 368 gpu.InvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
366 break; 369 break;
367 case FlushMode::FlushAndInvalidate: 370 case FlushMode::FlushAndInvalidate:
368 gpu.FlushAndInvalidateRegion(overlap_start, overlap_size); 371 gpu.FlushAndInvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
369 break; 372 break;
370 } 373 }
371 }; 374 };
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index aae2a4019..daefa43a6 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -9,6 +9,7 @@
9#include "video_core/engines/kepler_memory.h" 9#include "video_core/engines/kepler_memory.h"
10#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h"
12 13
13namespace Tegra::Engines { 14namespace Tegra::Engines {
14 15
@@ -48,7 +49,8 @@ void KeplerMemory::ProcessData(u32 data) {
48 // We have to invalidate the destination region to evict any outdated surfaces from the cache. 49 // We have to invalidate the destination region to evict any outdated surfaces from the cache.
49 // We do this before actually writing the new data because the destination address might contain 50 // We do this before actually writing the new data because the destination address might contain
50 // a dirty surface that will have to be written back to memory. 51 // a dirty surface that will have to be written back to memory.
51 Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32)); 52 system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)),
53 sizeof(u32));
52 54
53 Memory::Write32(*dest_address, data); 55 Memory::Write32(*dest_address, data);
54 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 56 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 144e7fa82..49979694e 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -396,7 +396,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
396 const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); 396 const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos);
397 ASSERT_MSG(address, "Invalid GPU address"); 397 ASSERT_MSG(address, "Invalid GPU address");
398 398
399 Memory::Write32(*address, value); 399 u8* ptr{Memory::GetPointer(*address)};
400 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
401 std::memcpy(ptr, &value, sizeof(u32));
402
400 dirty_flags.OnMemoryWrite(); 403 dirty_flags.OnMemoryWrite();
401 404
402 // Increment the current buffer position. 405 // Increment the current buffer position.
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 9dfea5999..415a6319a 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -9,6 +9,7 @@
9#include "video_core/engines/maxwell_3d.h" 9#include "video_core/engines/maxwell_3d.h"
10#include "video_core/engines/maxwell_dma.h" 10#include "video_core/engines/maxwell_dma.h"
11#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
12#include "video_core/renderer_base.h"
12#include "video_core/textures/decoders.h" 13#include "video_core/textures/decoders.h"
13 14
14namespace Tegra::Engines { 15namespace Tegra::Engines {
@@ -92,12 +93,14 @@ void MaxwellDMA::HandleCopy() {
92 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 93 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
93 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 94 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
94 // copying. 95 // copying.
95 Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size); 96 Core::System::GetInstance().Renderer().Rasterizer().FlushRegion(
97 ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size);
96 98
97 // We have to invalidate the destination region to evict any outdated surfaces from the 99 // We have to invalidate the destination region to evict any outdated surfaces from the
98 // cache. We do this before actually writing the new data because the destination address 100 // cache. We do this before actually writing the new data because the destination address
99 // might contain a dirty surface that will have to be written back to memory. 101 // might contain a dirty surface that will have to be written back to memory.
100 Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size); 102 Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion(
103 ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size);
101 }; 104 };
102 105
103 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 106 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 56a203275..a14b95c30 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -11,6 +11,11 @@
11#include "video_core/dma_pusher.h" 11#include "video_core/dma_pusher.h"
12#include "video_core/memory_manager.h" 12#include "video_core/memory_manager.h"
13 13
14using CacheAddr = std::uintptr_t;
15inline CacheAddr ToCacheAddr(const void* host_ptr) {
16 return reinterpret_cast<CacheAddr>(host_ptr);
17}
18
14namespace Core { 19namespace Core {
15class System; 20class System;
16} 21}
@@ -209,13 +214,13 @@ public:
209 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; 214 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
210 215
211 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 216 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
212 virtual void FlushRegion(VAddr addr, u64 size) = 0; 217 virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
213 218
214 /// Notify rasterizer that any caches of the specified region should be invalidated 219 /// Notify rasterizer that any caches of the specified region should be invalidated
215 virtual void InvalidateRegion(VAddr addr, u64 size) = 0; 220 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
216 221
217 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 222 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
218 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 223 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
219 224
220private: 225private:
221 void ProcessBindMethod(const MethodCall& method_call); 226 void ProcessBindMethod(const MethodCall& method_call);
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index ad0a747e3..8b355cf7b 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -22,15 +22,15 @@ void GPUAsynch::SwapBuffers(
22 gpu_thread.SwapBuffers(std::move(framebuffer)); 22 gpu_thread.SwapBuffers(std::move(framebuffer));
23} 23}
24 24
25void GPUAsynch::FlushRegion(VAddr addr, u64 size) { 25void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
26 gpu_thread.FlushRegion(addr, size); 26 gpu_thread.FlushRegion(addr, size);
27} 27}
28 28
29void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { 29void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) {
30 gpu_thread.InvalidateRegion(addr, size); 30 gpu_thread.InvalidateRegion(addr, size);
31} 31}
32 32
33void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { 33void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
34 gpu_thread.FlushAndInvalidateRegion(addr, size); 34 gpu_thread.FlushAndInvalidateRegion(addr, size);
35} 35}
36 36
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index e6a807aba..1dcc61a6c 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -26,9 +26,9 @@ public:
26 void PushGPUEntries(Tegra::CommandList&& entries) override; 26 void PushGPUEntries(Tegra::CommandList&& entries) override;
27 void SwapBuffers( 27 void SwapBuffers(
28 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; 28 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
29 void FlushRegion(VAddr addr, u64 size) override; 29 void FlushRegion(CacheAddr addr, u64 size) override;
30 void InvalidateRegion(VAddr addr, u64 size) override; 30 void InvalidateRegion(CacheAddr addr, u64 size) override;
31 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 31 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
32 32
33private: 33private:
34 GPUThread::ThreadManager gpu_thread; 34 GPUThread::ThreadManager gpu_thread;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 4c00b96c7..2cfc900ed 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -22,15 +22,15 @@ void GPUSynch::SwapBuffers(
22 renderer.SwapBuffers(std::move(framebuffer)); 22 renderer.SwapBuffers(std::move(framebuffer));
23} 23}
24 24
25void GPUSynch::FlushRegion(VAddr addr, u64 size) { 25void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
26 renderer.Rasterizer().FlushRegion(addr, size); 26 renderer.Rasterizer().FlushRegion(addr, size);
27} 27}
28 28
29void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { 29void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) {
30 renderer.Rasterizer().InvalidateRegion(addr, size); 30 renderer.Rasterizer().InvalidateRegion(addr, size);
31} 31}
32 32
33void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { 33void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
34 renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); 34 renderer.Rasterizer().FlushAndInvalidateRegion(addr, size);
35} 35}
36 36
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 7d5a241ff..766b5631c 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -21,9 +21,9 @@ public:
21 void PushGPUEntries(Tegra::CommandList&& entries) override; 21 void PushGPUEntries(Tegra::CommandList&& entries) override;
22 void SwapBuffers( 22 void SwapBuffers(
23 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; 23 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
24 void FlushRegion(VAddr addr, u64 size) override; 24 void FlushRegion(CacheAddr addr, u64 size) override;
25 void InvalidateRegion(VAddr addr, u64 size) override; 25 void InvalidateRegion(CacheAddr addr, u64 size) override;
26 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 26 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
27}; 27};
28 28
29} // namespace VideoCommon 29} // namespace VideoCommon
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c5bdd2a17..086b2f625 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,7 +5,6 @@
5#include "common/assert.h" 5#include "common/assert.h"
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "core/frontend/scope_acquire_window_context.h" 7#include "core/frontend/scope_acquire_window_context.h"
8#include "core/settings.h"
9#include "video_core/dma_pusher.h" 8#include "video_core/dma_pusher.h"
10#include "video_core/gpu.h" 9#include "video_core/gpu.h"
11#include "video_core/gpu_thread.h" 10#include "video_core/gpu_thread.h"
@@ -13,38 +12,13 @@
13 12
14namespace VideoCommon::GPUThread { 13namespace VideoCommon::GPUThread {
15 14
16/// Executes a single GPU thread command
17static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer,
18 Tegra::DmaPusher& dma_pusher) {
19 if (const auto submit_list = std::get_if<SubmitListCommand>(command)) {
20 dma_pusher.Push(std::move(submit_list->entries));
21 dma_pusher.DispatchCalls();
22 } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) {
23 renderer.SwapBuffers(data->framebuffer);
24 } else if (const auto data = std::get_if<FlushRegionCommand>(command)) {
25 renderer.Rasterizer().FlushRegion(data->addr, data->size);
26 } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) {
27 renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
28 } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) {
29 renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size);
30 } else {
31 UNREACHABLE();
32 }
33}
34
35/// Runs the GPU thread 15/// Runs the GPU thread
36static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, 16static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher,
37 SynchState& state) { 17 SynchState& state) {
38
39 MicroProfileOnThreadCreate("GpuThread"); 18 MicroProfileOnThreadCreate("GpuThread");
40 19
41 auto WaitForWakeup = [&]() {
42 std::unique_lock<std::mutex> lock{state.signal_mutex};
43 state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; });
44 };
45
46 // Wait for first GPU command before acquiring the window context 20 // Wait for first GPU command before acquiring the window context
47 WaitForWakeup(); 21 state.WaitForCommands();
48 22
49 // If emulation was stopped during disk shader loading, abort before trying to acquire context 23 // If emulation was stopped during disk shader loading, abort before trying to acquire context
50 if (!state.is_running) { 24 if (!state.is_running) {
@@ -53,100 +27,72 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
53 27
54 Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; 28 Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()};
55 29
30 CommandDataContainer next;
56 while (state.is_running) { 31 while (state.is_running) {
57 if (!state.is_running) { 32 state.WaitForCommands();
58 return; 33 while (!state.queue.Empty()) {
59 } 34 state.queue.Pop(next);
60 35 if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) {
61 { 36 dma_pusher.Push(std::move(submit_list->entries));
62 // Thread has been woken up, so make the previous write queue the next read queue 37 dma_pusher.DispatchCalls();
63 std::lock_guard<std::mutex> lock{state.signal_mutex}; 38 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
64 std::swap(state.push_queue, state.pop_queue); 39 state.DecrementFramesCounter();
65 } 40 renderer.SwapBuffers(std::move(data->framebuffer));
66 41 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
67 // Execute all of the GPU commands 42 renderer.Rasterizer().FlushRegion(data->addr, data->size);
68 while (!state.pop_queue->empty()) { 43 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
69 ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); 44 renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
70 state.pop_queue->pop(); 45 } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) {
46 return;
47 } else {
48 UNREACHABLE();
49 }
71 } 50 }
72
73 state.UpdateIdleState();
74
75 // Signal that the GPU thread has finished processing commands
76 if (state.is_idle) {
77 state.idle_condition.notify_one();
78 }
79
80 // Wait for CPU thread to send more GPU commands
81 WaitForWakeup();
82 } 51 }
83} 52}
84 53
85ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) 54ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher)
86 : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), 55 : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer),
87 std::ref(dma_pusher), std::ref(state)}, 56 std::ref(dma_pusher), std::ref(state)} {}
88 thread_id{thread.get_id()} {}
89 57
90ThreadManager::~ThreadManager() { 58ThreadManager::~ThreadManager() {
91 { 59 // Notify GPU thread that a shutdown is pending
92 // Notify GPU thread that a shutdown is pending 60 PushCommand(EndProcessingCommand());
93 std::lock_guard<std::mutex> lock{state.signal_mutex};
94 state.is_running = false;
95 }
96
97 state.signal_condition.notify_one();
98 thread.join(); 61 thread.join();
99} 62}
100 63
101void ThreadManager::SubmitList(Tegra::CommandList&& entries) { 64void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
102 if (entries.empty()) { 65 PushCommand(SubmitListCommand(std::move(entries)));
103 return;
104 }
105
106 PushCommand(SubmitListCommand(std::move(entries)), false, false);
107} 66}
108 67
109void ThreadManager::SwapBuffers( 68void ThreadManager::SwapBuffers(
110 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { 69 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
111 PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); 70 state.IncrementFramesCounter();
71 PushCommand(SwapBuffersCommand(std::move(framebuffer)));
72 state.WaitForFrames();
112} 73}
113 74
114void ThreadManager::FlushRegion(VAddr addr, u64 size) { 75void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
115 // Block the CPU when using accurate emulation 76 PushCommand(FlushRegionCommand(addr, size));
116 PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false);
117} 77}
118 78
119void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { 79void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) {
120 PushCommand(InvalidateRegionCommand(addr, size), true, true); 80 if (state.queue.Empty()) {
81 // It's quicker to invalidate a single region on the CPU if the queue is already empty
82 renderer.Rasterizer().InvalidateRegion(addr, size);
83 } else {
84 PushCommand(InvalidateRegionCommand(addr, size));
85 }
121} 86}
122 87
123void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { 88void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
89 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
124 InvalidateRegion(addr, size); 90 InvalidateRegion(addr, size);
125} 91}
126 92
127void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { 93void ThreadManager::PushCommand(CommandData&& command_data) {
128 { 94 state.queue.Push(CommandDataContainer(std::move(command_data)));
129 std::lock_guard<std::mutex> lock{state.signal_mutex}; 95 state.SignalCommands();
130
131 if ((allow_on_cpu && state.is_idle) || IsGpuThread()) {
132 // Execute the command synchronously on the current thread
133 ExecuteCommand(&command_data, renderer, dma_pusher);
134 return;
135 }
136
137 // Push the command to the GPU thread
138 state.UpdateIdleState();
139 state.push_queue->emplace(command_data);
140 }
141
142 // Signal the GPU thread that commands are pending
143 state.signal_condition.notify_one();
144
145 if (wait_for_idle) {
146 // Wait for the GPU to be idle (all commands to be executed)
147 std::unique_lock<std::mutex> lock{state.idle_mutex};
148 state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); });
149 }
150} 96}
151 97
152} // namespace VideoCommon::GPUThread 98} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index edb148b14..8cd7db1c6 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -13,6 +13,9 @@
13#include <thread> 13#include <thread>
14#include <variant> 14#include <variant>
15 15
16#include "common/threadsafe_queue.h"
17#include "video_core/gpu.h"
18
16namespace Tegra { 19namespace Tegra {
17struct FramebufferConfig; 20struct FramebufferConfig;
18class DmaPusher; 21class DmaPusher;
@@ -24,6 +27,9 @@ class RendererBase;
24 27
25namespace VideoCommon::GPUThread { 28namespace VideoCommon::GPUThread {
26 29
30/// Command to signal to the GPU thread that processing has ended
31struct EndProcessingCommand final {};
32
27/// Command to signal to the GPU thread that a command list is ready for processing 33/// Command to signal to the GPU thread that a command list is ready for processing
28struct SubmitListCommand final { 34struct SubmitListCommand final {
29 explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} 35 explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
@@ -36,59 +42,110 @@ struct SwapBuffersCommand final {
36 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) 42 explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
37 : framebuffer{std::move(framebuffer)} {} 43 : framebuffer{std::move(framebuffer)} {}
38 44
39 std::optional<const Tegra::FramebufferConfig> framebuffer; 45 std::optional<Tegra::FramebufferConfig> framebuffer;
40}; 46};
41 47
42/// Command to signal to the GPU thread to flush a region 48/// Command to signal to the GPU thread to flush a region
43struct FlushRegionCommand final { 49struct FlushRegionCommand final {
44 explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} 50 explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
45 51
46 const VAddr addr; 52 CacheAddr addr;
47 const u64 size; 53 u64 size;
48}; 54};
49 55
50/// Command to signal to the GPU thread to invalidate a region 56/// Command to signal to the GPU thread to invalidate a region
51struct InvalidateRegionCommand final { 57struct InvalidateRegionCommand final {
52 explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} 58 explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
53 59
54 const VAddr addr; 60 CacheAddr addr;
55 const u64 size; 61 u64 size;
56}; 62};
57 63
58/// Command to signal to the GPU thread to flush and invalidate a region 64/// Command to signal to the GPU thread to flush and invalidate a region
59struct FlushAndInvalidateRegionCommand final { 65struct FlushAndInvalidateRegionCommand final {
60 explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) 66 explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
61 : addr{addr}, size{size} {} 67 : addr{addr}, size{size} {}
62 68
63 const VAddr addr; 69 CacheAddr addr;
64 const u64 size; 70 u64 size;
65}; 71};
66 72
67using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, 73using CommandData =
68 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; 74 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
75 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
76
77struct CommandDataContainer {
78 CommandDataContainer() = default;
79
80 CommandDataContainer(CommandData&& data) : data{std::move(data)} {}
81
82 CommandDataContainer& operator=(const CommandDataContainer& t) {
83 data = std::move(t.data);
84 return *this;
85 }
86
87 CommandData data;
88};
69 89
70/// Struct used to synchronize the GPU thread 90/// Struct used to synchronize the GPU thread
71struct SynchState final { 91struct SynchState final {
72 std::atomic<bool> is_running{true}; 92 std::atomic_bool is_running{true};
73 std::atomic<bool> is_idle{true}; 93 std::atomic_int queued_frame_count{};
74 std::condition_variable signal_condition; 94 std::mutex frames_mutex;
75 std::mutex signal_mutex; 95 std::mutex commands_mutex;
76 std::condition_variable idle_condition; 96 std::condition_variable commands_condition;
77 std::mutex idle_mutex; 97 std::condition_variable frames_condition;
78 98
79 // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and 99 void IncrementFramesCounter() {
80 // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes 100 std::lock_guard<std::mutex> lock{frames_mutex};
81 // empty. This allows for efficient thread-safe access, as it does not require any copies. 101 ++queued_frame_count;
82 102 }
83 using CommandQueue = std::queue<CommandData>; 103
84 std::array<CommandQueue, 2> command_queues; 104 void DecrementFramesCounter() {
85 CommandQueue* push_queue{&command_queues[0]}; 105 {
86 CommandQueue* pop_queue{&command_queues[1]}; 106 std::lock_guard<std::mutex> lock{frames_mutex};
87 107 --queued_frame_count;
88 void UpdateIdleState() { 108
89 std::lock_guard<std::mutex> lock{idle_mutex}; 109 if (queued_frame_count) {
90 is_idle = command_queues[0].empty() && command_queues[1].empty(); 110 return;
111 }
112 }
113 frames_condition.notify_one();
91 } 114 }
115
116 void WaitForFrames() {
117 {
118 std::lock_guard<std::mutex> lock{frames_mutex};
119 if (!queued_frame_count) {
120 return;
121 }
122 }
123
124 // Wait for the GPU to be idle (all commands to be executed)
125 {
126 std::unique_lock<std::mutex> lock{frames_mutex};
127 frames_condition.wait(lock, [this] { return !queued_frame_count; });
128 }
129 }
130
131 void SignalCommands() {
132 {
133 std::unique_lock<std::mutex> lock{commands_mutex};
134 if (queue.Empty()) {
135 return;
136 }
137 }
138
139 commands_condition.notify_one();
140 }
141
142 void WaitForCommands() {
143 std::unique_lock<std::mutex> lock{commands_mutex};
144 commands_condition.wait(lock, [this] { return !queue.Empty(); });
145 }
146
147 using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
148 CommandQueue queue;
92}; 149};
93 150
94/// Class used to manage the GPU thread 151/// Class used to manage the GPU thread
@@ -105,22 +162,17 @@ public:
105 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); 162 std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
106 163
107 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 164 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
108 void FlushRegion(VAddr addr, u64 size); 165 void FlushRegion(CacheAddr addr, u64 size);
109 166
110 /// Notify rasterizer that any caches of the specified region should be invalidated 167 /// Notify rasterizer that any caches of the specified region should be invalidated
111 void InvalidateRegion(VAddr addr, u64 size); 168 void InvalidateRegion(CacheAddr addr, u64 size);
112 169
113 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated 170 /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
114 void FlushAndInvalidateRegion(VAddr addr, u64 size); 171 void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
115 172
116private: 173private:
117 /// Pushes a command to be executed by the GPU thread 174 /// Pushes a command to be executed by the GPU thread
118 void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); 175 void PushCommand(CommandData&& command_data);
119
120 /// Returns true if this is called by the GPU thread
121 bool IsGpuThread() const {
122 return std::this_thread::get_id() == thread_id;
123 }
124 176
125private: 177private:
126 SynchState state; 178 SynchState state;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
index a7bcf26fb..ecd9986a0 100644
--- a/src/video_core/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache.h
@@ -4,6 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <mutex>
7#include <set> 8#include <set>
8#include <unordered_map> 9#include <unordered_map>
9 10
@@ -12,14 +13,26 @@
12 13
13#include "common/common_types.h" 14#include "common/common_types.h"
14#include "core/settings.h" 15#include "core/settings.h"
16#include "video_core/gpu.h"
15#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
16 18
17class RasterizerCacheObject { 19class RasterizerCacheObject {
18public: 20public:
21 explicit RasterizerCacheObject(const u8* host_ptr)
22 : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {}
23
19 virtual ~RasterizerCacheObject(); 24 virtual ~RasterizerCacheObject();
20 25
26 CacheAddr GetCacheAddr() const {
27 return cache_addr;
28 }
29
30 const u8* GetHostPtr() const {
31 return host_ptr;
32 }
33
21 /// Gets the address of the shader in guest memory, required for cache management 34 /// Gets the address of the shader in guest memory, required for cache management
22 virtual VAddr GetAddr() const = 0; 35 virtual VAddr GetCpuAddr() const = 0;
23 36
24 /// Gets the size of the shader in guest memory, required for cache management 37 /// Gets the size of the shader in guest memory, required for cache management
25 virtual std::size_t GetSizeInBytes() const = 0; 38 virtual std::size_t GetSizeInBytes() const = 0;
@@ -58,6 +71,8 @@ private:
58 bool is_registered{}; ///< Whether the object is currently registered with the cache 71 bool is_registered{}; ///< Whether the object is currently registered with the cache
59 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) 72 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
60 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing 73 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
74 CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space
75 const u8* host_ptr{}; ///< Pointer to the memory backing this cached region
61}; 76};
62 77
63template <class T> 78template <class T>
@@ -68,7 +83,9 @@ public:
68 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} 83 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
69 84
70 /// Write any cached resources overlapping the specified region back to memory 85 /// Write any cached resources overlapping the specified region back to memory
71 void FlushRegion(Tegra::GPUVAddr addr, size_t size) { 86 void FlushRegion(CacheAddr addr, std::size_t size) {
87 std::lock_guard<std::recursive_mutex> lock{mutex};
88
72 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 89 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
73 for (auto& object : objects) { 90 for (auto& object : objects) {
74 FlushObject(object); 91 FlushObject(object);
@@ -76,7 +93,9 @@ public:
76 } 93 }
77 94
78 /// Mark the specified region as being invalidated 95 /// Mark the specified region as being invalidated
79 void InvalidateRegion(VAddr addr, u64 size) { 96 void InvalidateRegion(CacheAddr addr, u64 size) {
97 std::lock_guard<std::recursive_mutex> lock{mutex};
98
80 const auto& objects{GetSortedObjectsFromRegion(addr, size)}; 99 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
81 for (auto& object : objects) { 100 for (auto& object : objects) {
82 if (!object->IsRegistered()) { 101 if (!object->IsRegistered()) {
@@ -89,48 +108,60 @@ public:
89 108
90 /// Invalidates everything in the cache 109 /// Invalidates everything in the cache
91 void InvalidateAll() { 110 void InvalidateAll() {
111 std::lock_guard<std::recursive_mutex> lock{mutex};
112
92 while (interval_cache.begin() != interval_cache.end()) { 113 while (interval_cache.begin() != interval_cache.end()) {
93 Unregister(*interval_cache.begin()->second.begin()); 114 Unregister(*interval_cache.begin()->second.begin());
94 } 115 }
95 } 116 }
96 117
97protected: 118protected:
98 /// Tries to get an object from the cache with the specified address 119 /// Tries to get an object from the cache with the specified cache address
99 T TryGet(VAddr addr) const { 120 T TryGet(CacheAddr addr) const {
100 const auto iter = map_cache.find(addr); 121 const auto iter = map_cache.find(addr);
101 if (iter != map_cache.end()) 122 if (iter != map_cache.end())
102 return iter->second; 123 return iter->second;
103 return nullptr; 124 return nullptr;
104 } 125 }
105 126
127 T TryGet(const void* addr) const {
128 const auto iter = map_cache.find(ToCacheAddr(addr));
129 if (iter != map_cache.end())
130 return iter->second;
131 return nullptr;
132 }
133
106 /// Register an object into the cache 134 /// Register an object into the cache
107 void Register(const T& object) { 135 void Register(const T& object) {
136 std::lock_guard<std::recursive_mutex> lock{mutex};
137
108 object->SetIsRegistered(true); 138 object->SetIsRegistered(true);
109 interval_cache.add({GetInterval(object), ObjectSet{object}}); 139 interval_cache.add({GetInterval(object), ObjectSet{object}});
110 map_cache.insert({object->GetAddr(), object}); 140 map_cache.insert({object->GetCacheAddr(), object});
111 rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); 141 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
112 } 142 }
113 143
114 /// Unregisters an object from the cache 144 /// Unregisters an object from the cache
115 void Unregister(const T& object) { 145 void Unregister(const T& object) {
116 object->SetIsRegistered(false); 146 std::lock_guard<std::recursive_mutex> lock{mutex};
117 rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1);
118 // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
119 if (Settings::values.use_accurate_gpu_emulation) {
120 FlushObject(object);
121 }
122 147
148 object->SetIsRegistered(false);
149 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
123 interval_cache.subtract({GetInterval(object), ObjectSet{object}}); 150 interval_cache.subtract({GetInterval(object), ObjectSet{object}});
124 map_cache.erase(object->GetAddr()); 151 map_cache.erase(object->GetCacheAddr());
125 } 152 }
126 153
127 /// Returns a ticks counter used for tracking when cached objects were last modified 154 /// Returns a ticks counter used for tracking when cached objects were last modified
128 u64 GetModifiedTicks() { 155 u64 GetModifiedTicks() {
156 std::lock_guard<std::recursive_mutex> lock{mutex};
157
129 return ++modified_ticks; 158 return ++modified_ticks;
130 } 159 }
131 160
132 /// Flushes the specified object, updating appropriate cache state as needed 161 /// Flushes the specified object, updating appropriate cache state as needed
133 void FlushObject(const T& object) { 162 void FlushObject(const T& object) {
163 std::lock_guard<std::recursive_mutex> lock{mutex};
164
134 if (!object->IsDirty()) { 165 if (!object->IsDirty()) {
135 return; 166 return;
136 } 167 }
@@ -140,7 +171,7 @@ protected:
140 171
141private: 172private:
142 /// Returns a list of cached objects from the specified memory region, ordered by access time 173 /// Returns a list of cached objects from the specified memory region, ordered by access time
143 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { 174 std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) {
144 if (size == 0) { 175 if (size == 0) {
145 return {}; 176 return {};
146 } 177 }
@@ -164,17 +195,18 @@ private:
164 } 195 }
165 196
166 using ObjectSet = std::set<T>; 197 using ObjectSet = std::set<T>;
167 using ObjectCache = std::unordered_map<VAddr, T>; 198 using ObjectCache = std::unordered_map<CacheAddr, T>;
168 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; 199 using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>;
169 using ObjectInterval = typename IntervalCache::interval_type; 200 using ObjectInterval = typename IntervalCache::interval_type;
170 201
171 static auto GetInterval(const T& object) { 202 static auto GetInterval(const T& object) {
172 return ObjectInterval::right_open(object->GetAddr(), 203 return ObjectInterval::right_open(object->GetCacheAddr(),
173 object->GetAddr() + object->GetSizeInBytes()); 204 object->GetCacheAddr() + object->GetSizeInBytes());
174 } 205 }
175 206
176 ObjectCache map_cache; 207 ObjectCache map_cache;
177 IntervalCache interval_cache; ///< Cache of objects 208 IntervalCache interval_cache; ///< Cache of objects
178 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing 209 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
179 VideoCore::RasterizerInterface& rasterizer; 210 VideoCore::RasterizerInterface& rasterizer;
211 std::recursive_mutex mutex;
180}; 212};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 6a1dc9cf6..76e292e87 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -35,14 +35,14 @@ public:
35 virtual void FlushAll() = 0; 35 virtual void FlushAll() = 0;
36 36
37 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 37 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
38 virtual void FlushRegion(VAddr addr, u64 size) = 0; 38 virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
39 39
40 /// Notify rasterizer that any caches of the specified region should be invalidated 40 /// Notify rasterizer that any caches of the specified region should be invalidated
41 virtual void InvalidateRegion(VAddr addr, u64 size) = 0; 41 virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0;
42 42
43 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 43 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
44 /// and invalidated 44 /// and invalidated
45 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 45 virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
46 46
47 /// Attempt to use a faster method to perform a surface copy 47 /// Attempt to use a faster method to perform a surface copy
48 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 48 virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
@@ -63,7 +63,7 @@ public:
63 } 63 }
64 64
65 /// Increase/decrease the number of object in pages touching the specified region 65 /// Increase/decrease the number of object in pages touching the specified region
66 virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {} 66 virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
67 67
68 /// Initialize disk cached resources for the game being emulated 68 /// Initialize disk cached resources for the game being emulated
69 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, 69 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index b3062e5ba..a4eea61a6 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -13,6 +13,11 @@
13 13
14namespace OpenGL { 14namespace OpenGL {
15 15
16CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
17 std::size_t alignment, u8* host_ptr)
18 : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
19 host_ptr} {}
20
16OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) 21OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
17 : RasterizerCache{rasterizer}, stream_buffer(size, true) {} 22 : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
18 23
@@ -26,11 +31,12 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
26 // TODO: Figure out which size is the best for given games. 31 // TODO: Figure out which size is the best for given games.
27 cache &= size >= 2048; 32 cache &= size >= 2048;
28 33
34 const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
29 if (cache) { 35 if (cache) {
30 auto entry = TryGet(*cpu_addr); 36 auto entry = TryGet(host_ptr);
31 if (entry) { 37 if (entry) {
32 if (entry->size >= size && entry->alignment == alignment) { 38 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
33 return entry->offset; 39 return entry->GetOffset();
34 } 40 }
35 Unregister(entry); 41 Unregister(entry);
36 } 42 }
@@ -39,17 +45,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
39 AlignBuffer(alignment); 45 AlignBuffer(alignment);
40 const GLintptr uploaded_offset = buffer_offset; 46 const GLintptr uploaded_offset = buffer_offset;
41 47
42 Memory::ReadBlock(*cpu_addr, buffer_ptr, size); 48 if (!host_ptr) {
49 return uploaded_offset;
50 }
43 51
52 std::memcpy(buffer_ptr, host_ptr, size);
44 buffer_ptr += size; 53 buffer_ptr += size;
45 buffer_offset += size; 54 buffer_offset += size;
46 55
47 if (cache) { 56 if (cache) {
48 auto entry = std::make_shared<CachedBufferEntry>(); 57 auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
49 entry->offset = uploaded_offset; 58 alignment, host_ptr);
50 entry->size = size;
51 entry->alignment = alignment;
52 entry->addr = *cpu_addr;
53 Register(entry); 59 Register(entry);
54 } 60 }
55 61
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index c11acfb79..1de1f84ae 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -17,22 +17,39 @@ namespace OpenGL {
17 17
18class RasterizerOpenGL; 18class RasterizerOpenGL;
19 19
20struct CachedBufferEntry final : public RasterizerCacheObject { 20class CachedBufferEntry final : public RasterizerCacheObject {
21 VAddr GetAddr() const override { 21public:
22 return addr; 22 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset,
23 std::size_t alignment, u8* host_ptr);
24
25 VAddr GetCpuAddr() const override {
26 return cpu_addr;
23 } 27 }
24 28
25 std::size_t GetSizeInBytes() const override { 29 std::size_t GetSizeInBytes() const override {
26 return size; 30 return size;
27 } 31 }
28 32
33 std::size_t GetSize() const {
34 return size;
35 }
36
37 GLintptr GetOffset() const {
38 return offset;
39 }
40
41 std::size_t GetAlignment() const {
42 return alignment;
43 }
44
29 // We do not have to flush this cache as things in it are never modified by us. 45 // We do not have to flush this cache as things in it are never modified by us.
30 void Flush() override {} 46 void Flush() override {}
31 47
32 VAddr addr; 48private:
33 std::size_t size; 49 VAddr cpu_addr{};
34 GLintptr offset; 50 std::size_t size{};
35 std::size_t alignment; 51 GLintptr offset{};
52 std::size_t alignment{};
36}; 53};
37 54
38class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { 55class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index 7161d1dea..a2c509c24 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -15,12 +15,13 @@
15 15
16namespace OpenGL { 16namespace OpenGL {
17 17
18CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} { 18CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr)
19 : cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} {
19 buffer.Create(); 20 buffer.Create();
20 // Bind and unbind the buffer so it gets allocated by the driver 21 // Bind and unbind the buffer so it gets allocated by the driver
21 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); 22 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
22 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); 23 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
23 LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); 24 LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory");
24} 25}
25 26
26void CachedGlobalRegion::Reload(u32 size_) { 27void CachedGlobalRegion::Reload(u32 size_) {
@@ -35,7 +36,7 @@ void CachedGlobalRegion::Reload(u32 size_) {
35 36
36 // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer 37 // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
37 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); 38 glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
38 glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW); 39 glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
39} 40}
40 41
41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { 42GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const {
@@ -46,11 +47,11 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
46 return search->second; 47 return search->second;
47} 48}
48 49
49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) { 50GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr) {
50 GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; 51 GlobalRegion region{TryGetReservedGlobalRegion(addr, size)};
51 if (!region) { 52 if (!region) {
52 // No reserved surface available, create a new one and reserve it 53 // No reserved surface available, create a new one and reserve it
53 region = std::make_shared<CachedGlobalRegion>(addr, size); 54 region = std::make_shared<CachedGlobalRegion>(addr, size, host_ptr);
54 ReserveGlobalRegion(region); 55 ReserveGlobalRegion(region);
55 } 56 }
56 region->Reload(size); 57 region->Reload(size);
@@ -58,7 +59,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si
58} 59}
59 60
60void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { 61void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
61 reserve.insert_or_assign(region->GetAddr(), std::move(region)); 62 reserve.insert_or_assign(region->GetCpuAddr(), std::move(region));
62} 63}
63 64
64GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) 65GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
@@ -80,11 +81,12 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
80 ASSERT(actual_addr); 81 ASSERT(actual_addr);
81 82
82 // Look up global region in the cache based on address 83 // Look up global region in the cache based on address
83 GlobalRegion region = TryGet(*actual_addr); 84 const auto& host_ptr{Memory::GetPointer(*actual_addr)};
85 GlobalRegion region{TryGet(host_ptr)};
84 86
85 if (!region) { 87 if (!region) {
86 // No global region found - create a new one 88 // No global region found - create a new one
87 region = GetUncachedGlobalRegion(*actual_addr, size); 89 region = GetUncachedGlobalRegion(*actual_addr, size, host_ptr);
88 Register(region); 90 Register(region);
89 } 91 }
90 92
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index ba2bdc60c..e497a0619 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -27,14 +27,12 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
27 27
28class CachedGlobalRegion final : public RasterizerCacheObject { 28class CachedGlobalRegion final : public RasterizerCacheObject {
29public: 29public:
30 explicit CachedGlobalRegion(VAddr addr, u32 size); 30 explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr);
31 31
32 /// Gets the address of the shader in guest memory, required for cache management 32 VAddr GetCpuAddr() const override {
33 VAddr GetAddr() const override { 33 return cpu_addr;
34 return addr;
35 } 34 }
36 35
37 /// Gets the size of the shader in guest memory, required for cache management
38 std::size_t GetSizeInBytes() const override { 36 std::size_t GetSizeInBytes() const override {
39 return size; 37 return size;
40 } 38 }
@@ -53,9 +51,8 @@ public:
53 } 51 }
54 52
55private: 53private:
56 VAddr addr{}; 54 VAddr cpu_addr{};
57 u32 size{}; 55 u32 size{};
58
59 OGLBuffer buffer; 56 OGLBuffer buffer;
60}; 57};
61 58
@@ -69,7 +66,7 @@ public:
69 66
70private: 67private:
71 GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; 68 GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const;
72 GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size); 69 GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr);
73 void ReserveGlobalRegion(GlobalRegion region); 70 void ReserveGlobalRegion(GlobalRegion region);
74 71
75 std::unordered_map<VAddr, GlobalRegion> reserve; 72 std::unordered_map<VAddr, GlobalRegion> reserve;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 976f64c24..bb6de5477 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -449,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
449 return boost::make_iterator_range(map.equal_range(interval)); 449 return boost::make_iterator_range(map.equal_range(interval));
450} 450}
451 451
452void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { 452void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
453 const u64 page_start{addr >> Memory::PAGE_BITS}; 453 const u64 page_start{addr >> Memory::PAGE_BITS};
454 const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; 454 const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
455 455
@@ -747,12 +747,12 @@ void RasterizerOpenGL::DrawArrays() {
747 747
748void RasterizerOpenGL::FlushAll() {} 748void RasterizerOpenGL::FlushAll() {}
749 749
750void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { 750void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
751 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 751 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
752 res_cache.FlushRegion(addr, size); 752 res_cache.FlushRegion(addr, size);
753} 753}
754 754
755void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 755void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
756 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 756 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
757 res_cache.InvalidateRegion(addr, size); 757 res_cache.InvalidateRegion(addr, size);
758 shader_cache.InvalidateRegion(addr, size); 758 shader_cache.InvalidateRegion(addr, size);
@@ -760,7 +760,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
760 buffer_cache.InvalidateRegion(addr, size); 760 buffer_cache.InvalidateRegion(addr, size);
761} 761}
762 762
763void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { 763void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
764 FlushRegion(addr, size); 764 FlushRegion(addr, size);
765 InvalidateRegion(addr, size); 765 InvalidateRegion(addr, size);
766} 766}
@@ -782,7 +782,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
782 782
783 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 783 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
784 784
785 const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; 785 const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))};
786 if (!surface) { 786 if (!surface) {
787 return {}; 787 return {};
788 } 788 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index ca3de0592..30f3e8acb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -57,9 +57,9 @@ public:
57 void DrawArrays() override; 57 void DrawArrays() override;
58 void Clear() override; 58 void Clear() override;
59 void FlushAll() override; 59 void FlushAll() override;
60 void FlushRegion(VAddr addr, u64 size) override; 60 void FlushRegion(CacheAddr addr, u64 size) override;
61 void InvalidateRegion(VAddr addr, u64 size) override; 61 void InvalidateRegion(CacheAddr addr, u64 size) override;
62 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 62 void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
63 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, 63 bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
64 const Tegra::Engines::Fermi2D::Regs::Surface& dst, 64 const Tegra::Engines::Fermi2D::Regs::Surface& dst,
65 const Common::Rectangle<u32>& src_rect, 65 const Common::Rectangle<u32>& src_rect,
@@ -67,7 +67,7 @@ public:
67 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, 67 bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
68 u32 pixel_stride) override; 68 u32 pixel_stride) override;
69 bool AccelerateDrawBatch(bool is_indexed) override; 69 bool AccelerateDrawBatch(bool is_indexed) override;
70 void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; 70 void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
71 void LoadDiskResources(const std::atomic_bool& stop_loading, 71 void LoadDiskResources(const std::atomic_bool& stop_loading,
72 const VideoCore::DiskResourceLoadCallback& callback) override; 72 const VideoCore::DiskResourceLoadCallback& callback) override;
73 73
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index bd1409660..451de00e8 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -61,6 +61,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
61 61
62 addr = cpu_addr ? *cpu_addr : 0; 62 addr = cpu_addr ? *cpu_addr : 0;
63 gpu_addr = gpu_addr_; 63 gpu_addr = gpu_addr_;
64 host_ptr = Memory::GetPointer(addr);
64 size_in_bytes = SizeInBytesRaw(); 65 size_in_bytes = SizeInBytesRaw();
65 66
66 if (IsPixelFormatASTC(pixel_format)) { 67 if (IsPixelFormatASTC(pixel_format)) {
@@ -563,8 +564,8 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
563} 564}
564 565
565CachedSurface::CachedSurface(const SurfaceParams& params) 566CachedSurface::CachedSurface(const SurfaceParams& params)
566 : params(params), gl_target(SurfaceTargetToGL(params.target)), 567 : params{params}, gl_target{SurfaceTargetToGL(params.target)},
567 cached_size_in_bytes(params.size_in_bytes) { 568 cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} {
568 texture.Create(gl_target); 569 texture.Create(gl_target);
569 570
570 // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0) 571 // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
@@ -633,10 +634,9 @@ void CachedSurface::LoadGLBuffer() {
633 const u32 bpp = params.GetFormatBpp() / 8; 634 const u32 bpp = params.GetFormatBpp() / 8;
634 const u32 copy_size = params.width * bpp; 635 const u32 copy_size = params.width * bpp;
635 if (params.pitch == copy_size) { 636 if (params.pitch == copy_size) {
636 std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr), 637 std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl);
637 params.size_in_bytes_gl);
638 } else { 638 } else {
639 const u8* start = Memory::GetPointer(params.addr); 639 const u8* start{params.host_ptr};
640 u8* write_to = gl_buffer[0].data(); 640 u8* write_to = gl_buffer[0].data();
641 for (u32 h = params.height; h > 0; h--) { 641 for (u32 h = params.height; h > 0; h--) {
642 std::memcpy(write_to, start, copy_size); 642 std::memcpy(write_to, start, copy_size);
@@ -680,8 +680,6 @@ void CachedSurface::FlushGLBuffer() {
680 glPixelStorei(GL_PACK_ROW_LENGTH, 0); 680 glPixelStorei(GL_PACK_ROW_LENGTH, 0);
681 Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, 681 Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width,
682 params.height, params.depth, true, true); 682 params.height, params.depth, true, true);
683 const u8* const texture_src_data = Memory::GetPointer(params.addr);
684 ASSERT(texture_src_data);
685 if (params.is_tiled) { 683 if (params.is_tiled) {
686 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", 684 ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}",
687 params.block_width, static_cast<u32>(params.target)); 685 params.block_width, static_cast<u32>(params.target));
@@ -691,9 +689,9 @@ void CachedSurface::FlushGLBuffer() {
691 const u32 bpp = params.GetFormatBpp() / 8; 689 const u32 bpp = params.GetFormatBpp() / 8;
692 const u32 copy_size = params.width * bpp; 690 const u32 copy_size = params.width * bpp;
693 if (params.pitch == copy_size) { 691 if (params.pitch == copy_size) {
694 std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes()); 692 std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes());
695 } else { 693 } else {
696 u8* start = Memory::GetPointer(params.addr); 694 u8* start{params.host_ptr};
697 const u8* read_to = gl_buffer[0].data(); 695 const u8* read_to = gl_buffer[0].data();
698 for (u32 h = params.height; h > 0; h--) { 696 for (u32 h = params.height; h > 0; h--) {
699 std::memcpy(start, read_to, copy_size); 697 std::memcpy(start, read_to, copy_size);
@@ -932,7 +930,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres
932 } 930 }
933 931
934 // Look up surface in the cache based on address 932 // Look up surface in the cache based on address
935 Surface surface{TryGet(params.addr)}; 933 Surface surface{TryGet(params.host_ptr)};
936 if (surface) { 934 if (surface) {
937 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { 935 if (surface->GetSurfaceParams().IsCompatibleSurface(params)) {
938 // Use the cached surface as-is unless it's not synced with memory 936 // Use the cached surface as-is unless it's not synced with memory
@@ -986,7 +984,7 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
986 for (u32 layer = 0; layer < dst_params.depth; layer++) { 984 for (u32 layer = 0; layer < dst_params.depth; layer++) {
987 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { 985 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
988 const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); 986 const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap);
989 const Surface& copy = TryGet(sub_address); 987 const Surface& copy = TryGet(Memory::GetPointer(sub_address));
990 if (!copy) 988 if (!copy)
991 continue; 989 continue;
992 const auto& src_params{copy->GetSurfaceParams()}; 990 const auto& src_params{copy->GetSurfaceParams()};
@@ -1163,7 +1161,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface,
1163 const auto& dst_params{dst_surface->GetSurfaceParams()}; 1161 const auto& dst_params{dst_surface->GetSurfaceParams()};
1164 1162
1165 // Flush enough memory for both the source and destination surface 1163 // Flush enough memory for both the source and destination surface
1166 FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize())); 1164 FlushRegion(ToCacheAddr(src_params.host_ptr),
1165 std::max(src_params.MemorySize(), dst_params.MemorySize()));
1167 1166
1168 LoadSurface(dst_surface); 1167 LoadSurface(dst_surface);
1169} 1168}
@@ -1215,8 +1214,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface,
1215 return new_surface; 1214 return new_surface;
1216} 1215}
1217 1216
1218Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const { 1217Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const {
1219 return TryGet(addr); 1218 return TryGet(host_ptr);
1220} 1219}
1221 1220
1222void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { 1221void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) {
@@ -1267,7 +1266,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
1267 src_params.height == dst_params.MipHeight(*level) && 1266 src_params.height == dst_params.MipHeight(*level) &&
1268 src_params.block_height >= dst_params.MipBlockHeight(*level)) { 1267 src_params.block_height >= dst_params.MipBlockHeight(*level)) {
1269 const std::optional<u32> slot = 1268 const std::optional<u32> slot =
1270 TryFindBestLayer(render_surface->GetAddr(), dst_params, *level); 1269 TryFindBestLayer(render_surface->GetCpuAddr(), dst_params, *level);
1271 if (slot.has_value()) { 1270 if (slot.has_value()) {
1272 glCopyImageSubData(render_surface->Texture().handle, 1271 glCopyImageSubData(render_surface->Texture().handle,
1273 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, 1272 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
@@ -1283,8 +1282,8 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
1283} 1282}
1284 1283
1285static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { 1284static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) {
1286 const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize(); 1285 const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize();
1287 const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize(); 1286 const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize();
1288 if (bound2 > bound1) 1287 if (bound2 > bound1)
1289 return true; 1288 return true;
1290 const auto& dst_params = blitted_surface->GetSurfaceParams(); 1289 const auto& dst_params = blitted_surface->GetSurfaceParams();
@@ -1327,7 +1326,8 @@ void RasterizerCacheOpenGL::SignalPreDrawCall() {
1327void RasterizerCacheOpenGL::SignalPostDrawCall() { 1326void RasterizerCacheOpenGL::SignalPostDrawCall() {
1328 for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { 1327 for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) {
1329 if (current_color_buffers[i] != nullptr) { 1328 if (current_color_buffers[i] != nullptr) {
1330 Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr()); 1329 Surface intersect =
1330 CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr());
1331 if (intersect != nullptr) { 1331 if (intersect != nullptr) {
1332 PartialReinterpretSurface(current_color_buffers[i], intersect); 1332 PartialReinterpretSurface(current_color_buffers[i], intersect);
1333 texception = true; 1333 texception = true;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 9cf6f50be..b3afad139 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -297,6 +297,7 @@ struct SurfaceParams {
297 bool srgb_conversion; 297 bool srgb_conversion;
298 // Parameters used for caching 298 // Parameters used for caching
299 VAddr addr; 299 VAddr addr;
300 u8* host_ptr;
300 Tegra::GPUVAddr gpu_addr; 301 Tegra::GPUVAddr gpu_addr;
301 std::size_t size_in_bytes; 302 std::size_t size_in_bytes;
302 std::size_t size_in_bytes_gl; 303 std::size_t size_in_bytes_gl;
@@ -345,9 +346,9 @@ class RasterizerOpenGL;
345 346
346class CachedSurface final : public RasterizerCacheObject { 347class CachedSurface final : public RasterizerCacheObject {
347public: 348public:
348 CachedSurface(const SurfaceParams& params); 349 explicit CachedSurface(const SurfaceParams& params);
349 350
350 VAddr GetAddr() const override { 351 VAddr GetCpuAddr() const override {
351 return params.addr; 352 return params.addr;
352 } 353 }
353 354
@@ -449,7 +450,7 @@ public:
449 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); 450 Surface GetColorBufferSurface(std::size_t index, bool preserve_contents);
450 451
451 /// Tries to find a framebuffer using on the provided CPU address 452 /// Tries to find a framebuffer using on the provided CPU address
452 Surface TryFindFramebufferSurface(VAddr addr) const; 453 Surface TryFindFramebufferSurface(const u8* host_ptr) const;
453 454
454 /// Copies the contents of one surface to another 455 /// Copies the contents of one surface to another
455 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, 456 void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config,
@@ -506,12 +507,12 @@ private:
506 std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; 507 std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers;
507 Surface last_depth_buffer; 508 Surface last_depth_buffer;
508 509
509 using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>; 510 using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>;
510 using SurfaceInterval = typename SurfaceIntervalCache::interval_type; 511 using SurfaceInterval = typename SurfaceIntervalCache::interval_type;
511 512
512 static auto GetReinterpretInterval(const Surface& object) { 513 static auto GetReinterpretInterval(const Surface& object) {
513 return SurfaceInterval::right_open(object->GetAddr() + 1, 514 return SurfaceInterval::right_open(object->GetCacheAddr() + 1,
514 object->GetAddr() + object->GetMemorySize() - 1); 515 object->GetCacheAddr() + object->GetMemorySize() - 1);
515 } 516 }
516 517
517 // Reinterpreted surfaces are very fragil as the game may keep rendering into them. 518 // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
@@ -523,7 +524,7 @@ private:
523 reinterpret_surface->MarkReinterpreted(); 524 reinterpret_surface->MarkReinterpreted();
524 } 525 }
525 526
526 Surface CollideOnReinterpretedSurface(VAddr addr) const { 527 Surface CollideOnReinterpretedSurface(CacheAddr addr) const {
527 const SurfaceInterval interval{addr}; 528 const SurfaceInterval interval{addr};
528 for (auto& pair : 529 for (auto& pair :
529 boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { 530 boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 4883e4f62..60a04e146 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -42,9 +42,9 @@ VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
42} 42}
43 43
44/// Gets the shader program code from memory for the specified address 44/// Gets the shader program code from memory for the specified address
45ProgramCode GetShaderCode(VAddr addr) { 45ProgramCode GetShaderCode(const u8* host_ptr) {
46 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); 46 ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
47 Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); 47 std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64));
48 return program_code; 48 return program_code;
49} 49}
50 50
@@ -214,12 +214,13 @@ std::set<GLenum> GetSupportedFormats() {
214 214
215} // namespace 215} // namespace
216 216
217CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, 217CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
218 ShaderDiskCacheOpenGL& disk_cache, 218 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
219 const PrecompiledPrograms& precompiled_programs, 219 const PrecompiledPrograms& precompiled_programs,
220 ProgramCode&& program_code, ProgramCode&& program_code_b) 220 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
221 : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, 221 : host_ptr{host_ptr}, guest_addr{guest_addr}, unique_identifier{unique_identifier},
222 disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { 222 program_type{program_type}, disk_cache{disk_cache},
223 precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} {
223 224
224 const std::size_t code_size = CalculateProgramSize(program_code); 225 const std::size_t code_size = CalculateProgramSize(program_code);
225 const std::size_t code_size_b = 226 const std::size_t code_size_b =
@@ -243,12 +244,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro
243 disk_cache.SaveRaw(raw); 244 disk_cache.SaveRaw(raw);
244} 245}
245 246
246CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, 247CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
247 ShaderDiskCacheOpenGL& disk_cache, 248 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
248 const PrecompiledPrograms& precompiled_programs, 249 const PrecompiledPrograms& precompiled_programs,
249 GLShader::ProgramResult result) 250 GLShader::ProgramResult result, u8* host_ptr)
250 : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, 251 : guest_addr{guest_addr}, unique_identifier{unique_identifier}, program_type{program_type},
251 disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { 252 disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{
253 host_ptr} {
252 254
253 code = std::move(result.first); 255 code = std::move(result.first);
254 entries = result.second; 256 entries = result.second;
@@ -271,7 +273,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
271 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); 273 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
272 } 274 }
273 275
274 LabelGLObject(GL_PROGRAM, program->handle, addr); 276 LabelGLObject(GL_PROGRAM, program->handle, guest_addr);
275 } 277 }
276 278
277 handle = program->handle; 279 handle = program->handle;
@@ -323,7 +325,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
323 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); 325 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
324 } 326 }
325 327
326 LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name); 328 LabelGLObject(GL_PROGRAM, target_program->handle, guest_addr, debug_name);
327 329
328 return target_program->handle; 330 return target_program->handle;
329}; 331};
@@ -489,14 +491,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
489 const VAddr program_addr{GetShaderAddress(program)}; 491 const VAddr program_addr{GetShaderAddress(program)};
490 492
491 // Look up shader in the cache based on address 493 // Look up shader in the cache based on address
492 Shader shader{TryGet(program_addr)}; 494 const auto& host_ptr{Memory::GetPointer(program_addr)};
495 Shader shader{TryGet(host_ptr)};
493 496
494 if (!shader) { 497 if (!shader) {
495 // No shader found - create a new one 498 // No shader found - create a new one
496 ProgramCode program_code = GetShaderCode(program_addr); 499 const auto& host_ptr{Memory::GetPointer(program_addr)};
500 ProgramCode program_code{GetShaderCode(host_ptr)};
497 ProgramCode program_code_b; 501 ProgramCode program_code_b;
498 if (program == Maxwell::ShaderProgram::VertexA) { 502 if (program == Maxwell::ShaderProgram::VertexA) {
499 program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)); 503 program_code_b = GetShaderCode(
504 Memory::GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
500 } 505 }
501 const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); 506 const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
502 507
@@ -504,11 +509,11 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
504 if (found != precompiled_shaders.end()) { 509 if (found != precompiled_shaders.end()) {
505 shader = 510 shader =
506 std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, 511 std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache,
507 precompiled_programs, found->second); 512 precompiled_programs, found->second, host_ptr);
508 } else { 513 } else {
509 shader = std::make_shared<CachedShader>( 514 shader = std::make_shared<CachedShader>(
510 program_addr, unique_identifier, program, disk_cache, precompiled_programs, 515 program_addr, unique_identifier, program, disk_cache, precompiled_programs,
511 std::move(program_code), std::move(program_code_b)); 516 std::move(program_code), std::move(program_code_b), host_ptr);
512 } 517 }
513 Register(shader); 518 Register(shader);
514 } 519 }
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 97eed192f..81fe716b4 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
39 39
40class CachedShader final : public RasterizerCacheObject { 40class CachedShader final : public RasterizerCacheObject {
41public: 41public:
42 explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, 42 explicit CachedShader(VAddr guest_addr, u64 unique_identifier,
43 ShaderDiskCacheOpenGL& disk_cache, 43 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
44 const PrecompiledPrograms& precompiled_programs, 44 const PrecompiledPrograms& precompiled_programs,
45 ProgramCode&& program_code, ProgramCode&& program_code_b); 45 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
46 46
47 explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, 47 explicit CachedShader(VAddr guest_addr, u64 unique_identifier,
48 ShaderDiskCacheOpenGL& disk_cache, 48 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
49 const PrecompiledPrograms& precompiled_programs, 49 const PrecompiledPrograms& precompiled_programs,
50 GLShader::ProgramResult result); 50 GLShader::ProgramResult result, u8* host_ptr);
51 51
52 VAddr GetAddr() const override { 52 VAddr GetCpuAddr() const override {
53 return addr; 53 return guest_addr;
54 } 54 }
55 55
56 std::size_t GetSizeInBytes() const override { 56 std::size_t GetSizeInBytes() const override {
@@ -91,7 +91,8 @@ private:
91 91
92 ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; 92 ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
93 93
94 VAddr addr{}; 94 u8* host_ptr{};
95 VAddr guest_addr{};
95 u64 unique_identifier{}; 96 u64 unique_identifier{};
96 Maxwell::ShaderProgram program_type{}; 97 Maxwell::ShaderProgram program_type{};
97 ShaderDiskCacheOpenGL& disk_cache; 98 ShaderDiskCacheOpenGL& disk_cache;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 4a33a6c84..95eab3fec 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -17,6 +17,11 @@
17 17
18namespace Vulkan { 18namespace Vulkan {
19 19
20CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset,
21 std::size_t alignment, u8* host_ptr)
22 : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{
23 host_ptr} {}
24
20VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, 25VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager,
21 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, 26 VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
22 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size) 27 VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size)
@@ -37,16 +42,18 @@ VKBufferCache::~VKBufferCache() = default;
37u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment, 42u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment,
38 bool cache) { 43 bool cache) {
39 const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; 44 const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)};
40 ASSERT(cpu_addr); 45 ASSERT_MSG(cpu_addr, "Invalid GPU address");
41 46
42 // Cache management is a big overhead, so only cache entries with a given size. 47 // Cache management is a big overhead, so only cache entries with a given size.
43 // TODO: Figure out which size is the best for given games. 48 // TODO: Figure out which size is the best for given games.
44 cache &= size >= 2048; 49 cache &= size >= 2048;
45 50
51 const auto& host_ptr{Memory::GetPointer(*cpu_addr)};
46 if (cache) { 52 if (cache) {
47 if (auto entry = TryGet(*cpu_addr); entry) { 53 auto entry = TryGet(host_ptr);
48 if (entry->size >= size && entry->alignment == alignment) { 54 if (entry) {
49 return entry->offset; 55 if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
56 return entry->GetOffset();
50 } 57 }
51 Unregister(entry); 58 Unregister(entry);
52 } 59 }
@@ -55,17 +62,17 @@ u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64
55 AlignBuffer(alignment); 62 AlignBuffer(alignment);
56 const u64 uploaded_offset = buffer_offset; 63 const u64 uploaded_offset = buffer_offset;
57 64
58 Memory::ReadBlock(*cpu_addr, buffer_ptr, size); 65 if (!host_ptr) {
66 return uploaded_offset;
67 }
59 68
69 std::memcpy(buffer_ptr, host_ptr, size);
60 buffer_ptr += size; 70 buffer_ptr += size;
61 buffer_offset += size; 71 buffer_offset += size;
62 72
63 if (cache) { 73 if (cache) {
64 auto entry = std::make_shared<CachedBufferEntry>(); 74 auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset,
65 entry->offset = uploaded_offset; 75 alignment, host_ptr);
66 entry->size = size;
67 entry->alignment = alignment;
68 entry->addr = *cpu_addr;
69 Register(entry); 76 Register(entry);
70 } 77 }
71 78
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index d8e916f31..8b415744b 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -24,22 +24,39 @@ class VKFence;
24class VKMemoryManager; 24class VKMemoryManager;
25class VKStreamBuffer; 25class VKStreamBuffer;
26 26
27struct CachedBufferEntry final : public RasterizerCacheObject { 27class CachedBufferEntry final : public RasterizerCacheObject {
28 VAddr GetAddr() const override { 28public:
29 return addr; 29 explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment,
30 u8* host_ptr);
31
32 VAddr GetCpuAddr() const override {
33 return cpu_addr;
30 } 34 }
31 35
32 std::size_t GetSizeInBytes() const override { 36 std::size_t GetSizeInBytes() const override {
33 return size; 37 return size;
34 } 38 }
35 39
40 std::size_t GetSize() const {
41 return size;
42 }
43
44 u64 GetOffset() const {
45 return offset;
46 }
47
48 std::size_t GetAlignment() const {
49 return alignment;
50 }
51
36 // We do not have to flush this cache as things in it are never modified by us. 52 // We do not have to flush this cache as things in it are never modified by us.
37 void Flush() override {} 53 void Flush() override {}
38 54
39 VAddr addr; 55private:
40 std::size_t size; 56 VAddr cpu_addr{};
41 u64 offset; 57 std::size_t size{};
42 std::size_t alignment; 58 u64 offset{};
59 std::size_t alignment{};
43}; 60};
44 61
45class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { 62class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {