diff options
26 files changed, 394 insertions, 294 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index a34b9e753..b031ebc66 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "core/core.h" | 10 | #include "core/core.h" |
| 11 | #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" | 11 | #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" |
| 12 | #include "core/hle/service/nvdrv/devices/nvmap.h" | 12 | #include "core/hle/service/nvdrv/devices/nvmap.h" |
| 13 | #include "core/memory.h" | ||
| 13 | #include "video_core/memory_manager.h" | 14 | #include "video_core/memory_manager.h" |
| 14 | #include "video_core/rasterizer_interface.h" | 15 | #include "video_core/rasterizer_interface.h" |
| 15 | #include "video_core/renderer_base.h" | 16 | #include "video_core/renderer_base.h" |
| @@ -178,7 +179,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou | |||
| 178 | auto& gpu = system_instance.GPU(); | 179 | auto& gpu = system_instance.GPU(); |
| 179 | auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); | 180 | auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); |
| 180 | ASSERT(cpu_addr); | 181 | ASSERT(cpu_addr); |
| 181 | gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size); | 182 | gpu.FlushAndInvalidateRegion(ToCacheAddr(Memory::GetPointer(*cpu_addr)), itr->second.size); |
| 182 | 183 | ||
| 183 | params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); | 184 | params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); |
| 184 | 185 | ||
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 6591c45d2..4fde53033 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -67,8 +67,11 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa | |||
| 67 | LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, | 67 | LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, |
| 68 | (base + size) * PAGE_SIZE); | 68 | (base + size) * PAGE_SIZE); |
| 69 | 69 | ||
| 70 | RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, | 70 | // During boot, current_page_table might not be set yet, in which case we need not flush |
| 71 | FlushMode::FlushAndInvalidate); | 71 | if (current_page_table) { |
| 72 | RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, | ||
| 73 | FlushMode::FlushAndInvalidate); | ||
| 74 | } | ||
| 72 | 75 | ||
| 73 | VAddr end = base + size; | 76 | VAddr end = base + size; |
| 74 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", | 77 | ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", |
| @@ -359,13 +362,13 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { | |||
| 359 | auto& gpu = system_instance.GPU(); | 362 | auto& gpu = system_instance.GPU(); |
| 360 | switch (mode) { | 363 | switch (mode) { |
| 361 | case FlushMode::Flush: | 364 | case FlushMode::Flush: |
| 362 | gpu.FlushRegion(overlap_start, overlap_size); | 365 | gpu.FlushRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size); |
| 363 | break; | 366 | break; |
| 364 | case FlushMode::Invalidate: | 367 | case FlushMode::Invalidate: |
| 365 | gpu.InvalidateRegion(overlap_start, overlap_size); | 368 | gpu.InvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size); |
| 366 | break; | 369 | break; |
| 367 | case FlushMode::FlushAndInvalidate: | 370 | case FlushMode::FlushAndInvalidate: |
| 368 | gpu.FlushAndInvalidateRegion(overlap_start, overlap_size); | 371 | gpu.FlushAndInvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size); |
| 369 | break; | 372 | break; |
| 370 | } | 373 | } |
| 371 | }; | 374 | }; |
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index aae2a4019..daefa43a6 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "video_core/engines/kepler_memory.h" | 9 | #include "video_core/engines/kepler_memory.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/rasterizer_interface.h" | 11 | #include "video_core/rasterizer_interface.h" |
| 12 | #include "video_core/renderer_base.h" | ||
| 12 | 13 | ||
| 13 | namespace Tegra::Engines { | 14 | namespace Tegra::Engines { |
| 14 | 15 | ||
| @@ -48,7 +49,8 @@ void KeplerMemory::ProcessData(u32 data) { | |||
| 48 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. | 49 | // We have to invalidate the destination region to evict any outdated surfaces from the cache. |
| 49 | // We do this before actually writing the new data because the destination address might contain | 50 | // We do this before actually writing the new data because the destination address might contain |
| 50 | // a dirty surface that will have to be written back to memory. | 51 | // a dirty surface that will have to be written back to memory. |
| 51 | Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32)); | 52 | system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)), |
| 53 | sizeof(u32)); | ||
| 52 | 54 | ||
| 53 | Memory::Write32(*dest_address, data); | 55 | Memory::Write32(*dest_address, data); |
| 54 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | 56 | system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 144e7fa82..49979694e 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -396,7 +396,10 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 396 | const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); | 396 | const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); |
| 397 | ASSERT_MSG(address, "Invalid GPU address"); | 397 | ASSERT_MSG(address, "Invalid GPU address"); |
| 398 | 398 | ||
| 399 | Memory::Write32(*address, value); | 399 | u8* ptr{Memory::GetPointer(*address)}; |
| 400 | rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | ||
| 401 | std::memcpy(ptr, &value, sizeof(u32)); | ||
| 402 | |||
| 400 | dirty_flags.OnMemoryWrite(); | 403 | dirty_flags.OnMemoryWrite(); |
| 401 | 404 | ||
| 402 | // Increment the current buffer position. | 405 | // Increment the current buffer position. |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 9dfea5999..415a6319a 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "video_core/engines/maxwell_3d.h" | 9 | #include "video_core/engines/maxwell_3d.h" |
| 10 | #include "video_core/engines/maxwell_dma.h" | 10 | #include "video_core/engines/maxwell_dma.h" |
| 11 | #include "video_core/rasterizer_interface.h" | 11 | #include "video_core/rasterizer_interface.h" |
| 12 | #include "video_core/renderer_base.h" | ||
| 12 | #include "video_core/textures/decoders.h" | 13 | #include "video_core/textures/decoders.h" |
| 13 | 14 | ||
| 14 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| @@ -92,12 +93,14 @@ void MaxwellDMA::HandleCopy() { | |||
| 92 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { | 93 | const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { |
| 93 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated | 94 | // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated |
| 94 | // copying. | 95 | // copying. |
| 95 | Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size); | 96 | Core::System::GetInstance().Renderer().Rasterizer().FlushRegion( |
| 97 | ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size); | ||
| 96 | 98 | ||
| 97 | // We have to invalidate the destination region to evict any outdated surfaces from the | 99 | // We have to invalidate the destination region to evict any outdated surfaces from the |
| 98 | // cache. We do this before actually writing the new data because the destination address | 100 | // cache. We do this before actually writing the new data because the destination address |
| 99 | // might contain a dirty surface that will have to be written back to memory. | 101 | // might contain a dirty surface that will have to be written back to memory. |
| 100 | Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size); | 102 | Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion( |
| 103 | ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size); | ||
| 101 | }; | 104 | }; |
| 102 | 105 | ||
| 103 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | 106 | if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 56a203275..a14b95c30 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -11,6 +11,11 @@ | |||
| 11 | #include "video_core/dma_pusher.h" | 11 | #include "video_core/dma_pusher.h" |
| 12 | #include "video_core/memory_manager.h" | 12 | #include "video_core/memory_manager.h" |
| 13 | 13 | ||
| 14 | using CacheAddr = std::uintptr_t; | ||
| 15 | inline CacheAddr ToCacheAddr(const void* host_ptr) { | ||
| 16 | return reinterpret_cast<CacheAddr>(host_ptr); | ||
| 17 | } | ||
| 18 | |||
| 14 | namespace Core { | 19 | namespace Core { |
| 15 | class System; | 20 | class System; |
| 16 | } | 21 | } |
| @@ -209,13 +214,13 @@ public: | |||
| 209 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; | 214 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; |
| 210 | 215 | ||
| 211 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 216 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 212 | virtual void FlushRegion(VAddr addr, u64 size) = 0; | 217 | virtual void FlushRegion(CacheAddr addr, u64 size) = 0; |
| 213 | 218 | ||
| 214 | /// Notify rasterizer that any caches of the specified region should be invalidated | 219 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 215 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | 220 | virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 216 | 221 | ||
| 217 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 222 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 218 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | 223 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 219 | 224 | ||
| 220 | private: | 225 | private: |
| 221 | void ProcessBindMethod(const MethodCall& method_call); | 226 | void ProcessBindMethod(const MethodCall& method_call); |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index ad0a747e3..8b355cf7b 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -22,15 +22,15 @@ void GPUAsynch::SwapBuffers( | |||
| 22 | gpu_thread.SwapBuffers(std::move(framebuffer)); | 22 | gpu_thread.SwapBuffers(std::move(framebuffer)); |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | void GPUAsynch::FlushRegion(VAddr addr, u64 size) { | 25 | void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { |
| 26 | gpu_thread.FlushRegion(addr, size); | 26 | gpu_thread.FlushRegion(addr, size); |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { | 29 | void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { |
| 30 | gpu_thread.InvalidateRegion(addr, size); | 30 | gpu_thread.InvalidateRegion(addr, size); |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 33 | void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { |
| 34 | gpu_thread.FlushAndInvalidateRegion(addr, size); | 34 | gpu_thread.FlushAndInvalidateRegion(addr, size); |
| 35 | } | 35 | } |
| 36 | 36 | ||
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index e6a807aba..1dcc61a6c 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -26,9 +26,9 @@ public: | |||
| 26 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 26 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 27 | void SwapBuffers( | 27 | void SwapBuffers( |
| 28 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | 28 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; |
| 29 | void FlushRegion(VAddr addr, u64 size) override; | 29 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 30 | void InvalidateRegion(VAddr addr, u64 size) override; | 30 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 31 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 31 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 32 | 32 | ||
| 33 | private: | 33 | private: |
| 34 | GPUThread::ThreadManager gpu_thread; | 34 | GPUThread::ThreadManager gpu_thread; |
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 4c00b96c7..2cfc900ed 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp | |||
| @@ -22,15 +22,15 @@ void GPUSynch::SwapBuffers( | |||
| 22 | renderer.SwapBuffers(std::move(framebuffer)); | 22 | renderer.SwapBuffers(std::move(framebuffer)); |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | void GPUSynch::FlushRegion(VAddr addr, u64 size) { | 25 | void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { |
| 26 | renderer.Rasterizer().FlushRegion(addr, size); | 26 | renderer.Rasterizer().FlushRegion(addr, size); |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { | 29 | void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { |
| 30 | renderer.Rasterizer().InvalidateRegion(addr, size); | 30 | renderer.Rasterizer().InvalidateRegion(addr, size); |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 33 | void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { |
| 34 | renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); | 34 | renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); |
| 35 | } | 35 | } |
| 36 | 36 | ||
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 7d5a241ff..766b5631c 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h | |||
| @@ -21,9 +21,9 @@ public: | |||
| 21 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 21 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 22 | void SwapBuffers( | 22 | void SwapBuffers( |
| 23 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | 23 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; |
| 24 | void FlushRegion(VAddr addr, u64 size) override; | 24 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 25 | void InvalidateRegion(VAddr addr, u64 size) override; | 25 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 26 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 26 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 27 | }; | 27 | }; |
| 28 | 28 | ||
| 29 | } // namespace VideoCommon | 29 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index c5bdd2a17..086b2f625 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -5,7 +5,6 @@ | |||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "core/frontend/scope_acquire_window_context.h" | 7 | #include "core/frontend/scope_acquire_window_context.h" |
| 8 | #include "core/settings.h" | ||
| 9 | #include "video_core/dma_pusher.h" | 8 | #include "video_core/dma_pusher.h" |
| 10 | #include "video_core/gpu.h" | 9 | #include "video_core/gpu.h" |
| 11 | #include "video_core/gpu_thread.h" | 10 | #include "video_core/gpu_thread.h" |
| @@ -13,38 +12,13 @@ | |||
| 13 | 12 | ||
| 14 | namespace VideoCommon::GPUThread { | 13 | namespace VideoCommon::GPUThread { |
| 15 | 14 | ||
| 16 | /// Executes a single GPU thread command | ||
| 17 | static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer, | ||
| 18 | Tegra::DmaPusher& dma_pusher) { | ||
| 19 | if (const auto submit_list = std::get_if<SubmitListCommand>(command)) { | ||
| 20 | dma_pusher.Push(std::move(submit_list->entries)); | ||
| 21 | dma_pusher.DispatchCalls(); | ||
| 22 | } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) { | ||
| 23 | renderer.SwapBuffers(data->framebuffer); | ||
| 24 | } else if (const auto data = std::get_if<FlushRegionCommand>(command)) { | ||
| 25 | renderer.Rasterizer().FlushRegion(data->addr, data->size); | ||
| 26 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) { | ||
| 27 | renderer.Rasterizer().InvalidateRegion(data->addr, data->size); | ||
| 28 | } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) { | ||
| 29 | renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size); | ||
| 30 | } else { | ||
| 31 | UNREACHABLE(); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | /// Runs the GPU thread | 15 | /// Runs the GPU thread |
| 36 | static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, | 16 | static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, |
| 37 | SynchState& state) { | 17 | SynchState& state) { |
| 38 | |||
| 39 | MicroProfileOnThreadCreate("GpuThread"); | 18 | MicroProfileOnThreadCreate("GpuThread"); |
| 40 | 19 | ||
| 41 | auto WaitForWakeup = [&]() { | ||
| 42 | std::unique_lock<std::mutex> lock{state.signal_mutex}; | ||
| 43 | state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; }); | ||
| 44 | }; | ||
| 45 | |||
| 46 | // Wait for first GPU command before acquiring the window context | 20 | // Wait for first GPU command before acquiring the window context |
| 47 | WaitForWakeup(); | 21 | state.WaitForCommands(); |
| 48 | 22 | ||
| 49 | // If emulation was stopped during disk shader loading, abort before trying to acquire context | 23 | // If emulation was stopped during disk shader loading, abort before trying to acquire context |
| 50 | if (!state.is_running) { | 24 | if (!state.is_running) { |
| @@ -53,100 +27,72 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | |||
| 53 | 27 | ||
| 54 | Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; | 28 | Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; |
| 55 | 29 | ||
| 30 | CommandDataContainer next; | ||
| 56 | while (state.is_running) { | 31 | while (state.is_running) { |
| 57 | if (!state.is_running) { | 32 | state.WaitForCommands(); |
| 58 | return; | 33 | while (!state.queue.Empty()) { |
| 59 | } | 34 | state.queue.Pop(next); |
| 60 | 35 | if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { | |
| 61 | { | 36 | dma_pusher.Push(std::move(submit_list->entries)); |
| 62 | // Thread has been woken up, so make the previous write queue the next read queue | 37 | dma_pusher.DispatchCalls(); |
| 63 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | 38 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { |
| 64 | std::swap(state.push_queue, state.pop_queue); | 39 | state.DecrementFramesCounter(); |
| 65 | } | 40 | renderer.SwapBuffers(std::move(data->framebuffer)); |
| 66 | 41 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { | |
| 67 | // Execute all of the GPU commands | 42 | renderer.Rasterizer().FlushRegion(data->addr, data->size); |
| 68 | while (!state.pop_queue->empty()) { | 43 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| 69 | ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); | 44 | renderer.Rasterizer().InvalidateRegion(data->addr, data->size); |
| 70 | state.pop_queue->pop(); | 45 | } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) { |
| 46 | return; | ||
| 47 | } else { | ||
| 48 | UNREACHABLE(); | ||
| 49 | } | ||
| 71 | } | 50 | } |
| 72 | |||
| 73 | state.UpdateIdleState(); | ||
| 74 | |||
| 75 | // Signal that the GPU thread has finished processing commands | ||
| 76 | if (state.is_idle) { | ||
| 77 | state.idle_condition.notify_one(); | ||
| 78 | } | ||
| 79 | |||
| 80 | // Wait for CPU thread to send more GPU commands | ||
| 81 | WaitForWakeup(); | ||
| 82 | } | 51 | } |
| 83 | } | 52 | } |
| 84 | 53 | ||
| 85 | ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) | 54 | ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) |
| 86 | : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), | 55 | : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), |
| 87 | std::ref(dma_pusher), std::ref(state)}, | 56 | std::ref(dma_pusher), std::ref(state)} {} |
| 88 | thread_id{thread.get_id()} {} | ||
| 89 | 57 | ||
| 90 | ThreadManager::~ThreadManager() { | 58 | ThreadManager::~ThreadManager() { |
| 91 | { | 59 | // Notify GPU thread that a shutdown is pending |
| 92 | // Notify GPU thread that a shutdown is pending | 60 | PushCommand(EndProcessingCommand()); |
| 93 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | ||
| 94 | state.is_running = false; | ||
| 95 | } | ||
| 96 | |||
| 97 | state.signal_condition.notify_one(); | ||
| 98 | thread.join(); | 61 | thread.join(); |
| 99 | } | 62 | } |
| 100 | 63 | ||
| 101 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | 64 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
| 102 | if (entries.empty()) { | 65 | PushCommand(SubmitListCommand(std::move(entries))); |
| 103 | return; | ||
| 104 | } | ||
| 105 | |||
| 106 | PushCommand(SubmitListCommand(std::move(entries)), false, false); | ||
| 107 | } | 66 | } |
| 108 | 67 | ||
| 109 | void ThreadManager::SwapBuffers( | 68 | void ThreadManager::SwapBuffers( |
| 110 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | 69 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { |
| 111 | PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); | 70 | state.IncrementFramesCounter(); |
| 71 | PushCommand(SwapBuffersCommand(std::move(framebuffer))); | ||
| 72 | state.WaitForFrames(); | ||
| 112 | } | 73 | } |
| 113 | 74 | ||
| 114 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { | 75 | void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { |
| 115 | // Block the CPU when using accurate emulation | 76 | PushCommand(FlushRegionCommand(addr, size)); |
| 116 | PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false); | ||
| 117 | } | 77 | } |
| 118 | 78 | ||
| 119 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | 79 | void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { |
| 120 | PushCommand(InvalidateRegionCommand(addr, size), true, true); | 80 | if (state.queue.Empty()) { |
| 81 | // It's quicker to invalidate a single region on the CPU if the queue is already empty | ||
| 82 | renderer.Rasterizer().InvalidateRegion(addr, size); | ||
| 83 | } else { | ||
| 84 | PushCommand(InvalidateRegionCommand(addr, size)); | ||
| 85 | } | ||
| 121 | } | 86 | } |
| 122 | 87 | ||
| 123 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 88 | void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { |
| 89 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important | ||
| 124 | InvalidateRegion(addr, size); | 90 | InvalidateRegion(addr, size); |
| 125 | } | 91 | } |
| 126 | 92 | ||
| 127 | void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { | 93 | void ThreadManager::PushCommand(CommandData&& command_data) { |
| 128 | { | 94 | state.queue.Push(CommandDataContainer(std::move(command_data))); |
| 129 | std::lock_guard<std::mutex> lock{state.signal_mutex}; | 95 | state.SignalCommands(); |
| 130 | |||
| 131 | if ((allow_on_cpu && state.is_idle) || IsGpuThread()) { | ||
| 132 | // Execute the command synchronously on the current thread | ||
| 133 | ExecuteCommand(&command_data, renderer, dma_pusher); | ||
| 134 | return; | ||
| 135 | } | ||
| 136 | |||
| 137 | // Push the command to the GPU thread | ||
| 138 | state.UpdateIdleState(); | ||
| 139 | state.push_queue->emplace(command_data); | ||
| 140 | } | ||
| 141 | |||
| 142 | // Signal the GPU thread that commands are pending | ||
| 143 | state.signal_condition.notify_one(); | ||
| 144 | |||
| 145 | if (wait_for_idle) { | ||
| 146 | // Wait for the GPU to be idle (all commands to be executed) | ||
| 147 | std::unique_lock<std::mutex> lock{state.idle_mutex}; | ||
| 148 | state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); }); | ||
| 149 | } | ||
| 150 | } | 96 | } |
| 151 | 97 | ||
| 152 | } // namespace VideoCommon::GPUThread | 98 | } // namespace VideoCommon::GPUThread |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index edb148b14..8cd7db1c6 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -13,6 +13,9 @@ | |||
| 13 | #include <thread> | 13 | #include <thread> |
| 14 | #include <variant> | 14 | #include <variant> |
| 15 | 15 | ||
| 16 | #include "common/threadsafe_queue.h" | ||
| 17 | #include "video_core/gpu.h" | ||
| 18 | |||
| 16 | namespace Tegra { | 19 | namespace Tegra { |
| 17 | struct FramebufferConfig; | 20 | struct FramebufferConfig; |
| 18 | class DmaPusher; | 21 | class DmaPusher; |
| @@ -24,6 +27,9 @@ class RendererBase; | |||
| 24 | 27 | ||
| 25 | namespace VideoCommon::GPUThread { | 28 | namespace VideoCommon::GPUThread { |
| 26 | 29 | ||
| 30 | /// Command to signal to the GPU thread that processing has ended | ||
| 31 | struct EndProcessingCommand final {}; | ||
| 32 | |||
| 27 | /// Command to signal to the GPU thread that a command list is ready for processing | 33 | /// Command to signal to the GPU thread that a command list is ready for processing |
| 28 | struct SubmitListCommand final { | 34 | struct SubmitListCommand final { |
| 29 | explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} | 35 | explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} |
| @@ -36,59 +42,110 @@ struct SwapBuffersCommand final { | |||
| 36 | explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) | 42 | explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) |
| 37 | : framebuffer{std::move(framebuffer)} {} | 43 | : framebuffer{std::move(framebuffer)} {} |
| 38 | 44 | ||
| 39 | std::optional<const Tegra::FramebufferConfig> framebuffer; | 45 | std::optional<Tegra::FramebufferConfig> framebuffer; |
| 40 | }; | 46 | }; |
| 41 | 47 | ||
| 42 | /// Command to signal to the GPU thread to flush a region | 48 | /// Command to signal to the GPU thread to flush a region |
| 43 | struct FlushRegionCommand final { | 49 | struct FlushRegionCommand final { |
| 44 | explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | 50 | explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} |
| 45 | 51 | ||
| 46 | const VAddr addr; | 52 | CacheAddr addr; |
| 47 | const u64 size; | 53 | u64 size; |
| 48 | }; | 54 | }; |
| 49 | 55 | ||
| 50 | /// Command to signal to the GPU thread to invalidate a region | 56 | /// Command to signal to the GPU thread to invalidate a region |
| 51 | struct InvalidateRegionCommand final { | 57 | struct InvalidateRegionCommand final { |
| 52 | explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} | 58 | explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} |
| 53 | 59 | ||
| 54 | const VAddr addr; | 60 | CacheAddr addr; |
| 55 | const u64 size; | 61 | u64 size; |
| 56 | }; | 62 | }; |
| 57 | 63 | ||
| 58 | /// Command to signal to the GPU thread to flush and invalidate a region | 64 | /// Command to signal to the GPU thread to flush and invalidate a region |
| 59 | struct FlushAndInvalidateRegionCommand final { | 65 | struct FlushAndInvalidateRegionCommand final { |
| 60 | explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) | 66 | explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) |
| 61 | : addr{addr}, size{size} {} | 67 | : addr{addr}, size{size} {} |
| 62 | 68 | ||
| 63 | const VAddr addr; | 69 | CacheAddr addr; |
| 64 | const u64 size; | 70 | u64 size; |
| 65 | }; | 71 | }; |
| 66 | 72 | ||
| 67 | using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | 73 | using CommandData = |
| 68 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | 74 | std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, |
| 75 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | ||
| 76 | |||
| 77 | struct CommandDataContainer { | ||
| 78 | CommandDataContainer() = default; | ||
| 79 | |||
| 80 | CommandDataContainer(CommandData&& data) : data{std::move(data)} {} | ||
| 81 | |||
| 82 | CommandDataContainer& operator=(const CommandDataContainer& t) { | ||
| 83 | data = std::move(t.data); | ||
| 84 | return *this; | ||
| 85 | } | ||
| 86 | |||
| 87 | CommandData data; | ||
| 88 | }; | ||
| 69 | 89 | ||
| 70 | /// Struct used to synchronize the GPU thread | 90 | /// Struct used to synchronize the GPU thread |
| 71 | struct SynchState final { | 91 | struct SynchState final { |
| 72 | std::atomic<bool> is_running{true}; | 92 | std::atomic_bool is_running{true}; |
| 73 | std::atomic<bool> is_idle{true}; | 93 | std::atomic_int queued_frame_count{}; |
| 74 | std::condition_variable signal_condition; | 94 | std::mutex frames_mutex; |
| 75 | std::mutex signal_mutex; | 95 | std::mutex commands_mutex; |
| 76 | std::condition_variable idle_condition; | 96 | std::condition_variable commands_condition; |
| 77 | std::mutex idle_mutex; | 97 | std::condition_variable frames_condition; |
| 78 | 98 | ||
| 79 | // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and | 99 | void IncrementFramesCounter() { |
| 80 | // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes | 100 | std::lock_guard<std::mutex> lock{frames_mutex}; |
| 81 | // empty. This allows for efficient thread-safe access, as it does not require any copies. | 101 | ++queued_frame_count; |
| 82 | 102 | } | |
| 83 | using CommandQueue = std::queue<CommandData>; | 103 | |
| 84 | std::array<CommandQueue, 2> command_queues; | 104 | void DecrementFramesCounter() { |
| 85 | CommandQueue* push_queue{&command_queues[0]}; | 105 | { |
| 86 | CommandQueue* pop_queue{&command_queues[1]}; | 106 | std::lock_guard<std::mutex> lock{frames_mutex}; |
| 87 | 107 | --queued_frame_count; | |
| 88 | void UpdateIdleState() { | 108 | |
| 89 | std::lock_guard<std::mutex> lock{idle_mutex}; | 109 | if (queued_frame_count) { |
| 90 | is_idle = command_queues[0].empty() && command_queues[1].empty(); | 110 | return; |
| 111 | } | ||
| 112 | } | ||
| 113 | frames_condition.notify_one(); | ||
| 91 | } | 114 | } |
| 115 | |||
| 116 | void WaitForFrames() { | ||
| 117 | { | ||
| 118 | std::lock_guard<std::mutex> lock{frames_mutex}; | ||
| 119 | if (!queued_frame_count) { | ||
| 120 | return; | ||
| 121 | } | ||
| 122 | } | ||
| 123 | |||
| 124 | // Wait for the GPU to be idle (all commands to be executed) | ||
| 125 | { | ||
| 126 | std::unique_lock<std::mutex> lock{frames_mutex}; | ||
| 127 | frames_condition.wait(lock, [this] { return !queued_frame_count; }); | ||
| 128 | } | ||
| 129 | } | ||
| 130 | |||
| 131 | void SignalCommands() { | ||
| 132 | { | ||
| 133 | std::unique_lock<std::mutex> lock{commands_mutex}; | ||
| 134 | if (queue.Empty()) { | ||
| 135 | return; | ||
| 136 | } | ||
| 137 | } | ||
| 138 | |||
| 139 | commands_condition.notify_one(); | ||
| 140 | } | ||
| 141 | |||
| 142 | void WaitForCommands() { | ||
| 143 | std::unique_lock<std::mutex> lock{commands_mutex}; | ||
| 144 | commands_condition.wait(lock, [this] { return !queue.Empty(); }); | ||
| 145 | } | ||
| 146 | |||
| 147 | using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | ||
| 148 | CommandQueue queue; | ||
| 92 | }; | 149 | }; |
| 93 | 150 | ||
| 94 | /// Class used to manage the GPU thread | 151 | /// Class used to manage the GPU thread |
| @@ -105,22 +162,17 @@ public: | |||
| 105 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); | 162 | std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); |
| 106 | 163 | ||
| 107 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 164 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 108 | void FlushRegion(VAddr addr, u64 size); | 165 | void FlushRegion(CacheAddr addr, u64 size); |
| 109 | 166 | ||
| 110 | /// Notify rasterizer that any caches of the specified region should be invalidated | 167 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 111 | void InvalidateRegion(VAddr addr, u64 size); | 168 | void InvalidateRegion(CacheAddr addr, u64 size); |
| 112 | 169 | ||
| 113 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 170 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 114 | void FlushAndInvalidateRegion(VAddr addr, u64 size); | 171 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size); |
| 115 | 172 | ||
| 116 | private: | 173 | private: |
| 117 | /// Pushes a command to be executed by the GPU thread | 174 | /// Pushes a command to be executed by the GPU thread |
| 118 | void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); | 175 | void PushCommand(CommandData&& command_data); |
| 119 | |||
| 120 | /// Returns true if this is called by the GPU thread | ||
| 121 | bool IsGpuThread() const { | ||
| 122 | return std::this_thread::get_id() == thread_id; | ||
| 123 | } | ||
| 124 | 176 | ||
| 125 | private: | 177 | private: |
| 126 | SynchState state; | 178 | SynchState state; |
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index a7bcf26fb..ecd9986a0 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <mutex> | ||
| 7 | #include <set> | 8 | #include <set> |
| 8 | #include <unordered_map> | 9 | #include <unordered_map> |
| 9 | 10 | ||
| @@ -12,14 +13,26 @@ | |||
| 12 | 13 | ||
| 13 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 14 | #include "core/settings.h" | 15 | #include "core/settings.h" |
| 16 | #include "video_core/gpu.h" | ||
| 15 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 16 | 18 | ||
| 17 | class RasterizerCacheObject { | 19 | class RasterizerCacheObject { |
| 18 | public: | 20 | public: |
| 21 | explicit RasterizerCacheObject(const u8* host_ptr) | ||
| 22 | : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {} | ||
| 23 | |||
| 19 | virtual ~RasterizerCacheObject(); | 24 | virtual ~RasterizerCacheObject(); |
| 20 | 25 | ||
| 26 | CacheAddr GetCacheAddr() const { | ||
| 27 | return cache_addr; | ||
| 28 | } | ||
| 29 | |||
| 30 | const u8* GetHostPtr() const { | ||
| 31 | return host_ptr; | ||
| 32 | } | ||
| 33 | |||
| 21 | /// Gets the address of the shader in guest memory, required for cache management | 34 | /// Gets the address of the shader in guest memory, required for cache management |
| 22 | virtual VAddr GetAddr() const = 0; | 35 | virtual VAddr GetCpuAddr() const = 0; |
| 23 | 36 | ||
| 24 | /// Gets the size of the shader in guest memory, required for cache management | 37 | /// Gets the size of the shader in guest memory, required for cache management |
| 25 | virtual std::size_t GetSizeInBytes() const = 0; | 38 | virtual std::size_t GetSizeInBytes() const = 0; |
| @@ -58,6 +71,8 @@ private: | |||
| 58 | bool is_registered{}; ///< Whether the object is currently registered with the cache | 71 | bool is_registered{}; ///< Whether the object is currently registered with the cache |
| 59 | bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) | 72 | bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) |
| 60 | u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing | 73 | u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing |
| 74 | CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space | ||
| 75 | const u8* host_ptr{}; ///< Pointer to the memory backing this cached region | ||
| 61 | }; | 76 | }; |
| 62 | 77 | ||
| 63 | template <class T> | 78 | template <class T> |
| @@ -68,7 +83,9 @@ public: | |||
| 68 | explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} | 83 | explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} |
| 69 | 84 | ||
| 70 | /// Write any cached resources overlapping the specified region back to memory | 85 | /// Write any cached resources overlapping the specified region back to memory |
| 71 | void FlushRegion(Tegra::GPUVAddr addr, size_t size) { | 86 | void FlushRegion(CacheAddr addr, std::size_t size) { |
| 87 | std::lock_guard<std::recursive_mutex> lock{mutex}; | ||
| 88 | |||
| 72 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | 89 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; |
| 73 | for (auto& object : objects) { | 90 | for (auto& object : objects) { |
| 74 | FlushObject(object); | 91 | FlushObject(object); |
| @@ -76,7 +93,9 @@ public: | |||
| 76 | } | 93 | } |
| 77 | 94 | ||
| 78 | /// Mark the specified region as being invalidated | 95 | /// Mark the specified region as being invalidated |
| 79 | void InvalidateRegion(VAddr addr, u64 size) { | 96 | void InvalidateRegion(CacheAddr addr, u64 size) { |
| 97 | std::lock_guard<std::recursive_mutex> lock{mutex}; | ||
| 98 | |||
| 80 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | 99 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; |
| 81 | for (auto& object : objects) { | 100 | for (auto& object : objects) { |
| 82 | if (!object->IsRegistered()) { | 101 | if (!object->IsRegistered()) { |
| @@ -89,48 +108,60 @@ public: | |||
| 89 | 108 | ||
| 90 | /// Invalidates everything in the cache | 109 | /// Invalidates everything in the cache |
| 91 | void InvalidateAll() { | 110 | void InvalidateAll() { |
| 111 | std::lock_guard<std::recursive_mutex> lock{mutex}; | ||
| 112 | |||
| 92 | while (interval_cache.begin() != interval_cache.end()) { | 113 | while (interval_cache.begin() != interval_cache.end()) { |
| 93 | Unregister(*interval_cache.begin()->second.begin()); | 114 | Unregister(*interval_cache.begin()->second.begin()); |
| 94 | } | 115 | } |
| 95 | } | 116 | } |
| 96 | 117 | ||
| 97 | protected: | 118 | protected: |
| 98 | /// Tries to get an object from the cache with the specified address | 119 | /// Tries to get an object from the cache with the specified cache address |
| 99 | T TryGet(VAddr addr) const { | 120 | T TryGet(CacheAddr addr) const { |
| 100 | const auto iter = map_cache.find(addr); | 121 | const auto iter = map_cache.find(addr); |
| 101 | if (iter != map_cache.end()) | 122 | if (iter != map_cache.end()) |
| 102 | return iter->second; | 123 | return iter->second; |
| 103 | return nullptr; | 124 | return nullptr; |
| 104 | } | 125 | } |
| 105 | 126 | ||
| 127 | T TryGet(const void* addr) const { | ||
| 128 | const auto iter = map_cache.find(ToCacheAddr(addr)); | ||
| 129 | if (iter != map_cache.end()) | ||
| 130 | return iter->second; | ||
| 131 | return nullptr; | ||
| 132 | } | ||
| 133 | |||
| 106 | /// Register an object into the cache | 134 | /// Register an object into the cache |
| 107 | void Register(const T& object) { | 135 | void Register(const T& object) { |
| 136 | std::lock_guard<std::recursive_mutex> lock{mutex}; | ||
| 137 | |||
| 108 | object->SetIsRegistered(true); | 138 | object->SetIsRegistered(true); |
| 109 | interval_cache.add({GetInterval(object), ObjectSet{object}}); | 139 | interval_cache.add({GetInterval(object), ObjectSet{object}}); |
| 110 | map_cache.insert({object->GetAddr(), object}); | 140 | map_cache.insert({object->GetCacheAddr(), object}); |
| 111 | rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); | 141 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); |
| 112 | } | 142 | } |
| 113 | 143 | ||
| 114 | /// Unregisters an object from the cache | 144 | /// Unregisters an object from the cache |
| 115 | void Unregister(const T& object) { | 145 | void Unregister(const T& object) { |
| 116 | object->SetIsRegistered(false); | 146 | std::lock_guard<std::recursive_mutex> lock{mutex}; |
| 117 | rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1); | ||
| 118 | // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit | ||
| 119 | if (Settings::values.use_accurate_gpu_emulation) { | ||
| 120 | FlushObject(object); | ||
| 121 | } | ||
| 122 | 147 | ||
| 148 | object->SetIsRegistered(false); | ||
| 149 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); | ||
| 123 | interval_cache.subtract({GetInterval(object), ObjectSet{object}}); | 150 | interval_cache.subtract({GetInterval(object), ObjectSet{object}}); |
| 124 | map_cache.erase(object->GetAddr()); | 151 | map_cache.erase(object->GetCacheAddr()); |
| 125 | } | 152 | } |
| 126 | 153 | ||
| 127 | /// Returns a ticks counter used for tracking when cached objects were last modified | 154 | /// Returns a ticks counter used for tracking when cached objects were last modified |
| 128 | u64 GetModifiedTicks() { | 155 | u64 GetModifiedTicks() { |
| 156 | std::lock_guard<std::recursive_mutex> lock{mutex}; | ||
| 157 | |||
| 129 | return ++modified_ticks; | 158 | return ++modified_ticks; |
| 130 | } | 159 | } |
| 131 | 160 | ||
| 132 | /// Flushes the specified object, updating appropriate cache state as needed | 161 | /// Flushes the specified object, updating appropriate cache state as needed |
| 133 | void FlushObject(const T& object) { | 162 | void FlushObject(const T& object) { |
| 163 | std::lock_guard<std::recursive_mutex> lock{mutex}; | ||
| 164 | |||
| 134 | if (!object->IsDirty()) { | 165 | if (!object->IsDirty()) { |
| 135 | return; | 166 | return; |
| 136 | } | 167 | } |
| @@ -140,7 +171,7 @@ protected: | |||
| 140 | 171 | ||
| 141 | private: | 172 | private: |
| 142 | /// Returns a list of cached objects from the specified memory region, ordered by access time | 173 | /// Returns a list of cached objects from the specified memory region, ordered by access time |
| 143 | std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { | 174 | std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { |
| 144 | if (size == 0) { | 175 | if (size == 0) { |
| 145 | return {}; | 176 | return {}; |
| 146 | } | 177 | } |
| @@ -164,17 +195,18 @@ private: | |||
| 164 | } | 195 | } |
| 165 | 196 | ||
| 166 | using ObjectSet = std::set<T>; | 197 | using ObjectSet = std::set<T>; |
| 167 | using ObjectCache = std::unordered_map<VAddr, T>; | 198 | using ObjectCache = std::unordered_map<CacheAddr, T>; |
| 168 | using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; | 199 | using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; |
| 169 | using ObjectInterval = typename IntervalCache::interval_type; | 200 | using ObjectInterval = typename IntervalCache::interval_type; |
| 170 | 201 | ||
| 171 | static auto GetInterval(const T& object) { | 202 | static auto GetInterval(const T& object) { |
| 172 | return ObjectInterval::right_open(object->GetAddr(), | 203 | return ObjectInterval::right_open(object->GetCacheAddr(), |
| 173 | object->GetAddr() + object->GetSizeInBytes()); | 204 | object->GetCacheAddr() + object->GetSizeInBytes()); |
| 174 | } | 205 | } |
| 175 | 206 | ||
| 176 | ObjectCache map_cache; | 207 | ObjectCache map_cache; |
| 177 | IntervalCache interval_cache; ///< Cache of objects | 208 | IntervalCache interval_cache; ///< Cache of objects |
| 178 | u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing | 209 | u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing |
| 179 | VideoCore::RasterizerInterface& rasterizer; | 210 | VideoCore::RasterizerInterface& rasterizer; |
| 211 | std::recursive_mutex mutex; | ||
| 180 | }; | 212 | }; |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 6a1dc9cf6..76e292e87 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -35,14 +35,14 @@ public: | |||
| 35 | virtual void FlushAll() = 0; | 35 | virtual void FlushAll() = 0; |
| 36 | 36 | ||
| 37 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 37 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 38 | virtual void FlushRegion(VAddr addr, u64 size) = 0; | 38 | virtual void FlushRegion(CacheAddr addr, u64 size) = 0; |
| 39 | 39 | ||
| 40 | /// Notify rasterizer that any caches of the specified region should be invalidated | 40 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 41 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | 41 | virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 42 | 42 | ||
| 43 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 43 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 44 | /// and invalidated | 44 | /// and invalidated |
| 45 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | 45 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; |
| 46 | 46 | ||
| 47 | /// Attempt to use a faster method to perform a surface copy | 47 | /// Attempt to use a faster method to perform a surface copy |
| 48 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 48 | virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| @@ -63,7 +63,7 @@ public: | |||
| 63 | } | 63 | } |
| 64 | 64 | ||
| 65 | /// Increase/decrease the number of object in pages touching the specified region | 65 | /// Increase/decrease the number of object in pages touching the specified region |
| 66 | virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {} | 66 | virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} |
| 67 | 67 | ||
| 68 | /// Initialize disk cached resources for the game being emulated | 68 | /// Initialize disk cached resources for the game being emulated |
| 69 | virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, | 69 | virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index b3062e5ba..a4eea61a6 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -13,6 +13,11 @@ | |||
| 13 | 13 | ||
| 14 | namespace OpenGL { | 14 | namespace OpenGL { |
| 15 | 15 | ||
| 16 | CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | ||
| 17 | std::size_t alignment, u8* host_ptr) | ||
| 18 | : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{ | ||
| 19 | host_ptr} {} | ||
| 20 | |||
| 16 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) | 21 | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) |
| 17 | : RasterizerCache{rasterizer}, stream_buffer(size, true) {} | 22 | : RasterizerCache{rasterizer}, stream_buffer(size, true) {} |
| 18 | 23 | ||
| @@ -26,11 +31,12 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size | |||
| 26 | // TODO: Figure out which size is the best for given games. | 31 | // TODO: Figure out which size is the best for given games. |
| 27 | cache &= size >= 2048; | 32 | cache &= size >= 2048; |
| 28 | 33 | ||
| 34 | const auto& host_ptr{Memory::GetPointer(*cpu_addr)}; | ||
| 29 | if (cache) { | 35 | if (cache) { |
| 30 | auto entry = TryGet(*cpu_addr); | 36 | auto entry = TryGet(host_ptr); |
| 31 | if (entry) { | 37 | if (entry) { |
| 32 | if (entry->size >= size && entry->alignment == alignment) { | 38 | if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { |
| 33 | return entry->offset; | 39 | return entry->GetOffset(); |
| 34 | } | 40 | } |
| 35 | Unregister(entry); | 41 | Unregister(entry); |
| 36 | } | 42 | } |
| @@ -39,17 +45,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size | |||
| 39 | AlignBuffer(alignment); | 45 | AlignBuffer(alignment); |
| 40 | const GLintptr uploaded_offset = buffer_offset; | 46 | const GLintptr uploaded_offset = buffer_offset; |
| 41 | 47 | ||
| 42 | Memory::ReadBlock(*cpu_addr, buffer_ptr, size); | 48 | if (!host_ptr) { |
| 49 | return uploaded_offset; | ||
| 50 | } | ||
| 43 | 51 | ||
| 52 | std::memcpy(buffer_ptr, host_ptr, size); | ||
| 44 | buffer_ptr += size; | 53 | buffer_ptr += size; |
| 45 | buffer_offset += size; | 54 | buffer_offset += size; |
| 46 | 55 | ||
| 47 | if (cache) { | 56 | if (cache) { |
| 48 | auto entry = std::make_shared<CachedBufferEntry>(); | 57 | auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, |
| 49 | entry->offset = uploaded_offset; | 58 | alignment, host_ptr); |
| 50 | entry->size = size; | ||
| 51 | entry->alignment = alignment; | ||
| 52 | entry->addr = *cpu_addr; | ||
| 53 | Register(entry); | 59 | Register(entry); |
| 54 | } | 60 | } |
| 55 | 61 | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index c11acfb79..1de1f84ae 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -17,22 +17,39 @@ namespace OpenGL { | |||
| 17 | 17 | ||
| 18 | class RasterizerOpenGL; | 18 | class RasterizerOpenGL; |
| 19 | 19 | ||
| 20 | struct CachedBufferEntry final : public RasterizerCacheObject { | 20 | class CachedBufferEntry final : public RasterizerCacheObject { |
| 21 | VAddr GetAddr() const override { | 21 | public: |
| 22 | return addr; | 22 | explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, |
| 23 | std::size_t alignment, u8* host_ptr); | ||
| 24 | |||
| 25 | VAddr GetCpuAddr() const override { | ||
| 26 | return cpu_addr; | ||
| 23 | } | 27 | } |
| 24 | 28 | ||
| 25 | std::size_t GetSizeInBytes() const override { | 29 | std::size_t GetSizeInBytes() const override { |
| 26 | return size; | 30 | return size; |
| 27 | } | 31 | } |
| 28 | 32 | ||
| 33 | std::size_t GetSize() const { | ||
| 34 | return size; | ||
| 35 | } | ||
| 36 | |||
| 37 | GLintptr GetOffset() const { | ||
| 38 | return offset; | ||
| 39 | } | ||
| 40 | |||
| 41 | std::size_t GetAlignment() const { | ||
| 42 | return alignment; | ||
| 43 | } | ||
| 44 | |||
| 29 | // We do not have to flush this cache as things in it are never modified by us. | 45 | // We do not have to flush this cache as things in it are never modified by us. |
| 30 | void Flush() override {} | 46 | void Flush() override {} |
| 31 | 47 | ||
| 32 | VAddr addr; | 48 | private: |
| 33 | std::size_t size; | 49 | VAddr cpu_addr{}; |
| 34 | GLintptr offset; | 50 | std::size_t size{}; |
| 35 | std::size_t alignment; | 51 | GLintptr offset{}; |
| 52 | std::size_t alignment{}; | ||
| 36 | }; | 53 | }; |
| 37 | 54 | ||
| 38 | class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | 55 | class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { |
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index 7161d1dea..a2c509c24 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp | |||
| @@ -15,12 +15,13 @@ | |||
| 15 | 15 | ||
| 16 | namespace OpenGL { | 16 | namespace OpenGL { |
| 17 | 17 | ||
| 18 | CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} { | 18 | CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr) |
| 19 | : cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} { | ||
| 19 | buffer.Create(); | 20 | buffer.Create(); |
| 20 | // Bind and unbind the buffer so it gets allocated by the driver | 21 | // Bind and unbind the buffer so it gets allocated by the driver |
| 21 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); | 22 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); |
| 22 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); | 23 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); |
| 23 | LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); | 24 | LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); |
| 24 | } | 25 | } |
| 25 | 26 | ||
| 26 | void CachedGlobalRegion::Reload(u32 size_) { | 27 | void CachedGlobalRegion::Reload(u32 size_) { |
| @@ -35,7 +36,7 @@ void CachedGlobalRegion::Reload(u32 size_) { | |||
| 35 | 36 | ||
| 36 | // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer | 37 | // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer |
| 37 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); | 38 | glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); |
| 38 | glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW); | 39 | glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); |
| 39 | } | 40 | } |
| 40 | 41 | ||
| 41 | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { | 42 | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { |
| @@ -46,11 +47,11 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 | |||
| 46 | return search->second; | 47 | return search->second; |
| 47 | } | 48 | } |
| 48 | 49 | ||
| 49 | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) { | 50 | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr) { |
| 50 | GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; | 51 | GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; |
| 51 | if (!region) { | 52 | if (!region) { |
| 52 | // No reserved surface available, create a new one and reserve it | 53 | // No reserved surface available, create a new one and reserve it |
| 53 | region = std::make_shared<CachedGlobalRegion>(addr, size); | 54 | region = std::make_shared<CachedGlobalRegion>(addr, size, host_ptr); |
| 54 | ReserveGlobalRegion(region); | 55 | ReserveGlobalRegion(region); |
| 55 | } | 56 | } |
| 56 | region->Reload(size); | 57 | region->Reload(size); |
| @@ -58,7 +59,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si | |||
| 58 | } | 59 | } |
| 59 | 60 | ||
| 60 | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { | 61 | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { |
| 61 | reserve.insert_or_assign(region->GetAddr(), std::move(region)); | 62 | reserve.insert_or_assign(region->GetCpuAddr(), std::move(region)); |
| 62 | } | 63 | } |
| 63 | 64 | ||
| 64 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | 65 | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) |
| @@ -80,11 +81,12 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | |||
| 80 | ASSERT(actual_addr); | 81 | ASSERT(actual_addr); |
| 81 | 82 | ||
| 82 | // Look up global region in the cache based on address | 83 | // Look up global region in the cache based on address |
| 83 | GlobalRegion region = TryGet(*actual_addr); | 84 | const auto& host_ptr{Memory::GetPointer(*actual_addr)}; |
| 85 | GlobalRegion region{TryGet(host_ptr)}; | ||
| 84 | 86 | ||
| 85 | if (!region) { | 87 | if (!region) { |
| 86 | // No global region found - create a new one | 88 | // No global region found - create a new one |
| 87 | region = GetUncachedGlobalRegion(*actual_addr, size); | 89 | region = GetUncachedGlobalRegion(*actual_addr, size, host_ptr); |
| 88 | Register(region); | 90 | Register(region); |
| 89 | } | 91 | } |
| 90 | 92 | ||
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h index ba2bdc60c..e497a0619 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ b/src/video_core/renderer_opengl/gl_global_cache.h | |||
| @@ -27,14 +27,12 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; | |||
| 27 | 27 | ||
| 28 | class CachedGlobalRegion final : public RasterizerCacheObject { | 28 | class CachedGlobalRegion final : public RasterizerCacheObject { |
| 29 | public: | 29 | public: |
| 30 | explicit CachedGlobalRegion(VAddr addr, u32 size); | 30 | explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr); |
| 31 | 31 | ||
| 32 | /// Gets the address of the shader in guest memory, required for cache management | 32 | VAddr GetCpuAddr() const override { |
| 33 | VAddr GetAddr() const override { | 33 | return cpu_addr; |
| 34 | return addr; | ||
| 35 | } | 34 | } |
| 36 | 35 | ||
| 37 | /// Gets the size of the shader in guest memory, required for cache management | ||
| 38 | std::size_t GetSizeInBytes() const override { | 36 | std::size_t GetSizeInBytes() const override { |
| 39 | return size; | 37 | return size; |
| 40 | } | 38 | } |
| @@ -53,9 +51,8 @@ public: | |||
| 53 | } | 51 | } |
| 54 | 52 | ||
| 55 | private: | 53 | private: |
| 56 | VAddr addr{}; | 54 | VAddr cpu_addr{}; |
| 57 | u32 size{}; | 55 | u32 size{}; |
| 58 | |||
| 59 | OGLBuffer buffer; | 56 | OGLBuffer buffer; |
| 60 | }; | 57 | }; |
| 61 | 58 | ||
| @@ -69,7 +66,7 @@ public: | |||
| 69 | 66 | ||
| 70 | private: | 67 | private: |
| 71 | GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; | 68 | GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; |
| 72 | GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size); | 69 | GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr); |
| 73 | void ReserveGlobalRegion(GlobalRegion region); | 70 | void ReserveGlobalRegion(GlobalRegion region); |
| 74 | 71 | ||
| 75 | std::unordered_map<VAddr, GlobalRegion> reserve; | 72 | std::unordered_map<VAddr, GlobalRegion> reserve; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 976f64c24..bb6de5477 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -449,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | |||
| 449 | return boost::make_iterator_range(map.equal_range(interval)); | 449 | return boost::make_iterator_range(map.equal_range(interval)); |
| 450 | } | 450 | } |
| 451 | 451 | ||
| 452 | void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { | 452 | void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { |
| 453 | const u64 page_start{addr >> Memory::PAGE_BITS}; | 453 | const u64 page_start{addr >> Memory::PAGE_BITS}; |
| 454 | const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; | 454 | const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; |
| 455 | 455 | ||
| @@ -747,12 +747,12 @@ void RasterizerOpenGL::DrawArrays() { | |||
| 747 | 747 | ||
| 748 | void RasterizerOpenGL::FlushAll() {} | 748 | void RasterizerOpenGL::FlushAll() {} |
| 749 | 749 | ||
| 750 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | 750 | void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { |
| 751 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 751 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 752 | res_cache.FlushRegion(addr, size); | 752 | res_cache.FlushRegion(addr, size); |
| 753 | } | 753 | } |
| 754 | 754 | ||
| 755 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | 755 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { |
| 756 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 756 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 757 | res_cache.InvalidateRegion(addr, size); | 757 | res_cache.InvalidateRegion(addr, size); |
| 758 | shader_cache.InvalidateRegion(addr, size); | 758 | shader_cache.InvalidateRegion(addr, size); |
| @@ -760,7 +760,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | |||
| 760 | buffer_cache.InvalidateRegion(addr, size); | 760 | buffer_cache.InvalidateRegion(addr, size); |
| 761 | } | 761 | } |
| 762 | 762 | ||
| 763 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 763 | void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { |
| 764 | FlushRegion(addr, size); | 764 | FlushRegion(addr, size); |
| 765 | InvalidateRegion(addr, size); | 765 | InvalidateRegion(addr, size); |
| 766 | } | 766 | } |
| @@ -782,7 +782,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 782 | 782 | ||
| 783 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 783 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 784 | 784 | ||
| 785 | const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; | 785 | const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))}; |
| 786 | if (!surface) { | 786 | if (!surface) { |
| 787 | return {}; | 787 | return {}; |
| 788 | } | 788 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ca3de0592..30f3e8acb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -57,9 +57,9 @@ public: | |||
| 57 | void DrawArrays() override; | 57 | void DrawArrays() override; |
| 58 | void Clear() override; | 58 | void Clear() override; |
| 59 | void FlushAll() override; | 59 | void FlushAll() override; |
| 60 | void FlushRegion(VAddr addr, u64 size) override; | 60 | void FlushRegion(CacheAddr addr, u64 size) override; |
| 61 | void InvalidateRegion(VAddr addr, u64 size) override; | 61 | void InvalidateRegion(CacheAddr addr, u64 size) override; |
| 62 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 62 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; |
| 63 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 63 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
| 64 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, | 64 | const Tegra::Engines::Fermi2D::Regs::Surface& dst, |
| 65 | const Common::Rectangle<u32>& src_rect, | 65 | const Common::Rectangle<u32>& src_rect, |
| @@ -67,7 +67,7 @@ public: | |||
| 67 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | 67 | bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |
| 68 | u32 pixel_stride) override; | 68 | u32 pixel_stride) override; |
| 69 | bool AccelerateDrawBatch(bool is_indexed) override; | 69 | bool AccelerateDrawBatch(bool is_indexed) override; |
| 70 | void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; | 70 | void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; |
| 71 | void LoadDiskResources(const std::atomic_bool& stop_loading, | 71 | void LoadDiskResources(const std::atomic_bool& stop_loading, |
| 72 | const VideoCore::DiskResourceLoadCallback& callback) override; | 72 | const VideoCore::DiskResourceLoadCallback& callback) override; |
| 73 | 73 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index bd1409660..451de00e8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | |||
| @@ -61,6 +61,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { | |||
| 61 | 61 | ||
| 62 | addr = cpu_addr ? *cpu_addr : 0; | 62 | addr = cpu_addr ? *cpu_addr : 0; |
| 63 | gpu_addr = gpu_addr_; | 63 | gpu_addr = gpu_addr_; |
| 64 | host_ptr = Memory::GetPointer(addr); | ||
| 64 | size_in_bytes = SizeInBytesRaw(); | 65 | size_in_bytes = SizeInBytesRaw(); |
| 65 | 66 | ||
| 66 | if (IsPixelFormatASTC(pixel_format)) { | 67 | if (IsPixelFormatASTC(pixel_format)) { |
| @@ -563,8 +564,8 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac | |||
| 563 | } | 564 | } |
| 564 | 565 | ||
| 565 | CachedSurface::CachedSurface(const SurfaceParams& params) | 566 | CachedSurface::CachedSurface(const SurfaceParams& params) |
| 566 | : params(params), gl_target(SurfaceTargetToGL(params.target)), | 567 | : params{params}, gl_target{SurfaceTargetToGL(params.target)}, |
| 567 | cached_size_in_bytes(params.size_in_bytes) { | 568 | cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} { |
| 568 | texture.Create(gl_target); | 569 | texture.Create(gl_target); |
| 569 | 570 | ||
| 570 | // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0) | 571 | // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0) |
| @@ -633,10 +634,9 @@ void CachedSurface::LoadGLBuffer() { | |||
| 633 | const u32 bpp = params.GetFormatBpp() / 8; | 634 | const u32 bpp = params.GetFormatBpp() / 8; |
| 634 | const u32 copy_size = params.width * bpp; | 635 | const u32 copy_size = params.width * bpp; |
| 635 | if (params.pitch == copy_size) { | 636 | if (params.pitch == copy_size) { |
| 636 | std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr), | 637 | std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl); |
| 637 | params.size_in_bytes_gl); | ||
| 638 | } else { | 638 | } else { |
| 639 | const u8* start = Memory::GetPointer(params.addr); | 639 | const u8* start{params.host_ptr}; |
| 640 | u8* write_to = gl_buffer[0].data(); | 640 | u8* write_to = gl_buffer[0].data(); |
| 641 | for (u32 h = params.height; h > 0; h--) { | 641 | for (u32 h = params.height; h > 0; h--) { |
| 642 | std::memcpy(write_to, start, copy_size); | 642 | std::memcpy(write_to, start, copy_size); |
| @@ -680,8 +680,6 @@ void CachedSurface::FlushGLBuffer() { | |||
| 680 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); | 680 | glPixelStorei(GL_PACK_ROW_LENGTH, 0); |
| 681 | Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, | 681 | Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, |
| 682 | params.height, params.depth, true, true); | 682 | params.height, params.depth, true, true); |
| 683 | const u8* const texture_src_data = Memory::GetPointer(params.addr); | ||
| 684 | ASSERT(texture_src_data); | ||
| 685 | if (params.is_tiled) { | 683 | if (params.is_tiled) { |
| 686 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | 684 | ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", |
| 687 | params.block_width, static_cast<u32>(params.target)); | 685 | params.block_width, static_cast<u32>(params.target)); |
| @@ -691,9 +689,9 @@ void CachedSurface::FlushGLBuffer() { | |||
| 691 | const u32 bpp = params.GetFormatBpp() / 8; | 689 | const u32 bpp = params.GetFormatBpp() / 8; |
| 692 | const u32 copy_size = params.width * bpp; | 690 | const u32 copy_size = params.width * bpp; |
| 693 | if (params.pitch == copy_size) { | 691 | if (params.pitch == copy_size) { |
| 694 | std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes()); | 692 | std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes()); |
| 695 | } else { | 693 | } else { |
| 696 | u8* start = Memory::GetPointer(params.addr); | 694 | u8* start{params.host_ptr}; |
| 697 | const u8* read_to = gl_buffer[0].data(); | 695 | const u8* read_to = gl_buffer[0].data(); |
| 698 | for (u32 h = params.height; h > 0; h--) { | 696 | for (u32 h = params.height; h > 0; h--) { |
| 699 | std::memcpy(start, read_to, copy_size); | 697 | std::memcpy(start, read_to, copy_size); |
| @@ -932,7 +930,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres | |||
| 932 | } | 930 | } |
| 933 | 931 | ||
| 934 | // Look up surface in the cache based on address | 932 | // Look up surface in the cache based on address |
| 935 | Surface surface{TryGet(params.addr)}; | 933 | Surface surface{TryGet(params.host_ptr)}; |
| 936 | if (surface) { | 934 | if (surface) { |
| 937 | if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { | 935 | if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { |
| 938 | // Use the cached surface as-is unless it's not synced with memory | 936 | // Use the cached surface as-is unless it's not synced with memory |
| @@ -986,7 +984,7 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, | |||
| 986 | for (u32 layer = 0; layer < dst_params.depth; layer++) { | 984 | for (u32 layer = 0; layer < dst_params.depth; layer++) { |
| 987 | for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { | 985 | for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { |
| 988 | const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); | 986 | const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); |
| 989 | const Surface& copy = TryGet(sub_address); | 987 | const Surface& copy = TryGet(Memory::GetPointer(sub_address)); |
| 990 | if (!copy) | 988 | if (!copy) |
| 991 | continue; | 989 | continue; |
| 992 | const auto& src_params{copy->GetSurfaceParams()}; | 990 | const auto& src_params{copy->GetSurfaceParams()}; |
| @@ -1163,7 +1161,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, | |||
| 1163 | const auto& dst_params{dst_surface->GetSurfaceParams()}; | 1161 | const auto& dst_params{dst_surface->GetSurfaceParams()}; |
| 1164 | 1162 | ||
| 1165 | // Flush enough memory for both the source and destination surface | 1163 | // Flush enough memory for both the source and destination surface |
| 1166 | FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize())); | 1164 | FlushRegion(ToCacheAddr(src_params.host_ptr), |
| 1165 | std::max(src_params.MemorySize(), dst_params.MemorySize())); | ||
| 1167 | 1166 | ||
| 1168 | LoadSurface(dst_surface); | 1167 | LoadSurface(dst_surface); |
| 1169 | } | 1168 | } |
| @@ -1215,8 +1214,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, | |||
| 1215 | return new_surface; | 1214 | return new_surface; |
| 1216 | } | 1215 | } |
| 1217 | 1216 | ||
| 1218 | Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const { | 1217 | Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const { |
| 1219 | return TryGet(addr); | 1218 | return TryGet(host_ptr); |
| 1220 | } | 1219 | } |
| 1221 | 1220 | ||
| 1222 | void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { | 1221 | void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { |
| @@ -1267,7 +1266,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa | |||
| 1267 | src_params.height == dst_params.MipHeight(*level) && | 1266 | src_params.height == dst_params.MipHeight(*level) && |
| 1268 | src_params.block_height >= dst_params.MipBlockHeight(*level)) { | 1267 | src_params.block_height >= dst_params.MipBlockHeight(*level)) { |
| 1269 | const std::optional<u32> slot = | 1268 | const std::optional<u32> slot = |
| 1270 | TryFindBestLayer(render_surface->GetAddr(), dst_params, *level); | 1269 | TryFindBestLayer(render_surface->GetCpuAddr(), dst_params, *level); |
| 1271 | if (slot.has_value()) { | 1270 | if (slot.has_value()) { |
| 1272 | glCopyImageSubData(render_surface->Texture().handle, | 1271 | glCopyImageSubData(render_surface->Texture().handle, |
| 1273 | SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, | 1272 | SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, |
| @@ -1283,8 +1282,8 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa | |||
| 1283 | } | 1282 | } |
| 1284 | 1283 | ||
| 1285 | static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { | 1284 | static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { |
| 1286 | const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize(); | 1285 | const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize(); |
| 1287 | const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize(); | 1286 | const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize(); |
| 1288 | if (bound2 > bound1) | 1287 | if (bound2 > bound1) |
| 1289 | return true; | 1288 | return true; |
| 1290 | const auto& dst_params = blitted_surface->GetSurfaceParams(); | 1289 | const auto& dst_params = blitted_surface->GetSurfaceParams(); |
| @@ -1327,7 +1326,8 @@ void RasterizerCacheOpenGL::SignalPreDrawCall() { | |||
| 1327 | void RasterizerCacheOpenGL::SignalPostDrawCall() { | 1326 | void RasterizerCacheOpenGL::SignalPostDrawCall() { |
| 1328 | for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { | 1327 | for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { |
| 1329 | if (current_color_buffers[i] != nullptr) { | 1328 | if (current_color_buffers[i] != nullptr) { |
| 1330 | Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr()); | 1329 | Surface intersect = |
| 1330 | CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr()); | ||
| 1331 | if (intersect != nullptr) { | 1331 | if (intersect != nullptr) { |
| 1332 | PartialReinterpretSurface(current_color_buffers[i], intersect); | 1332 | PartialReinterpretSurface(current_color_buffers[i], intersect); |
| 1333 | texception = true; | 1333 | texception = true; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index 9cf6f50be..b3afad139 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h | |||
| @@ -297,6 +297,7 @@ struct SurfaceParams { | |||
| 297 | bool srgb_conversion; | 297 | bool srgb_conversion; |
| 298 | // Parameters used for caching | 298 | // Parameters used for caching |
| 299 | VAddr addr; | 299 | VAddr addr; |
| 300 | u8* host_ptr; | ||
| 300 | Tegra::GPUVAddr gpu_addr; | 301 | Tegra::GPUVAddr gpu_addr; |
| 301 | std::size_t size_in_bytes; | 302 | std::size_t size_in_bytes; |
| 302 | std::size_t size_in_bytes_gl; | 303 | std::size_t size_in_bytes_gl; |
| @@ -345,9 +346,9 @@ class RasterizerOpenGL; | |||
| 345 | 346 | ||
| 346 | class CachedSurface final : public RasterizerCacheObject { | 347 | class CachedSurface final : public RasterizerCacheObject { |
| 347 | public: | 348 | public: |
| 348 | CachedSurface(const SurfaceParams& params); | 349 | explicit CachedSurface(const SurfaceParams& params); |
| 349 | 350 | ||
| 350 | VAddr GetAddr() const override { | 351 | VAddr GetCpuAddr() const override { |
| 351 | return params.addr; | 352 | return params.addr; |
| 352 | } | 353 | } |
| 353 | 354 | ||
| @@ -449,7 +450,7 @@ public: | |||
| 449 | Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); | 450 | Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); |
| 450 | 451 | ||
| 451 | /// Tries to find a framebuffer using on the provided CPU address | 452 | /// Tries to find a framebuffer using on the provided CPU address |
| 452 | Surface TryFindFramebufferSurface(VAddr addr) const; | 453 | Surface TryFindFramebufferSurface(const u8* host_ptr) const; |
| 453 | 454 | ||
| 454 | /// Copies the contents of one surface to another | 455 | /// Copies the contents of one surface to another |
| 455 | void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | 456 | void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, |
| @@ -506,12 +507,12 @@ private: | |||
| 506 | std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; | 507 | std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; |
| 507 | Surface last_depth_buffer; | 508 | Surface last_depth_buffer; |
| 508 | 509 | ||
| 509 | using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>; | 510 | using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>; |
| 510 | using SurfaceInterval = typename SurfaceIntervalCache::interval_type; | 511 | using SurfaceInterval = typename SurfaceIntervalCache::interval_type; |
| 511 | 512 | ||
| 512 | static auto GetReinterpretInterval(const Surface& object) { | 513 | static auto GetReinterpretInterval(const Surface& object) { |
| 513 | return SurfaceInterval::right_open(object->GetAddr() + 1, | 514 | return SurfaceInterval::right_open(object->GetCacheAddr() + 1, |
| 514 | object->GetAddr() + object->GetMemorySize() - 1); | 515 | object->GetCacheAddr() + object->GetMemorySize() - 1); |
| 515 | } | 516 | } |
| 516 | 517 | ||
| 517 | // Reinterpreted surfaces are very fragil as the game may keep rendering into them. | 518 | // Reinterpreted surfaces are very fragil as the game may keep rendering into them. |
| @@ -523,7 +524,7 @@ private: | |||
| 523 | reinterpret_surface->MarkReinterpreted(); | 524 | reinterpret_surface->MarkReinterpreted(); |
| 524 | } | 525 | } |
| 525 | 526 | ||
| 526 | Surface CollideOnReinterpretedSurface(VAddr addr) const { | 527 | Surface CollideOnReinterpretedSurface(CacheAddr addr) const { |
| 527 | const SurfaceInterval interval{addr}; | 528 | const SurfaceInterval interval{addr}; |
| 528 | for (auto& pair : | 529 | for (auto& pair : |
| 529 | boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { | 530 | boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4883e4f62..60a04e146 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -42,9 +42,9 @@ VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | |||
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | /// Gets the shader program code from memory for the specified address | 44 | /// Gets the shader program code from memory for the specified address |
| 45 | ProgramCode GetShaderCode(VAddr addr) { | 45 | ProgramCode GetShaderCode(const u8* host_ptr) { |
| 46 | ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); | 46 | ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); |
| 47 | Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); | 47 | std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64)); |
| 48 | return program_code; | 48 | return program_code; |
| 49 | } | 49 | } |
| 50 | 50 | ||
| @@ -214,12 +214,13 @@ std::set<GLenum> GetSupportedFormats() { | |||
| 214 | 214 | ||
| 215 | } // namespace | 215 | } // namespace |
| 216 | 216 | ||
| 217 | CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, | 217 | CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, |
| 218 | ShaderDiskCacheOpenGL& disk_cache, | 218 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |
| 219 | const PrecompiledPrograms& precompiled_programs, | 219 | const PrecompiledPrograms& precompiled_programs, |
| 220 | ProgramCode&& program_code, ProgramCode&& program_code_b) | 220 | ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) |
| 221 | : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, | 221 | : host_ptr{host_ptr}, guest_addr{guest_addr}, unique_identifier{unique_identifier}, |
| 222 | disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { | 222 | program_type{program_type}, disk_cache{disk_cache}, |
| 223 | precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} { | ||
| 223 | 224 | ||
| 224 | const std::size_t code_size = CalculateProgramSize(program_code); | 225 | const std::size_t code_size = CalculateProgramSize(program_code); |
| 225 | const std::size_t code_size_b = | 226 | const std::size_t code_size_b = |
| @@ -243,12 +244,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro | |||
| 243 | disk_cache.SaveRaw(raw); | 244 | disk_cache.SaveRaw(raw); |
| 244 | } | 245 | } |
| 245 | 246 | ||
| 246 | CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, | 247 | CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, |
| 247 | ShaderDiskCacheOpenGL& disk_cache, | 248 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |
| 248 | const PrecompiledPrograms& precompiled_programs, | 249 | const PrecompiledPrograms& precompiled_programs, |
| 249 | GLShader::ProgramResult result) | 250 | GLShader::ProgramResult result, u8* host_ptr) |
| 250 | : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, | 251 | : guest_addr{guest_addr}, unique_identifier{unique_identifier}, program_type{program_type}, |
| 251 | disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { | 252 | disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{ |
| 253 | host_ptr} { | ||
| 252 | 254 | ||
| 253 | code = std::move(result.first); | 255 | code = std::move(result.first); |
| 254 | entries = result.second; | 256 | entries = result.second; |
| @@ -271,7 +273,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive | |||
| 271 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | 273 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); |
| 272 | } | 274 | } |
| 273 | 275 | ||
| 274 | LabelGLObject(GL_PROGRAM, program->handle, addr); | 276 | LabelGLObject(GL_PROGRAM, program->handle, guest_addr); |
| 275 | } | 277 | } |
| 276 | 278 | ||
| 277 | handle = program->handle; | 279 | handle = program->handle; |
| @@ -323,7 +325,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind | |||
| 323 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | 325 | disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); |
| 324 | } | 326 | } |
| 325 | 327 | ||
| 326 | LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name); | 328 | LabelGLObject(GL_PROGRAM, target_program->handle, guest_addr, debug_name); |
| 327 | 329 | ||
| 328 | return target_program->handle; | 330 | return target_program->handle; |
| 329 | }; | 331 | }; |
| @@ -489,14 +491,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 489 | const VAddr program_addr{GetShaderAddress(program)}; | 491 | const VAddr program_addr{GetShaderAddress(program)}; |
| 490 | 492 | ||
| 491 | // Look up shader in the cache based on address | 493 | // Look up shader in the cache based on address |
| 492 | Shader shader{TryGet(program_addr)}; | 494 | const auto& host_ptr{Memory::GetPointer(program_addr)}; |
| 495 | Shader shader{TryGet(host_ptr)}; | ||
| 493 | 496 | ||
| 494 | if (!shader) { | 497 | if (!shader) { |
| 495 | // No shader found - create a new one | 498 | // No shader found - create a new one |
| 496 | ProgramCode program_code = GetShaderCode(program_addr); | 499 | const auto& host_ptr{Memory::GetPointer(program_addr)}; |
| 500 | ProgramCode program_code{GetShaderCode(host_ptr)}; | ||
| 497 | ProgramCode program_code_b; | 501 | ProgramCode program_code_b; |
| 498 | if (program == Maxwell::ShaderProgram::VertexA) { | 502 | if (program == Maxwell::ShaderProgram::VertexA) { |
| 499 | program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)); | 503 | program_code_b = GetShaderCode( |
| 504 | Memory::GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); | ||
| 500 | } | 505 | } |
| 501 | const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | 506 | const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); |
| 502 | 507 | ||
| @@ -504,11 +509,11 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 504 | if (found != precompiled_shaders.end()) { | 509 | if (found != precompiled_shaders.end()) { |
| 505 | shader = | 510 | shader = |
| 506 | std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, | 511 | std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, |
| 507 | precompiled_programs, found->second); | 512 | precompiled_programs, found->second, host_ptr); |
| 508 | } else { | 513 | } else { |
| 509 | shader = std::make_shared<CachedShader>( | 514 | shader = std::make_shared<CachedShader>( |
| 510 | program_addr, unique_identifier, program, disk_cache, precompiled_programs, | 515 | program_addr, unique_identifier, program, disk_cache, precompiled_programs, |
| 511 | std::move(program_code), std::move(program_code_b)); | 516 | std::move(program_code), std::move(program_code_b), host_ptr); |
| 512 | } | 517 | } |
| 513 | Register(shader); | 518 | Register(shader); |
| 514 | } | 519 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 97eed192f..81fe716b4 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; | |||
| 39 | 39 | ||
| 40 | class CachedShader final : public RasterizerCacheObject { | 40 | class CachedShader final : public RasterizerCacheObject { |
| 41 | public: | 41 | public: |
| 42 | explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, | 42 | explicit CachedShader(VAddr guest_addr, u64 unique_identifier, |
| 43 | ShaderDiskCacheOpenGL& disk_cache, | 43 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |
| 44 | const PrecompiledPrograms& precompiled_programs, | 44 | const PrecompiledPrograms& precompiled_programs, |
| 45 | ProgramCode&& program_code, ProgramCode&& program_code_b); | 45 | ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); |
| 46 | 46 | ||
| 47 | explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, | 47 | explicit CachedShader(VAddr guest_addr, u64 unique_identifier, |
| 48 | ShaderDiskCacheOpenGL& disk_cache, | 48 | Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, |
| 49 | const PrecompiledPrograms& precompiled_programs, | 49 | const PrecompiledPrograms& precompiled_programs, |
| 50 | GLShader::ProgramResult result); | 50 | GLShader::ProgramResult result, u8* host_ptr); |
| 51 | 51 | ||
| 52 | VAddr GetAddr() const override { | 52 | VAddr GetCpuAddr() const override { |
| 53 | return addr; | 53 | return guest_addr; |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | std::size_t GetSizeInBytes() const override { | 56 | std::size_t GetSizeInBytes() const override { |
| @@ -91,7 +91,8 @@ private: | |||
| 91 | 91 | ||
| 92 | ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; | 92 | ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; |
| 93 | 93 | ||
| 94 | VAddr addr{}; | 94 | u8* host_ptr{}; |
| 95 | VAddr guest_addr{}; | ||
| 95 | u64 unique_identifier{}; | 96 | u64 unique_identifier{}; |
| 96 | Maxwell::ShaderProgram program_type{}; | 97 | Maxwell::ShaderProgram program_type{}; |
| 97 | ShaderDiskCacheOpenGL& disk_cache; | 98 | ShaderDiskCacheOpenGL& disk_cache; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 4a33a6c84..95eab3fec 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -17,6 +17,11 @@ | |||
| 17 | 17 | ||
| 18 | namespace Vulkan { | 18 | namespace Vulkan { |
| 19 | 19 | ||
| 20 | CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, | ||
| 21 | std::size_t alignment, u8* host_ptr) | ||
| 22 | : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{ | ||
| 23 | host_ptr} {} | ||
| 24 | |||
| 20 | VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, | 25 | VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, |
| 21 | VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, | 26 | VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, |
| 22 | VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size) | 27 | VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size) |
| @@ -37,16 +42,18 @@ VKBufferCache::~VKBufferCache() = default; | |||
| 37 | u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment, | 42 | u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment, |
| 38 | bool cache) { | 43 | bool cache) { |
| 39 | const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; | 44 | const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; |
| 40 | ASSERT(cpu_addr); | 45 | ASSERT_MSG(cpu_addr, "Invalid GPU address"); |
| 41 | 46 | ||
| 42 | // Cache management is a big overhead, so only cache entries with a given size. | 47 | // Cache management is a big overhead, so only cache entries with a given size. |
| 43 | // TODO: Figure out which size is the best for given games. | 48 | // TODO: Figure out which size is the best for given games. |
| 44 | cache &= size >= 2048; | 49 | cache &= size >= 2048; |
| 45 | 50 | ||
| 51 | const auto& host_ptr{Memory::GetPointer(*cpu_addr)}; | ||
| 46 | if (cache) { | 52 | if (cache) { |
| 47 | if (auto entry = TryGet(*cpu_addr); entry) { | 53 | auto entry = TryGet(host_ptr); |
| 48 | if (entry->size >= size && entry->alignment == alignment) { | 54 | if (entry) { |
| 49 | return entry->offset; | 55 | if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { |
| 56 | return entry->GetOffset(); | ||
| 50 | } | 57 | } |
| 51 | Unregister(entry); | 58 | Unregister(entry); |
| 52 | } | 59 | } |
| @@ -55,17 +62,17 @@ u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 | |||
| 55 | AlignBuffer(alignment); | 62 | AlignBuffer(alignment); |
| 56 | const u64 uploaded_offset = buffer_offset; | 63 | const u64 uploaded_offset = buffer_offset; |
| 57 | 64 | ||
| 58 | Memory::ReadBlock(*cpu_addr, buffer_ptr, size); | 65 | if (!host_ptr) { |
| 66 | return uploaded_offset; | ||
| 67 | } | ||
| 59 | 68 | ||
| 69 | std::memcpy(buffer_ptr, host_ptr, size); | ||
| 60 | buffer_ptr += size; | 70 | buffer_ptr += size; |
| 61 | buffer_offset += size; | 71 | buffer_offset += size; |
| 62 | 72 | ||
| 63 | if (cache) { | 73 | if (cache) { |
| 64 | auto entry = std::make_shared<CachedBufferEntry>(); | 74 | auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, |
| 65 | entry->offset = uploaded_offset; | 75 | alignment, host_ptr); |
| 66 | entry->size = size; | ||
| 67 | entry->alignment = alignment; | ||
| 68 | entry->addr = *cpu_addr; | ||
| 69 | Register(entry); | 76 | Register(entry); |
| 70 | } | 77 | } |
| 71 | 78 | ||
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index d8e916f31..8b415744b 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -24,22 +24,39 @@ class VKFence; | |||
| 24 | class VKMemoryManager; | 24 | class VKMemoryManager; |
| 25 | class VKStreamBuffer; | 25 | class VKStreamBuffer; |
| 26 | 26 | ||
| 27 | struct CachedBufferEntry final : public RasterizerCacheObject { | 27 | class CachedBufferEntry final : public RasterizerCacheObject { |
| 28 | VAddr GetAddr() const override { | 28 | public: |
| 29 | return addr; | 29 | explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment, |
| 30 | u8* host_ptr); | ||
| 31 | |||
| 32 | VAddr GetCpuAddr() const override { | ||
| 33 | return cpu_addr; | ||
| 30 | } | 34 | } |
| 31 | 35 | ||
| 32 | std::size_t GetSizeInBytes() const override { | 36 | std::size_t GetSizeInBytes() const override { |
| 33 | return size; | 37 | return size; |
| 34 | } | 38 | } |
| 35 | 39 | ||
| 40 | std::size_t GetSize() const { | ||
| 41 | return size; | ||
| 42 | } | ||
| 43 | |||
| 44 | u64 GetOffset() const { | ||
| 45 | return offset; | ||
| 46 | } | ||
| 47 | |||
| 48 | std::size_t GetAlignment() const { | ||
| 49 | return alignment; | ||
| 50 | } | ||
| 51 | |||
| 36 | // We do not have to flush this cache as things in it are never modified by us. | 52 | // We do not have to flush this cache as things in it are never modified by us. |
| 37 | void Flush() override {} | 53 | void Flush() override {} |
| 38 | 54 | ||
| 39 | VAddr addr; | 55 | private: |
| 40 | std::size_t size; | 56 | VAddr cpu_addr{}; |
| 41 | u64 offset; | 57 | std::size_t size{}; |
| 42 | std::size_t alignment; | 58 | u64 offset{}; |
| 59 | std::size_t alignment{}; | ||
| 43 | }; | 60 | }; |
| 44 | 61 | ||
| 45 | class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | 62 | class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { |