summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2023-01-04 22:05:20 -0500
committerGravatar Fernando Sahmkow2023-01-05 05:23:39 -0500
commit6c7eb81f7d871f5c08a4844471633a67725aae73 (patch)
treed642b93700ff8527a7750d20551e96c58a36e90f /src
parentMerge pull request #9501 from FernandoS27/yfc-rel-2 (diff)
downloadyuzu-6c7eb81f7d871f5c08a4844471633a67725aae73.tar.gz
yuzu-6c7eb81f7d871f5c08a4844471633a67725aae73.tar.xz
yuzu-6c7eb81f7d871f5c08a4844471633a67725aae73.zip
video_core: Cache GPU internal writes.
Diffstat (limited to '')
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/engine_upload.cpp2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp7
-rw-r--r--src/video_core/engines/maxwell_dma.cpp17
-rw-r--r--src/video_core/invalidation_accumulator.h78
-rw-r--r--src/video_core/memory_manager.cpp62
-rw-r--r--src/video_core/memory_manager.h17
-rw-r--r--src/video_core/rasterizer_interface.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h1
10 files changed, 185 insertions, 30 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index aa271a377..b7095ae13 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -85,6 +85,7 @@ add_library(video_core STATIC
85 gpu.h 85 gpu.h
86 gpu_thread.cpp 86 gpu_thread.cpp
87 gpu_thread.h 87 gpu_thread.h
88 invalidation_accumulator.h
88 memory_manager.cpp 89 memory_manager.cpp
89 memory_manager.h 90 memory_manager.h
90 precompiled_headers.h 91 precompiled_headers.h
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index cea1dd8b0..7f5a0c29d 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
76 regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, 76 regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
77 x_elements, regs.line_count, regs.dest.BlockHeight(), 77 x_elements, regs.line_count, regs.dest.BlockHeight(),
78 regs.dest.BlockDepth(), regs.line_length_in); 78 regs.dest.BlockDepth(), regs.line_length_in);
79 memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); 79 memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
80 } 80 }
81} 81}
82 82
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index fbfd1ddd2..97f547789 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -485,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
485} 485}
486 486
487void Maxwell3D::ProcessQueryGet() { 487void Maxwell3D::ProcessQueryGet() {
488 // TODO(Subv): Support the other query units.
489 if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) {
490 LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented");
491 }
492
493 switch (regs.report_semaphore.query.operation) { 488 switch (regs.report_semaphore.query.operation) {
494 case Regs::ReportSemaphore::Operation::Release: 489 case Regs::ReportSemaphore::Operation::Release:
495 if (regs.report_semaphore.query.short_query != 0) { 490 if (regs.report_semaphore.query.short_query != 0) {
@@ -649,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
649 644
650 const GPUVAddr address{buffer_address + regs.const_buffer.offset}; 645 const GPUVAddr address{buffer_address + regs.const_buffer.offset};
651 const size_t copy_size = amount * sizeof(u32); 646 const size_t copy_size = amount * sizeof(u32);
652 memory_manager.WriteBlock(address, start_base, copy_size); 647 memory_manager.WriteBlockCached(address, start_base, copy_size);
653 648
654 // Increment the current buffer position. 649 // Increment the current buffer position.
655 regs.const_buffer.offset += static_cast<u32>(copy_size); 650 regs.const_buffer.offset += static_cast<u32>(copy_size);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 01f70ea9e..7bf08e3e0 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -69,7 +69,7 @@ void MaxwellDMA::Launch() {
69 if (launch.multi_line_enable) { 69 if (launch.multi_line_enable) {
70 const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; 70 const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
71 const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; 71 const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
72 72 memory_manager.FlushCaching();
73 if (!is_src_pitch && !is_dst_pitch) { 73 if (!is_src_pitch && !is_dst_pitch) {
74 // If both the source and the destination are in block layout, assert. 74 // If both the source and the destination are in block layout, assert.
75 CopyBlockLinearToBlockLinear(); 75 CopyBlockLinearToBlockLinear();
@@ -104,6 +104,7 @@ void MaxwellDMA::Launch() {
104 reinterpret_cast<u8*>(tmp_buffer.data()), 104 reinterpret_cast<u8*>(tmp_buffer.data()),
105 regs.line_length_in * sizeof(u32)); 105 regs.line_length_in * sizeof(u32));
106 } else { 106 } else {
107 memory_manager.FlushCaching();
107 const auto convert_linear_2_blocklinear_addr = [](u64 address) { 108 const auto convert_linear_2_blocklinear_addr = [](u64 address) {
108 return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | 109 return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
109 ((address & 0x180) >> 1) | ((address & 0x20) << 3); 110 ((address & 0x180) >> 1) | ((address & 0x20) << 3);
@@ -121,7 +122,7 @@ void MaxwellDMA::Launch() {
121 memory_manager.ReadBlockUnsafe( 122 memory_manager.ReadBlockUnsafe(
122 convert_linear_2_blocklinear_addr(regs.offset_in + offset), 123 convert_linear_2_blocklinear_addr(regs.offset_in + offset),
123 tmp_buffer.data(), tmp_buffer.size()); 124 tmp_buffer.data(), tmp_buffer.size());
124 memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(), 125 memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
125 tmp_buffer.size()); 126 tmp_buffer.size());
126 } 127 }
127 } else if (is_src_pitch && !is_dst_pitch) { 128 } else if (is_src_pitch && !is_dst_pitch) {
@@ -132,7 +133,7 @@ void MaxwellDMA::Launch() {
132 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 133 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
133 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), 134 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
134 tmp_buffer.size()); 135 tmp_buffer.size());
135 memory_manager.WriteBlock( 136 memory_manager.WriteBlockCached(
136 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 137 convert_linear_2_blocklinear_addr(regs.offset_out + offset),
137 tmp_buffer.data(), tmp_buffer.size()); 138 tmp_buffer.data(), tmp_buffer.size());
138 } 139 }
@@ -141,7 +142,7 @@ void MaxwellDMA::Launch() {
141 std::vector<u8> tmp_buffer(regs.line_length_in); 142 std::vector<u8> tmp_buffer(regs.line_length_in);
142 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), 143 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
143 regs.line_length_in); 144 regs.line_length_in);
144 memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), 145 memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
145 regs.line_length_in); 146 regs.line_length_in);
146 } 147 }
147 } 148 }
@@ -204,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
204 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 205 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
205 regs.pitch_out); 206 regs.pitch_out);
206 207
207 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 208 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
208} 209}
209 210
210void MaxwellDMA::CopyPitchToBlockLinear() { 211void MaxwellDMA::CopyPitchToBlockLinear() {
@@ -256,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
256 dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 257 dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
257 regs.pitch_in); 258 regs.pitch_in);
258 259
259 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 260 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
260} 261}
261 262
262void MaxwellDMA::FastCopyBlockLinearToPitch() { 263void MaxwellDMA::FastCopyBlockLinearToPitch() {
@@ -287,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
287 regs.src_params.block_size.height, regs.src_params.block_size.depth, 288 regs.src_params.block_size.height, regs.src_params.block_size.depth,
288 regs.pitch_out); 289 regs.pitch_out);
289 290
290 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 291 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
291} 292}
292 293
293void MaxwellDMA::CopyBlockLinearToBlockLinear() { 294void MaxwellDMA::CopyBlockLinearToBlockLinear() {
@@ -347,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
347 dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, 348 dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
348 dst.block_size.height, dst.block_size.depth, pitch); 349 dst.block_size.height, dst.block_size.depth, pitch);
349 350
350 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 351 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
351} 352}
352 353
353void MaxwellDMA::ReleaseSemaphore() { 354void MaxwellDMA::ReleaseSemaphore() {
diff --git a/src/video_core/invalidation_accumulator.h b/src/video_core/invalidation_accumulator.h
new file mode 100644
index 000000000..42420e31c
--- /dev/null
+++ b/src/video_core/invalidation_accumulator.h
@@ -0,0 +1,78 @@
1// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <vector>
7
8#include "common/common_types.h"
9
10namespace VideoCommon {
11
12class InvalidationAccumulator {
13public:
14 InvalidationAccumulator() = default;
15 ~InvalidationAccumulator() = default;
16
17 void Add(GPUVAddr address, size_t size) {
18 const auto reset_values = [&]() {
19 if (has_collected) {
20 buffer.emplace_back(start_address, accumulated_size);
21 }
22 start_address = address;
23 accumulated_size = size;
24 last_collection = start_address + size;
25 };
26 if (address >= start_address && address + size <= last_collection) [[likely]] {
27 return;
28 }
29 size = (address + size + atomicy_side_mask) & atomicy_mask - address;
30 address = address & atomicy_mask;
31 if (!has_collected) [[unlikely]] {
32 reset_values();
33 has_collected = true;
34 return;
35 }
36 if (address != last_collection) [[unlikely]] {
37 reset_values();
38 return;
39 }
40 accumulated_size += size;
41 last_collection += size;
42 }
43
44 void Clear() {
45 buffer.clear();
46 start_address = 0;
47 last_collection = 0;
48 has_collected = false;
49 }
50
51 bool AnyAccumulated() const {
52 return has_collected;
53 }
54
55 template <typename Func>
56 void Callback(Func&& func) {
57 if (!has_collected) {
58 return;
59 }
60 buffer.emplace_back(start_address, accumulated_size);
61 for (auto& [address, size] : buffer) {
62 func(address, size);
63 }
64 }
65
66private:
67 static constexpr size_t atomicy_bits = 5;
68 static constexpr size_t atomicy_size = 1ULL << atomicy_bits;
69 static constexpr size_t atomicy_side_mask = atomicy_size - 1;
70 static constexpr size_t atomicy_mask = ~atomicy_side_mask;
71 GPUVAddr start_address{};
72 GPUVAddr last_collection{};
73 size_t accumulated_size{};
74 bool has_collected{};
75 std::vector<std::pair<VAddr, size_t>> buffer;
76};
77
78} // namespace VideoCommon
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 3a5cdeb39..83924475b 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -11,6 +11,7 @@
11#include "core/hle/kernel/k_page_table.h" 11#include "core/hle/kernel/k_page_table.h"
12#include "core/hle/kernel/k_process.h" 12#include "core/hle/kernel/k_process.h"
13#include "core/memory.h" 13#include "core/memory.h"
14#include "video_core/invalidation_accumulator.h"
14#include "video_core/memory_manager.h" 15#include "video_core/memory_manager.h"
15#include "video_core/rasterizer_interface.h" 16#include "video_core/rasterizer_interface.h"
16#include "video_core/renderer_base.h" 17#include "video_core/renderer_base.h"
@@ -26,7 +27,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
26 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, 27 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
27 page_bits != big_page_bits ? page_bits : 0}, 28 page_bits != big_page_bits ? page_bits : 0},
28 kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( 29 kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
29 1, std::memory_order_acq_rel)} { 30 1, std::memory_order_acq_rel)},
31 accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
30 address_space_size = 1ULL << address_space_bits; 32 address_space_size = 1ULL << address_space_bits;
31 page_size = 1ULL << page_bits; 33 page_size = 1ULL << page_bits;
32 page_mask = page_size - 1ULL; 34 page_mask = page_size - 1ULL;
@@ -185,15 +187,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
185 if (size == 0) { 187 if (size == 0) {
186 return; 188 return;
187 } 189 }
188 const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); 190 GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash);
189 191
190 for (const auto& [map_addr, map_size] : submapped_ranges) { 192 for (const auto& [map_addr, map_size] : page_stash) {
191 // Flush and invalidate through the GPU interface, to be asynchronous if possible. 193 rasterizer->UnmapMemory(map_addr, map_size);
192 const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
193 ASSERT(cpu_addr);
194
195 rasterizer->UnmapMemory(*cpu_addr, map_size);
196 } 194 }
195 page_stash.clear();
197 196
198 BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); 197 BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
199 PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); 198 PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
@@ -454,6 +453,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
454 WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); 453 WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
455} 454}
456 455
456void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer,
457 std::size_t size) {
458 WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
459 accumulator->Add(gpu_dest_addr, size);
460}
461
457void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, 462void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
458 VideoCommon::CacheType which) const { 463 VideoCommon::CacheType which) const {
459 auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, 464 auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
@@ -663,7 +668,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
663std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( 668std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
664 GPUVAddr gpu_addr, std::size_t size) const { 669 GPUVAddr gpu_addr, std::size_t size) const {
665 std::vector<std::pair<GPUVAddr, std::size_t>> result{}; 670 std::vector<std::pair<GPUVAddr, std::size_t>> result{};
666 std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; 671 GetSubmappedRangeImpl<true>(gpu_addr, size, result);
672 return result;
673}
674
675template <bool is_gpu_address>
676void MemoryManager::GetSubmappedRangeImpl(
677 GPUVAddr gpu_addr, std::size_t size,
678 std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
679 result) const {
680 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
681 last_segment{};
667 std::optional<VAddr> old_page_addr{}; 682 std::optional<VAddr> old_page_addr{};
668 const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, 683 const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
669 [[maybe_unused]] std::size_t offset, 684 [[maybe_unused]] std::size_t offset,
@@ -685,8 +700,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
685 } 700 }
686 old_page_addr = {cpu_addr_base + copy_amount}; 701 old_page_addr = {cpu_addr_base + copy_amount};
687 if (!last_segment) { 702 if (!last_segment) {
688 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; 703 if constexpr (is_gpu_address) {
689 last_segment = {new_base_addr, copy_amount}; 704 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
705 last_segment = {new_base_addr, copy_amount};
706 } else {
707 last_segment = {cpu_addr_base, copy_amount};
708 }
690 } else { 709 } else {
691 last_segment->second += copy_amount; 710 last_segment->second += copy_amount;
692 } 711 }
@@ -703,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
703 } 722 }
704 old_page_addr = {cpu_addr_base + copy_amount}; 723 old_page_addr = {cpu_addr_base + copy_amount};
705 if (!last_segment) { 724 if (!last_segment) {
706 const GPUVAddr new_base_addr = (page_index << page_bits) + offset; 725 if constexpr (is_gpu_address) {
707 last_segment = {new_base_addr, copy_amount}; 726 const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
727 last_segment = {new_base_addr, copy_amount};
728 } else {
729 last_segment = {cpu_addr_base, copy_amount};
730 }
708 } else { 731 } else {
709 last_segment->second += copy_amount; 732 last_segment->second += copy_amount;
710 } 733 }
@@ -715,7 +738,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
715 }; 738 };
716 MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); 739 MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
717 split(0, 0, 0); 740 split(0, 0, 0);
718 return result; 741}
742
743void MemoryManager::FlushCaching() {
744 if (!accumulator->AnyAccumulated()) {
745 return;
746 }
747 accumulator->Callback([this](GPUVAddr addr, size_t size) {
748 GetSubmappedRangeImpl<false>(addr, size, page_stash);
749 });
750 rasterizer->InnerInvalidation(page_stash);
751 page_stash.clear();
752 accumulator->Clear();
719} 753}
720 754
721} // namespace Tegra 755} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 828e13439..e6de0d0cb 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -19,6 +19,10 @@ namespace VideoCore {
19class RasterizerInterface; 19class RasterizerInterface;
20} 20}
21 21
22namespace VideoCommon {
23class InvalidationAccumulator;
24}
25
22namespace Core { 26namespace Core {
23class DeviceMemory; 27class DeviceMemory;
24namespace Memory { 28namespace Memory {
@@ -80,6 +84,7 @@ public:
80 */ 84 */
81 void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; 85 void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
82 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); 86 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
87 void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
83 88
84 /** 89 /**
85 * Checks if a gpu region can be simply read with a pointer. 90 * Checks if a gpu region can be simply read with a pointer.
@@ -102,7 +107,7 @@ public:
102 * will be returned; 107 * will be returned;
103 */ 108 */
104 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, 109 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
105 std::size_t size) const; 110 std::size_t size) const;
106 111
107 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, 112 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
108 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); 113 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
@@ -129,6 +134,8 @@ public:
129 size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, 134 size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
130 size_t max_size = std::numeric_limits<size_t>::max()) const; 135 size_t max_size = std::numeric_limits<size_t>::max()) const;
131 136
137 void FlushCaching();
138
132private: 139private:
133 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> 140 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
134 inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, 141 inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
@@ -154,6 +161,12 @@ private:
154 inline bool IsBigPageContinous(size_t big_page_index) const; 161 inline bool IsBigPageContinous(size_t big_page_index) const;
155 inline void SetBigPageContinous(size_t big_page_index, bool value); 162 inline void SetBigPageContinous(size_t big_page_index, bool value);
156 163
164 template <bool is_gpu_address>
165 void GetSubmappedRangeImpl(
166 GPUVAddr gpu_addr, std::size_t size,
167 std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
168 result) const;
169
157 Core::System& system; 170 Core::System& system;
158 Core::Memory::Memory& memory; 171 Core::Memory::Memory& memory;
159 Core::DeviceMemory& device_memory; 172 Core::DeviceMemory& device_memory;
@@ -201,10 +214,12 @@ private:
201 Common::VirtualBuffer<u32> big_page_table_cpu; 214 Common::VirtualBuffer<u32> big_page_table_cpu;
202 215
203 std::vector<u64> big_page_continous; 216 std::vector<u64> big_page_continous;
217 std::vector<std::pair<VAddr, std::size_t>> page_stash{};
204 218
205 constexpr static size_t continous_bits = 64; 219 constexpr static size_t continous_bits = 64;
206 220
207 const size_t unique_identifier; 221 const size_t unique_identifier;
222 std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
208 223
209 static std::atomic<size_t> unique_identifier_generator; 224 static std::atomic<size_t> unique_identifier_generator;
210}; 225};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index f44c7df50..6b66ad7b6 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,7 @@
6#include <functional> 6#include <functional>
7#include <optional> 7#include <optional>
8#include <span> 8#include <span>
9#include <utility>
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "common/polyfill_thread.h" 11#include "common/polyfill_thread.h"
11#include "video_core/cache_types.h" 12#include "video_core/cache_types.h"
@@ -95,6 +96,12 @@ public:
95 virtual void InvalidateRegion(VAddr addr, u64 size, 96 virtual void InvalidateRegion(VAddr addr, u64 size,
96 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; 97 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
97 98
99 virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
100 for (const auto [cpu_addr, size] : sequences) {
101 InvalidateRegion(cpu_addr, size);
102 }
103 }
104
98 /// Notify rasterizer that any caches of the specified region are desync with guest 105 /// Notify rasterizer that any caches of the specified region are desync with guest
99 virtual void OnCPUWrite(VAddr addr, u64 size) = 0; 106 virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
100 107
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 242bf9602..6c4d74564 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -186,6 +186,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
186 186
187 SCOPE_EXIT({ gpu.TickWork(); }); 187 SCOPE_EXIT({ gpu.TickWork(); });
188 FlushWork(); 188 FlushWork();
189 gpu_memory->FlushCaching();
189 190
190 query_cache.UpdateCounters(); 191 query_cache.UpdateCounters();
191 192
@@ -393,6 +394,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
393 394
394void RasterizerVulkan::DispatchCompute() { 395void RasterizerVulkan::DispatchCompute() {
395 FlushWork(); 396 FlushWork();
397 gpu_memory->FlushCaching();
396 398
397 ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; 399 ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
398 if (!pipeline) { 400 if (!pipeline) {
@@ -481,6 +483,27 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
481 } 483 }
482} 484}
483 485
486void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
487 {
488 std::scoped_lock lock{texture_cache.mutex};
489 for (const auto [addr, size] : sequences) {
490 texture_cache.WriteMemory(addr, size);
491 }
492 }
493 {
494 std::scoped_lock lock{buffer_cache.mutex};
495 for (const auto [addr, size] : sequences) {
496 buffer_cache.WriteMemory(addr, size);
497 }
498 }
499 {
500 for (const auto [addr, size] : sequences) {
501 query_cache.InvalidateRegion(addr, size);
502 pipeline_cache.InvalidateRegion(addr, size);
503 }
504 }
505}
506
484void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { 507void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
485 if (addr == 0 || size == 0) { 508 if (addr == 0 || size == 0) {
486 return; 509 return;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index c661e5b19..472cc64d9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -79,6 +79,7 @@ public:
79 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 79 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
80 void InvalidateRegion(VAddr addr, u64 size, 80 void InvalidateRegion(VAddr addr, u64 size,
81 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 81 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
82 void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
82 void OnCPUWrite(VAddr addr, u64 size) override; 83 void OnCPUWrite(VAddr addr, u64 size) override;
83 void InvalidateGPUCache() override; 84 void InvalidateGPUCache() override;
84 void UnmapMemory(VAddr addr, u64 size) override; 85 void UnmapMemory(VAddr addr, u64 size) override;