diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | src/video_core/engines/engine_upload.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 17 | ||||
| -rw-r--r-- | src/video_core/invalidation_accumulator.h | 78 | ||||
| -rw-r--r-- | src/video_core/memory_manager.cpp | 62 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 17 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 23 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.h | 1 |
10 files changed, 185 insertions, 30 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index aa271a377..b7095ae13 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -85,6 +85,7 @@ add_library(video_core STATIC | |||
| 85 | gpu.h | 85 | gpu.h |
| 86 | gpu_thread.cpp | 86 | gpu_thread.cpp |
| 87 | gpu_thread.h | 87 | gpu_thread.h |
| 88 | invalidation_accumulator.h | ||
| 88 | memory_manager.cpp | 89 | memory_manager.cpp |
| 89 | memory_manager.h | 90 | memory_manager.h |
| 90 | precompiled_headers.h | 91 | precompiled_headers.h |
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index cea1dd8b0..7f5a0c29d 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) { | |||
| 76 | regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, | 76 | regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, |
| 77 | x_elements, regs.line_count, regs.dest.BlockHeight(), | 77 | x_elements, regs.line_count, regs.dest.BlockHeight(), |
| 78 | regs.dest.BlockDepth(), regs.line_length_in); | 78 | regs.dest.BlockDepth(), regs.line_length_in); |
| 79 | memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | 79 | memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size); |
| 80 | } | 80 | } |
| 81 | } | 81 | } |
| 82 | 82 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index fbfd1ddd2..97f547789 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -485,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | |||
| 485 | } | 485 | } |
| 486 | 486 | ||
| 487 | void Maxwell3D::ProcessQueryGet() { | 487 | void Maxwell3D::ProcessQueryGet() { |
| 488 | // TODO(Subv): Support the other query units. | ||
| 489 | if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) { | ||
| 490 | LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented"); | ||
| 491 | } | ||
| 492 | |||
| 493 | switch (regs.report_semaphore.query.operation) { | 488 | switch (regs.report_semaphore.query.operation) { |
| 494 | case Regs::ReportSemaphore::Operation::Release: | 489 | case Regs::ReportSemaphore::Operation::Release: |
| 495 | if (regs.report_semaphore.query.short_query != 0) { | 490 | if (regs.report_semaphore.query.short_query != 0) { |
| @@ -649,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) { | |||
| 649 | 644 | ||
| 650 | const GPUVAddr address{buffer_address + regs.const_buffer.offset}; | 645 | const GPUVAddr address{buffer_address + regs.const_buffer.offset}; |
| 651 | const size_t copy_size = amount * sizeof(u32); | 646 | const size_t copy_size = amount * sizeof(u32); |
| 652 | memory_manager.WriteBlock(address, start_base, copy_size); | 647 | memory_manager.WriteBlockCached(address, start_base, copy_size); |
| 653 | 648 | ||
| 654 | // Increment the current buffer position. | 649 | // Increment the current buffer position. |
| 655 | regs.const_buffer.offset += static_cast<u32>(copy_size); | 650 | regs.const_buffer.offset += static_cast<u32>(copy_size); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 01f70ea9e..7bf08e3e0 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -69,7 +69,7 @@ void MaxwellDMA::Launch() { | |||
| 69 | if (launch.multi_line_enable) { | 69 | if (launch.multi_line_enable) { |
| 70 | const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; | 70 | const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; |
| 71 | const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; | 71 | const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; |
| 72 | 72 | memory_manager.FlushCaching(); | |
| 73 | if (!is_src_pitch && !is_dst_pitch) { | 73 | if (!is_src_pitch && !is_dst_pitch) { |
| 74 | // If both the source and the destination are in block layout, assert. | 74 | // If both the source and the destination are in block layout, assert. |
| 75 | CopyBlockLinearToBlockLinear(); | 75 | CopyBlockLinearToBlockLinear(); |
| @@ -104,6 +104,7 @@ void MaxwellDMA::Launch() { | |||
| 104 | reinterpret_cast<u8*>(tmp_buffer.data()), | 104 | reinterpret_cast<u8*>(tmp_buffer.data()), |
| 105 | regs.line_length_in * sizeof(u32)); | 105 | regs.line_length_in * sizeof(u32)); |
| 106 | } else { | 106 | } else { |
| 107 | memory_manager.FlushCaching(); | ||
| 107 | const auto convert_linear_2_blocklinear_addr = [](u64 address) { | 108 | const auto convert_linear_2_blocklinear_addr = [](u64 address) { |
| 108 | return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | | 109 | return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | |
| 109 | ((address & 0x180) >> 1) | ((address & 0x20) << 3); | 110 | ((address & 0x180) >> 1) | ((address & 0x20) << 3); |
| @@ -121,7 +122,7 @@ void MaxwellDMA::Launch() { | |||
| 121 | memory_manager.ReadBlockUnsafe( | 122 | memory_manager.ReadBlockUnsafe( |
| 122 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), | 123 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), |
| 123 | tmp_buffer.data(), tmp_buffer.size()); | 124 | tmp_buffer.data(), tmp_buffer.size()); |
| 124 | memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(), | 125 | memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), |
| 125 | tmp_buffer.size()); | 126 | tmp_buffer.size()); |
| 126 | } | 127 | } |
| 127 | } else if (is_src_pitch && !is_dst_pitch) { | 128 | } else if (is_src_pitch && !is_dst_pitch) { |
| @@ -132,7 +133,7 @@ void MaxwellDMA::Launch() { | |||
| 132 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 133 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 133 | memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), | 134 | memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), |
| 134 | tmp_buffer.size()); | 135 | tmp_buffer.size()); |
| 135 | memory_manager.WriteBlock( | 136 | memory_manager.WriteBlockCached( |
| 136 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), | 137 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), |
| 137 | tmp_buffer.data(), tmp_buffer.size()); | 138 | tmp_buffer.data(), tmp_buffer.size()); |
| 138 | } | 139 | } |
| @@ -141,7 +142,7 @@ void MaxwellDMA::Launch() { | |||
| 141 | std::vector<u8> tmp_buffer(regs.line_length_in); | 142 | std::vector<u8> tmp_buffer(regs.line_length_in); |
| 142 | memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), | 143 | memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), |
| 143 | regs.line_length_in); | 144 | regs.line_length_in); |
| 144 | memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), | 145 | memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), |
| 145 | regs.line_length_in); | 146 | regs.line_length_in); |
| 146 | } | 147 | } |
| 147 | } | 148 | } |
| @@ -204,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 204 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | 205 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |
| 205 | regs.pitch_out); | 206 | regs.pitch_out); |
| 206 | 207 | ||
| 207 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 208 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |
| 208 | } | 209 | } |
| 209 | 210 | ||
| 210 | void MaxwellDMA::CopyPitchToBlockLinear() { | 211 | void MaxwellDMA::CopyPitchToBlockLinear() { |
| @@ -256,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 256 | dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | 257 | dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |
| 257 | regs.pitch_in); | 258 | regs.pitch_in); |
| 258 | 259 | ||
| 259 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 260 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |
| 260 | } | 261 | } |
| 261 | 262 | ||
| 262 | void MaxwellDMA::FastCopyBlockLinearToPitch() { | 263 | void MaxwellDMA::FastCopyBlockLinearToPitch() { |
| @@ -287,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { | |||
| 287 | regs.src_params.block_size.height, regs.src_params.block_size.depth, | 288 | regs.src_params.block_size.height, regs.src_params.block_size.depth, |
| 288 | regs.pitch_out); | 289 | regs.pitch_out); |
| 289 | 290 | ||
| 290 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 291 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |
| 291 | } | 292 | } |
| 292 | 293 | ||
| 293 | void MaxwellDMA::CopyBlockLinearToBlockLinear() { | 294 | void MaxwellDMA::CopyBlockLinearToBlockLinear() { |
| @@ -347,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { | |||
| 347 | dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, | 348 | dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, |
| 348 | dst.block_size.height, dst.block_size.depth, pitch); | 349 | dst.block_size.height, dst.block_size.depth, pitch); |
| 349 | 350 | ||
| 350 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 351 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |
| 351 | } | 352 | } |
| 352 | 353 | ||
| 353 | void MaxwellDMA::ReleaseSemaphore() { | 354 | void MaxwellDMA::ReleaseSemaphore() { |
diff --git a/src/video_core/invalidation_accumulator.h b/src/video_core/invalidation_accumulator.h new file mode 100644 index 000000000..42420e31c --- /dev/null +++ b/src/video_core/invalidation_accumulator.h | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | |||
| 10 | namespace VideoCommon { | ||
| 11 | |||
| 12 | class InvalidationAccumulator { | ||
| 13 | public: | ||
| 14 | InvalidationAccumulator() = default; | ||
| 15 | ~InvalidationAccumulator() = default; | ||
| 16 | |||
| 17 | void Add(GPUVAddr address, size_t size) { | ||
| 18 | const auto reset_values = [&]() { | ||
| 19 | if (has_collected) { | ||
| 20 | buffer.emplace_back(start_address, accumulated_size); | ||
| 21 | } | ||
| 22 | start_address = address; | ||
| 23 | accumulated_size = size; | ||
| 24 | last_collection = start_address + size; | ||
| 25 | }; | ||
| 26 | if (address >= start_address && address + size <= last_collection) [[likely]] { | ||
| 27 | return; | ||
| 28 | } | ||
| 29 | size = (address + size + atomicy_side_mask) & atomicy_mask - address; | ||
| 30 | address = address & atomicy_mask; | ||
| 31 | if (!has_collected) [[unlikely]] { | ||
| 32 | reset_values(); | ||
| 33 | has_collected = true; | ||
| 34 | return; | ||
| 35 | } | ||
| 36 | if (address != last_collection) [[unlikely]] { | ||
| 37 | reset_values(); | ||
| 38 | return; | ||
| 39 | } | ||
| 40 | accumulated_size += size; | ||
| 41 | last_collection += size; | ||
| 42 | } | ||
| 43 | |||
| 44 | void Clear() { | ||
| 45 | buffer.clear(); | ||
| 46 | start_address = 0; | ||
| 47 | last_collection = 0; | ||
| 48 | has_collected = false; | ||
| 49 | } | ||
| 50 | |||
| 51 | bool AnyAccumulated() const { | ||
| 52 | return has_collected; | ||
| 53 | } | ||
| 54 | |||
| 55 | template <typename Func> | ||
| 56 | void Callback(Func&& func) { | ||
| 57 | if (!has_collected) { | ||
| 58 | return; | ||
| 59 | } | ||
| 60 | buffer.emplace_back(start_address, accumulated_size); | ||
| 61 | for (auto& [address, size] : buffer) { | ||
| 62 | func(address, size); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | private: | ||
| 67 | static constexpr size_t atomicy_bits = 5; | ||
| 68 | static constexpr size_t atomicy_size = 1ULL << atomicy_bits; | ||
| 69 | static constexpr size_t atomicy_side_mask = atomicy_size - 1; | ||
| 70 | static constexpr size_t atomicy_mask = ~atomicy_side_mask; | ||
| 71 | GPUVAddr start_address{}; | ||
| 72 | GPUVAddr last_collection{}; | ||
| 73 | size_t accumulated_size{}; | ||
| 74 | bool has_collected{}; | ||
| 75 | std::vector<std::pair<VAddr, size_t>> buffer; | ||
| 76 | }; | ||
| 77 | |||
| 78 | } // namespace VideoCommon | ||
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 3a5cdeb39..83924475b 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "core/hle/kernel/k_page_table.h" | 11 | #include "core/hle/kernel/k_page_table.h" |
| 12 | #include "core/hle/kernel/k_process.h" | 12 | #include "core/hle/kernel/k_process.h" |
| 13 | #include "core/memory.h" | 13 | #include "core/memory.h" |
| 14 | #include "video_core/invalidation_accumulator.h" | ||
| 14 | #include "video_core/memory_manager.h" | 15 | #include "video_core/memory_manager.h" |
| 15 | #include "video_core/rasterizer_interface.h" | 16 | #include "video_core/rasterizer_interface.h" |
| 16 | #include "video_core/renderer_base.h" | 17 | #include "video_core/renderer_base.h" |
| @@ -26,7 +27,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 | |||
| 26 | entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, | 27 | entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, |
| 27 | page_bits != big_page_bits ? page_bits : 0}, | 28 | page_bits != big_page_bits ? page_bits : 0}, |
| 28 | kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( | 29 | kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( |
| 29 | 1, std::memory_order_acq_rel)} { | 30 | 1, std::memory_order_acq_rel)}, |
| 31 | accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} { | ||
| 30 | address_space_size = 1ULL << address_space_bits; | 32 | address_space_size = 1ULL << address_space_bits; |
| 31 | page_size = 1ULL << page_bits; | 33 | page_size = 1ULL << page_bits; |
| 32 | page_mask = page_size - 1ULL; | 34 | page_mask = page_size - 1ULL; |
| @@ -185,15 +187,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
| 185 | if (size == 0) { | 187 | if (size == 0) { |
| 186 | return; | 188 | return; |
| 187 | } | 189 | } |
| 188 | const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); | 190 | GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash); |
| 189 | 191 | ||
| 190 | for (const auto& [map_addr, map_size] : submapped_ranges) { | 192 | for (const auto& [map_addr, map_size] : page_stash) { |
| 191 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. | 193 | rasterizer->UnmapMemory(map_addr, map_size); |
| 192 | const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr); | ||
| 193 | ASSERT(cpu_addr); | ||
| 194 | |||
| 195 | rasterizer->UnmapMemory(*cpu_addr, map_size); | ||
| 196 | } | 194 | } |
| 195 | page_stash.clear(); | ||
| 197 | 196 | ||
| 198 | BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); | 197 | BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |
| 199 | PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); | 198 | PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |
| @@ -454,6 +453,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf | |||
| 454 | WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); | 453 | WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); |
| 455 | } | 454 | } |
| 456 | 455 | ||
| 456 | void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, | ||
| 457 | std::size_t size) { | ||
| 458 | WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); | ||
| 459 | accumulator->Add(gpu_dest_addr, size); | ||
| 460 | } | ||
| 461 | |||
| 457 | void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, | 462 | void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, |
| 458 | VideoCommon::CacheType which) const { | 463 | VideoCommon::CacheType which) const { |
| 459 | auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, | 464 | auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, |
| @@ -663,7 +668,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons | |||
| 663 | std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | 668 | std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( |
| 664 | GPUVAddr gpu_addr, std::size_t size) const { | 669 | GPUVAddr gpu_addr, std::size_t size) const { |
| 665 | std::vector<std::pair<GPUVAddr, std::size_t>> result{}; | 670 | std::vector<std::pair<GPUVAddr, std::size_t>> result{}; |
| 666 | std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; | 671 | GetSubmappedRangeImpl<true>(gpu_addr, size, result); |
| 672 | return result; | ||
| 673 | } | ||
| 674 | |||
| 675 | template <bool is_gpu_address> | ||
| 676 | void MemoryManager::GetSubmappedRangeImpl( | ||
| 677 | GPUVAddr gpu_addr, std::size_t size, | ||
| 678 | std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& | ||
| 679 | result) const { | ||
| 680 | std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> | ||
| 681 | last_segment{}; | ||
| 667 | std::optional<VAddr> old_page_addr{}; | 682 | std::optional<VAddr> old_page_addr{}; |
| 668 | const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, | 683 | const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, |
| 669 | [[maybe_unused]] std::size_t offset, | 684 | [[maybe_unused]] std::size_t offset, |
| @@ -685,8 +700,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | |||
| 685 | } | 700 | } |
| 686 | old_page_addr = {cpu_addr_base + copy_amount}; | 701 | old_page_addr = {cpu_addr_base + copy_amount}; |
| 687 | if (!last_segment) { | 702 | if (!last_segment) { |
| 688 | const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; | 703 | if constexpr (is_gpu_address) { |
| 689 | last_segment = {new_base_addr, copy_amount}; | 704 | const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; |
| 705 | last_segment = {new_base_addr, copy_amount}; | ||
| 706 | } else { | ||
| 707 | last_segment = {cpu_addr_base, copy_amount}; | ||
| 708 | } | ||
| 690 | } else { | 709 | } else { |
| 691 | last_segment->second += copy_amount; | 710 | last_segment->second += copy_amount; |
| 692 | } | 711 | } |
| @@ -703,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | |||
| 703 | } | 722 | } |
| 704 | old_page_addr = {cpu_addr_base + copy_amount}; | 723 | old_page_addr = {cpu_addr_base + copy_amount}; |
| 705 | if (!last_segment) { | 724 | if (!last_segment) { |
| 706 | const GPUVAddr new_base_addr = (page_index << page_bits) + offset; | 725 | if constexpr (is_gpu_address) { |
| 707 | last_segment = {new_base_addr, copy_amount}; | 726 | const GPUVAddr new_base_addr = (page_index << page_bits) + offset; |
| 727 | last_segment = {new_base_addr, copy_amount}; | ||
| 728 | } else { | ||
| 729 | last_segment = {cpu_addr_base, copy_amount}; | ||
| 730 | } | ||
| 708 | } else { | 731 | } else { |
| 709 | last_segment->second += copy_amount; | 732 | last_segment->second += copy_amount; |
| 710 | } | 733 | } |
| @@ -715,7 +738,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | |||
| 715 | }; | 738 | }; |
| 716 | MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); | 739 | MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); |
| 717 | split(0, 0, 0); | 740 | split(0, 0, 0); |
| 718 | return result; | 741 | } |
| 742 | |||
| 743 | void MemoryManager::FlushCaching() { | ||
| 744 | if (!accumulator->AnyAccumulated()) { | ||
| 745 | return; | ||
| 746 | } | ||
| 747 | accumulator->Callback([this](GPUVAddr addr, size_t size) { | ||
| 748 | GetSubmappedRangeImpl<false>(addr, size, page_stash); | ||
| 749 | }); | ||
| 750 | rasterizer->InnerInvalidation(page_stash); | ||
| 751 | page_stash.clear(); | ||
| 752 | accumulator->Clear(); | ||
| 719 | } | 753 | } |
| 720 | 754 | ||
| 721 | } // namespace Tegra | 755 | } // namespace Tegra |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 828e13439..e6de0d0cb 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -19,6 +19,10 @@ namespace VideoCore { | |||
| 19 | class RasterizerInterface; | 19 | class RasterizerInterface; |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | namespace VideoCommon { | ||
| 23 | class InvalidationAccumulator; | ||
| 24 | } | ||
| 25 | |||
| 22 | namespace Core { | 26 | namespace Core { |
| 23 | class DeviceMemory; | 27 | class DeviceMemory; |
| 24 | namespace Memory { | 28 | namespace Memory { |
| @@ -80,6 +84,7 @@ public: | |||
| 80 | */ | 84 | */ |
| 81 | void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; | 85 | void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; |
| 82 | void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); | 86 | void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); |
| 87 | void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); | ||
| 83 | 88 | ||
| 84 | /** | 89 | /** |
| 85 | * Checks if a gpu region can be simply read with a pointer. | 90 | * Checks if a gpu region can be simply read with a pointer. |
| @@ -102,7 +107,7 @@ public: | |||
| 102 | * will be returned; | 107 | * will be returned; |
| 103 | */ | 108 | */ |
| 104 | std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, | 109 | std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, |
| 105 | std::size_t size) const; | 110 | std::size_t size) const; |
| 106 | 111 | ||
| 107 | GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, | 112 | GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, |
| 108 | PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); | 113 | PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); |
| @@ -129,6 +134,8 @@ public: | |||
| 129 | size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, | 134 | size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, |
| 130 | size_t max_size = std::numeric_limits<size_t>::max()) const; | 135 | size_t max_size = std::numeric_limits<size_t>::max()) const; |
| 131 | 136 | ||
| 137 | void FlushCaching(); | ||
| 138 | |||
| 132 | private: | 139 | private: |
| 133 | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> | 140 | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> |
| 134 | inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, | 141 | inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, |
| @@ -154,6 +161,12 @@ private: | |||
| 154 | inline bool IsBigPageContinous(size_t big_page_index) const; | 161 | inline bool IsBigPageContinous(size_t big_page_index) const; |
| 155 | inline void SetBigPageContinous(size_t big_page_index, bool value); | 162 | inline void SetBigPageContinous(size_t big_page_index, bool value); |
| 156 | 163 | ||
| 164 | template <bool is_gpu_address> | ||
| 165 | void GetSubmappedRangeImpl( | ||
| 166 | GPUVAddr gpu_addr, std::size_t size, | ||
| 167 | std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& | ||
| 168 | result) const; | ||
| 169 | |||
| 157 | Core::System& system; | 170 | Core::System& system; |
| 158 | Core::Memory::Memory& memory; | 171 | Core::Memory::Memory& memory; |
| 159 | Core::DeviceMemory& device_memory; | 172 | Core::DeviceMemory& device_memory; |
| @@ -201,10 +214,12 @@ private: | |||
| 201 | Common::VirtualBuffer<u32> big_page_table_cpu; | 214 | Common::VirtualBuffer<u32> big_page_table_cpu; |
| 202 | 215 | ||
| 203 | std::vector<u64> big_page_continous; | 216 | std::vector<u64> big_page_continous; |
| 217 | std::vector<std::pair<VAddr, std::size_t>> page_stash{}; | ||
| 204 | 218 | ||
| 205 | constexpr static size_t continous_bits = 64; | 219 | constexpr static size_t continous_bits = 64; |
| 206 | 220 | ||
| 207 | const size_t unique_identifier; | 221 | const size_t unique_identifier; |
| 222 | std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; | ||
| 208 | 223 | ||
| 209 | static std::atomic<size_t> unique_identifier_generator; | 224 | static std::atomic<size_t> unique_identifier_generator; |
| 210 | }; | 225 | }; |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index f44c7df50..6b66ad7b6 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <functional> | 6 | #include <functional> |
| 7 | #include <optional> | 7 | #include <optional> |
| 8 | #include <span> | 8 | #include <span> |
| 9 | #include <utility> | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | #include "common/polyfill_thread.h" | 11 | #include "common/polyfill_thread.h" |
| 11 | #include "video_core/cache_types.h" | 12 | #include "video_core/cache_types.h" |
| @@ -95,6 +96,12 @@ public: | |||
| 95 | virtual void InvalidateRegion(VAddr addr, u64 size, | 96 | virtual void InvalidateRegion(VAddr addr, u64 size, |
| 96 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | 97 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |
| 97 | 98 | ||
| 99 | virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | ||
| 100 | for (const auto [cpu_addr, size] : sequences) { | ||
| 101 | InvalidateRegion(cpu_addr, size); | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 98 | /// Notify rasterizer that any caches of the specified region are desync with guest | 105 | /// Notify rasterizer that any caches of the specified region are desync with guest |
| 99 | virtual void OnCPUWrite(VAddr addr, u64 size) = 0; | 106 | virtual void OnCPUWrite(VAddr addr, u64 size) = 0; |
| 100 | 107 | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 242bf9602..6c4d74564 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -186,6 +186,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
| 186 | 186 | ||
| 187 | SCOPE_EXIT({ gpu.TickWork(); }); | 187 | SCOPE_EXIT({ gpu.TickWork(); }); |
| 188 | FlushWork(); | 188 | FlushWork(); |
| 189 | gpu_memory->FlushCaching(); | ||
| 189 | 190 | ||
| 190 | query_cache.UpdateCounters(); | 191 | query_cache.UpdateCounters(); |
| 191 | 192 | ||
| @@ -393,6 +394,7 @@ void RasterizerVulkan::Clear(u32 layer_count) { | |||
| 393 | 394 | ||
| 394 | void RasterizerVulkan::DispatchCompute() { | 395 | void RasterizerVulkan::DispatchCompute() { |
| 395 | FlushWork(); | 396 | FlushWork(); |
| 397 | gpu_memory->FlushCaching(); | ||
| 396 | 398 | ||
| 397 | ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; | 399 | ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; |
| 398 | if (!pipeline) { | 400 | if (!pipeline) { |
| @@ -481,6 +483,27 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | |||
| 481 | } | 483 | } |
| 482 | } | 484 | } |
| 483 | 485 | ||
| 486 | void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | ||
| 487 | { | ||
| 488 | std::scoped_lock lock{texture_cache.mutex}; | ||
| 489 | for (const auto [addr, size] : sequences) { | ||
| 490 | texture_cache.WriteMemory(addr, size); | ||
| 491 | } | ||
| 492 | } | ||
| 493 | { | ||
| 494 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 495 | for (const auto [addr, size] : sequences) { | ||
| 496 | buffer_cache.WriteMemory(addr, size); | ||
| 497 | } | ||
| 498 | } | ||
| 499 | { | ||
| 500 | for (const auto [addr, size] : sequences) { | ||
| 501 | query_cache.InvalidateRegion(addr, size); | ||
| 502 | pipeline_cache.InvalidateRegion(addr, size); | ||
| 503 | } | ||
| 504 | } | ||
| 505 | } | ||
| 506 | |||
| 484 | void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | 507 | void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { |
| 485 | if (addr == 0 || size == 0) { | 508 | if (addr == 0 || size == 0) { |
| 486 | return; | 509 | return; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c661e5b19..472cc64d9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -79,6 +79,7 @@ public: | |||
| 79 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 79 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 80 | void InvalidateRegion(VAddr addr, u64 size, | 80 | void InvalidateRegion(VAddr addr, u64 size, |
| 81 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 81 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 82 | void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; | ||
| 82 | void OnCPUWrite(VAddr addr, u64 size) override; | 83 | void OnCPUWrite(VAddr addr, u64 size) override; |
| 83 | void InvalidateGPUCache() override; | 84 | void InvalidateGPUCache() override; |
| 84 | void UnmapMemory(VAddr addr, u64 size) override; | 85 | void UnmapMemory(VAddr addr, u64 size) override; |