diff options
| author | 2023-07-10 18:54:19 -0700 | |
|---|---|---|
| committer | 2023-07-10 18:54:19 -0700 | |
| commit | ce7c418e0cc05d92c18ad69c7cb37fecfa71b037 (patch) | |
| tree | ea1852111c1b3c3c340608ae518fc8711a4fcfe3 /src/video_core | |
| parent | Merge pull request #11050 from SuperSamus/sdl-button-labels (diff) | |
| parent | Fix ScratchBuffer moves (diff) | |
| download | yuzu-ce7c418e0cc05d92c18ad69c7cb37fecfa71b037.tar.gz yuzu-ce7c418e0cc05d92c18ad69c7cb37fecfa71b037.tar.xz yuzu-ce7c418e0cc05d92c18ad69c7cb37fecfa71b037.zip | |
Merge pull request #10996 from Kelebek1/readblock_optimisation
Use spans over guest memory where possible instead of copying data
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 7 | ||||
| -rw-r--r-- | src/video_core/dma_pusher.cpp | 26 | ||||
| -rw-r--r-- | src/video_core/engines/engine_upload.cpp | 28 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 87 | ||||
| -rw-r--r-- | src/video_core/engines/sw_blitter/blitter.cpp | 29 | ||||
| -rw-r--r-- | src/video_core/memory_manager.cpp | 30 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 18 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 24 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.cpp | 26 | ||||
| -rw-r--r-- | src/video_core/texture_cache/util.h | 3 |
12 files changed, 144 insertions, 139 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b5ed3380f..6ed4b78f2 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -234,9 +234,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 234 | if (has_new_downloads) { | 234 | if (has_new_downloads) { |
| 235 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 235 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 236 | } | 236 | } |
| 237 | tmp_buffer.resize_destructive(amount); | 237 | |
| 238 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); | 238 | Core::Memory::CpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp( |
| 239 | cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); | 239 | cpu_memory, *cpu_src_address, amount, &tmp_buffer); |
| 240 | tmp.SetAddressAndSize(*cpu_dest_address, amount); | ||
| 240 | return true; | 241 | return true; |
| 241 | } | 242 | } |
| 242 | 243 | ||
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 551929824..9f1b340a9 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include "common/microprofile.h" | 5 | #include "common/microprofile.h" |
| 6 | #include "common/settings.h" | 6 | #include "common/settings.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/memory.h" | ||
| 8 | #include "video_core/dma_pusher.h" | 9 | #include "video_core/dma_pusher.h" |
| 9 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 10 | #include "video_core/gpu.h" | 11 | #include "video_core/gpu.h" |
| @@ -12,6 +13,8 @@ | |||
| 12 | 13 | ||
| 13 | namespace Tegra { | 14 | namespace Tegra { |
| 14 | 15 | ||
| 16 | constexpr u32 MacroRegistersStart = 0xE00; | ||
| 17 | |||
| 15 | DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, | 18 | DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, |
| 16 | Control::ChannelState& channel_state_) | 19 | Control::ChannelState& channel_state_) |
| 17 | : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_, | 20 | : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_, |
| @@ -74,25 +77,16 @@ bool DmaPusher::Step() { | |||
| 74 | } | 77 | } |
| 75 | 78 | ||
| 76 | // Push buffer non-empty, read a word | 79 | // Push buffer non-empty, read a word |
| 77 | command_headers.resize_destructive(command_list_header.size); | 80 | if (dma_state.method >= MacroRegistersStart) { |
| 78 | constexpr u32 MacroRegistersStart = 0xE00; | ||
| 79 | if (dma_state.method < MacroRegistersStart) { | ||
| 80 | if (Settings::IsGPULevelHigh()) { | ||
| 81 | memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), | ||
| 82 | command_list_header.size * sizeof(u32)); | ||
| 83 | } else { | ||
| 84 | memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), | ||
| 85 | command_list_header.size * sizeof(u32)); | ||
| 86 | } | ||
| 87 | } else { | ||
| 88 | const size_t copy_size = command_list_header.size * sizeof(u32); | ||
| 89 | if (subchannels[dma_state.subchannel]) { | 81 | if (subchannels[dma_state.subchannel]) { |
| 90 | subchannels[dma_state.subchannel]->current_dirty = | 82 | subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty( |
| 91 | memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size); | 83 | dma_state.dma_get, command_list_header.size * sizeof(u32)); |
| 92 | } | 84 | } |
| 93 | memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size); | ||
| 94 | } | 85 | } |
| 95 | ProcessCommands(command_headers); | 86 | Core::Memory::GpuGuestMemory<Tegra::CommandHeader, |
| 87 | Core::Memory::GuestMemoryFlags::UnsafeRead> | ||
| 88 | headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers); | ||
| 89 | ProcessCommands(headers); | ||
| 96 | } | 90 | } |
| 97 | 91 | ||
| 98 | return true; | 92 | return true; |
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 7f5a0c29d..bc64d4486 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include "common/algorithm.h" | 6 | #include "common/algorithm.h" |
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "core/memory.h" | ||
| 8 | #include "video_core/engines/engine_upload.h" | 9 | #include "video_core/engines/engine_upload.h" |
| 9 | #include "video_core/memory_manager.h" | 10 | #include "video_core/memory_manager.h" |
| 10 | #include "video_core/rasterizer_interface.h" | 11 | #include "video_core/rasterizer_interface.h" |
| @@ -46,15 +47,11 @@ void State::ProcessData(const u32* data, size_t num_data) { | |||
| 46 | void State::ProcessData(std::span<const u8> read_buffer) { | 47 | void State::ProcessData(std::span<const u8> read_buffer) { |
| 47 | const GPUVAddr address{regs.dest.Address()}; | 48 | const GPUVAddr address{regs.dest.Address()}; |
| 48 | if (is_linear) { | 49 | if (is_linear) { |
| 49 | if (regs.line_count == 1) { | 50 | for (size_t line = 0; line < regs.line_count; ++line) { |
| 50 | rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer); | 51 | const GPUVAddr dest_line = address + line * regs.dest.pitch; |
| 51 | } else { | 52 | std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in, |
| 52 | for (size_t line = 0; line < regs.line_count; ++line) { | 53 | regs.line_length_in); |
| 53 | const GPUVAddr dest_line = address + line * regs.dest.pitch; | 54 | rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer); |
| 54 | std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in, | ||
| 55 | regs.line_length_in); | ||
| 56 | rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer); | ||
| 57 | } | ||
| 58 | } | 55 | } |
| 59 | } else { | 56 | } else { |
| 60 | u32 width = regs.dest.width; | 57 | u32 width = regs.dest.width; |
| @@ -70,13 +67,14 @@ void State::ProcessData(std::span<const u8> read_buffer) { | |||
| 70 | const std::size_t dst_size = Tegra::Texture::CalculateSize( | 67 | const std::size_t dst_size = Tegra::Texture::CalculateSize( |
| 71 | true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, | 68 | true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, |
| 72 | regs.dest.BlockHeight(), regs.dest.BlockDepth()); | 69 | regs.dest.BlockHeight(), regs.dest.BlockDepth()); |
| 73 | tmp_buffer.resize_destructive(dst_size); | 70 | |
| 74 | memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | 71 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 75 | Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width, | 72 | tmp(memory_manager, address, dst_size, &tmp_buffer); |
| 76 | regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, | 73 | |
| 77 | x_elements, regs.line_count, regs.dest.BlockHeight(), | 74 | Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, |
| 75 | regs.dest.depth, x_offset, regs.dest.y, x_elements, | ||
| 76 | regs.line_count, regs.dest.BlockHeight(), | ||
| 78 | regs.dest.BlockDepth(), regs.line_length_in); | 77 | regs.dest.BlockDepth(), regs.line_length_in); |
| 79 | memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size); | ||
| 80 | } | 78 | } |
| 81 | } | 79 | } |
| 82 | 80 | ||
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 601095f03..a38d9528a 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -84,7 +84,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { | |||
| 84 | 84 | ||
| 85 | Texture::TICEntry tic_entry; | 85 | Texture::TICEntry tic_entry; |
| 86 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | 86 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| 87 | |||
| 88 | return tic_entry; | 87 | return tic_entry; |
| 89 | } | 88 | } |
| 90 | 89 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 62d70e9f3..c3696096d 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/settings.h" | 9 | #include "common/settings.h" |
| 10 | #include "core/core.h" | 10 | #include "core/core.h" |
| 11 | #include "core/core_timing.h" | 11 | #include "core/core_timing.h" |
| 12 | #include "core/memory.h" | ||
| 12 | #include "video_core/dirty_flags.h" | 13 | #include "video_core/dirty_flags.h" |
| 13 | #include "video_core/engines/draw_manager.h" | 14 | #include "video_core/engines/draw_manager.h" |
| 14 | #include "video_core/engines/maxwell_3d.h" | 15 | #include "video_core/engines/maxwell_3d.h" |
| @@ -679,17 +680,14 @@ void Maxwell3D::ProcessCBData(u32 value) { | |||
| 679 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { | 680 | Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { |
| 680 | const GPUVAddr tic_address_gpu{regs.tex_header.Address() + | 681 | const GPUVAddr tic_address_gpu{regs.tex_header.Address() + |
| 681 | tic_index * sizeof(Texture::TICEntry)}; | 682 | tic_index * sizeof(Texture::TICEntry)}; |
| 682 | |||
| 683 | Texture::TICEntry tic_entry; | 683 | Texture::TICEntry tic_entry; |
| 684 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); | 684 | memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); |
| 685 | |||
| 686 | return tic_entry; | 685 | return tic_entry; |
| 687 | } | 686 | } |
| 688 | 687 | ||
| 689 | Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { | 688 | Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { |
| 690 | const GPUVAddr tsc_address_gpu{regs.tex_sampler.Address() + | 689 | const GPUVAddr tsc_address_gpu{regs.tex_sampler.Address() + |
| 691 | tsc_index * sizeof(Texture::TSCEntry)}; | 690 | tsc_index * sizeof(Texture::TSCEntry)}; |
| 692 | |||
| 693 | Texture::TSCEntry tsc_entry; | 691 | Texture::TSCEntry tsc_entry; |
| 694 | memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); | 692 | memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); |
| 695 | return tsc_entry; | 693 | return tsc_entry; |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index f8598fd98..cd8e24b0b 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include "common/microprofile.h" | 7 | #include "common/microprofile.h" |
| 8 | #include "common/settings.h" | 8 | #include "common/settings.h" |
| 9 | #include "core/core.h" | 9 | #include "core/core.h" |
| 10 | #include "core/memory.h" | ||
| 10 | #include "video_core/engines/maxwell_3d.h" | 11 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/engines/maxwell_dma.h" | 12 | #include "video_core/engines/maxwell_dma.h" |
| 12 | #include "video_core/memory_manager.h" | 13 | #include "video_core/memory_manager.h" |
| @@ -130,11 +131,12 @@ void MaxwellDMA::Launch() { | |||
| 130 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | 131 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |
| 131 | read_buffer.resize_destructive(16); | 132 | read_buffer.resize_destructive(16); |
| 132 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 133 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 133 | memory_manager.ReadBlock( | 134 | Core::Memory::GpuGuestMemoryScoped< |
| 134 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), | 135 | u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 135 | read_buffer.data(), read_buffer.size()); | 136 | tmp_write_buffer(memory_manager, |
| 136 | memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(), | 137 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), |
| 137 | read_buffer.size()); | 138 | 16, &read_buffer); |
| 139 | tmp_write_buffer.SetAddressAndSize(regs.offset_out + offset, 16); | ||
| 138 | } | 140 | } |
| 139 | } else if (is_src_pitch && !is_dst_pitch) { | 141 | } else if (is_src_pitch && !is_dst_pitch) { |
| 140 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); | 142 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); |
| @@ -142,20 +144,19 @@ void MaxwellDMA::Launch() { | |||
| 142 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | 144 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |
| 143 | read_buffer.resize_destructive(16); | 145 | read_buffer.resize_destructive(16); |
| 144 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 146 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 145 | memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), | 147 | Core::Memory::GpuGuestMemoryScoped< |
| 146 | read_buffer.size()); | 148 | u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 147 | memory_manager.WriteBlockCached( | 149 | tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); |
| 148 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), | 150 | tmp_write_buffer.SetAddressAndSize( |
| 149 | read_buffer.data(), read_buffer.size()); | 151 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); |
| 150 | } | 152 | } |
| 151 | } else { | 153 | } else { |
| 152 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { | 154 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { |
| 153 | read_buffer.resize_destructive(regs.line_length_in); | 155 | Core::Memory::GpuGuestMemoryScoped< |
| 154 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), | 156 | u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 155 | regs.line_length_in, | 157 | tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, |
| 156 | VideoCommon::CacheType::NoBufferCache); | 158 | &read_buffer); |
| 157 | memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(), | 159 | tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); |
| 158 | regs.line_length_in); | ||
| 159 | } | 160 | } |
| 160 | } | 161 | } |
| 161 | } | 162 | } |
| @@ -222,17 +223,15 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 222 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | 223 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |
| 223 | 224 | ||
| 224 | const size_t dst_size = dst_operand.pitch * regs.line_count; | 225 | const size_t dst_size = dst_operand.pitch * regs.line_count; |
| 225 | read_buffer.resize_destructive(src_size); | ||
| 226 | write_buffer.resize_destructive(dst_size); | ||
| 227 | 226 | ||
| 228 | memory_manager.ReadBlock(src_operand.address, read_buffer.data(), src_size); | 227 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( |
| 229 | memory_manager.ReadBlock(dst_operand.address, write_buffer.data(), dst_size); | 228 | memory_manager, src_operand.address, src_size, &read_buffer); |
| 229 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||
| 230 | tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); | ||
| 230 | 231 | ||
| 231 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, | 232 | UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, |
| 232 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | 233 | x_offset, src_params.origin.y, x_elements, regs.line_count, block_height, |
| 233 | dst_operand.pitch); | 234 | block_depth, dst_operand.pitch); |
| 234 | |||
| 235 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | ||
| 236 | } | 235 | } |
| 237 | 236 | ||
| 238 | void MaxwellDMA::CopyPitchToBlockLinear() { | 237 | void MaxwellDMA::CopyPitchToBlockLinear() { |
| @@ -287,18 +286,17 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 287 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | 286 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |
| 288 | const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; | 287 | const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; |
| 289 | 288 | ||
| 290 | read_buffer.resize_destructive(src_size); | 289 | GPUVAddr src_addr = regs.offset_in; |
| 291 | write_buffer.resize_destructive(dst_size); | 290 | GPUVAddr dst_addr = regs.offset_out; |
| 292 | 291 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( | |
| 293 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | 292 | memory_manager, src_addr, src_size, &read_buffer); |
| 294 | memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); | 293 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> |
| 295 | 294 | tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); | |
| 296 | // If the input is linear and the output is tiled, swizzle the input and copy it over. | 295 | |
| 297 | SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, | 296 | // If the input is linear and the output is tiled, swizzle the input and copy it over. |
| 298 | dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | 297 | SwizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, |
| 299 | regs.pitch_in); | 298 | x_offset, dst_params.origin.y, x_elements, regs.line_count, block_height, |
| 300 | 299 | block_depth, regs.pitch_in); | |
| 301 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | ||
| 302 | } | 300 | } |
| 303 | 301 | ||
| 304 | void MaxwellDMA::CopyBlockLinearToBlockLinear() { | 302 | void MaxwellDMA::CopyBlockLinearToBlockLinear() { |
| @@ -342,23 +340,20 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { | |||
| 342 | const u32 pitch = x_elements * bytes_per_pixel; | 340 | const u32 pitch = x_elements * bytes_per_pixel; |
| 343 | const size_t mid_buffer_size = pitch * regs.line_count; | 341 | const size_t mid_buffer_size = pitch * regs.line_count; |
| 344 | 342 | ||
| 345 | read_buffer.resize_destructive(src_size); | ||
| 346 | write_buffer.resize_destructive(dst_size); | ||
| 347 | |||
| 348 | intermediate_buffer.resize_destructive(mid_buffer_size); | 343 | intermediate_buffer.resize_destructive(mid_buffer_size); |
| 349 | 344 | ||
| 350 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | 345 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( |
| 351 | memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | 346 | memory_manager, regs.offset_in, src_size, &read_buffer); |
| 347 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> | ||
| 348 | tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); | ||
| 352 | 349 | ||
| 353 | UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height, | 350 | UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, |
| 354 | src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count, | 351 | src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count, |
| 355 | src.block_size.height, src.block_size.depth, pitch); | 352 | src.block_size.height, src.block_size.depth, pitch); |
| 356 | 353 | ||
| 357 | SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, | 354 | SwizzleSubrect(tmp_write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, |
| 358 | dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, | 355 | dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, |
| 359 | dst.block_size.height, dst.block_size.depth, pitch); | 356 | dst.block_size.height, dst.block_size.depth, pitch); |
| 360 | |||
| 361 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | ||
| 362 | } | 357 | } |
| 363 | 358 | ||
| 364 | void MaxwellDMA::ReleaseSemaphore() { | 359 | void MaxwellDMA::ReleaseSemaphore() { |
diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp index ff88cd03d..3a599f466 100644 --- a/src/video_core/engines/sw_blitter/blitter.cpp +++ b/src/video_core/engines/sw_blitter/blitter.cpp | |||
| @@ -159,11 +159,11 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | |||
| 159 | const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); | 159 | const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); |
| 160 | const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); | 160 | const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); |
| 161 | const size_t src_size = get_surface_size(src, src_bytes_per_pixel); | 161 | const size_t src_size = get_surface_size(src, src_bytes_per_pixel); |
| 162 | impl->tmp_buffer.resize_destructive(src_size); | ||
| 163 | memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size); | ||
| 164 | 162 | ||
| 165 | const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; | 163 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( |
| 164 | memory_manager, src.Address(), src_size, &impl->tmp_buffer); | ||
| 166 | 165 | ||
| 166 | const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; | ||
| 167 | const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; | 167 | const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; |
| 168 | 168 | ||
| 169 | impl->src_buffer.resize_destructive(src_copy_size); | 169 | impl->src_buffer.resize_destructive(src_copy_size); |
| @@ -200,12 +200,11 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | |||
| 200 | 200 | ||
| 201 | impl->dst_buffer.resize_destructive(dst_copy_size); | 201 | impl->dst_buffer.resize_destructive(dst_copy_size); |
| 202 | if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { | 202 | if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { |
| 203 | UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width, | 203 | UnswizzleSubrect(impl->src_buffer, tmp_buffer, src_bytes_per_pixel, src.width, src.height, |
| 204 | src.height, src.depth, config.src_x0, config.src_y0, src_extent_x, | 204 | src.depth, config.src_x0, config.src_y0, src_extent_x, src_extent_y, |
| 205 | src_extent_y, src.block_height, src.block_depth, | 205 | src.block_height, src.block_depth, src_extent_x * src_bytes_per_pixel); |
| 206 | src_extent_x * src_bytes_per_pixel); | ||
| 207 | } else { | 206 | } else { |
| 208 | process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, | 207 | process_pitch_linear(false, tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, |
| 209 | src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); | 208 | src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); |
| 210 | } | 209 | } |
| 211 | 210 | ||
| @@ -221,20 +220,18 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, | |||
| 221 | } | 220 | } |
| 222 | 221 | ||
| 223 | const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); | 222 | const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); |
| 224 | impl->tmp_buffer.resize_destructive(dst_size); | 223 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadWrite> |
| 225 | memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); | 224 | tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); |
| 226 | 225 | ||
| 227 | if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { | 226 | if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { |
| 228 | SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width, | 227 | SwizzleSubrect(tmp_buffer2, impl->dst_buffer, dst_bytes_per_pixel, dst.width, dst.height, |
| 229 | dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, | 228 | dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, dst_extent_y, |
| 230 | dst_extent_y, dst.block_height, dst.block_depth, | 229 | dst.block_height, dst.block_depth, dst_extent_x * dst_bytes_per_pixel); |
| 231 | dst_extent_x * dst_bytes_per_pixel); | ||
| 232 | } else { | 230 | } else { |
| 233 | process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y, | 231 | process_pitch_linear(true, impl->dst_buffer, tmp_buffer2, dst_extent_x, dst_extent_y, |
| 234 | dst.pitch, config.dst_x0, config.dst_y0, | 232 | dst.pitch, config.dst_x0, config.dst_y0, |
| 235 | static_cast<size_t>(dst_bytes_per_pixel)); | 233 | static_cast<size_t>(dst_bytes_per_pixel)); |
| 236 | } | 234 | } |
| 237 | memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); | ||
| 238 | return true; | 235 | return true; |
| 239 | } | 236 | } |
| 240 | 237 | ||
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 45141e488..d16040613 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -10,13 +10,13 @@ | |||
| 10 | #include "core/device_memory.h" | 10 | #include "core/device_memory.h" |
| 11 | #include "core/hle/kernel/k_page_table.h" | 11 | #include "core/hle/kernel/k_page_table.h" |
| 12 | #include "core/hle/kernel/k_process.h" | 12 | #include "core/hle/kernel/k_process.h" |
| 13 | #include "core/memory.h" | ||
| 14 | #include "video_core/invalidation_accumulator.h" | 13 | #include "video_core/invalidation_accumulator.h" |
| 15 | #include "video_core/memory_manager.h" | 14 | #include "video_core/memory_manager.h" |
| 16 | #include "video_core/rasterizer_interface.h" | 15 | #include "video_core/rasterizer_interface.h" |
| 17 | #include "video_core/renderer_base.h" | 16 | #include "video_core/renderer_base.h" |
| 18 | 17 | ||
| 19 | namespace Tegra { | 18 | namespace Tegra { |
| 19 | using Core::Memory::GuestMemoryFlags; | ||
| 20 | 20 | ||
| 21 | std::atomic<size_t> MemoryManager::unique_identifier_generator{}; | 21 | std::atomic<size_t> MemoryManager::unique_identifier_generator{}; |
| 22 | 22 | ||
| @@ -587,13 +587,10 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, | |||
| 587 | 587 | ||
| 588 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, | 588 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, |
| 589 | VideoCommon::CacheType which) { | 589 | VideoCommon::CacheType which) { |
| 590 | tmp_buffer.resize_destructive(size); | 590 | Core::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( |
| 591 | ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); | 591 | *this, gpu_src_addr, size); |
| 592 | 592 | data.SetAddressAndSize(gpu_dest_addr, size); | |
| 593 | // The output block must be flushed in case it has data modified from the GPU. | ||
| 594 | // Fixes NPC geometry in Zombie Panic in Wonderland DX | ||
| 595 | FlushRegion(gpu_dest_addr, size, which); | 593 | FlushRegion(gpu_dest_addr, size, which); |
| 596 | WriteBlock(gpu_dest_addr, tmp_buffer.data(), size, which); | ||
| 597 | } | 594 | } |
| 598 | 595 | ||
| 599 | bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { | 596 | bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { |
| @@ -758,4 +755,23 @@ void MemoryManager::FlushCaching() { | |||
| 758 | accumulator->Clear(); | 755 | accumulator->Clear(); |
| 759 | } | 756 | } |
| 760 | 757 | ||
| 758 | const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { | ||
| 759 | auto cpu_addr = GpuToCpuAddress(src_addr); | ||
| 760 | if (cpu_addr) { | ||
| 761 | return memory.GetSpan(*cpu_addr, size); | ||
| 762 | } | ||
| 763 | return nullptr; | ||
| 764 | } | ||
| 765 | |||
| 766 | u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) { | ||
| 767 | if (!IsContinuousRange(src_addr, size)) { | ||
| 768 | return nullptr; | ||
| 769 | } | ||
| 770 | auto cpu_addr = GpuToCpuAddress(src_addr); | ||
| 771 | if (cpu_addr) { | ||
| 772 | return memory.GetSpan(*cpu_addr, size); | ||
| 773 | } | ||
| 774 | return nullptr; | ||
| 775 | } | ||
| 776 | |||
| 761 | } // namespace Tegra | 777 | } // namespace Tegra |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 4202c26ff..9b311b9e5 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include "common/range_map.h" | 15 | #include "common/range_map.h" |
| 16 | #include "common/scratch_buffer.h" | 16 | #include "common/scratch_buffer.h" |
| 17 | #include "common/virtual_buffer.h" | 17 | #include "common/virtual_buffer.h" |
| 18 | #include "core/memory.h" | ||
| 18 | #include "video_core/cache_types.h" | 19 | #include "video_core/cache_types.h" |
| 19 | #include "video_core/pte_kind.h" | 20 | #include "video_core/pte_kind.h" |
| 20 | 21 | ||
| @@ -62,6 +63,20 @@ public: | |||
| 62 | [[nodiscard]] u8* GetPointer(GPUVAddr addr); | 63 | [[nodiscard]] u8* GetPointer(GPUVAddr addr); |
| 63 | [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const; | 64 | [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const; |
| 64 | 65 | ||
| 66 | template <typename T> | ||
| 67 | [[nodiscard]] T* GetPointer(GPUVAddr addr) { | ||
| 68 | const auto address{GpuToCpuAddress(addr)}; | ||
| 69 | if (!address) { | ||
| 70 | return {}; | ||
| 71 | } | ||
| 72 | return memory.GetPointer(*address); | ||
| 73 | } | ||
| 74 | |||
| 75 | template <typename T> | ||
| 76 | [[nodiscard]] const T* GetPointer(GPUVAddr addr) const { | ||
| 77 | return GetPointer<T*>(addr); | ||
| 78 | } | ||
| 79 | |||
| 65 | /** | 80 | /** |
| 66 | * ReadBlock and WriteBlock are full read and write operations over virtual | 81 | * ReadBlock and WriteBlock are full read and write operations over virtual |
| 67 | * GPU Memory. It's important to use these when GPU memory may not be continuous | 82 | * GPU Memory. It's important to use these when GPU memory may not be continuous |
| @@ -139,6 +154,9 @@ public: | |||
| 139 | 154 | ||
| 140 | void FlushCaching(); | 155 | void FlushCaching(); |
| 141 | 156 | ||
| 157 | const u8* GetSpan(const GPUVAddr src_addr, const std::size_t size) const; | ||
| 158 | u8* GetSpan(const GPUVAddr src_addr, const std::size_t size); | ||
| 159 | |||
| 142 | private: | 160 | private: |
| 143 | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> | 161 | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> |
| 144 | inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, | 162 | inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3a859139c..4457b366f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 10 | #include "common/settings.h" | 10 | #include "common/settings.h" |
| 11 | #include "core/memory.h" | ||
| 11 | #include "video_core/control/channel_state.h" | 12 | #include "video_core/control/channel_state.h" |
| 12 | #include "video_core/dirty_flags.h" | 13 | #include "video_core/dirty_flags.h" |
| 13 | #include "video_core/engines/kepler_compute.h" | 14 | #include "video_core/engines/kepler_compute.h" |
| @@ -1026,19 +1027,19 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | |||
| 1026 | runtime.AccelerateImageUpload(image, staging, uploads); | 1027 | runtime.AccelerateImageUpload(image, staging, uploads); |
| 1027 | return; | 1028 | return; |
| 1028 | } | 1029 | } |
| 1029 | const size_t guest_size_bytes = image.guest_size_bytes; | 1030 | |
| 1030 | swizzle_data_buffer.resize_destructive(guest_size_bytes); | 1031 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( |
| 1031 | gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); | 1032 | *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); |
| 1032 | 1033 | ||
| 1033 | if (True(image.flags & ImageFlagBits::Converted)) { | 1034 | if (True(image.flags & ImageFlagBits::Converted)) { |
| 1034 | unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); | 1035 | unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); |
| 1035 | auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, | 1036 | auto copies = |
| 1036 | unswizzle_data_buffer); | 1037 | UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer); |
| 1037 | ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); | 1038 | ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); |
| 1038 | image.UploadMemory(staging, copies); | 1039 | image.UploadMemory(staging, copies); |
| 1039 | } else { | 1040 | } else { |
| 1040 | const auto copies = | 1041 | const auto copies = |
| 1041 | UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span); | 1042 | UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span); |
| 1042 | image.UploadMemory(staging, copies); | 1043 | image.UploadMemory(staging, copies); |
| 1043 | } | 1044 | } |
| 1044 | } | 1045 | } |
| @@ -1231,11 +1232,12 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) { | |||
| 1231 | decode->image_id = image_id; | 1232 | decode->image_id = image_id; |
| 1232 | async_decodes.push_back(std::move(decode)); | 1233 | async_decodes.push_back(std::move(decode)); |
| 1233 | 1234 | ||
| 1234 | Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes); | 1235 | static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; |
| 1235 | const size_t guest_size_bytes = image.guest_size_bytes; | 1236 | local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); |
| 1236 | swizzle_data_buffer.resize_destructive(guest_size_bytes); | 1237 | Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( |
| 1237 | gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); | 1238 | *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); |
| 1238 | auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer, | 1239 | |
| 1240 | auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, | ||
| 1239 | local_unswizzle_data_buffer); | 1241 | local_unswizzle_data_buffer); |
| 1240 | const size_t out_size = MapSizeBytes(image); | 1242 | const size_t out_size = MapSizeBytes(image); |
| 1241 | 1243 | ||
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 0de6ed09d..a83f5d41c 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "common/div_ceil.h" | 20 | #include "common/div_ceil.h" |
| 21 | #include "common/scratch_buffer.h" | 21 | #include "common/scratch_buffer.h" |
| 22 | #include "common/settings.h" | 22 | #include "common/settings.h" |
| 23 | #include "core/memory.h" | ||
| 23 | #include "video_core/compatible_formats.h" | 24 | #include "video_core/compatible_formats.h" |
| 24 | #include "video_core/engines/maxwell_3d.h" | 25 | #include "video_core/engines/maxwell_3d.h" |
| 25 | #include "video_core/memory_manager.h" | 26 | #include "video_core/memory_manager.h" |
| @@ -544,17 +545,15 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr | |||
| 544 | tile_size.height, info.tile_width_spacing); | 545 | tile_size.height, info.tile_width_spacing); |
| 545 | const size_t subresource_size = sizes[level]; | 546 | const size_t subresource_size = sizes[level]; |
| 546 | 547 | ||
| 547 | tmp_buffer.resize_destructive(subresource_size); | ||
| 548 | const std::span<u8> dst(tmp_buffer); | ||
| 549 | |||
| 550 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { | 548 | for (s32 layer = 0; layer < info.resources.layers; ++layer) { |
| 551 | const std::span<const u8> src = input.subspan(host_offset); | 549 | const std::span<const u8> src = input.subspan(host_offset); |
| 552 | gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); | 550 | { |
| 553 | 551 | Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> | |
| 554 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, | 552 | dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); |
| 555 | num_tiles.depth, block.height, block.depth); | ||
| 556 | 553 | ||
| 557 | gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); | 554 | SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, |
| 555 | num_tiles.depth, block.height, block.depth); | ||
| 556 | } | ||
| 558 | 557 | ||
| 559 | host_offset += host_bytes_per_layer; | 558 | host_offset += host_bytes_per_layer; |
| 560 | guest_offset += layer_stride; | 559 | guest_offset += layer_stride; |
| @@ -837,6 +836,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory | |||
| 837 | const Extent3D size = info.size; | 836 | const Extent3D size = info.size; |
| 838 | 837 | ||
| 839 | if (info.type == ImageType::Linear) { | 838 | if (info.type == ImageType::Linear) { |
| 839 | ASSERT(output.size_bytes() >= guest_size_bytes); | ||
| 840 | gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); | 840 | gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); |
| 841 | 841 | ||
| 842 | ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); | 842 | ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); |
| @@ -904,16 +904,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory | |||
| 904 | return copies; | 904 | return copies; |
| 905 | } | 905 | } |
| 906 | 906 | ||
| 907 | BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 908 | const ImageBase& image, std::span<u8> output) { | ||
| 909 | gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); | ||
| 910 | return BufferCopy{ | ||
| 911 | .src_offset = 0, | ||
| 912 | .dst_offset = 0, | ||
| 913 | .size = image.guest_size_bytes, | ||
| 914 | }; | ||
| 915 | } | ||
| 916 | |||
| 917 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | 907 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, |
| 918 | std::span<BufferImageCopy> copies) { | 908 | std::span<BufferImageCopy> copies) { |
| 919 | u32 output_offset = 0; | 909 | u32 output_offset = 0; |
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index ab45a43c4..5a0649d24 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h | |||
| @@ -66,9 +66,6 @@ struct OverlapResult { | |||
| 66 | Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | 66 | Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |
| 67 | std::span<const u8> input, std::span<u8> output); | 67 | std::span<const u8> input, std::span<u8> output); |
| 68 | 68 | ||
| 69 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | ||
| 70 | const ImageBase& image, std::span<u8> output); | ||
| 71 | |||
| 72 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | 69 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, |
| 73 | std::span<BufferImageCopy> copies); | 70 | std::span<BufferImageCopy> copies); |
| 74 | 71 | ||