diff options
| author | 2023-06-22 21:53:07 -0700 | |
|---|---|---|
| committer | 2023-06-22 21:53:07 -0700 | |
| commit | 2fc5dedf6996d4a5c93ddf1ccd67a6963e4827e8 (patch) | |
| tree | d82f2cf4f7a5e9773616846c095a941b282a84f6 /src/video_core/engines | |
| parent | Merge pull request #10806 from liamwhite/worst-fs-implementation-ever (diff) | |
| parent | Remove memory allocations in some hot paths (diff) | |
| download | yuzu-2fc5dedf6996d4a5c93ddf1ccd67a6963e4827e8.tar.gz yuzu-2fc5dedf6996d4a5c93ddf1ccd67a6963e4827e8.tar.xz yuzu-2fc5dedf6996d4a5c93ddf1ccd67a6963e4827e8.zip | |
Merge pull request #10457 from Kelebek1/optimise
Remove memory allocations in some hot paths
Diffstat (limited to 'src/video_core/engines')
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 35 |
1 files changed, 19 insertions, 16 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index ebe5536de..bc1eb41e7 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -108,9 +108,11 @@ void MaxwellDMA::Launch() { | |||
| 108 | if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { | 108 | if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { |
| 109 | ASSERT(regs.remap_const.component_size_minus_one == 3); | 109 | ASSERT(regs.remap_const.component_size_minus_one == 3); |
| 110 | accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); | 110 | accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); |
| 111 | std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); | 111 | read_buffer.resize_destructive(regs.line_length_in * sizeof(u32)); |
| 112 | std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in); | ||
| 113 | std::ranges::fill(span, regs.remap_consta_value); | ||
| 112 | memory_manager.WriteBlockUnsafe(regs.offset_out, | 114 | memory_manager.WriteBlockUnsafe(regs.offset_out, |
| 113 | reinterpret_cast<u8*>(tmp_buffer.data()), | 115 | reinterpret_cast<u8*>(read_buffer.data()), |
| 114 | regs.line_length_in * sizeof(u32)); | 116 | regs.line_length_in * sizeof(u32)); |
| 115 | } else { | 117 | } else { |
| 116 | memory_manager.FlushCaching(); | 118 | memory_manager.FlushCaching(); |
| @@ -126,32 +128,32 @@ void MaxwellDMA::Launch() { | |||
| 126 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); | 128 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); |
| 127 | UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); | 129 | UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); |
| 128 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | 130 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |
| 129 | std::vector<u8> tmp_buffer(16); | 131 | read_buffer.resize_destructive(16); |
| 130 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 132 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 131 | memory_manager.ReadBlockUnsafe( | 133 | memory_manager.ReadBlockUnsafe( |
| 132 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), | 134 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), |
| 133 | tmp_buffer.data(), tmp_buffer.size()); | 135 | read_buffer.data(), read_buffer.size()); |
| 134 | memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), | 136 | memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(), |
| 135 | tmp_buffer.size()); | 137 | read_buffer.size()); |
| 136 | } | 138 | } |
| 137 | } else if (is_src_pitch && !is_dst_pitch) { | 139 | } else if (is_src_pitch && !is_dst_pitch) { |
| 138 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); | 140 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); |
| 139 | UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); | 141 | UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); |
| 140 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | 142 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |
| 141 | std::vector<u8> tmp_buffer(16); | 143 | read_buffer.resize_destructive(16); |
| 142 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 144 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 143 | memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), | 145 | memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), |
| 144 | tmp_buffer.size()); | 146 | read_buffer.size()); |
| 145 | memory_manager.WriteBlockCached( | 147 | memory_manager.WriteBlockCached( |
| 146 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), | 148 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), |
| 147 | tmp_buffer.data(), tmp_buffer.size()); | 149 | read_buffer.data(), read_buffer.size()); |
| 148 | } | 150 | } |
| 149 | } else { | 151 | } else { |
| 150 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { | 152 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { |
| 151 | std::vector<u8> tmp_buffer(regs.line_length_in); | 153 | read_buffer.resize_destructive(regs.line_length_in); |
| 152 | memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), | 154 | memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), |
| 153 | regs.line_length_in); | 155 | regs.line_length_in); |
| 154 | memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), | 156 | memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(), |
| 155 | regs.line_length_in); | 157 | regs.line_length_in); |
| 156 | } | 158 | } |
| 157 | } | 159 | } |
| @@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 171 | src_operand.address = regs.offset_in; | 173 | src_operand.address = regs.offset_in; |
| 172 | 174 | ||
| 173 | DMA::BufferOperand dst_operand; | 175 | DMA::BufferOperand dst_operand; |
| 174 | dst_operand.pitch = regs.pitch_out; | 176 | u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out)); |
| 177 | dst_operand.pitch = abs_pitch_out; | ||
| 175 | dst_operand.width = regs.line_length_in; | 178 | dst_operand.width = regs.line_length_in; |
| 176 | dst_operand.height = regs.line_count; | 179 | dst_operand.height = regs.line_count; |
| 177 | dst_operand.address = regs.offset_out; | 180 | dst_operand.address = regs.offset_out; |
| @@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 218 | const size_t src_size = | 221 | const size_t src_size = |
| 219 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | 222 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |
| 220 | 223 | ||
| 221 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | 224 | const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count; |
| 222 | read_buffer.resize_destructive(src_size); | 225 | read_buffer.resize_destructive(src_size); |
| 223 | write_buffer.resize_destructive(dst_size); | 226 | write_buffer.resize_destructive(dst_size); |
| 224 | 227 | ||
| @@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 227 | 230 | ||
| 228 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, | 231 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, |
| 229 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | 232 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |
| 230 | regs.pitch_out); | 233 | abs_pitch_out); |
| 231 | 234 | ||
| 232 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | 235 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |
| 233 | } | 236 | } |