summaryrefslogtreecommitdiff
path: root/src/video_core/engines
diff options
context:
space:
mode:
authorGravatar bunnei2023-06-22 21:53:07 -0700
committerGravatar GitHub2023-06-22 21:53:07 -0700
commit2fc5dedf6996d4a5c93ddf1ccd67a6963e4827e8 (patch)
treed82f2cf4f7a5e9773616846c095a941b282a84f6 /src/video_core/engines
parentMerge pull request #10806 from liamwhite/worst-fs-implementation-ever (diff)
parentRemove memory allocations in some hot paths (diff)
downloadyuzu-2fc5dedf6996d4a5c93ddf1ccd67a6963e4827e8.tar.gz
yuzu-2fc5dedf6996d4a5c93ddf1ccd67a6963e4827e8.tar.xz
yuzu-2fc5dedf6996d4a5c93ddf1ccd67a6963e4827e8.zip
Merge pull request #10457 from Kelebek1/optimise
Remove memory allocations in some hot paths
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/maxwell_dma.cpp35
1 files changed, 19 insertions, 16 deletions
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ebe5536de..bc1eb41e7 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -108,9 +108,11 @@ void MaxwellDMA::Launch() {
108 if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { 108 if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
109 ASSERT(regs.remap_const.component_size_minus_one == 3); 109 ASSERT(regs.remap_const.component_size_minus_one == 3);
110 accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); 110 accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
111 std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); 111 read_buffer.resize_destructive(regs.line_length_in * sizeof(u32));
112 std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in);
113 std::ranges::fill(span, regs.remap_consta_value);
112 memory_manager.WriteBlockUnsafe(regs.offset_out, 114 memory_manager.WriteBlockUnsafe(regs.offset_out,
113 reinterpret_cast<u8*>(tmp_buffer.data()), 115 reinterpret_cast<u8*>(read_buffer.data()),
114 regs.line_length_in * sizeof(u32)); 116 regs.line_length_in * sizeof(u32));
115 } else { 117 } else {
116 memory_manager.FlushCaching(); 118 memory_manager.FlushCaching();
@@ -126,32 +128,32 @@ void MaxwellDMA::Launch() {
126 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); 128 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
127 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); 129 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
128 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 130 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
129 std::vector<u8> tmp_buffer(16); 131 read_buffer.resize_destructive(16);
130 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 132 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
131 memory_manager.ReadBlockUnsafe( 133 memory_manager.ReadBlockUnsafe(
132 convert_linear_2_blocklinear_addr(regs.offset_in + offset), 134 convert_linear_2_blocklinear_addr(regs.offset_in + offset),
133 tmp_buffer.data(), tmp_buffer.size()); 135 read_buffer.data(), read_buffer.size());
134 memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), 136 memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(),
135 tmp_buffer.size()); 137 read_buffer.size());
136 } 138 }
137 } else if (is_src_pitch && !is_dst_pitch) { 139 } else if (is_src_pitch && !is_dst_pitch) {
138 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); 140 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
139 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); 141 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
140 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 142 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
141 std::vector<u8> tmp_buffer(16); 143 read_buffer.resize_destructive(16);
142 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 144 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
143 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), 145 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(),
144 tmp_buffer.size()); 146 read_buffer.size());
145 memory_manager.WriteBlockCached( 147 memory_manager.WriteBlockCached(
146 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 148 convert_linear_2_blocklinear_addr(regs.offset_out + offset),
147 tmp_buffer.data(), tmp_buffer.size()); 149 read_buffer.data(), read_buffer.size());
148 } 150 }
149 } else { 151 } else {
150 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { 152 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
151 std::vector<u8> tmp_buffer(regs.line_length_in); 153 read_buffer.resize_destructive(regs.line_length_in);
152 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), 154 memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(),
153 regs.line_length_in); 155 regs.line_length_in);
154 memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), 156 memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(),
155 regs.line_length_in); 157 regs.line_length_in);
156 } 158 }
157 } 159 }
@@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
171 src_operand.address = regs.offset_in; 173 src_operand.address = regs.offset_in;
172 174
173 DMA::BufferOperand dst_operand; 175 DMA::BufferOperand dst_operand;
174 dst_operand.pitch = regs.pitch_out; 176 u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out));
177 dst_operand.pitch = abs_pitch_out;
175 dst_operand.width = regs.line_length_in; 178 dst_operand.width = regs.line_length_in;
176 dst_operand.height = regs.line_count; 179 dst_operand.height = regs.line_count;
177 dst_operand.address = regs.offset_out; 180 dst_operand.address = regs.offset_out;
@@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
218 const size_t src_size = 221 const size_t src_size =
219 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); 222 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
220 223
221 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; 224 const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count;
222 read_buffer.resize_destructive(src_size); 225 read_buffer.resize_destructive(src_size);
223 write_buffer.resize_destructive(dst_size); 226 write_buffer.resize_destructive(dst_size);
224 227
@@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
227 230
228 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, 231 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
229 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 232 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
230 regs.pitch_out); 233 abs_pitch_out);
231 234
232 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); 235 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
233} 236}