summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Yuri Kunde Schlesner2015-08-18 20:52:40 -0700
committerGravatar Yuri Kunde Schlesner2015-08-18 20:52:40 -0700
commita9fc6598092f2d4f3105ebf7284685b687fa4e75 (patch)
treecd6b5895a3ddf1edc18ac6eb498a103e5c4a95b0 /src
parentMerge pull request #1047 from aroulin/shader-ex2-lg2 (diff)
parentGPU: Implement TextureCopy-mode display transfers (diff)
downloadyuzu-a9fc6598092f2d4f3105ebf7284685b687fa4e75.tar.gz
yuzu-a9fc6598092f2d4f3105ebf7284685b687fa4e75.tar.xz
yuzu-a9fc6598092f2d4f3105ebf7284685b687fa4e75.zip
Merge pull request #996 from yuriks/texture-copy
GPU: Implement TextureCopy-mode display transfers
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/service/gsp_gpu.cpp25
-rw-r--r--src/core/hle/service/gsp_gpu.h11
-rw-r--r--src/core/hw/gpu.cpp69
-rw-r--r--src/core/hw/gpu.h32
4 files changed, 101 insertions, 36 deletions
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index e93c1b436..3c41e656c 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -418,7 +418,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
418 418
419 case CommandId::SET_DISPLAY_TRANSFER: 419 case CommandId::SET_DISPLAY_TRANSFER:
420 { 420 {
421 auto& params = command.image_copy; 421 auto& params = command.display_transfer;
422 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), 422 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)),
423 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); 423 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
424 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), 424 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)),
@@ -433,17 +433,22 @@ static void ExecuteCommand(const Command& command, u32 thread_id) {
433 // TODO: Check if texture copies are implemented correctly.. 433 // TODO: Check if texture copies are implemented correctly..
434 case CommandId::SET_TEXTURE_COPY: 434 case CommandId::SET_TEXTURE_COPY:
435 { 435 {
436 auto& params = command.image_copy; 436 auto& params = command.texture_copy;
437 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), 437 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address),
438 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); 438 Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3);
439 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), 439 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address),
440 Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); 440 Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3);
441 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size); 441 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size),
442 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size); 442 params.size);
443 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)), params.flags); 443 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size),
444 444 params.in_width_gap);
445 // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1? 445 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size),
446 WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1); 446 params.out_width_gap);
447 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags),
448 params.flags);
449
450 // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to matter.
451 WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1);
447 break; 452 break;
448 } 453 }
449 454
diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h
index c89d0a467..8bcb30ad1 100644
--- a/src/core/hle/service/gsp_gpu.h
+++ b/src/core/hle/service/gsp_gpu.h
@@ -127,7 +127,16 @@ struct Command {
127 u32 in_buffer_size; 127 u32 in_buffer_size;
128 u32 out_buffer_size; 128 u32 out_buffer_size;
129 u32 flags; 129 u32 flags;
130 } image_copy; 130 } display_transfer;
131
132 struct {
133 u32 in_buffer_address;
134 u32 out_buffer_address;
135 u32 size;
136 u32 in_width_gap;
137 u32 out_width_gap;
138 u32 flags;
139 } texture_copy;
131 140
132 u8 raw_data[0x1C]; 141 u8 raw_data[0x1C];
133 }; 142 };
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 3ccbc03b2..68ae38289 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include <numeric>
6#include <type_traits> 7#include <type_traits>
7 8
8#include "common/color.h" 9#include "common/color.h"
@@ -158,14 +159,59 @@ inline void Write(u32 addr, const T data) {
158 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); 159 u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
159 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); 160 u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
160 161
162 if (config.is_texture_copy) {
163 u32 input_width = config.texture_copy.input_width * 16;
164 u32 input_gap = config.texture_copy.input_gap * 16;
165 u32 output_width = config.texture_copy.output_width * 16;
166 u32 output_gap = config.texture_copy.output_gap * 16;
167
168 size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap);
169 VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), contiguous_input_size);
170
171 u32 remaining_size = config.texture_copy.size;
172 u32 remaining_input = input_width;
173 u32 remaining_output = output_width;
174 while (remaining_size > 0) {
175 u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size });
176
177 std::memcpy(dst_pointer, src_pointer, copy_size);
178 src_pointer += copy_size;
179 dst_pointer += copy_size;
180
181 remaining_input -= copy_size;
182 remaining_output -= copy_size;
183 remaining_size -= copy_size;
184
185 if (remaining_input == 0) {
186 remaining_input = input_width;
187 src_pointer += input_gap;
188 }
189 if (remaining_output == 0) {
190 remaining_output = output_width;
191 dst_pointer += output_gap;
192 }
193 }
194
195 LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X",
196 config.texture_copy.size,
197 config.GetPhysicalInputAddress(), input_width, input_gap,
198 config.GetPhysicalOutputAddress(), output_width, output_gap,
199 config.flags);
200
201 size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap);
202 VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), contiguous_output_size);
203
204 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
205 break;
206 }
207
161 if (config.scaling > config.ScaleXY) { 208 if (config.scaling > config.ScaleXY) {
162 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); 209 LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value());
163 UNIMPLEMENTED(); 210 UNIMPLEMENTED();
164 break; 211 break;
165 } 212 }
166 213
167 if (config.output_tiled && 214 if (config.input_linear && config.scaling != config.NoScale) {
168 (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) {
169 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); 215 LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
170 UNIMPLEMENTED(); 216 UNIMPLEMENTED();
171 break; 217 break;
@@ -182,23 +228,6 @@ inline void Write(u32 addr, const T data) {
182 228
183 VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size); 229 VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size);
184 230
185 if (config.raw_copy) {
186 // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions
187 // TODO(Subv): Verify if raw copies perform scaling
188 memcpy(dst_pointer, src_pointer, output_size);
189
190 LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy",
191 output_size,
192 config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(),
193 config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(),
194 config.output_format.Value(), config.flags);
195
196 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
197
198 VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size);
199 break;
200 }
201
202 for (u32 y = 0; y < output_height; ++y) { 231 for (u32 y = 0; y < output_height; ++y) {
203 for (u32 x = 0; x < output_width; ++x) { 232 for (u32 x = 0; x < output_width; ++x) {
204 Math::Vec4<u8> src_color; 233 Math::Vec4<u8> src_color;
@@ -220,7 +249,7 @@ inline void Write(u32 addr, const T data) {
220 u32 src_offset; 249 u32 src_offset;
221 u32 dst_offset; 250 u32 dst_offset;
222 251
223 if (config.output_tiled) { 252 if (config.input_linear) {
224 if (!config.dont_swizzle) { 253 if (!config.dont_swizzle) {
225 // Interpret the input as linear and the output as tiled 254 // Interpret the input as linear and the output as tiled
226 u32 coarse_y = y & ~7; 255 u32 coarse_y = y & ~7;
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index daad506fe..2e3a9f779 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -201,12 +201,14 @@ struct Regs {
201 u32 flags; 201 u32 flags;
202 202
203 BitField< 0, 1, u32> flip_vertically; // flips input data vertically 203 BitField< 0, 1, u32> flip_vertically; // flips input data vertically
204 BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format 204 BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format
205 BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing 205 BitField< 2, 1, u32> crop_input_lines;
206 BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields
206 BitField< 5, 1, u32> dont_swizzle; 207 BitField< 5, 1, u32> dont_swizzle;
207 BitField< 8, 3, PixelFormat> input_format; 208 BitField< 8, 3, PixelFormat> input_format;
208 BitField<12, 3, PixelFormat> output_format; 209 BitField<12, 3, PixelFormat> output_format;
209 210 /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one.
211 BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented
210 BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer 212 BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer
211 }; 213 };
212 214
@@ -214,10 +216,30 @@ struct Regs {
214 216
215 // it seems that writing to this field triggers the display transfer 217 // it seems that writing to this field triggers the display transfer
216 u32 trigger; 218 u32 trigger;
219
220 INSERT_PADDING_WORDS(0x1);
221
222 struct {
223 u32 size;
224
225 union {
226 u32 input_size;
227
228 BitField< 0, 16, u32> input_width;
229 BitField<16, 16, u32> input_gap;
230 };
231
232 union {
233 u32 output_size;
234
235 BitField< 0, 16, u32> output_width;
236 BitField<16, 16, u32> output_gap;
237 };
238 } texture_copy;
217 } display_transfer_config; 239 } display_transfer_config;
218 ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c); 240 ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c);
219 241
220 INSERT_PADDING_WORDS(0x331); 242 INSERT_PADDING_WORDS(0x32D);
221 243
222 struct { 244 struct {
223 // command list size (in bytes) 245 // command list size (in bytes)