diff options
| author | 2015-08-18 20:52:40 -0700 | |
|---|---|---|
| committer | 2015-08-18 20:52:40 -0700 | |
| commit | a9fc6598092f2d4f3105ebf7284685b687fa4e75 (patch) | |
| tree | cd6b5895a3ddf1edc18ac6eb498a103e5c4a95b0 /src | |
| parent | Merge pull request #1047 from aroulin/shader-ex2-lg2 (diff) | |
| parent | GPU: Implement TextureCopy-mode display transfers (diff) | |
| download | yuzu-a9fc6598092f2d4f3105ebf7284685b687fa4e75.tar.gz yuzu-a9fc6598092f2d4f3105ebf7284685b687fa4e75.tar.xz yuzu-a9fc6598092f2d4f3105ebf7284685b687fa4e75.zip | |
Merge pull request #996 from yuriks/texture-copy
GPU: Implement TextureCopy-mode display transfers
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/hle/service/gsp_gpu.cpp | 25 | ||||
| -rw-r--r-- | src/core/hle/service/gsp_gpu.h | 11 | ||||
| -rw-r--r-- | src/core/hw/gpu.cpp | 69 | ||||
| -rw-r--r-- | src/core/hw/gpu.h | 32 |
4 files changed, 101 insertions, 36 deletions
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp index e93c1b436..3c41e656c 100644 --- a/src/core/hle/service/gsp_gpu.cpp +++ b/src/core/hle/service/gsp_gpu.cpp | |||
| @@ -418,7 +418,7 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { | |||
| 418 | 418 | ||
| 419 | case CommandId::SET_DISPLAY_TRANSFER: | 419 | case CommandId::SET_DISPLAY_TRANSFER: |
| 420 | { | 420 | { |
| 421 | auto& params = command.image_copy; | 421 | auto& params = command.display_transfer; |
| 422 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), | 422 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), |
| 423 | Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | 423 | Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); |
| 424 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), | 424 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), |
| @@ -433,17 +433,22 @@ static void ExecuteCommand(const Command& command, u32 thread_id) { | |||
| 433 | // TODO: Check if texture copies are implemented correctly.. | 433 | // TODO: Check if texture copies are implemented correctly.. |
| 434 | case CommandId::SET_TEXTURE_COPY: | 434 | case CommandId::SET_TEXTURE_COPY: |
| 435 | { | 435 | { |
| 436 | auto& params = command.image_copy; | 436 | auto& params = command.texture_copy; |
| 437 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_address)), | 437 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.input_address), |
| 438 | Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); | 438 | Memory::VirtualToPhysicalAddress(params.in_buffer_address) >> 3); |
| 439 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_address)), | 439 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.output_address), |
| 440 | Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); | 440 | Memory::VirtualToPhysicalAddress(params.out_buffer_address) >> 3); |
| 441 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.input_size)), params.in_buffer_size); | 441 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.size), |
| 442 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.output_size)), params.out_buffer_size); | 442 | params.size); |
| 443 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.flags)), params.flags); | 443 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.input_size), |
| 444 | 444 | params.in_width_gap); | |
| 445 | // TODO: Should this register be set to 1 or should instead its value be OR-ed with 1? | 445 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.texture_copy.output_size), |
| 446 | WriteGPURegister(static_cast<u32>(GPU_REG_INDEX(display_transfer_config.trigger)), 1); | 446 | params.out_width_gap); |
| 447 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.flags), | ||
| 448 | params.flags); | ||
| 449 | |||
| 450 | // NOTE: Actual GSP ORs 1 with current register instead of overwriting. Doesn't seem to matter. | ||
| 451 | WriteGPURegister((u32)GPU_REG_INDEX(display_transfer_config.trigger), 1); | ||
| 447 | break; | 452 | break; |
| 448 | } | 453 | } |
| 449 | 454 | ||
diff --git a/src/core/hle/service/gsp_gpu.h b/src/core/hle/service/gsp_gpu.h index c89d0a467..8bcb30ad1 100644 --- a/src/core/hle/service/gsp_gpu.h +++ b/src/core/hle/service/gsp_gpu.h | |||
| @@ -127,7 +127,16 @@ struct Command { | |||
| 127 | u32 in_buffer_size; | 127 | u32 in_buffer_size; |
| 128 | u32 out_buffer_size; | 128 | u32 out_buffer_size; |
| 129 | u32 flags; | 129 | u32 flags; |
| 130 | } image_copy; | 130 | } display_transfer; |
| 131 | |||
| 132 | struct { | ||
| 133 | u32 in_buffer_address; | ||
| 134 | u32 out_buffer_address; | ||
| 135 | u32 size; | ||
| 136 | u32 in_width_gap; | ||
| 137 | u32 out_width_gap; | ||
| 138 | u32 flags; | ||
| 139 | } texture_copy; | ||
| 131 | 140 | ||
| 132 | u8 raw_data[0x1C]; | 141 | u8 raw_data[0x1C]; |
| 133 | }; | 142 | }; |
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 3ccbc03b2..68ae38289 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include <numeric> | ||
| 6 | #include <type_traits> | 7 | #include <type_traits> |
| 7 | 8 | ||
| 8 | #include "common/color.h" | 9 | #include "common/color.h" |
| @@ -158,14 +159,59 @@ inline void Write(u32 addr, const T data) { | |||
| 158 | u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); | 159 | u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); |
| 159 | u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); | 160 | u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); |
| 160 | 161 | ||
| 162 | if (config.is_texture_copy) { | ||
| 163 | u32 input_width = config.texture_copy.input_width * 16; | ||
| 164 | u32 input_gap = config.texture_copy.input_gap * 16; | ||
| 165 | u32 output_width = config.texture_copy.output_width * 16; | ||
| 166 | u32 output_gap = config.texture_copy.output_gap * 16; | ||
| 167 | |||
| 168 | size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); | ||
| 169 | VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), contiguous_input_size); | ||
| 170 | |||
| 171 | u32 remaining_size = config.texture_copy.size; | ||
| 172 | u32 remaining_input = input_width; | ||
| 173 | u32 remaining_output = output_width; | ||
| 174 | while (remaining_size > 0) { | ||
| 175 | u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size }); | ||
| 176 | |||
| 177 | std::memcpy(dst_pointer, src_pointer, copy_size); | ||
| 178 | src_pointer += copy_size; | ||
| 179 | dst_pointer += copy_size; | ||
| 180 | |||
| 181 | remaining_input -= copy_size; | ||
| 182 | remaining_output -= copy_size; | ||
| 183 | remaining_size -= copy_size; | ||
| 184 | |||
| 185 | if (remaining_input == 0) { | ||
| 186 | remaining_input = input_width; | ||
| 187 | src_pointer += input_gap; | ||
| 188 | } | ||
| 189 | if (remaining_output == 0) { | ||
| 190 | remaining_output = output_width; | ||
| 191 | dst_pointer += output_gap; | ||
| 192 | } | ||
| 193 | } | ||
| 194 | |||
| 195 | LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", | ||
| 196 | config.texture_copy.size, | ||
| 197 | config.GetPhysicalInputAddress(), input_width, input_gap, | ||
| 198 | config.GetPhysicalOutputAddress(), output_width, output_gap, | ||
| 199 | config.flags); | ||
| 200 | |||
| 201 | size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); | ||
| 202 | VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), contiguous_output_size); | ||
| 203 | |||
| 204 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); | ||
| 205 | break; | ||
| 206 | } | ||
| 207 | |||
| 161 | if (config.scaling > config.ScaleXY) { | 208 | if (config.scaling > config.ScaleXY) { |
| 162 | LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); | 209 | LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); |
| 163 | UNIMPLEMENTED(); | 210 | UNIMPLEMENTED(); |
| 164 | break; | 211 | break; |
| 165 | } | 212 | } |
| 166 | 213 | ||
| 167 | if (config.output_tiled && | 214 | if (config.input_linear && config.scaling != config.NoScale) { |
| 168 | (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) { | ||
| 169 | LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); | 215 | LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); |
| 170 | UNIMPLEMENTED(); | 216 | UNIMPLEMENTED(); |
| 171 | break; | 217 | break; |
| @@ -182,23 +228,6 @@ inline void Write(u32 addr, const T data) { | |||
| 182 | 228 | ||
| 183 | VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size); | 229 | VideoCore::g_renderer->hw_rasterizer->NotifyPreRead(config.GetPhysicalInputAddress(), input_size); |
| 184 | 230 | ||
| 185 | if (config.raw_copy) { | ||
| 186 | // Raw copies do not perform color conversion nor tiled->linear / linear->tiled conversions | ||
| 187 | // TODO(Subv): Verify if raw copies perform scaling | ||
| 188 | memcpy(dst_pointer, src_pointer, output_size); | ||
| 189 | |||
| 190 | LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), output format: %x, flags 0x%08X, Raw copy", | ||
| 191 | output_size, | ||
| 192 | config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), | ||
| 193 | config.GetPhysicalOutputAddress(), config.output_width.Value(), config.output_height.Value(), | ||
| 194 | config.output_format.Value(), config.flags); | ||
| 195 | |||
| 196 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); | ||
| 197 | |||
| 198 | VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size); | ||
| 199 | break; | ||
| 200 | } | ||
| 201 | |||
| 202 | for (u32 y = 0; y < output_height; ++y) { | 231 | for (u32 y = 0; y < output_height; ++y) { |
| 203 | for (u32 x = 0; x < output_width; ++x) { | 232 | for (u32 x = 0; x < output_width; ++x) { |
| 204 | Math::Vec4<u8> src_color; | 233 | Math::Vec4<u8> src_color; |
| @@ -220,7 +249,7 @@ inline void Write(u32 addr, const T data) { | |||
| 220 | u32 src_offset; | 249 | u32 src_offset; |
| 221 | u32 dst_offset; | 250 | u32 dst_offset; |
| 222 | 251 | ||
| 223 | if (config.output_tiled) { | 252 | if (config.input_linear) { |
| 224 | if (!config.dont_swizzle) { | 253 | if (!config.dont_swizzle) { |
| 225 | // Interpret the input as linear and the output as tiled | 254 | // Interpret the input as linear and the output as tiled |
| 226 | u32 coarse_y = y & ~7; | 255 | u32 coarse_y = y & ~7; |
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index daad506fe..2e3a9f779 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h | |||
| @@ -201,12 +201,14 @@ struct Regs { | |||
| 201 | u32 flags; | 201 | u32 flags; |
| 202 | 202 | ||
| 203 | BitField< 0, 1, u32> flip_vertically; // flips input data vertically | 203 | BitField< 0, 1, u32> flip_vertically; // flips input data vertically |
| 204 | BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format | 204 | BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format |
| 205 | BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing | 205 | BitField< 2, 1, u32> crop_input_lines; |
| 206 | BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields | ||
| 206 | BitField< 5, 1, u32> dont_swizzle; | 207 | BitField< 5, 1, u32> dont_swizzle; |
| 207 | BitField< 8, 3, PixelFormat> input_format; | 208 | BitField< 8, 3, PixelFormat> input_format; |
| 208 | BitField<12, 3, PixelFormat> output_format; | 209 | BitField<12, 3, PixelFormat> output_format; |
| 209 | 210 | /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one. | |
| 211 | BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented | ||
| 210 | BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer | 212 | BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer |
| 211 | }; | 213 | }; |
| 212 | 214 | ||
| @@ -214,10 +216,30 @@ struct Regs { | |||
| 214 | 216 | ||
| 215 | // it seems that writing to this field triggers the display transfer | 217 | // it seems that writing to this field triggers the display transfer |
| 216 | u32 trigger; | 218 | u32 trigger; |
| 219 | |||
| 220 | INSERT_PADDING_WORDS(0x1); | ||
| 221 | |||
| 222 | struct { | ||
| 223 | u32 size; | ||
| 224 | |||
| 225 | union { | ||
| 226 | u32 input_size; | ||
| 227 | |||
| 228 | BitField< 0, 16, u32> input_width; | ||
| 229 | BitField<16, 16, u32> input_gap; | ||
| 230 | }; | ||
| 231 | |||
| 232 | union { | ||
| 233 | u32 output_size; | ||
| 234 | |||
| 235 | BitField< 0, 16, u32> output_width; | ||
| 236 | BitField<16, 16, u32> output_gap; | ||
| 237 | }; | ||
| 238 | } texture_copy; | ||
| 217 | } display_transfer_config; | 239 | } display_transfer_config; |
| 218 | ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c); | 240 | ASSERT_MEMBER_SIZE(display_transfer_config, 0x2c); |
| 219 | 241 | ||
| 220 | INSERT_PADDING_WORDS(0x331); | 242 | INSERT_PADDING_WORDS(0x32D); |
| 221 | 243 | ||
| 222 | struct { | 244 | struct { |
| 223 | // command list size (in bytes) | 245 | // command list size (in bytes) |