diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/hw/gpu.cpp | 573 | ||||
| -rw-r--r-- | src/core/memory.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/rasterizer_interface.h | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 1 |
5 files changed, 347 insertions, 247 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 0e6b91e3a..28cb97d8e 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp | |||
| @@ -80,6 +80,319 @@ static Math::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_ | |||
| 80 | MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255)); | 80 | MICROPROFILE_DEFINE(GPU_DisplayTransfer, "GPU", "DisplayTransfer", MP_RGB(100, 100, 255)); |
| 81 | MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100)); | 81 | MICROPROFILE_DEFINE(GPU_CmdlistProcessing, "GPU", "Cmdlist Processing", MP_RGB(100, 255, 100)); |
| 82 | 82 | ||
| 83 | static void MemoryFill(const Regs::MemoryFillConfig& config) { | ||
| 84 | const PAddr start_addr = config.GetStartAddress(); | ||
| 85 | const PAddr end_addr = config.GetEndAddress(); | ||
| 86 | |||
| 87 | // TODO: do hwtest with these cases | ||
| 88 | if (!Memory::IsValidPhysicalAddress(start_addr)) { | ||
| 89 | LOG_CRITICAL(HW_GPU, "invalid start address 0x%08X", start_addr); | ||
| 90 | return; | ||
| 91 | } | ||
| 92 | |||
| 93 | if (!Memory::IsValidPhysicalAddress(end_addr)) { | ||
| 94 | LOG_CRITICAL(HW_GPU, "invalid end address 0x%08X", end_addr); | ||
| 95 | return; | ||
| 96 | } | ||
| 97 | |||
| 98 | if (end_addr <= start_addr) { | ||
| 99 | LOG_CRITICAL(HW_GPU, "invalid memory range from 0x%08X to 0x%08X", start_addr, end_addr); | ||
| 100 | return; | ||
| 101 | } | ||
| 102 | |||
| 103 | u8* start = Memory::GetPhysicalPointer(start_addr); | ||
| 104 | u8* end = Memory::GetPhysicalPointer(end_addr); | ||
| 105 | |||
| 106 | // TODO: Consider always accelerating and returning vector of | ||
| 107 | // regions that the accelerated fill did not cover to | ||
| 108 | // reduce/eliminate the fill that the cpu has to do. | ||
| 109 | // This would also mean that the flush below is not needed. | ||
| 110 | // Fill should first flush all surfaces that touch but are | ||
| 111 | // not completely within the fill range. | ||
| 112 | // Then fill all completely covered surfaces, and return the | ||
| 113 | // regions that were between surfaces or within the touching | ||
| 114 | // ones for cpu to manually fill here. | ||
| 115 | if (VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) | ||
| 116 | return; | ||
| 117 | |||
| 118 | Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), | ||
| 119 | config.GetEndAddress() - config.GetStartAddress()); | ||
| 120 | |||
| 121 | if (config.fill_24bit) { | ||
| 122 | // fill with 24-bit values | ||
| 123 | for (u8* ptr = start; ptr < end; ptr += 3) { | ||
| 124 | ptr[0] = config.value_24bit_r; | ||
| 125 | ptr[1] = config.value_24bit_g; | ||
| 126 | ptr[2] = config.value_24bit_b; | ||
| 127 | } | ||
| 128 | } else if (config.fill_32bit) { | ||
| 129 | // fill with 32-bit values | ||
| 130 | if (end > start) { | ||
| 131 | u32 value = config.value_32bit; | ||
| 132 | size_t len = (end - start) / sizeof(u32); | ||
| 133 | for (size_t i = 0; i < len; ++i) | ||
| 134 | memcpy(&start[i * sizeof(u32)], &value, sizeof(u32)); | ||
| 135 | } | ||
| 136 | } else { | ||
| 137 | // fill with 16-bit values | ||
| 138 | u16 value_16bit = config.value_16bit.Value(); | ||
| 139 | for (u8* ptr = start; ptr < end; ptr += sizeof(u16)) | ||
| 140 | memcpy(ptr, &value_16bit, sizeof(u16)); | ||
| 141 | } | ||
| 142 | } | ||
| 143 | |||
| 144 | static void DisplayTransfer(const Regs::DisplayTransferConfig& config) { | ||
| 145 | const PAddr src_addr = config.GetPhysicalInputAddress(); | ||
| 146 | const PAddr dst_addr = config.GetPhysicalOutputAddress(); | ||
| 147 | |||
| 148 | // TODO: do hwtest with these cases | ||
| 149 | if (!Memory::IsValidPhysicalAddress(src_addr)) { | ||
| 150 | LOG_CRITICAL(HW_GPU, "invalid input address 0x%08X", src_addr); | ||
| 151 | return; | ||
| 152 | } | ||
| 153 | |||
| 154 | if (!Memory::IsValidPhysicalAddress(dst_addr)) { | ||
| 155 | LOG_CRITICAL(HW_GPU, "invalid output address 0x%08X", dst_addr); | ||
| 156 | return; | ||
| 157 | } | ||
| 158 | |||
| 159 | if (config.input_width == 0) { | ||
| 160 | LOG_CRITICAL(HW_GPU, "zero input width"); | ||
| 161 | return; | ||
| 162 | } | ||
| 163 | |||
| 164 | if (config.input_height == 0) { | ||
| 165 | LOG_CRITICAL(HW_GPU, "zero input height"); | ||
| 166 | return; | ||
| 167 | } | ||
| 168 | |||
| 169 | if (config.output_width == 0) { | ||
| 170 | LOG_CRITICAL(HW_GPU, "zero output width"); | ||
| 171 | return; | ||
| 172 | } | ||
| 173 | |||
| 174 | if (config.output_height == 0) { | ||
| 175 | LOG_CRITICAL(HW_GPU, "zero output height"); | ||
| 176 | return; | ||
| 177 | } | ||
| 178 | |||
| 179 | if (VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) | ||
| 180 | return; | ||
| 181 | |||
| 182 | u8* src_pointer = Memory::GetPhysicalPointer(src_addr); | ||
| 183 | u8* dst_pointer = Memory::GetPhysicalPointer(dst_addr); | ||
| 184 | |||
| 185 | if (config.scaling > config.ScaleXY) { | ||
| 186 | LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", | ||
| 187 | config.scaling.Value()); | ||
| 188 | UNIMPLEMENTED(); | ||
| 189 | return; | ||
| 190 | } | ||
| 191 | |||
| 192 | if (config.input_linear && config.scaling != config.NoScale) { | ||
| 193 | LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); | ||
| 194 | UNIMPLEMENTED(); | ||
| 195 | return; | ||
| 196 | } | ||
| 197 | |||
| 198 | int horizontal_scale = config.scaling != config.NoScale ? 1 : 0; | ||
| 199 | int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0; | ||
| 200 | |||
| 201 | u32 output_width = config.output_width >> horizontal_scale; | ||
| 202 | u32 output_height = config.output_height >> vertical_scale; | ||
| 203 | |||
| 204 | u32 input_size = | ||
| 205 | config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); | ||
| 206 | u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); | ||
| 207 | |||
| 208 | Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); | ||
| 209 | Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); | ||
| 210 | |||
| 211 | for (u32 y = 0; y < output_height; ++y) { | ||
| 212 | for (u32 x = 0; x < output_width; ++x) { | ||
| 213 | Math::Vec4<u8> src_color; | ||
| 214 | |||
| 215 | // Calculate the [x,y] position of the input image | ||
| 216 | // based on the current output position and the scale | ||
| 217 | u32 input_x = x << horizontal_scale; | ||
| 218 | u32 input_y = y << vertical_scale; | ||
| 219 | |||
| 220 | u32 output_y; | ||
| 221 | if (config.flip_vertically) { | ||
| 222 | // Flip the y value of the output data, | ||
| 223 | // we do this after calculating the [x,y] position of the input image | ||
| 224 | // to account for the scaling options. | ||
| 225 | output_y = output_height - y - 1; | ||
| 226 | } else { | ||
| 227 | output_y = y; | ||
| 228 | } | ||
| 229 | |||
| 230 | u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format); | ||
| 231 | u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format); | ||
| 232 | u32 src_offset; | ||
| 233 | u32 dst_offset; | ||
| 234 | |||
| 235 | if (config.input_linear) { | ||
| 236 | if (!config.dont_swizzle) { | ||
| 237 | // Interpret the input as linear and the output as tiled | ||
| 238 | u32 coarse_y = output_y & ~7; | ||
| 239 | u32 stride = output_width * dst_bytes_per_pixel; | ||
| 240 | |||
| 241 | src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; | ||
| 242 | dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) + | ||
| 243 | coarse_y * stride; | ||
| 244 | } else { | ||
| 245 | // Both input and output are linear | ||
| 246 | src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; | ||
| 247 | dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel; | ||
| 248 | } | ||
| 249 | } else { | ||
| 250 | if (!config.dont_swizzle) { | ||
| 251 | // Interpret the input as tiled and the output as linear | ||
| 252 | u32 coarse_y = input_y & ~7; | ||
| 253 | u32 stride = config.input_width * src_bytes_per_pixel; | ||
| 254 | |||
| 255 | src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + | ||
| 256 | coarse_y * stride; | ||
| 257 | dst_offset = (x + output_y * output_width) * dst_bytes_per_pixel; | ||
| 258 | } else { | ||
| 259 | // Both input and output are tiled | ||
| 260 | u32 out_coarse_y = output_y & ~7; | ||
| 261 | u32 out_stride = output_width * dst_bytes_per_pixel; | ||
| 262 | |||
| 263 | u32 in_coarse_y = input_y & ~7; | ||
| 264 | u32 in_stride = config.input_width * src_bytes_per_pixel; | ||
| 265 | |||
| 266 | src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + | ||
| 267 | in_coarse_y * in_stride; | ||
| 268 | dst_offset = VideoCore::GetMortonOffset(x, output_y, dst_bytes_per_pixel) + | ||
| 269 | out_coarse_y * out_stride; | ||
| 270 | } | ||
| 271 | } | ||
| 272 | |||
| 273 | const u8* src_pixel = src_pointer + src_offset; | ||
| 274 | src_color = DecodePixel(config.input_format, src_pixel); | ||
| 275 | if (config.scaling == config.ScaleX) { | ||
| 276 | Math::Vec4<u8> pixel = | ||
| 277 | DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); | ||
| 278 | src_color = ((src_color + pixel) / 2).Cast<u8>(); | ||
| 279 | } else if (config.scaling == config.ScaleXY) { | ||
| 280 | Math::Vec4<u8> pixel1 = | ||
| 281 | DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); | ||
| 282 | Math::Vec4<u8> pixel2 = | ||
| 283 | DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); | ||
| 284 | Math::Vec4<u8> pixel3 = | ||
| 285 | DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); | ||
| 286 | src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); | ||
| 287 | } | ||
| 288 | |||
| 289 | u8* dst_pixel = dst_pointer + dst_offset; | ||
| 290 | switch (config.output_format) { | ||
| 291 | case Regs::PixelFormat::RGBA8: | ||
| 292 | Color::EncodeRGBA8(src_color, dst_pixel); | ||
| 293 | break; | ||
| 294 | |||
| 295 | case Regs::PixelFormat::RGB8: | ||
| 296 | Color::EncodeRGB8(src_color, dst_pixel); | ||
| 297 | break; | ||
| 298 | |||
| 299 | case Regs::PixelFormat::RGB565: | ||
| 300 | Color::EncodeRGB565(src_color, dst_pixel); | ||
| 301 | break; | ||
| 302 | |||
| 303 | case Regs::PixelFormat::RGB5A1: | ||
| 304 | Color::EncodeRGB5A1(src_color, dst_pixel); | ||
| 305 | break; | ||
| 306 | |||
| 307 | case Regs::PixelFormat::RGBA4: | ||
| 308 | Color::EncodeRGBA4(src_color, dst_pixel); | ||
| 309 | break; | ||
| 310 | |||
| 311 | default: | ||
| 312 | LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", | ||
| 313 | config.output_format.Value()); | ||
| 314 | break; | ||
| 315 | } | ||
| 316 | } | ||
| 317 | } | ||
| 318 | } | ||
| 319 | |||
| 320 | static void TextureCopy(const Regs::DisplayTransferConfig& config) { | ||
| 321 | const PAddr src_addr = config.GetPhysicalInputAddress(); | ||
| 322 | const PAddr dst_addr = config.GetPhysicalOutputAddress(); | ||
| 323 | |||
| 324 | // TODO: do hwtest with these cases | ||
| 325 | if (!Memory::IsValidPhysicalAddress(src_addr)) { | ||
| 326 | LOG_CRITICAL(HW_GPU, "invalid input address 0x%08X", src_addr); | ||
| 327 | return; | ||
| 328 | } | ||
| 329 | |||
| 330 | if (!Memory::IsValidPhysicalAddress(dst_addr)) { | ||
| 331 | LOG_CRITICAL(HW_GPU, "invalid output address 0x%08X", dst_addr); | ||
| 332 | return; | ||
| 333 | } | ||
| 334 | |||
| 335 | if (config.texture_copy.input_width == 0) { | ||
| 336 | LOG_CRITICAL(HW_GPU, "zero input width"); | ||
| 337 | return; | ||
| 338 | } | ||
| 339 | |||
| 340 | if (config.texture_copy.output_width == 0) { | ||
| 341 | LOG_CRITICAL(HW_GPU, "zero output width"); | ||
| 342 | return; | ||
| 343 | } | ||
| 344 | |||
| 345 | if (config.texture_copy.size == 0) { | ||
| 346 | LOG_CRITICAL(HW_GPU, "zero size"); | ||
| 347 | return; | ||
| 348 | } | ||
| 349 | |||
| 350 | if (VideoCore::g_renderer->Rasterizer()->AccelerateTextureCopy(config)) | ||
| 351 | return; | ||
| 352 | |||
| 353 | u8* src_pointer = Memory::GetPhysicalPointer(src_addr); | ||
| 354 | u8* dst_pointer = Memory::GetPhysicalPointer(dst_addr); | ||
| 355 | |||
| 356 | u32 input_width = config.texture_copy.input_width * 16; | ||
| 357 | u32 input_gap = config.texture_copy.input_gap * 16; | ||
| 358 | u32 output_width = config.texture_copy.output_width * 16; | ||
| 359 | u32 output_gap = config.texture_copy.output_gap * 16; | ||
| 360 | |||
| 361 | size_t contiguous_input_size = | ||
| 362 | config.texture_copy.size / input_width * (input_width + input_gap); | ||
| 363 | Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), | ||
| 364 | static_cast<u32>(contiguous_input_size)); | ||
| 365 | |||
| 366 | size_t contiguous_output_size = | ||
| 367 | config.texture_copy.size / output_width * (output_width + output_gap); | ||
| 368 | Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), | ||
| 369 | static_cast<u32>(contiguous_output_size)); | ||
| 370 | |||
| 371 | u32 remaining_size = config.texture_copy.size; | ||
| 372 | u32 remaining_input = input_width; | ||
| 373 | u32 remaining_output = output_width; | ||
| 374 | while (remaining_size > 0) { | ||
| 375 | u32 copy_size = std::min({remaining_input, remaining_output, remaining_size}); | ||
| 376 | |||
| 377 | std::memcpy(dst_pointer, src_pointer, copy_size); | ||
| 378 | src_pointer += copy_size; | ||
| 379 | dst_pointer += copy_size; | ||
| 380 | |||
| 381 | remaining_input -= copy_size; | ||
| 382 | remaining_output -= copy_size; | ||
| 383 | remaining_size -= copy_size; | ||
| 384 | |||
| 385 | if (remaining_input == 0) { | ||
| 386 | remaining_input = input_width; | ||
| 387 | src_pointer += input_gap; | ||
| 388 | } | ||
| 389 | if (remaining_output == 0) { | ||
| 390 | remaining_output = output_width; | ||
| 391 | dst_pointer += output_gap; | ||
| 392 | } | ||
| 393 | } | ||
| 394 | } | ||
| 395 | |||
| 83 | template <typename T> | 396 | template <typename T> |
| 84 | inline void Write(u32 addr, const T data) { | 397 | inline void Write(u32 addr, const T data) { |
| 85 | addr -= HW::VADDR_GPU; | 398 | addr -= HW::VADDR_GPU; |
| @@ -102,50 +415,13 @@ inline void Write(u32 addr, const T data) { | |||
| 102 | auto& config = g_regs.memory_fill_config[is_second_filler]; | 415 | auto& config = g_regs.memory_fill_config[is_second_filler]; |
| 103 | 416 | ||
| 104 | if (config.trigger) { | 417 | if (config.trigger) { |
| 105 | if (config.address_start) { // Some games pass invalid values here | 418 | MemoryFill(config); |
| 106 | u8* start = Memory::GetPhysicalPointer(config.GetStartAddress()); | 419 | LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), |
| 107 | u8* end = Memory::GetPhysicalPointer(config.GetEndAddress()); | 420 | config.GetEndAddress()); |
| 108 | |||
| 109 | // TODO: Consider always accelerating and returning vector of | ||
| 110 | // regions that the accelerated fill did not cover to | ||
| 111 | // reduce/eliminate the fill that the cpu has to do. | ||
| 112 | // This would also mean that the flush below is not needed. | ||
| 113 | // Fill should first flush all surfaces that touch but are | ||
| 114 | // not completely within the fill range. | ||
| 115 | // Then fill all completely covered surfaces, and return the | ||
| 116 | // regions that were between surfaces or within the touching | ||
| 117 | // ones for cpu to manually fill here. | ||
| 118 | if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) { | ||
| 119 | Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), | ||
| 120 | config.GetEndAddress() - | ||
| 121 | config.GetStartAddress()); | ||
| 122 | |||
| 123 | if (config.fill_24bit) { | ||
| 124 | // fill with 24-bit values | ||
| 125 | for (u8* ptr = start; ptr < end; ptr += 3) { | ||
| 126 | ptr[0] = config.value_24bit_r; | ||
| 127 | ptr[1] = config.value_24bit_g; | ||
| 128 | ptr[2] = config.value_24bit_b; | ||
| 129 | } | ||
| 130 | } else if (config.fill_32bit) { | ||
| 131 | // fill with 32-bit values | ||
| 132 | if (end > start) { | ||
| 133 | u32 value = config.value_32bit; | ||
| 134 | size_t len = (end - start) / sizeof(u32); | ||
| 135 | for (size_t i = 0; i < len; ++i) | ||
| 136 | memcpy(&start[i * sizeof(u32)], &value, sizeof(u32)); | ||
| 137 | } | ||
| 138 | } else { | ||
| 139 | // fill with 16-bit values | ||
| 140 | u16 value_16bit = config.value_16bit.Value(); | ||
| 141 | for (u8* ptr = start; ptr < end; ptr += sizeof(u16)) | ||
| 142 | memcpy(ptr, &value_16bit, sizeof(u16)); | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), | ||
| 147 | config.GetEndAddress()); | ||
| 148 | 421 | ||
| 422 | // It seems that it won't signal interrupt if "address_start" is zero. | ||
| 423 | // TODO: hwtest this | ||
| 424 | if (config.GetStartAddress() != 0) { | ||
| 149 | if (!is_second_filler) { | 425 | if (!is_second_filler) { |
| 150 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); | 426 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); |
| 151 | } else { | 427 | } else { |
| @@ -171,207 +447,22 @@ inline void Write(u32 addr, const T data) { | |||
| 171 | Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, | 447 | Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, |
| 172 | nullptr); | 448 | nullptr); |
| 173 | 449 | ||
| 174 | if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) { | 450 | if (config.is_texture_copy) { |
| 175 | u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); | 451 | TextureCopy(config); |
| 176 | u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); | 452 | LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> " |
| 177 | 453 | "0x%08X(%u+%u), flags 0x%08X", | |
| 178 | if (config.is_texture_copy) { | 454 | config.texture_copy.size, config.GetPhysicalInputAddress(), |
| 179 | u32 input_width = config.texture_copy.input_width * 16; | 455 | config.texture_copy.input_width * 16, config.texture_copy.input_gap * 16, |
| 180 | u32 input_gap = config.texture_copy.input_gap * 16; | 456 | config.GetPhysicalOutputAddress(), config.texture_copy.output_width * 16, |
| 181 | u32 output_width = config.texture_copy.output_width * 16; | 457 | config.texture_copy.output_gap * 16, config.flags); |
| 182 | u32 output_gap = config.texture_copy.output_gap * 16; | 458 | } else { |
| 183 | 459 | DisplayTransfer(config); | |
| 184 | size_t contiguous_input_size = | 460 | LOG_TRACE(HW_GPU, "DisplayTransfer: 0x%08x(%ux%u)-> " |
| 185 | config.texture_copy.size / input_width * (input_width + input_gap); | ||
| 186 | Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), | ||
| 187 | static_cast<u32>(contiguous_input_size)); | ||
| 188 | |||
| 189 | size_t contiguous_output_size = | ||
| 190 | config.texture_copy.size / output_width * (output_width + output_gap); | ||
| 191 | Memory::RasterizerFlushAndInvalidateRegion( | ||
| 192 | config.GetPhysicalOutputAddress(), | ||
| 193 | static_cast<u32>(contiguous_output_size)); | ||
| 194 | |||
| 195 | u32 remaining_size = config.texture_copy.size; | ||
| 196 | u32 remaining_input = input_width; | ||
| 197 | u32 remaining_output = output_width; | ||
| 198 | while (remaining_size > 0) { | ||
| 199 | u32 copy_size = | ||
| 200 | std::min({remaining_input, remaining_output, remaining_size}); | ||
| 201 | |||
| 202 | std::memcpy(dst_pointer, src_pointer, copy_size); | ||
| 203 | src_pointer += copy_size; | ||
| 204 | dst_pointer += copy_size; | ||
| 205 | |||
| 206 | remaining_input -= copy_size; | ||
| 207 | remaining_output -= copy_size; | ||
| 208 | remaining_size -= copy_size; | ||
| 209 | |||
| 210 | if (remaining_input == 0) { | ||
| 211 | remaining_input = input_width; | ||
| 212 | src_pointer += input_gap; | ||
| 213 | } | ||
| 214 | if (remaining_output == 0) { | ||
| 215 | remaining_output = output_width; | ||
| 216 | dst_pointer += output_gap; | ||
| 217 | } | ||
| 218 | } | ||
| 219 | |||
| 220 | LOG_TRACE( | ||
| 221 | HW_GPU, | ||
| 222 | "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", | ||
| 223 | config.texture_copy.size, config.GetPhysicalInputAddress(), input_width, | ||
| 224 | input_gap, config.GetPhysicalOutputAddress(), output_width, output_gap, | ||
| 225 | config.flags); | ||
| 226 | |||
| 227 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); | ||
| 228 | break; | ||
| 229 | } | ||
| 230 | |||
| 231 | if (config.scaling > config.ScaleXY) { | ||
| 232 | LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", | ||
| 233 | config.scaling.Value()); | ||
| 234 | UNIMPLEMENTED(); | ||
| 235 | break; | ||
| 236 | } | ||
| 237 | |||
| 238 | if (config.input_linear && config.scaling != config.NoScale) { | ||
| 239 | LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); | ||
| 240 | UNIMPLEMENTED(); | ||
| 241 | break; | ||
| 242 | } | ||
| 243 | |||
| 244 | int horizontal_scale = config.scaling != config.NoScale ? 1 : 0; | ||
| 245 | int vertical_scale = config.scaling == config.ScaleXY ? 1 : 0; | ||
| 246 | |||
| 247 | u32 output_width = config.output_width >> horizontal_scale; | ||
| 248 | u32 output_height = config.output_height >> vertical_scale; | ||
| 249 | |||
| 250 | u32 input_size = config.input_width * config.input_height * | ||
| 251 | GPU::Regs::BytesPerPixel(config.input_format); | ||
| 252 | u32 output_size = | ||
| 253 | output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); | ||
| 254 | |||
| 255 | Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); | ||
| 256 | Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), | ||
| 257 | output_size); | ||
| 258 | |||
| 259 | for (u32 y = 0; y < output_height; ++y) { | ||
| 260 | for (u32 x = 0; x < output_width; ++x) { | ||
| 261 | Math::Vec4<u8> src_color; | ||
| 262 | |||
| 263 | // Calculate the [x,y] position of the input image | ||
| 264 | // based on the current output position and the scale | ||
| 265 | u32 input_x = x << horizontal_scale; | ||
| 266 | u32 input_y = y << vertical_scale; | ||
| 267 | |||
| 268 | if (config.flip_vertically) { | ||
| 269 | // Flip the y value of the output data, | ||
| 270 | // we do this after calculating the [x,y] position of the input image | ||
| 271 | // to account for the scaling options. | ||
| 272 | y = output_height - y - 1; | ||
| 273 | } | ||
| 274 | |||
| 275 | u32 dst_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.output_format); | ||
| 276 | u32 src_bytes_per_pixel = GPU::Regs::BytesPerPixel(config.input_format); | ||
| 277 | u32 src_offset; | ||
| 278 | u32 dst_offset; | ||
| 279 | |||
| 280 | if (config.input_linear) { | ||
| 281 | if (!config.dont_swizzle) { | ||
| 282 | // Interpret the input as linear and the output as tiled | ||
| 283 | u32 coarse_y = y & ~7; | ||
| 284 | u32 stride = output_width * dst_bytes_per_pixel; | ||
| 285 | |||
| 286 | src_offset = | ||
| 287 | (input_x + input_y * config.input_width) * src_bytes_per_pixel; | ||
| 288 | dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + | ||
| 289 | coarse_y * stride; | ||
| 290 | } else { | ||
| 291 | // Both input and output are linear | ||
| 292 | src_offset = | ||
| 293 | (input_x + input_y * config.input_width) * src_bytes_per_pixel; | ||
| 294 | dst_offset = (x + y * output_width) * dst_bytes_per_pixel; | ||
| 295 | } | ||
| 296 | } else { | ||
| 297 | if (!config.dont_swizzle) { | ||
| 298 | // Interpret the input as tiled and the output as linear | ||
| 299 | u32 coarse_y = input_y & ~7; | ||
| 300 | u32 stride = config.input_width * src_bytes_per_pixel; | ||
| 301 | |||
| 302 | src_offset = VideoCore::GetMortonOffset(input_x, input_y, | ||
| 303 | src_bytes_per_pixel) + | ||
| 304 | coarse_y * stride; | ||
| 305 | dst_offset = (x + y * output_width) * dst_bytes_per_pixel; | ||
| 306 | } else { | ||
| 307 | // Both input and output are tiled | ||
| 308 | u32 out_coarse_y = y & ~7; | ||
| 309 | u32 out_stride = output_width * dst_bytes_per_pixel; | ||
| 310 | |||
| 311 | u32 in_coarse_y = input_y & ~7; | ||
| 312 | u32 in_stride = config.input_width * src_bytes_per_pixel; | ||
| 313 | |||
| 314 | src_offset = VideoCore::GetMortonOffset(input_x, input_y, | ||
| 315 | src_bytes_per_pixel) + | ||
| 316 | in_coarse_y * in_stride; | ||
| 317 | dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + | ||
| 318 | out_coarse_y * out_stride; | ||
| 319 | } | ||
| 320 | } | ||
| 321 | |||
| 322 | const u8* src_pixel = src_pointer + src_offset; | ||
| 323 | src_color = DecodePixel(config.input_format, src_pixel); | ||
| 324 | if (config.scaling == config.ScaleX) { | ||
| 325 | Math::Vec4<u8> pixel = | ||
| 326 | DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); | ||
| 327 | src_color = ((src_color + pixel) / 2).Cast<u8>(); | ||
| 328 | } else if (config.scaling == config.ScaleXY) { | ||
| 329 | Math::Vec4<u8> pixel1 = DecodePixel( | ||
| 330 | config.input_format, src_pixel + 1 * src_bytes_per_pixel); | ||
| 331 | Math::Vec4<u8> pixel2 = DecodePixel( | ||
| 332 | config.input_format, src_pixel + 2 * src_bytes_per_pixel); | ||
| 333 | Math::Vec4<u8> pixel3 = DecodePixel( | ||
| 334 | config.input_format, src_pixel + 3 * src_bytes_per_pixel); | ||
| 335 | src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); | ||
| 336 | } | ||
| 337 | |||
| 338 | u8* dst_pixel = dst_pointer + dst_offset; | ||
| 339 | switch (config.output_format) { | ||
| 340 | case Regs::PixelFormat::RGBA8: | ||
| 341 | Color::EncodeRGBA8(src_color, dst_pixel); | ||
| 342 | break; | ||
| 343 | |||
| 344 | case Regs::PixelFormat::RGB8: | ||
| 345 | Color::EncodeRGB8(src_color, dst_pixel); | ||
| 346 | break; | ||
| 347 | |||
| 348 | case Regs::PixelFormat::RGB565: | ||
| 349 | Color::EncodeRGB565(src_color, dst_pixel); | ||
| 350 | break; | ||
| 351 | |||
| 352 | case Regs::PixelFormat::RGB5A1: | ||
| 353 | Color::EncodeRGB5A1(src_color, dst_pixel); | ||
| 354 | break; | ||
| 355 | |||
| 356 | case Regs::PixelFormat::RGBA4: | ||
| 357 | Color::EncodeRGBA4(src_color, dst_pixel); | ||
| 358 | break; | ||
| 359 | |||
| 360 | default: | ||
| 361 | LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", | ||
| 362 | config.output_format.Value()); | ||
| 363 | break; | ||
| 364 | } | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> " | ||
| 369 | "0x%08x(%ux%u), dst format %x, flags 0x%08X", | 461 | "0x%08x(%ux%u), dst format %x, flags 0x%08X", |
| 370 | config.output_height * output_width * | ||
| 371 | GPU::Regs::BytesPerPixel(config.output_format), | ||
| 372 | config.GetPhysicalInputAddress(), config.input_width.Value(), | 462 | config.GetPhysicalInputAddress(), config.input_width.Value(), |
| 373 | config.input_height.Value(), config.GetPhysicalOutputAddress(), | 463 | config.input_height.Value(), config.GetPhysicalOutputAddress(), |
| 374 | output_width, output_height, config.output_format.Value(), config.flags); | 464 | config.output_width.Value(), config.output_height.Value(), |
| 465 | config.output_format.Value(), config.flags); | ||
| 375 | } | 466 | } |
| 376 | 467 | ||
| 377 | g_regs.display_transfer_config.trigger = 0; | 468 | g_regs.display_transfer_config.trigger = 0; |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index df029d655..64c388374 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -251,6 +251,9 @@ bool IsValidVirtualAddress(const VAddr vaddr) { | |||
| 251 | if (page_pointer) | 251 | if (page_pointer) |
| 252 | return true; | 252 | return true; |
| 253 | 253 | ||
| 254 | if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) | ||
| 255 | return true; | ||
| 256 | |||
| 254 | if (current_page_table->attributes[vaddr >> PAGE_BITS] != PageType::Special) | 257 | if (current_page_table->attributes[vaddr >> PAGE_BITS] != PageType::Special) |
| 255 | return false; | 258 | return false; |
| 256 | 259 | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 71df233b5..8ef7e74c7 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -42,11 +42,16 @@ public: | |||
| 42 | /// and invalidated | 42 | /// and invalidated |
| 43 | virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; | 43 | virtual void FlushAndInvalidateRegion(PAddr addr, u32 size) = 0; |
| 44 | 44 | ||
| 45 | /// Attempt to use a faster method to perform a display transfer | 45 | /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 0 |
| 46 | virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { | 46 | virtual bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { |
| 47 | return false; | 47 | return false; |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | /// Attempt to use a faster method to perform a display transfer with is_texture_copy = 1 | ||
| 51 | virtual bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { | ||
| 52 | return false; | ||
| 53 | } | ||
| 54 | |||
| 50 | /// Attempt to use a faster method to fill a region | 55 | /// Attempt to use a faster method to fill a region |
| 51 | virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { | 56 | virtual bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { |
| 52 | return false; | 57 | return false; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 62c9af28c..7cc3b407a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -709,11 +709,6 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe | |||
| 709 | using PixelFormat = CachedSurface::PixelFormat; | 709 | using PixelFormat = CachedSurface::PixelFormat; |
| 710 | using SurfaceType = CachedSurface::SurfaceType; | 710 | using SurfaceType = CachedSurface::SurfaceType; |
| 711 | 711 | ||
| 712 | if (config.is_texture_copy) { | ||
| 713 | // TODO(tfarley): Try to hardware accelerate this | ||
| 714 | return false; | ||
| 715 | } | ||
| 716 | |||
| 717 | CachedSurface src_params; | 712 | CachedSurface src_params; |
| 718 | src_params.addr = config.GetPhysicalInputAddress(); | 713 | src_params.addr = config.GetPhysicalInputAddress(); |
| 719 | src_params.width = config.output_width; | 714 | src_params.width = config.output_width; |
| @@ -768,6 +763,11 @@ bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransfe | |||
| 768 | return true; | 763 | return true; |
| 769 | } | 764 | } |
| 770 | 765 | ||
| 766 | bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) { | ||
| 767 | // TODO(tfarley): Try to hardware accelerate this | ||
| 768 | return false; | ||
| 769 | } | ||
| 770 | |||
| 771 | bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { | 771 | bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { |
| 772 | using PixelFormat = CachedSurface::PixelFormat; | 772 | using PixelFormat = CachedSurface::PixelFormat; |
| 773 | using SurfaceType = CachedSurface::SurfaceType; | 773 | using SurfaceType = CachedSurface::SurfaceType; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 7b4ce2ac5..e1a9cb361 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -238,6 +238,7 @@ public: | |||
| 238 | void FlushRegion(PAddr addr, u32 size) override; | 238 | void FlushRegion(PAddr addr, u32 size) override; |
| 239 | void FlushAndInvalidateRegion(PAddr addr, u32 size) override; | 239 | void FlushAndInvalidateRegion(PAddr addr, u32 size) override; |
| 240 | bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; | 240 | bool AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) override; |
| 241 | bool AccelerateTextureCopy(const GPU::Regs::DisplayTransferConfig& config) override; | ||
| 241 | bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; | 242 | bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config) override; |
| 242 | bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, | 243 | bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config, PAddr framebuffer_addr, |
| 243 | u32 pixel_stride, ScreenInfo& screen_info) override; | 244 | u32 pixel_stride, ScreenInfo& screen_info) override; |