diff options
Diffstat (limited to 'src/core/hw')
| -rw-r--r-- | src/core/hw/gpu.cpp | 142 | ||||
| -rw-r--r-- | src/core/hw/gpu.h | 84 | ||||
| -rw-r--r-- | src/core/hw/hw.cpp | 13 | ||||
| -rw-r--r-- | src/core/hw/hw.h | 42 | ||||
| -rw-r--r-- | src/core/hw/lcd.cpp | 13 | ||||
| -rw-r--r-- | src/core/hw/lcd.h | 19 | ||||
| -rw-r--r-- | src/core/hw/y2r.cpp | 77 | ||||
| -rw-r--r-- | src/core/hw/y2r.h | 3 |
8 files changed, 209 insertions, 184 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index a4dfb7e43..bf2c066f4 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp | |||
| @@ -12,15 +12,15 @@ | |||
| 12 | #include "common/microprofile.h" | 12 | #include "common/microprofile.h" |
| 13 | #include "common/vector_math.h" | 13 | #include "common/vector_math.h" |
| 14 | 14 | ||
| 15 | #include "core/settings.h" | ||
| 16 | #include "core/memory.h" | ||
| 17 | #include "core/core_timing.h" | 15 | #include "core/core_timing.h" |
| 16 | #include "core/memory.h" | ||
| 17 | #include "core/settings.h" | ||
| 18 | 18 | ||
| 19 | #include "core/hle/service/gsp_gpu.h" | 19 | #include "core/hle/service/gsp_gpu.h" |
| 20 | #include "core/hle/service/hid/hid.h" | 20 | #include "core/hle/service/hid/hid.h" |
| 21 | 21 | ||
| 22 | #include "core/hw/hw.h" | ||
| 23 | #include "core/hw/gpu.h" | 22 | #include "core/hw/gpu.h" |
| 23 | #include "core/hw/hw.h" | ||
| 24 | 24 | ||
| 25 | #include "core/tracer/recorder.h" | 25 | #include "core/tracer/recorder.h" |
| 26 | 26 | ||
| @@ -32,7 +32,6 @@ | |||
| 32 | 32 | ||
| 33 | #include "video_core/debug_utils/debug_utils.h" | 33 | #include "video_core/debug_utils/debug_utils.h" |
| 34 | 34 | ||
| 35 | |||
| 36 | namespace GPU { | 35 | namespace GPU { |
| 37 | 36 | ||
| 38 | Regs g_regs; | 37 | Regs g_regs; |
| @@ -49,7 +48,7 @@ static u64 frame_count; | |||
| 49 | static bool last_skip_frame; | 48 | static bool last_skip_frame; |
| 50 | 49 | ||
| 51 | template <typename T> | 50 | template <typename T> |
| 52 | inline void Read(T &var, const u32 raw_addr) { | 51 | inline void Read(T& var, const u32 raw_addr) { |
| 53 | u32 addr = raw_addr - HW::VADDR_GPU; | 52 | u32 addr = raw_addr - HW::VADDR_GPU; |
| 54 | u32 index = addr / 4; | 53 | u32 index = addr / 4; |
| 55 | 54 | ||
| @@ -105,8 +104,7 @@ inline void Write(u32 addr, const T data) { | |||
| 105 | 104 | ||
| 106 | // Memory fills are triggered once the fill value is written. | 105 | // Memory fills are triggered once the fill value is written. |
| 107 | case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3): | 106 | case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3): |
| 108 | case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): | 107 | case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): { |
| 109 | { | ||
| 110 | const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger)); | 108 | const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger)); |
| 111 | auto& config = g_regs.memory_fill_config[is_second_filler]; | 109 | auto& config = g_regs.memory_fill_config[is_second_filler]; |
| 112 | 110 | ||
| @@ -125,7 +123,9 @@ inline void Write(u32 addr, const T data) { | |||
| 125 | // regions that were between surfaces or within the touching | 123 | // regions that were between surfaces or within the touching |
| 126 | // ones for cpu to manually fill here. | 124 | // ones for cpu to manually fill here. |
| 127 | if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) { | 125 | if (!VideoCore::g_renderer->Rasterizer()->AccelerateFill(config)) { |
| 128 | Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress()); | 126 | Memory::RasterizerFlushAndInvalidateRegion(config.GetStartAddress(), |
| 127 | config.GetEndAddress() - | ||
| 128 | config.GetStartAddress()); | ||
| 129 | 129 | ||
| 130 | if (config.fill_24bit) { | 130 | if (config.fill_24bit) { |
| 131 | // fill with 24-bit values | 131 | // fill with 24-bit values |
| @@ -150,7 +150,8 @@ inline void Write(u32 addr, const T data) { | |||
| 150 | } | 150 | } |
| 151 | } | 151 | } |
| 152 | 152 | ||
| 153 | LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); | 153 | LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), |
| 154 | config.GetEndAddress()); | ||
| 154 | 155 | ||
| 155 | if (!is_second_filler) { | 156 | if (!is_second_filler) { |
| 156 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); | 157 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); |
| @@ -167,15 +168,15 @@ inline void Write(u32 addr, const T data) { | |||
| 167 | break; | 168 | break; |
| 168 | } | 169 | } |
| 169 | 170 | ||
| 170 | case GPU_REG_INDEX(display_transfer_config.trigger): | 171 | case GPU_REG_INDEX(display_transfer_config.trigger): { |
| 171 | { | ||
| 172 | MICROPROFILE_SCOPE(GPU_DisplayTransfer); | 172 | MICROPROFILE_SCOPE(GPU_DisplayTransfer); |
| 173 | 173 | ||
| 174 | const auto& config = g_regs.display_transfer_config; | 174 | const auto& config = g_regs.display_transfer_config; |
| 175 | if (config.trigger & 1) { | 175 | if (config.trigger & 1) { |
| 176 | 176 | ||
| 177 | if (Pica::g_debug_context) | 177 | if (Pica::g_debug_context) |
| 178 | Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); | 178 | Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, |
| 179 | nullptr); | ||
| 179 | 180 | ||
| 180 | if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) { | 181 | if (!VideoCore::g_renderer->Rasterizer()->AccelerateDisplayTransfer(config)) { |
| 181 | u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); | 182 | u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); |
| @@ -187,17 +188,23 @@ inline void Write(u32 addr, const T data) { | |||
| 187 | u32 output_width = config.texture_copy.output_width * 16; | 188 | u32 output_width = config.texture_copy.output_width * 16; |
| 188 | u32 output_gap = config.texture_copy.output_gap * 16; | 189 | u32 output_gap = config.texture_copy.output_gap * 16; |
| 189 | 190 | ||
| 190 | size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); | 191 | size_t contiguous_input_size = |
| 191 | Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast<u32>(contiguous_input_size)); | 192 | config.texture_copy.size / input_width * (input_width + input_gap); |
| 193 | Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), | ||
| 194 | static_cast<u32>(contiguous_input_size)); | ||
| 192 | 195 | ||
| 193 | size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); | 196 | size_t contiguous_output_size = |
| 194 | Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast<u32>(contiguous_output_size)); | 197 | config.texture_copy.size / output_width * (output_width + output_gap); |
| 198 | Memory::RasterizerFlushAndInvalidateRegion( | ||
| 199 | config.GetPhysicalOutputAddress(), | ||
| 200 | static_cast<u32>(contiguous_output_size)); | ||
| 195 | 201 | ||
| 196 | u32 remaining_size = config.texture_copy.size; | 202 | u32 remaining_size = config.texture_copy.size; |
| 197 | u32 remaining_input = input_width; | 203 | u32 remaining_input = input_width; |
| 198 | u32 remaining_output = output_width; | 204 | u32 remaining_output = output_width; |
| 199 | while (remaining_size > 0) { | 205 | while (remaining_size > 0) { |
| 200 | u32 copy_size = std::min({ remaining_input, remaining_output, remaining_size }); | 206 | u32 copy_size = |
| 207 | std::min({remaining_input, remaining_output, remaining_size}); | ||
| 201 | 208 | ||
| 202 | std::memcpy(dst_pointer, src_pointer, copy_size); | 209 | std::memcpy(dst_pointer, src_pointer, copy_size); |
| 203 | src_pointer += copy_size; | 210 | src_pointer += copy_size; |
| @@ -217,10 +224,11 @@ inline void Write(u32 addr, const T data) { | |||
| 217 | } | 224 | } |
| 218 | } | 225 | } |
| 219 | 226 | ||
| 220 | LOG_TRACE(HW_GPU, "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", | 227 | LOG_TRACE( |
| 221 | config.texture_copy.size, | 228 | HW_GPU, |
| 222 | config.GetPhysicalInputAddress(), input_width, input_gap, | 229 | "TextureCopy: 0x%X bytes from 0x%08X(%u+%u)-> 0x%08X(%u+%u), flags 0x%08X", |
| 223 | config.GetPhysicalOutputAddress(), output_width, output_gap, | 230 | config.texture_copy.size, config.GetPhysicalInputAddress(), input_width, |
| 231 | input_gap, config.GetPhysicalOutputAddress(), output_width, output_gap, | ||
| 224 | config.flags); | 232 | config.flags); |
| 225 | 233 | ||
| 226 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); | 234 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); |
| @@ -228,7 +236,8 @@ inline void Write(u32 addr, const T data) { | |||
| 228 | } | 236 | } |
| 229 | 237 | ||
| 230 | if (config.scaling > config.ScaleXY) { | 238 | if (config.scaling > config.ScaleXY) { |
| 231 | LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", config.scaling.Value()); | 239 | LOG_CRITICAL(HW_GPU, "Unimplemented display transfer scaling mode %u", |
| 240 | config.scaling.Value()); | ||
| 232 | UNIMPLEMENTED(); | 241 | UNIMPLEMENTED(); |
| 233 | break; | 242 | break; |
| 234 | } | 243 | } |
| @@ -245,11 +254,14 @@ inline void Write(u32 addr, const T data) { | |||
| 245 | u32 output_width = config.output_width >> horizontal_scale; | 254 | u32 output_width = config.output_width >> horizontal_scale; |
| 246 | u32 output_height = config.output_height >> vertical_scale; | 255 | u32 output_height = config.output_height >> vertical_scale; |
| 247 | 256 | ||
| 248 | u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); | 257 | u32 input_size = config.input_width * config.input_height * |
| 249 | u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); | 258 | GPU::Regs::BytesPerPixel(config.input_format); |
| 259 | u32 output_size = | ||
| 260 | output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); | ||
| 250 | 261 | ||
| 251 | Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); | 262 | Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), input_size); |
| 252 | Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), output_size); | 263 | Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), |
| 264 | output_size); | ||
| 253 | 265 | ||
| 254 | for (u32 y = 0; y < output_height; ++y) { | 266 | for (u32 y = 0; y < output_height; ++y) { |
| 255 | for (u32 x = 0; x < output_width; ++x) { | 267 | for (u32 x = 0; x < output_width; ++x) { |
| @@ -278,11 +290,14 @@ inline void Write(u32 addr, const T data) { | |||
| 278 | u32 coarse_y = y & ~7; | 290 | u32 coarse_y = y & ~7; |
| 279 | u32 stride = output_width * dst_bytes_per_pixel; | 291 | u32 stride = output_width * dst_bytes_per_pixel; |
| 280 | 292 | ||
| 281 | src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; | 293 | src_offset = |
| 282 | dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; | 294 | (input_x + input_y * config.input_width) * src_bytes_per_pixel; |
| 295 | dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + | ||
| 296 | coarse_y * stride; | ||
| 283 | } else { | 297 | } else { |
| 284 | // Both input and output are linear | 298 | // Both input and output are linear |
| 285 | src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; | 299 | src_offset = |
| 300 | (input_x + input_y * config.input_width) * src_bytes_per_pixel; | ||
| 286 | dst_offset = (x + y * output_width) * dst_bytes_per_pixel; | 301 | dst_offset = (x + y * output_width) * dst_bytes_per_pixel; |
| 287 | } | 302 | } |
| 288 | } else { | 303 | } else { |
| @@ -291,7 +306,9 @@ inline void Write(u32 addr, const T data) { | |||
| 291 | u32 coarse_y = input_y & ~7; | 306 | u32 coarse_y = input_y & ~7; |
| 292 | u32 stride = config.input_width * src_bytes_per_pixel; | 307 | u32 stride = config.input_width * src_bytes_per_pixel; |
| 293 | 308 | ||
| 294 | src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; | 309 | src_offset = VideoCore::GetMortonOffset(input_x, input_y, |
| 310 | src_bytes_per_pixel) + | ||
| 311 | coarse_y * stride; | ||
| 295 | dst_offset = (x + y * output_width) * dst_bytes_per_pixel; | 312 | dst_offset = (x + y * output_width) * dst_bytes_per_pixel; |
| 296 | } else { | 313 | } else { |
| 297 | // Both input and output are tiled | 314 | // Both input and output are tiled |
| @@ -301,20 +318,27 @@ inline void Write(u32 addr, const T data) { | |||
| 301 | u32 in_coarse_y = input_y & ~7; | 318 | u32 in_coarse_y = input_y & ~7; |
| 302 | u32 in_stride = config.input_width * src_bytes_per_pixel; | 319 | u32 in_stride = config.input_width * src_bytes_per_pixel; |
| 303 | 320 | ||
| 304 | src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; | 321 | src_offset = VideoCore::GetMortonOffset(input_x, input_y, |
| 305 | dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; | 322 | src_bytes_per_pixel) + |
| 323 | in_coarse_y * in_stride; | ||
| 324 | dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + | ||
| 325 | out_coarse_y * out_stride; | ||
| 306 | } | 326 | } |
| 307 | } | 327 | } |
| 308 | 328 | ||
| 309 | const u8* src_pixel = src_pointer + src_offset; | 329 | const u8* src_pixel = src_pointer + src_offset; |
| 310 | src_color = DecodePixel(config.input_format, src_pixel); | 330 | src_color = DecodePixel(config.input_format, src_pixel); |
| 311 | if (config.scaling == config.ScaleX) { | 331 | if (config.scaling == config.ScaleX) { |
| 312 | Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); | 332 | Math::Vec4<u8> pixel = |
| 333 | DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); | ||
| 313 | src_color = ((src_color + pixel) / 2).Cast<u8>(); | 334 | src_color = ((src_color + pixel) / 2).Cast<u8>(); |
| 314 | } else if (config.scaling == config.ScaleXY) { | 335 | } else if (config.scaling == config.ScaleXY) { |
| 315 | Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); | 336 | Math::Vec4<u8> pixel1 = DecodePixel( |
| 316 | Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); | 337 | config.input_format, src_pixel + 1 * src_bytes_per_pixel); |
| 317 | Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); | 338 | Math::Vec4<u8> pixel2 = DecodePixel( |
| 339 | config.input_format, src_pixel + 2 * src_bytes_per_pixel); | ||
| 340 | Math::Vec4<u8> pixel3 = DecodePixel( | ||
| 341 | config.input_format, src_pixel + 3 * src_bytes_per_pixel); | ||
| 318 | src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); | 342 | src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); |
| 319 | } | 343 | } |
| 320 | 344 | ||
| @@ -341,17 +365,20 @@ inline void Write(u32 addr, const T data) { | |||
| 341 | break; | 365 | break; |
| 342 | 366 | ||
| 343 | default: | 367 | default: |
| 344 | LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); | 368 | LOG_ERROR(HW_GPU, "Unknown destination framebuffer format %x", |
| 369 | config.output_format.Value()); | ||
| 345 | break; | 370 | break; |
| 346 | } | 371 | } |
| 347 | } | 372 | } |
| 348 | } | 373 | } |
| 349 | 374 | ||
| 350 | LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x, flags 0x%08X", | 375 | LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> " |
| 351 | config.output_height * output_width * GPU::Regs::BytesPerPixel(config.output_format), | 376 | "0x%08x(%ux%u), dst format %x, flags 0x%08X", |
| 352 | config.GetPhysicalInputAddress(), config.input_width.Value(), config.input_height.Value(), | 377 | config.output_height * output_width * |
| 353 | config.GetPhysicalOutputAddress(), output_width, output_height, | 378 | GPU::Regs::BytesPerPixel(config.output_format), |
| 354 | config.output_format.Value(), config.flags); | 379 | config.GetPhysicalInputAddress(), config.input_width.Value(), |
| 380 | config.input_height.Value(), config.GetPhysicalOutputAddress(), | ||
| 381 | output_width, output_height, config.output_format.Value(), config.flags); | ||
| 355 | } | 382 | } |
| 356 | 383 | ||
| 357 | g_regs.display_transfer_config.trigger = 0; | 384 | g_regs.display_transfer_config.trigger = 0; |
| @@ -361,17 +388,16 @@ inline void Write(u32 addr, const T data) { | |||
| 361 | } | 388 | } |
| 362 | 389 | ||
| 363 | // Seems like writing to this register triggers processing | 390 | // Seems like writing to this register triggers processing |
| 364 | case GPU_REG_INDEX(command_processor_config.trigger): | 391 | case GPU_REG_INDEX(command_processor_config.trigger): { |
| 365 | { | ||
| 366 | const auto& config = g_regs.command_processor_config; | 392 | const auto& config = g_regs.command_processor_config; |
| 367 | if (config.trigger & 1) | 393 | if (config.trigger & 1) { |
| 368 | { | ||
| 369 | MICROPROFILE_SCOPE(GPU_CmdlistProcessing); | 394 | MICROPROFILE_SCOPE(GPU_CmdlistProcessing); |
| 370 | 395 | ||
| 371 | u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress()); | 396 | u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress()); |
| 372 | 397 | ||
| 373 | if (Pica::g_debug_context && Pica::g_debug_context->recorder) { | 398 | if (Pica::g_debug_context && Pica::g_debug_context->recorder) { |
| 374 | Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress()); | 399 | Pica::g_debug_context->recorder->MemoryAccessed( |
| 400 | (u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress()); | ||
| 375 | } | 401 | } |
| 376 | 402 | ||
| 377 | Pica::CommandProcessor::ProcessCommandList(buffer, config.size); | 403 | Pica::CommandProcessor::ProcessCommandList(buffer, config.size); |
| @@ -389,16 +415,17 @@ inline void Write(u32 addr, const T data) { | |||
| 389 | // This is happening *after* handling the write to make sure we properly catch all memory reads. | 415 | // This is happening *after* handling the write to make sure we properly catch all memory reads. |
| 390 | if (Pica::g_debug_context && Pica::g_debug_context->recorder) { | 416 | if (Pica::g_debug_context && Pica::g_debug_context->recorder) { |
| 391 | // addr + GPU VBase - IO VBase + IO PBase | 417 | // addr + GPU VBase - IO VBase + IO PBase |
| 392 | Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data); | 418 | Pica::g_debug_context->recorder->RegisterWritten<T>( |
| 419 | addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data); | ||
| 393 | } | 420 | } |
| 394 | } | 421 | } |
| 395 | 422 | ||
| 396 | // Explicitly instantiate template functions because we aren't defining this in the header: | 423 | // Explicitly instantiate template functions because we aren't defining this in the header: |
| 397 | 424 | ||
| 398 | template void Read<u64>(u64 &var, const u32 addr); | 425 | template void Read<u64>(u64& var, const u32 addr); |
| 399 | template void Read<u32>(u32 &var, const u32 addr); | 426 | template void Read<u32>(u32& var, const u32 addr); |
| 400 | template void Read<u16>(u16 &var, const u32 addr); | 427 | template void Read<u16>(u16& var, const u32 addr); |
| 401 | template void Read<u8>(u8 &var, const u32 addr); | 428 | template void Read<u8>(u8& var, const u32 addr); |
| 402 | 429 | ||
| 403 | template void Write<u64>(u32 addr, const u64 data); | 430 | template void Write<u64>(u32 addr, const u64 data); |
| 404 | template void Write<u32>(u32 addr, const u32 data); | 431 | template void Write<u32>(u32 addr, const u32 data); |
| @@ -417,8 +444,9 @@ static void VBlankCallback(u64 userdata, int cycles_late) { | |||
| 417 | // - If frameskip == 0 (disabled), always swap buffers | 444 | // - If frameskip == 0 (disabled), always swap buffers |
| 418 | // - If frameskip == 1, swap buffers every other frame (starting from the first frame) | 445 | // - If frameskip == 1, swap buffers every other frame (starting from the first frame) |
| 419 | // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame) | 446 | // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame) |
| 420 | if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) || | 447 | if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && |
| 421 | Settings::values.frame_skip == 0) { | 448 | last_skip_frame != g_skip_frame) || |
| 449 | Settings::values.frame_skip == 0) { | ||
| 422 | VideoCore::g_renderer->SwapBuffers(); | 450 | VideoCore::g_renderer->SwapBuffers(); |
| 423 | } | 451 | } |
| 424 | 452 | ||
| @@ -448,12 +476,12 @@ void Init() { | |||
| 448 | // .. or at least these are the ones used by system applets. | 476 | // .. or at least these are the ones used by system applets. |
| 449 | // There's probably a smarter way to come up with addresses | 477 | // There's probably a smarter way to come up with addresses |
| 450 | // like this which does not require hardcoding. | 478 | // like this which does not require hardcoding. |
| 451 | framebuffer_top.address_left1 = 0x181E6000; | 479 | framebuffer_top.address_left1 = 0x181E6000; |
| 452 | framebuffer_top.address_left2 = 0x1822C800; | 480 | framebuffer_top.address_left2 = 0x1822C800; |
| 453 | framebuffer_top.address_right1 = 0x18273000; | 481 | framebuffer_top.address_right1 = 0x18273000; |
| 454 | framebuffer_top.address_right2 = 0x182B9800; | 482 | framebuffer_top.address_right2 = 0x182B9800; |
| 455 | framebuffer_sub.address_left1 = 0x1848F000; | 483 | framebuffer_sub.address_left1 = 0x1848F000; |
| 456 | framebuffer_sub.address_left2 = 0x184C7800; | 484 | framebuffer_sub.address_left2 = 0x184C7800; |
| 457 | 485 | ||
| 458 | framebuffer_top.width.Assign(240); | 486 | framebuffer_top.width.Assign(240); |
| 459 | framebuffer_top.height.Assign(400); | 487 | framebuffer_top.height.Assign(400); |
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index da4c345b4..077b6255f 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h | |||
| @@ -17,7 +17,8 @@ namespace GPU { | |||
| 17 | // Returns index corresponding to the Regs member labeled by field_name | 17 | // Returns index corresponding to the Regs member labeled by field_name |
| 18 | // TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions | 18 | // TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions |
| 19 | // when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])). | 19 | // when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])). |
| 20 | // For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members | 20 | // For details cf. |
| 21 | // https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members | ||
| 21 | // Hopefully, this will be fixed sometime in the future. | 22 | // Hopefully, this will be fixed sometime in the future. |
| 22 | // For lack of better alternatives, we currently hardcode the offsets when constant | 23 | // For lack of better alternatives, we currently hardcode the offsets when constant |
| 23 | // expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts | 24 | // expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts |
| @@ -30,8 +31,9 @@ namespace GPU { | |||
| 30 | // really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX | 31 | // really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX |
| 31 | // and then performs a (no-op) cast to size_t iff the second argument matches the expected | 32 | // and then performs a (no-op) cast to size_t iff the second argument matches the expected |
| 32 | // field offset. Otherwise, the compiler will fail to compile this code. | 33 | // field offset. Otherwise, the compiler will fail to compile this code. |
| 33 | #define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ | 34 | #define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \ |
| 34 | ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type)GPU_REG_INDEX(field_name)) | 35 | ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type) \ |
| 36 | GPU_REG_INDEX(field_name)) | ||
| 35 | #endif | 37 | #endif |
| 36 | 38 | ||
| 37 | // MMIO region 0x1EFxxxxx | 39 | // MMIO region 0x1EFxxxxx |
| @@ -44,18 +46,18 @@ struct Regs { | |||
| 44 | // support for that. | 46 | // support for that. |
| 45 | #define ASSERT_MEMBER_SIZE(name, size_in_bytes) | 47 | #define ASSERT_MEMBER_SIZE(name, size_in_bytes) |
| 46 | #else | 48 | #else |
| 47 | #define ASSERT_MEMBER_SIZE(name, size_in_bytes) \ | 49 | #define ASSERT_MEMBER_SIZE(name, size_in_bytes) \ |
| 48 | static_assert(sizeof(name) == size_in_bytes, \ | 50 | static_assert(sizeof(name) == size_in_bytes, \ |
| 49 | "Structure size and register block length don't match") | 51 | "Structure size and register block length don't match") |
| 50 | #endif | 52 | #endif |
| 51 | 53 | ||
| 52 | // Components are laid out in reverse byte order, most significant bits first. | 54 | // Components are laid out in reverse byte order, most significant bits first. |
| 53 | enum class PixelFormat : u32 { | 55 | enum class PixelFormat : u32 { |
| 54 | RGBA8 = 0, | 56 | RGBA8 = 0, |
| 55 | RGB8 = 1, | 57 | RGB8 = 1, |
| 56 | RGB565 = 2, | 58 | RGB565 = 2, |
| 57 | RGB5A1 = 3, | 59 | RGB5A1 = 3, |
| 58 | RGBA4 = 4, | 60 | RGBA4 = 4, |
| 59 | }; | 61 | }; |
| 60 | 62 | ||
| 61 | /** | 63 | /** |
| @@ -88,8 +90,8 @@ struct Regs { | |||
| 88 | BitField<0, 16, u32> value_16bit; | 90 | BitField<0, 16, u32> value_16bit; |
| 89 | 91 | ||
| 90 | // TODO: Verify component order | 92 | // TODO: Verify component order |
| 91 | BitField< 0, 8, u32> value_24bit_r; | 93 | BitField<0, 8, u32> value_24bit_r; |
| 92 | BitField< 8, 8, u32> value_24bit_g; | 94 | BitField<8, 8, u32> value_24bit_g; |
| 93 | BitField<16, 8, u32> value_24bit_b; | 95 | BitField<16, 8, u32> value_24bit_b; |
| 94 | }; | 96 | }; |
| 95 | 97 | ||
| @@ -126,7 +128,7 @@ struct Regs { | |||
| 126 | union { | 128 | union { |
| 127 | u32 size; | 129 | u32 size; |
| 128 | 130 | ||
| 129 | BitField< 0, 16, u32> width; | 131 | BitField<0, 16, u32> width; |
| 130 | BitField<16, 16, u32> height; | 132 | BitField<16, 16, u32> height; |
| 131 | }; | 133 | }; |
| 132 | 134 | ||
| @@ -138,7 +140,7 @@ struct Regs { | |||
| 138 | union { | 140 | union { |
| 139 | u32 format; | 141 | u32 format; |
| 140 | 142 | ||
| 141 | BitField< 0, 3, PixelFormat> color_format; | 143 | BitField<0, 3, PixelFormat> color_format; |
| 142 | }; | 144 | }; |
| 143 | 145 | ||
| 144 | INSERT_PADDING_WORDS(0x1); | 146 | INSERT_PADDING_WORDS(0x1); |
| @@ -180,35 +182,37 @@ struct Regs { | |||
| 180 | union { | 182 | union { |
| 181 | u32 output_size; | 183 | u32 output_size; |
| 182 | 184 | ||
| 183 | BitField< 0, 16, u32> output_width; | 185 | BitField<0, 16, u32> output_width; |
| 184 | BitField<16, 16, u32> output_height; | 186 | BitField<16, 16, u32> output_height; |
| 185 | }; | 187 | }; |
| 186 | 188 | ||
| 187 | union { | 189 | union { |
| 188 | u32 input_size; | 190 | u32 input_size; |
| 189 | 191 | ||
| 190 | BitField< 0, 16, u32> input_width; | 192 | BitField<0, 16, u32> input_width; |
| 191 | BitField<16, 16, u32> input_height; | 193 | BitField<16, 16, u32> input_height; |
| 192 | }; | 194 | }; |
| 193 | 195 | ||
| 194 | enum ScalingMode : u32 { | 196 | enum ScalingMode : u32 { |
| 195 | NoScale = 0, // Doesn't scale the image | 197 | NoScale = 0, // Doesn't scale the image |
| 196 | ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter | 198 | ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter |
| 197 | ScaleXY = 2, // Downscales the image in half in both the X and Y axes and applies a box filter | 199 | ScaleXY = |
| 200 | 2, // Downscales the image in half in both the X and Y axes and applies a box filter | ||
| 198 | }; | 201 | }; |
| 199 | 202 | ||
| 200 | union { | 203 | union { |
| 201 | u32 flags; | 204 | u32 flags; |
| 202 | 205 | ||
| 203 | BitField< 0, 1, u32> flip_vertically; // flips input data vertically | 206 | BitField<0, 1, u32> flip_vertically; // flips input data vertically |
| 204 | BitField< 1, 1, u32> input_linear; // Converts from linear to tiled format | 207 | BitField<1, 1, u32> input_linear; // Converts from linear to tiled format |
| 205 | BitField< 2, 1, u32> crop_input_lines; | 208 | BitField<2, 1, u32> crop_input_lines; |
| 206 | BitField< 3, 1, u32> is_texture_copy; // Copies the data without performing any processing and respecting texture copy fields | 209 | BitField<3, 1, u32> is_texture_copy; // Copies the data without performing any |
| 207 | BitField< 5, 1, u32> dont_swizzle; | 210 | // processing and respecting texture copy fields |
| 208 | BitField< 8, 3, PixelFormat> input_format; | 211 | BitField<5, 1, u32> dont_swizzle; |
| 212 | BitField<8, 3, PixelFormat> input_format; | ||
| 209 | BitField<12, 3, PixelFormat> output_format; | 213 | BitField<12, 3, PixelFormat> output_format; |
| 210 | /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one. | 214 | /// Uses some kind of 32x32 block swizzling mode, instead of the usual 8x8 one. |
| 211 | BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented | 215 | BitField<16, 1, u32> block_32; // TODO(yuriks): unimplemented |
| 212 | BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer | 216 | BitField<24, 2, ScalingMode> scaling; // Determines the scaling mode of the transfer |
| 213 | }; | 217 | }; |
| 214 | 218 | ||
| @@ -225,14 +229,14 @@ struct Regs { | |||
| 225 | union { | 229 | union { |
| 226 | u32 input_size; | 230 | u32 input_size; |
| 227 | 231 | ||
| 228 | BitField< 0, 16, u32> input_width; | 232 | BitField<0, 16, u32> input_width; |
| 229 | BitField<16, 16, u32> input_gap; | 233 | BitField<16, 16, u32> input_gap; |
| 230 | }; | 234 | }; |
| 231 | 235 | ||
| 232 | union { | 236 | union { |
| 233 | u32 output_size; | 237 | u32 output_size; |
| 234 | 238 | ||
| 235 | BitField< 0, 16, u32> output_width; | 239 | BitField<0, 16, u32> output_width; |
| 236 | BitField<16, 16, u32> output_gap; | 240 | BitField<16, 16, u32> output_gap; |
| 237 | }; | 241 | }; |
| 238 | } texture_copy; | 242 | } texture_copy; |
| @@ -267,12 +271,12 @@ struct Regs { | |||
| 267 | return sizeof(Regs) / sizeof(u32); | 271 | return sizeof(Regs) / sizeof(u32); |
| 268 | } | 272 | } |
| 269 | 273 | ||
| 270 | const u32& operator [] (int index) const { | 274 | const u32& operator[](int index) const { |
| 271 | const u32* content = reinterpret_cast<const u32*>(this); | 275 | const u32* content = reinterpret_cast<const u32*>(this); |
| 272 | return content[index]; | 276 | return content[index]; |
| 273 | } | 277 | } |
| 274 | 278 | ||
| 275 | u32& operator [] (int index) { | 279 | u32& operator[](int index) { |
| 276 | u32* content = reinterpret_cast<u32*>(this); | 280 | u32* content = reinterpret_cast<u32*>(this); |
| 277 | return content[index]; | 281 | return content[index]; |
| 278 | } | 282 | } |
| @@ -294,28 +298,29 @@ static_assert(std::is_standard_layout<Regs>::value, "Structure does not use stan | |||
| 294 | // is technically allowed since C++11. This macro should be enabled once MSVC adds | 298 | // is technically allowed since C++11. This macro should be enabled once MSVC adds |
| 295 | // support for that. | 299 | // support for that. |
| 296 | #ifndef _MSC_VER | 300 | #ifndef _MSC_VER |
| 297 | #define ASSERT_REG_POSITION(field_name, position) \ | 301 | #define ASSERT_REG_POSITION(field_name, position) \ |
| 298 | static_assert(offsetof(Regs, field_name) == position * 4, \ | 302 | static_assert(offsetof(Regs, field_name) == position * 4, \ |
| 299 | "Field "#field_name" has invalid position") | 303 | "Field " #field_name " has invalid position") |
| 300 | 304 | ||
| 301 | ASSERT_REG_POSITION(memory_fill_config[0], 0x00004); | 305 | ASSERT_REG_POSITION(memory_fill_config[0], 0x00004); |
| 302 | ASSERT_REG_POSITION(memory_fill_config[1], 0x00008); | 306 | ASSERT_REG_POSITION(memory_fill_config[1], 0x00008); |
| 303 | ASSERT_REG_POSITION(framebuffer_config[0], 0x00117); | 307 | ASSERT_REG_POSITION(framebuffer_config[0], 0x00117); |
| 304 | ASSERT_REG_POSITION(framebuffer_config[1], 0x00157); | 308 | ASSERT_REG_POSITION(framebuffer_config[1], 0x00157); |
| 305 | ASSERT_REG_POSITION(display_transfer_config, 0x00300); | 309 | ASSERT_REG_POSITION(display_transfer_config, 0x00300); |
| 306 | ASSERT_REG_POSITION(command_processor_config, 0x00638); | 310 | ASSERT_REG_POSITION(command_processor_config, 0x00638); |
| 307 | 311 | ||
| 308 | #undef ASSERT_REG_POSITION | 312 | #undef ASSERT_REG_POSITION |
| 309 | #endif // !defined(_MSC_VER) | 313 | #endif // !defined(_MSC_VER) |
| 310 | 314 | ||
| 311 | // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. | 315 | // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value |
| 316 | // anyway. | ||
| 312 | static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set"); | 317 | static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set"); |
| 313 | 318 | ||
| 314 | extern Regs g_regs; | 319 | extern Regs g_regs; |
| 315 | extern bool g_skip_frame; | 320 | extern bool g_skip_frame; |
| 316 | 321 | ||
| 317 | template <typename T> | 322 | template <typename T> |
| 318 | void Read(T &var, const u32 addr); | 323 | void Read(T& var, const u32 addr); |
| 319 | 324 | ||
| 320 | template <typename T> | 325 | template <typename T> |
| 321 | void Write(u32 addr, const T data); | 326 | void Write(u32 addr, const T data); |
| @@ -326,5 +331,4 @@ void Init(); | |||
| 326 | /// Shutdown hardware | 331 | /// Shutdown hardware |
| 327 | void Shutdown(); | 332 | void Shutdown(); |
| 328 | 333 | ||
| 329 | |||
| 330 | } // namespace | 334 | } // namespace |
diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp index b5fdbf9c1..db224c9aa 100644 --- a/src/core/hw/hw.cpp +++ b/src/core/hw/hw.cpp | |||
| @@ -5,14 +5,14 @@ | |||
| 5 | #include "common/common_types.h" | 5 | #include "common/common_types.h" |
| 6 | #include "common/logging/log.h" | 6 | #include "common/logging/log.h" |
| 7 | 7 | ||
| 8 | #include "core/hw/hw.h" | ||
| 9 | #include "core/hw/gpu.h" | 8 | #include "core/hw/gpu.h" |
| 9 | #include "core/hw/hw.h" | ||
| 10 | #include "core/hw/lcd.h" | 10 | #include "core/hw/lcd.h" |
| 11 | 11 | ||
| 12 | namespace HW { | 12 | namespace HW { |
| 13 | 13 | ||
| 14 | template <typename T> | 14 | template <typename T> |
| 15 | inline void Read(T &var, const u32 addr) { | 15 | inline void Read(T& var, const u32 addr) { |
| 16 | switch (addr & 0xFFFFF000) { | 16 | switch (addr & 0xFFFFF000) { |
| 17 | case VADDR_GPU: | 17 | case VADDR_GPU: |
| 18 | case VADDR_GPU + 0x1000: | 18 | case VADDR_GPU + 0x1000: |
| @@ -71,10 +71,10 @@ inline void Write(u32 addr, const T data) { | |||
| 71 | 71 | ||
| 72 | // Explicitly instantiate template functions because we aren't defining this in the header: | 72 | // Explicitly instantiate template functions because we aren't defining this in the header: |
| 73 | 73 | ||
| 74 | template void Read<u64>(u64 &var, const u32 addr); | 74 | template void Read<u64>(u64& var, const u32 addr); |
| 75 | template void Read<u32>(u32 &var, const u32 addr); | 75 | template void Read<u32>(u32& var, const u32 addr); |
| 76 | template void Read<u16>(u16 &var, const u32 addr); | 76 | template void Read<u16>(u16& var, const u32 addr); |
| 77 | template void Read<u8>(u8 &var, const u32 addr); | 77 | template void Read<u8>(u8& var, const u32 addr); |
| 78 | 78 | ||
| 79 | template void Write<u64>(u32 addr, const u64 data); | 79 | template void Write<u64>(u32 addr, const u64 data); |
| 80 | template void Write<u32>(u32 addr, const u32 data); | 80 | template void Write<u32>(u32 addr, const u32 data); |
| @@ -98,5 +98,4 @@ void Shutdown() { | |||
| 98 | LCD::Shutdown(); | 98 | LCD::Shutdown(); |
| 99 | LOG_DEBUG(HW, "shutdown OK"); | 99 | LOG_DEBUG(HW, "shutdown OK"); |
| 100 | } | 100 | } |
| 101 | |||
| 102 | } | 101 | } |
diff --git a/src/core/hw/hw.h b/src/core/hw/hw.h index d65608910..a3c5d2ea3 100644 --- a/src/core/hw/hw.h +++ b/src/core/hw/hw.h | |||
| @@ -10,30 +10,30 @@ namespace HW { | |||
| 10 | 10 | ||
| 11 | /// Beginnings of IO register regions, in the user VA space. | 11 | /// Beginnings of IO register regions, in the user VA space. |
| 12 | enum : u32 { | 12 | enum : u32 { |
| 13 | VADDR_HASH = 0x1EC01000, | 13 | VADDR_HASH = 0x1EC01000, |
| 14 | VADDR_CSND = 0x1EC03000, | 14 | VADDR_CSND = 0x1EC03000, |
| 15 | VADDR_DSP = 0x1EC40000, | 15 | VADDR_DSP = 0x1EC40000, |
| 16 | VADDR_PDN = 0x1EC41000, | 16 | VADDR_PDN = 0x1EC41000, |
| 17 | VADDR_CODEC = 0x1EC41000, | 17 | VADDR_CODEC = 0x1EC41000, |
| 18 | VADDR_SPI = 0x1EC42000, | 18 | VADDR_SPI = 0x1EC42000, |
| 19 | VADDR_SPI_2 = 0x1EC43000, // Only used under TWL_FIRM? | 19 | VADDR_SPI_2 = 0x1EC43000, // Only used under TWL_FIRM? |
| 20 | VADDR_I2C = 0x1EC44000, | 20 | VADDR_I2C = 0x1EC44000, |
| 21 | VADDR_CODEC_2 = 0x1EC45000, | 21 | VADDR_CODEC_2 = 0x1EC45000, |
| 22 | VADDR_HID = 0x1EC46000, | 22 | VADDR_HID = 0x1EC46000, |
| 23 | VADDR_GPIO = 0x1EC47000, | 23 | VADDR_GPIO = 0x1EC47000, |
| 24 | VADDR_I2C_2 = 0x1EC48000, | 24 | VADDR_I2C_2 = 0x1EC48000, |
| 25 | VADDR_SPI_3 = 0x1EC60000, | 25 | VADDR_SPI_3 = 0x1EC60000, |
| 26 | VADDR_I2C_3 = 0x1EC61000, | 26 | VADDR_I2C_3 = 0x1EC61000, |
| 27 | VADDR_MIC = 0x1EC62000, | 27 | VADDR_MIC = 0x1EC62000, |
| 28 | VADDR_PXI = 0x1EC63000, | 28 | VADDR_PXI = 0x1EC63000, |
| 29 | VADDR_LCD = 0x1ED02000, | 29 | VADDR_LCD = 0x1ED02000, |
| 30 | VADDR_DSP_2 = 0x1ED03000, | 30 | VADDR_DSP_2 = 0x1ED03000, |
| 31 | VADDR_HASH_2 = 0x1EE01000, | 31 | VADDR_HASH_2 = 0x1EE01000, |
| 32 | VADDR_GPU = 0x1EF00000, | 32 | VADDR_GPU = 0x1EF00000, |
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | template <typename T> | 35 | template <typename T> |
| 36 | void Read(T &var, const u32 addr); | 36 | void Read(T& var, const u32 addr); |
| 37 | 37 | ||
| 38 | template <typename T> | 38 | template <typename T> |
| 39 | void Write(u32 addr, const T data); | 39 | void Write(u32 addr, const T data); |
diff --git a/src/core/hw/lcd.cpp b/src/core/hw/lcd.cpp index 6f93709e3..0e3aa7cfd 100644 --- a/src/core/hw/lcd.cpp +++ b/src/core/hw/lcd.cpp | |||
| @@ -18,7 +18,7 @@ namespace LCD { | |||
| 18 | Regs g_regs; | 18 | Regs g_regs; |
| 19 | 19 | ||
| 20 | template <typename T> | 20 | template <typename T> |
| 21 | inline void Read(T &var, const u32 raw_addr) { | 21 | inline void Read(T& var, const u32 raw_addr) { |
| 22 | u32 addr = raw_addr - HW::VADDR_LCD; | 22 | u32 addr = raw_addr - HW::VADDR_LCD; |
| 23 | u32 index = addr / 4; | 23 | u32 index = addr / 4; |
| 24 | 24 | ||
| @@ -48,16 +48,17 @@ inline void Write(u32 addr, const T data) { | |||
| 48 | // This is happening *after* handling the write to make sure we properly catch all memory reads. | 48 | // This is happening *after* handling the write to make sure we properly catch all memory reads. |
| 49 | if (Pica::g_debug_context && Pica::g_debug_context->recorder) { | 49 | if (Pica::g_debug_context && Pica::g_debug_context->recorder) { |
| 50 | // addr + GPU VBase - IO VBase + IO PBase | 50 | // addr + GPU VBase - IO VBase + IO PBase |
| 51 | Pica::g_debug_context->recorder->RegisterWritten<T>(addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data); | 51 | Pica::g_debug_context->recorder->RegisterWritten<T>( |
| 52 | addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data); | ||
| 52 | } | 53 | } |
| 53 | } | 54 | } |
| 54 | 55 | ||
| 55 | // Explicitly instantiate template functions because we aren't defining this in the header: | 56 | // Explicitly instantiate template functions because we aren't defining this in the header: |
| 56 | 57 | ||
| 57 | template void Read<u64>(u64 &var, const u32 addr); | 58 | template void Read<u64>(u64& var, const u32 addr); |
| 58 | template void Read<u32>(u32 &var, const u32 addr); | 59 | template void Read<u32>(u32& var, const u32 addr); |
| 59 | template void Read<u16>(u16 &var, const u32 addr); | 60 | template void Read<u16>(u16& var, const u32 addr); |
| 60 | template void Read<u8>(u8 &var, const u32 addr); | 61 | template void Read<u8>(u8& var, const u32 addr); |
| 61 | 62 | ||
| 62 | template void Write<u64>(u32 addr, const u64 data); | 63 | template void Write<u64>(u32 addr, const u64 data); |
| 63 | template void Write<u32>(u32 addr, const u32 data); | 64 | template void Write<u32>(u32 addr, const u32 data); |
diff --git a/src/core/hw/lcd.h b/src/core/hw/lcd.h index 57029c5e8..404833165 100644 --- a/src/core/hw/lcd.h +++ b/src/core/hw/lcd.h | |||
| @@ -42,16 +42,15 @@ struct Regs { | |||
| 42 | return sizeof(Regs) / sizeof(u32); | 42 | return sizeof(Regs) / sizeof(u32); |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | const u32& operator [] (int index) const { | 45 | const u32& operator[](int index) const { |
| 46 | const u32* content = reinterpret_cast<const u32*>(this); | 46 | const u32* content = reinterpret_cast<const u32*>(this); |
| 47 | return content[index]; | 47 | return content[index]; |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | u32& operator [] (int index) { | 50 | u32& operator[](int index) { |
| 51 | u32* content = reinterpret_cast<u32*>(this); | 51 | u32* content = reinterpret_cast<u32*>(this); |
| 52 | return content[index]; | 52 | return content[index]; |
| 53 | } | 53 | } |
| 54 | |||
| 55 | }; | 54 | }; |
| 56 | static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout"); | 55 | static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout"); |
| 57 | 56 | ||
| @@ -59,14 +58,14 @@ static_assert(std::is_standard_layout<Regs>::value, "Structure does not use stan | |||
| 59 | // is technically allowed since C++11. This macro should be enabled once MSVC adds | 58 | // is technically allowed since C++11. This macro should be enabled once MSVC adds |
| 60 | // support for that. | 59 | // support for that. |
| 61 | #ifndef _MSC_VER | 60 | #ifndef _MSC_VER |
| 62 | #define ASSERT_REG_POSITION(field_name, position) \ | 61 | #define ASSERT_REG_POSITION(field_name, position) \ |
| 63 | static_assert(offsetof(Regs, field_name) == position * 4, \ | 62 | static_assert(offsetof(Regs, field_name) == position * 4, \ |
| 64 | "Field "#field_name" has invalid position") | 63 | "Field " #field_name " has invalid position") |
| 65 | 64 | ||
| 66 | ASSERT_REG_POSITION(color_fill_top, 0x81); | 65 | ASSERT_REG_POSITION(color_fill_top, 0x81); |
| 67 | ASSERT_REG_POSITION(backlight_top, 0x90); | 66 | ASSERT_REG_POSITION(backlight_top, 0x90); |
| 68 | ASSERT_REG_POSITION(color_fill_bottom, 0x281); | 67 | ASSERT_REG_POSITION(color_fill_bottom, 0x281); |
| 69 | ASSERT_REG_POSITION(backlight_bottom, 0x290); | 68 | ASSERT_REG_POSITION(backlight_bottom, 0x290); |
| 70 | 69 | ||
| 71 | #undef ASSERT_REG_POSITION | 70 | #undef ASSERT_REG_POSITION |
| 72 | #endif // !defined(_MSC_VER) | 71 | #endif // !defined(_MSC_VER) |
| @@ -74,7 +73,7 @@ ASSERT_REG_POSITION(backlight_bottom, 0x290); | |||
| 74 | extern Regs g_regs; | 73 | extern Regs g_regs; |
| 75 | 74 | ||
| 76 | template <typename T> | 75 | template <typename T> |
| 77 | void Read(T &var, const u32 addr); | 76 | void Read(T& var, const u32 addr); |
| 78 | 77 | ||
| 79 | template <typename T> | 78 | template <typename T> |
| 80 | void Write(u32 addr, const T data); | 79 | void Write(u32 addr, const T data); |
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp index 083391e83..5a68d7e65 100644 --- a/src/core/hw/y2r.cpp +++ b/src/core/hw/y2r.cpp | |||
| @@ -27,9 +27,9 @@ static const size_t TILE_SIZE = 8 * 8; | |||
| 27 | using ImageTile = std::array<u32, TILE_SIZE>; | 27 | using ImageTile = std::array<u32, TILE_SIZE>; |
| 28 | 28 | ||
| 29 | /// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles. | 29 | /// Converts a image strip from the source YUV format into individual 8x8 RGB32 tiles. |
| 30 | static void ConvertYUVToRGB(InputFormat input_format, | 30 | static void ConvertYUVToRGB(InputFormat input_format, const u8* input_Y, const u8* input_U, |
| 31 | const u8* input_Y, const u8* input_U, const u8* input_V, ImageTile output[], | 31 | const u8* input_V, ImageTile output[], unsigned int width, |
| 32 | unsigned int width, unsigned int height, const CoefficientSet& coefficients) { | 32 | unsigned int height, const CoefficientSet& coefficients) { |
| 33 | 33 | ||
| 34 | for (unsigned int y = 0; y < height; ++y) { | 34 | for (unsigned int y = 0; y < height; ++y) { |
| 35 | for (unsigned int x = 0; x < width; ++x) { | 35 | for (unsigned int x = 0; x < width; ++x) { |
| @@ -58,11 +58,11 @@ static void ConvertYUVToRGB(InputFormat input_format, | |||
| 58 | 58 | ||
| 59 | // This conversion process is bit-exact with hardware, as far as could be tested. | 59 | // This conversion process is bit-exact with hardware, as far as could be tested. |
| 60 | auto& c = coefficients; | 60 | auto& c = coefficients; |
| 61 | s32 cY = c[0]*Y; | 61 | s32 cY = c[0] * Y; |
| 62 | 62 | ||
| 63 | s32 r = cY + c[1]*V; | 63 | s32 r = cY + c[1] * V; |
| 64 | s32 g = cY - c[3]*U - c[2]*V; | 64 | s32 g = cY - c[3] * U - c[2] * V; |
| 65 | s32 b = cY + c[4]*U; | 65 | s32 b = cY + c[4] * U; |
| 66 | 66 | ||
| 67 | const s32 rounding_offset = 0x18; | 67 | const s32 rounding_offset = 0x18; |
| 68 | r = (r >> 3) + c[5] + rounding_offset; | 68 | r = (r >> 3) + c[5] + rounding_offset; |
| @@ -74,14 +74,14 @@ static void ConvertYUVToRGB(InputFormat input_format, | |||
| 74 | u32* out = &output[tile][y * 8 + tile_x]; | 74 | u32* out = &output[tile][y * 8 + tile_x]; |
| 75 | 75 | ||
| 76 | using MathUtil::Clamp; | 76 | using MathUtil::Clamp; |
| 77 | *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | | 77 | *out = ((u32)Clamp(r >> 5, 0, 0xFF) << 24) | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) | |
| 78 | ((u32)Clamp(g >> 5, 0, 0xFF) << 16) | | ||
| 79 | ((u32)Clamp(b >> 5, 0, 0xFF) << 8); | 78 | ((u32)Clamp(b >> 5, 0, 0xFF) << 8); |
| 80 | } | 79 | } |
| 81 | } | 80 | } |
| 82 | } | 81 | } |
| 83 | 82 | ||
| 84 | /// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit formats to 8-bit. | 83 | /// Simulates an incoming CDMA transfer. The N parameter is used to automatically convert 16-bit |
| 84 | /// formats to 8-bit. | ||
| 85 | template <size_t N> | 85 | template <size_t N> |
| 86 | static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) { | 86 | static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data) { |
| 87 | const u8* input = Memory::GetPointer(buf.address); | 87 | const u8* input = Memory::GetPointer(buf.address); |
| @@ -103,9 +103,10 @@ static void ReceiveData(u8* output, ConversionBuffer& buf, size_t amount_of_data | |||
| 103 | } | 103 | } |
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | /// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA transfer. | 106 | /// Convert intermediate RGB32 format to the final output format while simulating an outgoing CDMA |
| 107 | /// transfer. | ||
| 107 | static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data, | 108 | static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data, |
| 108 | OutputFormat output_format, u8 alpha) { | 109 | OutputFormat output_format, u8 alpha) { |
| 109 | 110 | ||
| 110 | u8* output = Memory::GetPointer(buf.address); | 111 | u8* output = Memory::GetPointer(buf.address); |
| 111 | 112 | ||
| @@ -113,9 +114,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data | |||
| 113 | u8* unit_end = output + buf.transfer_unit; | 114 | u8* unit_end = output + buf.transfer_unit; |
| 114 | while (output < unit_end) { | 115 | while (output < unit_end) { |
| 115 | u32 color = *input++; | 116 | u32 color = *input++; |
| 116 | Math::Vec4<u8> col_vec{ | 117 | Math::Vec4<u8> col_vec{(u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha}; |
| 117 | (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha | ||
| 118 | }; | ||
| 119 | 118 | ||
| 120 | switch (output_format) { | 119 | switch (output_format) { |
| 121 | case OutputFormat::RGBA8: | 120 | case OutputFormat::RGBA8: |
| @@ -146,34 +145,26 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data | |||
| 146 | } | 145 | } |
| 147 | 146 | ||
| 148 | static const u8 linear_lut[64] = { | 147 | static const u8 linear_lut[64] = { |
| 149 | 0, 1, 2, 3, 4, 5, 6, 7, | 148 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, |
| 150 | 8, 9, 10, 11, 12, 13, 14, 15, | 149 | 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, |
| 151 | 16, 17, 18, 19, 20, 21, 22, 23, | 150 | 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, |
| 152 | 24, 25, 26, 27, 28, 29, 30, 31, | ||
| 153 | 32, 33, 34, 35, 36, 37, 38, 39, | ||
| 154 | 40, 41, 42, 43, 44, 45, 46, 47, | ||
| 155 | 48, 49, 50, 51, 52, 53, 54, 55, | ||
| 156 | 56, 57, 58, 59, 60, 61, 62, 63, | ||
| 157 | }; | 151 | }; |
| 158 | 152 | ||
| 159 | static const u8 morton_lut[64] = { | 153 | static const u8 morton_lut[64] = { |
| 160 | 0, 1, 4, 5, 16, 17, 20, 21, | 154 | 0, 1, 4, 5, 16, 17, 20, 21, 2, 3, 6, 7, 18, 19, 22, 23, 8, 9, 12, 13, 24, 25, |
| 161 | 2, 3, 6, 7, 18, 19, 22, 23, | 155 | 28, 29, 10, 11, 14, 15, 26, 27, 30, 31, 32, 33, 36, 37, 48, 49, 52, 53, 34, 35, 38, 39, |
| 162 | 8, 9, 12, 13, 24, 25, 28, 29, | 156 | 50, 51, 54, 55, 40, 41, 44, 45, 56, 57, 60, 61, 42, 43, 46, 47, 58, 59, 62, 63, |
| 163 | 10, 11, 14, 15, 26, 27, 30, 31, | ||
| 164 | 32, 33, 36, 37, 48, 49, 52, 53, | ||
| 165 | 34, 35, 38, 39, 50, 51, 54, 55, | ||
| 166 | 40, 41, 44, 45, 56, 57, 60, 61, | ||
| 167 | 42, 43, 46, 47, 58, 59, 62, 63, | ||
| 168 | }; | 157 | }; |
| 169 | 158 | ||
| 170 | static void RotateTile0(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | 159 | static void RotateTile0(const ImageTile& input, ImageTile& output, int height, |
| 160 | const u8 out_map[64]) { | ||
| 171 | for (int i = 0; i < height * 8; ++i) { | 161 | for (int i = 0; i < height * 8; ++i) { |
| 172 | output[out_map[i]] = input[i]; | 162 | output[out_map[i]] = input[i]; |
| 173 | } | 163 | } |
| 174 | } | 164 | } |
| 175 | 165 | ||
| 176 | static void RotateTile90(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | 166 | static void RotateTile90(const ImageTile& input, ImageTile& output, int height, |
| 167 | const u8 out_map[64]) { | ||
| 177 | int out_i = 0; | 168 | int out_i = 0; |
| 178 | for (int x = 0; x < 8; ++x) { | 169 | for (int x = 0; x < 8; ++x) { |
| 179 | for (int y = height - 1; y >= 0; --y) { | 170 | for (int y = height - 1; y >= 0; --y) { |
| @@ -182,16 +173,18 @@ static void RotateTile90(const ImageTile& input, ImageTile& output, int height, | |||
| 182 | } | 173 | } |
| 183 | } | 174 | } |
| 184 | 175 | ||
| 185 | static void RotateTile180(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | 176 | static void RotateTile180(const ImageTile& input, ImageTile& output, int height, |
| 177 | const u8 out_map[64]) { | ||
| 186 | int out_i = 0; | 178 | int out_i = 0; |
| 187 | for (int i = height * 8 - 1; i >= 0; --i) { | 179 | for (int i = height * 8 - 1; i >= 0; --i) { |
| 188 | output[out_map[out_i++]] = input[i]; | 180 | output[out_map[out_i++]] = input[i]; |
| 189 | } | 181 | } |
| 190 | } | 182 | } |
| 191 | 183 | ||
| 192 | static void RotateTile270(const ImageTile& input, ImageTile& output, int height, const u8 out_map[64]) { | 184 | static void RotateTile270(const ImageTile& input, ImageTile& output, int height, |
| 185 | const u8 out_map[64]) { | ||
| 193 | int out_i = 0; | 186 | int out_i = 0; |
| 194 | for (int x = 8-1; x >= 0; --x) { | 187 | for (int x = 8 - 1; x >= 0; --x) { |
| 195 | for (int y = 0; y < height; ++y) { | 188 | for (int y = 0; y < height; ++y) { |
| 196 | output[out_map[out_i++]] = input[y * 8 + x]; | 189 | output[out_map[out_i++]] = input[y * 8 + x]; |
| 197 | } | 190 | } |
| @@ -274,9 +267,11 @@ void PerformConversion(ConversionConfiguration& cvt) { | |||
| 274 | const u8* tile_remap = nullptr; | 267 | const u8* tile_remap = nullptr; |
| 275 | switch (cvt.block_alignment) { | 268 | switch (cvt.block_alignment) { |
| 276 | case BlockAlignment::Linear: | 269 | case BlockAlignment::Linear: |
| 277 | tile_remap = linear_lut; break; | 270 | tile_remap = linear_lut; |
| 271 | break; | ||
| 278 | case BlockAlignment::Block8x8: | 272 | case BlockAlignment::Block8x8: |
| 279 | tile_remap = morton_lut; break; | 273 | tile_remap = morton_lut; |
| 274 | break; | ||
| 280 | } | 275 | } |
| 281 | 276 | ||
| 282 | for (unsigned int y = 0; y < cvt.input_lines; y += 8) { | 277 | for (unsigned int y = 0; y < cvt.input_lines; y += 8) { |
| @@ -320,7 +315,7 @@ void PerformConversion(ConversionConfiguration& cvt) { | |||
| 320 | // Note(yuriks): If additional optimization is required, input_format can be moved to a | 315 | // Note(yuriks): If additional optimization is required, input_format can be moved to a |
| 321 | // template parameter, so that its dispatch can be moved to outside the inner loop. | 316 | // template parameter, so that its dispatch can be moved to outside the inner loop. |
| 322 | ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(), | 317 | ConvertYUVToRGB(cvt.input_format, input_Y, input_U, input_V, tiles.get(), |
| 323 | cvt.input_line_width, row_height, cvt.coefficients); | 318 | cvt.input_line_width, row_height, cvt.coefficients); |
| 324 | 319 | ||
| 325 | u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); | 320 | u32* output_buffer = reinterpret_cast<u32*>(data_buffer.get()); |
| 326 | 321 | ||
| @@ -367,9 +362,9 @@ void PerformConversion(ConversionConfiguration& cvt) { | |||
| 367 | 362 | ||
| 368 | // Note(yuriks): If additional optimization is required, output_format can be moved to a | 363 | // Note(yuriks): If additional optimization is required, output_format can be moved to a |
| 369 | // template parameter, so that its dispatch can be moved to outside the inner loop. | 364 | // template parameter, so that its dispatch can be moved to outside the inner loop. |
| 370 | SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, cvt.output_format, (u8)cvt.alpha); | 365 | SendData(reinterpret_cast<u32*>(data_buffer.get()), cvt.dst, (int)row_data_size, |
| 366 | cvt.output_format, (u8)cvt.alpha); | ||
| 371 | } | 367 | } |
| 372 | } | 368 | } |
| 373 | |||
| 374 | } | 369 | } |
| 375 | } | 370 | } |
diff --git a/src/core/hw/y2r.h b/src/core/hw/y2r.h index 729e1eee3..6b6e71bec 100644 --- a/src/core/hw/y2r.h +++ b/src/core/hw/y2r.h | |||
| @@ -3,13 +3,12 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | namespace Y2R_U { | 5 | namespace Y2R_U { |
| 6 | struct ConversionConfiguration; | 6 | struct ConversionConfiguration; |
| 7 | } | 7 | } |
| 8 | 8 | ||
| 9 | namespace HW { | 9 | namespace HW { |
| 10 | namespace Y2R { | 10 | namespace Y2R { |
| 11 | 11 | ||
| 12 | void PerformConversion(Y2R_U::ConversionConfiguration& cvt); | 12 | void PerformConversion(Y2R_U::ConversionConfiguration& cvt); |
| 13 | |||
| 14 | } | 13 | } |
| 15 | } | 14 | } |