diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/hw/y2r.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/clipper.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/command_processor.cpp | 80 | ||||
| -rw-r--r-- | src/video_core/pica.h | 1 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 13 |
5 files changed, 55 insertions, 43 deletions
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp index b40f13cae..f80e26ecd 100644 --- a/src/core/hw/y2r.cpp +++ b/src/core/hw/y2r.cpp | |||
| @@ -111,7 +111,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data | |||
| 111 | while (output < unit_end) { | 111 | while (output < unit_end) { |
| 112 | u32 color = *input++; | 112 | u32 color = *input++; |
| 113 | Math::Vec4<u8> col_vec{ | 113 | Math::Vec4<u8> col_vec{ |
| 114 | (color >> 24) & 0xFF, (color >> 16) & 0xFF, (color >> 8) & 0xFF, alpha, | 114 | (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha |
| 115 | }; | 115 | }; |
| 116 | 116 | ||
| 117 | switch (output_format) { | 117 | switch (output_format) { |
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 224132d71..558b49d60 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp | |||
| @@ -153,7 +153,7 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { | |||
| 153 | "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), " | 153 | "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), " |
| 154 | "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " | 154 | "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " |
| 155 | "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", | 155 | "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", |
| 156 | i, output_list->size(), | 156 | i + 1, output_list->size() - 2, |
| 157 | vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), | 157 | vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), |
| 158 | vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), | 158 | vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), |
| 159 | vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), | 159 | vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), |
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 2a1c885a7..f2e3aee85 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp | |||
| @@ -60,6 +60,46 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 60 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); | 60 | GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); |
| 61 | break; | 61 | break; |
| 62 | 62 | ||
| 63 | // Load default vertex input attributes | ||
| 64 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): | ||
| 65 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): | ||
| 66 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): | ||
| 67 | { | ||
| 68 | // TODO: Does actual hardware indeed keep an intermediate buffer or does | ||
| 69 | // it directly write the values? | ||
| 70 | default_attr_write_buffer[default_attr_counter++] = value; | ||
| 71 | |||
| 72 | // Default attributes are written in a packed format such that four float24 values are encoded in | ||
| 73 | // three 32-bit numbers. We write to internal memory once a full such vector is | ||
| 74 | // written. | ||
| 75 | if (default_attr_counter >= 3) { | ||
| 76 | default_attr_counter = 0; | ||
| 77 | |||
| 78 | auto& setup = regs.vs_default_attributes_setup; | ||
| 79 | |||
| 80 | if (setup.index >= 16) { | ||
| 81 | LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); | ||
| 82 | break; | ||
| 83 | } | ||
| 84 | |||
| 85 | Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; | ||
| 86 | |||
| 87 | // NOTE: The destination component order indeed is "backwards" | ||
| 88 | attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); | ||
| 89 | attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); | ||
| 90 | attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); | ||
| 91 | attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); | ||
| 92 | |||
| 93 | LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, | ||
| 94 | attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), | ||
| 95 | attribute.w.ToFloat32()); | ||
| 96 | |||
| 97 | // TODO: Verify that this actually modifies the register! | ||
| 98 | setup.index = setup.index + 1; | ||
| 99 | } | ||
| 100 | break; | ||
| 101 | } | ||
| 102 | |||
| 63 | case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): | 103 | case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): |
| 64 | case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): | 104 | case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): |
| 65 | { | 105 | { |
| @@ -351,46 +391,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| 351 | break; | 391 | break; |
| 352 | } | 392 | } |
| 353 | 393 | ||
| 354 | // Load default vertex input attributes | ||
| 355 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): | ||
| 356 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): | ||
| 357 | case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): | ||
| 358 | { | ||
| 359 | // TODO: Does actual hardware indeed keep an intermediate buffer or does | ||
| 360 | // it directly write the values? | ||
| 361 | default_attr_write_buffer[default_attr_counter++] = value; | ||
| 362 | |||
| 363 | // Default attributes are written in a packed format such that four float24 values are encoded in | ||
| 364 | // three 32-bit numbers. We write to internal memory once a full such vector is | ||
| 365 | // written. | ||
| 366 | if (default_attr_counter >= 3) { | ||
| 367 | default_attr_counter = 0; | ||
| 368 | |||
| 369 | auto& setup = regs.vs_default_attributes_setup; | ||
| 370 | |||
| 371 | if (setup.index >= 16) { | ||
| 372 | LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); | ||
| 373 | break; | ||
| 374 | } | ||
| 375 | |||
| 376 | Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; | ||
| 377 | |||
| 378 | // NOTE: The destination component order indeed is "backwards" | ||
| 379 | attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); | ||
| 380 | attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); | ||
| 381 | attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); | ||
| 382 | attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); | ||
| 383 | |||
| 384 | LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, | ||
| 385 | attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), | ||
| 386 | attribute.w.ToFloat32()); | ||
| 387 | |||
| 388 | // TODO: Verify that this actually modifies the register! | ||
| 389 | setup.index = setup.index + 1; | ||
| 390 | } | ||
| 391 | break; | ||
| 392 | } | ||
| 393 | |||
| 394 | // Load shader program code | 394 | // Load shader program code |
| 395 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): | 395 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): |
| 396 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): | 396 | case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): |
diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 46a7b21dc..026b10a62 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h | |||
| @@ -290,6 +290,7 @@ struct Regs { | |||
| 290 | AddSigned = 3, | 290 | AddSigned = 3, |
| 291 | Lerp = 4, | 291 | Lerp = 4, |
| 292 | Subtract = 5, | 292 | Subtract = 5, |
| 293 | Dot3_RGB = 6, | ||
| 293 | 294 | ||
| 294 | MultiplyThenAdd = 8, | 295 | MultiplyThenAdd = 8, |
| 295 | AddThenMultiply = 9, | 296 | AddThenMultiply = 9, |
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index c381c2bd9..a6b7997ce 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp | |||
| @@ -641,7 +641,18 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, | |||
| 641 | result = (result * input[2].Cast<int>()) / 255; | 641 | result = (result * input[2].Cast<int>()) / 255; |
| 642 | return result.Cast<u8>(); | 642 | return result.Cast<u8>(); |
| 643 | } | 643 | } |
| 644 | 644 | case Operation::Dot3_RGB: | |
| 645 | { | ||
| 646 | // Not fully accurate. | ||
| 647 | // Worst case scenario seems to yield a +/-3 error | ||
| 648 | // Some HW results indicate that the per-component computation can't have a higher precision than 1/256, | ||
| 649 | // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( (0x80,g0,b0),(0x80,g1,b1) ) give different results | ||
| 650 | int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + | ||
| 651 | ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + | ||
| 652 | ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; | ||
| 653 | result = std::max(0, std::min(255, result)); | ||
| 654 | return { (u8)result, (u8)result, (u8)result }; | ||
| 655 | } | ||
| 645 | default: | 656 | default: |
| 646 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); | 657 | LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); |
| 647 | UNIMPLEMENTED(); | 658 | UNIMPLEMENTED(); |