summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/hw/y2r.cpp2
-rw-r--r--src/video_core/clipper.cpp2
-rw-r--r--src/video_core/command_processor.cpp80
-rw-r--r--src/video_core/pica.h1
-rw-r--r--src/video_core/rasterizer.cpp13
5 files changed, 55 insertions, 43 deletions
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp
index b40f13cae..f80e26ecd 100644
--- a/src/core/hw/y2r.cpp
+++ b/src/core/hw/y2r.cpp
@@ -111,7 +111,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data
111 while (output < unit_end) { 111 while (output < unit_end) {
112 u32 color = *input++; 112 u32 color = *input++;
113 Math::Vec4<u8> col_vec{ 113 Math::Vec4<u8> col_vec{
114 (color >> 24) & 0xFF, (color >> 16) & 0xFF, (color >> 8) & 0xFF, alpha, 114 (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha
115 }; 115 };
116 116
117 switch (output_format) { 117 switch (output_format) {
diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp
index 224132d71..558b49d60 100644
--- a/src/video_core/clipper.cpp
+++ b/src/video_core/clipper.cpp
@@ -153,7 +153,7 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) {
153 "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), " 153 "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), "
154 "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " 154 "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and "
155 "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", 155 "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)",
156 i, output_list->size(), 156 i + 1, output_list->size() - 2,
157 vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), 157 vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(),
158 vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), 158 vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(),
159 vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), 159 vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(),
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 2a1c885a7..f2e3aee85 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -60,6 +60,46 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
60 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); 60 GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D);
61 break; 61 break;
62 62
63 // Load default vertex input attributes
64 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233):
65 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234):
66 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235):
67 {
68 // TODO: Does actual hardware indeed keep an intermediate buffer or does
69 // it directly write the values?
70 default_attr_write_buffer[default_attr_counter++] = value;
71
72 // Default attributes are written in a packed format such that four float24 values are encoded in
73 // three 32-bit numbers. We write to internal memory once a full such vector is
74 // written.
75 if (default_attr_counter >= 3) {
76 default_attr_counter = 0;
77
78 auto& setup = regs.vs_default_attributes_setup;
79
80 if (setup.index >= 16) {
81 LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
82 break;
83 }
84
85 Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index];
86
87 // NOTE: The destination component order indeed is "backwards"
88 attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8);
89 attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
90 attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF));
91 attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF);
92
93 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
94 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
95 attribute.w.ToFloat32());
96
97 // TODO: Verify that this actually modifies the register!
98 setup.index = setup.index + 1;
99 }
100 break;
101 }
102
63 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): 103 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c):
64 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): 104 case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d):
65 { 105 {
@@ -351,46 +391,6 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
351 break; 391 break;
352 } 392 }
353 393
354 // Load default vertex input attributes
355 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233):
356 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234):
357 case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235):
358 {
359 // TODO: Does actual hardware indeed keep an intermediate buffer or does
360 // it directly write the values?
361 default_attr_write_buffer[default_attr_counter++] = value;
362
363 // Default attributes are written in a packed format such that four float24 values are encoded in
364 // three 32-bit numbers. We write to internal memory once a full such vector is
365 // written.
366 if (default_attr_counter >= 3) {
367 default_attr_counter = 0;
368
369 auto& setup = regs.vs_default_attributes_setup;
370
371 if (setup.index >= 16) {
372 LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
373 break;
374 }
375
376 Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index];
377
378 // NOTE: The destination component order indeed is "backwards"
379 attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8);
380 attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
381 attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF));
382 attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF);
383
384 LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
385 attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
386 attribute.w.ToFloat32());
387
388 // TODO: Verify that this actually modifies the register!
389 setup.index = setup.index + 1;
390 }
391 break;
392 }
393
394 // Load shader program code 394 // Load shader program code
395 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): 395 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc):
396 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): 396 case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd):
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 46a7b21dc..026b10a62 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -290,6 +290,7 @@ struct Regs {
290 AddSigned = 3, 290 AddSigned = 3,
291 Lerp = 4, 291 Lerp = 4,
292 Subtract = 5, 292 Subtract = 5,
293 Dot3_RGB = 6,
293 294
294 MultiplyThenAdd = 8, 295 MultiplyThenAdd = 8,
295 AddThenMultiply = 9, 296 AddThenMultiply = 9,
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index c381c2bd9..a6b7997ce 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -641,7 +641,18 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
641 result = (result * input[2].Cast<int>()) / 255; 641 result = (result * input[2].Cast<int>()) / 255;
642 return result.Cast<u8>(); 642 return result.Cast<u8>();
643 } 643 }
644 644 case Operation::Dot3_RGB:
645 {
646 // Not fully accurate.
647 // Worst case scenario seems to yield a +/-3 error
648 // Some HW results indicate that the per-component computation can't have a higher precision than 1/256,
649 // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( (0x80,g0,b0),(0x80,g1,b1) ) give different results
650 int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 +
651 ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 +
652 ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256;
653 result = std::max(0, std::min(255, result));
654 return { (u8)result, (u8)result, (u8)result };
655 }
645 default: 656 default:
646 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); 657 LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
647 UNIMPLEMENTED(); 658 UNIMPLEMENTED();