summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Tony Wasserka2015-03-12 14:18:46 +0100
committerGravatar Tony Wasserka2015-03-12 14:18:46 +0100
commite4f5ec6272016dd34afe4e8901a9e8027324ba21 (patch)
tree60f2db6f42538875bf8e552926e5df4dbddfab58 /src
parentMerge pull request #642 from bunnei/touchpad (diff)
downloadyuzu-e4f5ec6272016dd34afe4e8901a9e8027324ba21.tar.gz
yuzu-e4f5ec6272016dd34afe4e8901a9e8027324ba21.tar.xz
yuzu-e4f5ec6272016dd34afe4e8901a9e8027324ba21.zip
Pica/VertexShader: Fix a bug caused due to incorrect assumptions of consecutive output register tables.
We now write create a temporary buffer for output registers and copy all of them to the actual output vertex structure after the shader has run. This is technically not necessary, but it's easier to vectorize in the future.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/vertex_shader.cpp44
1 files changed, 24 insertions, 20 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 4eb3e743e..e8d865172 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -72,7 +72,7 @@ struct VertexShaderState {
72 u32* program_counter; 72 u32* program_counter;
73 73
74 const float24* input_register_table[16]; 74 const float24* input_register_table[16];
75 float24* output_register_table[7*4]; 75 Math::Vec4<float24> output_registers[16];
76 76
77 Math::Vec4<float24> temporary_registers[16]; 77 Math::Vec4<float24> temporary_registers[16];
78 bool conditional_code[2]; 78 bool conditional_code[2];
@@ -198,8 +198,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
198 src2[3] = src2[3] * float24::FromFloat32(-1); 198 src2[3] = src2[3] * float24::FromFloat32(-1);
199 } 199 }
200 200
201 float24* dest = (instr.common.dest.Value() < 0x08) ? state.output_register_table[4*instr.common.dest.Value().GetIndex()] 201 float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0]
202 : (instr.common.dest.Value() < 0x10) ? dummy_vec4_float24
203 : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0] 202 : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0]
204 : dummy_vec4_float24; 203 : dummy_vec4_float24;
205 204
@@ -409,8 +408,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
409 src3[3] = src3[3] * float24::FromFloat32(-1); 408 src3[3] = src3[3] * float24::FromFloat32(-1);
410 } 409 }
411 410
412 float24* dest = (instr.mad.dest.Value() < 0x08) ? state.output_register_table[4*instr.mad.dest.Value().GetIndex()] 411 float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0]
413 : (instr.mad.dest.Value() < 0x10) ? dummy_vec4_float24
414 : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0] 412 : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0]
415 : dummy_vec4_float24; 413 : dummy_vec4_float24;
416 414
@@ -587,12 +585,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
587 if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; 585 if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
588 if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; 586 if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
589 587
590 // Setup output register table 588 state.conditional_code[0] = false;
591 OutputVertex ret; 589 state.conditional_code[1] = false;
592 // Zero output so that attributes which aren't output won't have denormals in them, which will 590
593 // slow us down later. 591 ProcessShaderCode(state);
594 memset(&ret, 0, sizeof(ret)); 592 DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
593 state.debug.max_opdesc_id, registers.vs_main_offset,
594 registers.vs_output_attributes);
595 595
596 // Setup output data
597 OutputVertex ret;
598 // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
599 // figure out what those circumstances are and enable the remaining outputs then.
596 for (int i = 0; i < 7; ++i) { 600 for (int i = 0; i < 7; ++i) {
597 const auto& output_register_map = registers.vs_output_attributes[i]; 601 const auto& output_register_map = registers.vs_output_attributes[i];
598 602
@@ -601,18 +605,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
601 output_register_map.map_z, output_register_map.map_w 605 output_register_map.map_z, output_register_map.map_w
602 }; 606 };
603 607
604 for (int comp = 0; comp < 4; ++comp) 608 for (int comp = 0; comp < 4; ++comp) {
605 state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; 609 float24* out = ((float24*)&ret) + semantics[comp];
610 if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
611 *out = state.output_registers[i][comp];
612 } else {
613 // Zero output so that attributes which aren't output won't have denormals in them,
614 // which would slow us down later.
615 memset(out, 0, sizeof(*out));
616 }
617 }
606 } 618 }
607 619
608 state.conditional_code[0] = false;
609 state.conditional_code[1] = false;
610
611 ProcessShaderCode(state);
612 DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
613 state.debug.max_opdesc_id, registers.vs_main_offset,
614 registers.vs_output_attributes);
615
616 LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", 620 LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
617 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), 621 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
618 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), 622 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),