summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2015-03-16 18:35:34 -0400
committerGravatar bunnei2015-03-16 18:35:34 -0400
commitc1f5cb7dd5447acd4533c2b8affc438a94443006 (patch)
treeb125c422965ea5ac99335e05cf5906e52805f027 /src
parentMerge pull request #662 from linkmauve/video_core-warnings (diff)
parentPica/VertexShader: Fix a bug caused due to incorrect assumptions of consecuti... (diff)
downloadyuzu-c1f5cb7dd5447acd4533c2b8affc438a94443006.tar.gz
yuzu-c1f5cb7dd5447acd4533c2b8affc438a94443006.tar.xz
yuzu-c1f5cb7dd5447acd4533c2b8affc438a94443006.zip
Merge pull request #652 from neobrain/shader_output_fix
Pica/VertexShader: Fix a bug caused due to incorrect assumptions of consecutive output register tables.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/vertex_shader.cpp44
1 files changed, 24 insertions, 20 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 4eb3e743e..e8d865172 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -72,7 +72,7 @@ struct VertexShaderState {
72 u32* program_counter; 72 u32* program_counter;
73 73
74 const float24* input_register_table[16]; 74 const float24* input_register_table[16];
75 float24* output_register_table[7*4]; 75 Math::Vec4<float24> output_registers[16];
76 76
77 Math::Vec4<float24> temporary_registers[16]; 77 Math::Vec4<float24> temporary_registers[16];
78 bool conditional_code[2]; 78 bool conditional_code[2];
@@ -198,8 +198,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
198 src2[3] = src2[3] * float24::FromFloat32(-1); 198 src2[3] = src2[3] * float24::FromFloat32(-1);
199 } 199 }
200 200
201 float24* dest = (instr.common.dest.Value() < 0x08) ? state.output_register_table[4*instr.common.dest.Value().GetIndex()] 201 float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0]
202 : (instr.common.dest.Value() < 0x10) ? dummy_vec4_float24
203 : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0] 202 : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0]
204 : dummy_vec4_float24; 203 : dummy_vec4_float24;
205 204
@@ -409,8 +408,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
409 src3[3] = src3[3] * float24::FromFloat32(-1); 408 src3[3] = src3[3] * float24::FromFloat32(-1);
410 } 409 }
411 410
412 float24* dest = (instr.mad.dest.Value() < 0x08) ? state.output_register_table[4*instr.mad.dest.Value().GetIndex()] 411 float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0]
413 : (instr.mad.dest.Value() < 0x10) ? dummy_vec4_float24
414 : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0] 412 : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0]
415 : dummy_vec4_float24; 413 : dummy_vec4_float24;
416 414
@@ -587,12 +585,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
587 if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; 585 if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
588 if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; 586 if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
589 587
590 // Setup output register table 588 state.conditional_code[0] = false;
591 OutputVertex ret; 589 state.conditional_code[1] = false;
592 // Zero output so that attributes which aren't output won't have denormals in them, which will 590
593 // slow us down later. 591 ProcessShaderCode(state);
594 memset(&ret, 0, sizeof(ret)); 592 DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
593 state.debug.max_opdesc_id, registers.vs_main_offset,
594 registers.vs_output_attributes);
595 595
596 // Setup output data
597 OutputVertex ret;
598 // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
599 // figure out what those circumstances are and enable the remaining outputs then.
596 for (int i = 0; i < 7; ++i) { 600 for (int i = 0; i < 7; ++i) {
597 const auto& output_register_map = registers.vs_output_attributes[i]; 601 const auto& output_register_map = registers.vs_output_attributes[i];
598 602
@@ -601,18 +605,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
601 output_register_map.map_z, output_register_map.map_w 605 output_register_map.map_z, output_register_map.map_w
602 }; 606 };
603 607
604 for (int comp = 0; comp < 4; ++comp) 608 for (int comp = 0; comp < 4; ++comp) {
605 state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; 609 float24* out = ((float24*)&ret) + semantics[comp];
610 if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
611 *out = state.output_registers[i][comp];
612 } else {
613 // Zero output so that attributes which aren't output won't have denormals in them,
614 // which would slow us down later.
615 memset(out, 0, sizeof(*out));
616 }
617 }
606 } 618 }
607 619
608 state.conditional_code[0] = false;
609 state.conditional_code[1] = false;
610
611 ProcessShaderCode(state);
612 DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
613 state.debug.max_opdesc_id, registers.vs_main_offset,
614 registers.vs_output_attributes);
615
616 LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", 620 LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
617 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), 621 ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
618 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), 622 ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),