diff options
| author | 2015-03-16 18:35:34 -0400 | |
|---|---|---|
| committer | 2015-03-16 18:35:34 -0400 | |
| commit | c1f5cb7dd5447acd4533c2b8affc438a94443006 (patch) | |
| tree | b125c422965ea5ac99335e05cf5906e52805f027 /src | |
| parent | Merge pull request #662 from linkmauve/video_core-warnings (diff) | |
| parent | Pica/VertexShader: Fix a bug caused due to incorrect assumptions of consecuti... (diff) | |
| download | yuzu-c1f5cb7dd5447acd4533c2b8affc438a94443006.tar.gz yuzu-c1f5cb7dd5447acd4533c2b8affc438a94443006.tar.xz yuzu-c1f5cb7dd5447acd4533c2b8affc438a94443006.zip | |
Merge pull request #652 from neobrain/shader_output_fix
Pica/VertexShader: Fix a bug caused due to incorrect assumptions of consecutive output register tables.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/vertex_shader.cpp | 44 |
1 files changed, 24 insertions, 20 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 4eb3e743e..e8d865172 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp | |||
| @@ -72,7 +72,7 @@ struct VertexShaderState { | |||
| 72 | u32* program_counter; | 72 | u32* program_counter; |
| 73 | 73 | ||
| 74 | const float24* input_register_table[16]; | 74 | const float24* input_register_table[16]; |
| 75 | float24* output_register_table[7*4]; | 75 | Math::Vec4<float24> output_registers[16]; |
| 76 | 76 | ||
| 77 | Math::Vec4<float24> temporary_registers[16]; | 77 | Math::Vec4<float24> temporary_registers[16]; |
| 78 | bool conditional_code[2]; | 78 | bool conditional_code[2]; |
| @@ -198,8 +198,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 198 | src2[3] = src2[3] * float24::FromFloat32(-1); | 198 | src2[3] = src2[3] * float24::FromFloat32(-1); |
| 199 | } | 199 | } |
| 200 | 200 | ||
| 201 | float24* dest = (instr.common.dest.Value() < 0x08) ? state.output_register_table[4*instr.common.dest.Value().GetIndex()] | 201 | float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0] |
| 202 | : (instr.common.dest.Value() < 0x10) ? dummy_vec4_float24 | ||
| 203 | : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0] | 202 | : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0] |
| 204 | : dummy_vec4_float24; | 203 | : dummy_vec4_float24; |
| 205 | 204 | ||
| @@ -409,8 +408,7 @@ static void ProcessShaderCode(VertexShaderState& state) { | |||
| 409 | src3[3] = src3[3] * float24::FromFloat32(-1); | 408 | src3[3] = src3[3] * float24::FromFloat32(-1); |
| 410 | } | 409 | } |
| 411 | 410 | ||
| 412 | float24* dest = (instr.mad.dest.Value() < 0x08) ? state.output_register_table[4*instr.mad.dest.Value().GetIndex()] | 411 | float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0] |
| 413 | : (instr.mad.dest.Value() < 0x10) ? dummy_vec4_float24 | ||
| 414 | : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0] | 412 | : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0] |
| 415 | : dummy_vec4_float24; | 413 | : dummy_vec4_float24; |
| 416 | 414 | ||
| @@ -587,12 +585,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) { | |||
| 587 | if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; | 585 | if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x; |
| 588 | if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; | 586 | if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x; |
| 589 | 587 | ||
| 590 | // Setup output register table | 588 | state.conditional_code[0] = false; |
| 591 | OutputVertex ret; | 589 | state.conditional_code[1] = false; |
| 592 | // Zero output so that attributes which aren't output won't have denormals in them, which will | 590 | |
| 593 | // slow us down later. | 591 | ProcessShaderCode(state); |
| 594 | memset(&ret, 0, sizeof(ret)); | 592 | DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), |
| 593 | state.debug.max_opdesc_id, registers.vs_main_offset, | ||
| 594 | registers.vs_output_attributes); | ||
| 595 | 595 | ||
| 596 | // Setup output data | ||
| 597 | OutputVertex ret; | ||
| 598 | // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to | ||
| 599 | // figure out what those circumstances are and enable the remaining outputs then. | ||
| 596 | for (int i = 0; i < 7; ++i) { | 600 | for (int i = 0; i < 7; ++i) { |
| 597 | const auto& output_register_map = registers.vs_output_attributes[i]; | 601 | const auto& output_register_map = registers.vs_output_attributes[i]; |
| 598 | 602 | ||
| @@ -601,18 +605,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) { | |||
| 601 | output_register_map.map_z, output_register_map.map_w | 605 | output_register_map.map_z, output_register_map.map_w |
| 602 | }; | 606 | }; |
| 603 | 607 | ||
| 604 | for (int comp = 0; comp < 4; ++comp) | 608 | for (int comp = 0; comp < 4; ++comp) { |
| 605 | state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; | 609 | float24* out = ((float24*)&ret) + semantics[comp]; |
| 610 | if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { | ||
| 611 | *out = state.output_registers[i][comp]; | ||
| 612 | } else { | ||
| 613 | // Zero output so that attributes which aren't output won't have denormals in them, | ||
| 614 | // which would slow us down later. | ||
| 615 | memset(out, 0, sizeof(*out)); | ||
| 616 | } | ||
| 617 | } | ||
| 606 | } | 618 | } |
| 607 | 619 | ||
| 608 | state.conditional_code[0] = false; | ||
| 609 | state.conditional_code[1] = false; | ||
| 610 | |||
| 611 | ProcessShaderCode(state); | ||
| 612 | DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), | ||
| 613 | state.debug.max_opdesc_id, registers.vs_main_offset, | ||
| 614 | registers.vs_output_attributes); | ||
| 615 | |||
| 616 | LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", | 620 | LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", |
| 617 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), | 621 | ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), |
| 618 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), | 622 | ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), |