Merge pull request #652 from neobrain/shader_output_fix

Pica/VertexShader: Fix a bug caused due to incorrect assumptions of consecutive output register tables.
author: bunnei 2015-03-16 18:35:34 -0400
committer: bunnei 2015-03-16 18:35:34 -0400
commit: c1f5cb7dd5447acd4533c2b8affc438a94443006 (patch)
tree: b125c422965ea5ac99335e05cf5906e52805f027 /src
parent: Merge pull request #662 from linkmauve/video_core-warnings (diff)
parent: Pica/VertexShader: Fix a bug caused due to incorrect assumptions of consecuti... (diff)
download: yuzu-c1f5cb7dd5447acd4533c2b8affc438a94443006.tar.gz
yuzu-c1f5cb7dd5447acd4533c2b8affc438a94443006.tar.xz
yuzu-c1f5cb7dd5447acd4533c2b8affc438a94443006.zip
1 files changed, 24 insertions, 20 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 4eb3e743e..e8d865172 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -72,7 +72,7 @@ struct VertexShaderState {
    u32* program_counter;
    const float24* input_register_table[16];
-    float24* output_register_table[7*4];
+    Math::Vec4<float24> output_registers[16];
    Math::Vec4<float24> temporary_registers[16];
    bool conditional_code[2];
@@ -198,8 +198,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
                src2[3] = src2[3] * float24::FromFloat32(-1);
            }
-            float24* dest = (instr.common.dest.Value() < 0x08) ? state.output_register_table[4*instr.common.dest.Value().GetIndex()]
+            float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0]
-                        : (instr.common.dest.Value() < 0x10) ? dummy_vec4_float24
                        : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0]
                        : dummy_vec4_float24;
@@ -409,8 +408,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
                    src3[3] = src3[3] * float24::FromFloat32(-1);
                }
-                float24* dest = (instr.mad.dest.Value() < 0x08) ? state.output_register_table[4*instr.mad.dest.Value().GetIndex()]
+                float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0]
-                            : (instr.mad.dest.Value() < 0x10) ? dummy_vec4_float24
                            : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0]
                            : dummy_vec4_float24;
@@ -587,12 +585,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
    if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
    if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
-    // Setup output register table
+    state.conditional_code[0] = false;
-    OutputVertex ret;
+    state.conditional_code[1] = false;
-    // Zero output so that attributes which aren't output won't have denormals in them, which will
-    // slow us down later.
+    ProcessShaderCode(state);
-    memset(&ret, 0, sizeof(ret));
+    DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
+                           state.debug.max_opdesc_id, registers.vs_main_offset,
+                           registers.vs_output_attributes);
+    // Setup output data
+    OutputVertex ret;
+    // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
+    // figure out what those circumstances are and enable the remaining outputs then.
    for (int i = 0; i < 7; ++i) {
        const auto& output_register_map = registers.vs_output_attributes[i];
@@ -601,18 +605,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
            output_register_map.map_z, output_register_map.map_w
        };
-        for (int comp = 0; comp < 4; ++comp)
+        for (int comp = 0; comp < 4; ++comp) {
-            state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp];
+            float24* out = ((float24*)&ret) + semantics[comp];
+            if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
+                *out = state.output_registers[i][comp];
+            } else {
+                // Zero output so that attributes which aren't output won't have denormals in them,
+                // which would slow us down later.
+                memset(out, 0, sizeof(*out));
+            }
+        }
    }
-    state.conditional_code[0] = false;
-    state.conditional_code[1] = false;
-    ProcessShaderCode(state);
-    DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
-                           state.debug.max_opdesc_id, registers.vs_main_offset,
-                           registers.vs_output_attributes);
    LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
        ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
        ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
author	bunnei	2015-03-16 18:35:34 -0400
committer	bunnei	2015-03-16 18:35:34 -0400
commit	c1f5cb7dd5447acd4533c2b8affc438a94443006 (patch)
tree	b125c422965ea5ac99335e05cf5906e52805f027 /src
parent	Merge pull request #662 from linkmauve/video_core-warnings (diff)
parent	Pica/VertexShader: Fix a bug caused due to incorrect assumptions of consecuti... (diff)
download	yuzu-c1f5cb7dd5447acd4533c2b8affc438a94443006.tar.gz yuzu-c1f5cb7dd5447acd4533c2b8affc438a94443006.tar.xz yuzu-c1f5cb7dd5447acd4533c2b8affc438a94443006.zip

diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 4eb3e743e..e8d865172 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp
@@ -72,7 +72,7 @@ struct VertexShaderState {
72	u32* program_counter;	72	u32* program_counter;
73		73
74	const float24* input_register_table[16];	74	const float24* input_register_table[16];
75	float24* output_register_table[7*4];	75	Math::Vec4<float24> output_registers[16];
76		76
77	Math::Vec4<float24> temporary_registers[16];	77	Math::Vec4<float24> temporary_registers[16];
78	bool conditional_code[2];	78	bool conditional_code[2];
@@ -198,8 +198,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
198	src2[3] = src2[3] * float24::FromFloat32(-1);	198	src2[3] = src2[3] * float24::FromFloat32(-1);
199	}	199	}
200		200
201	float24* dest = (instr.common.dest.Value() < 0x08) ? state.output_register_table[4*instr.common.dest.Value().GetIndex()]	201	float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0]
202	: (instr.common.dest.Value() < 0x10) ? dummy_vec4_float24
203	: (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0]	202	: (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0]
204	: dummy_vec4_float24;	203	: dummy_vec4_float24;
205		204
@@ -409,8 +408,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
409	src3[3] = src3[3] * float24::FromFloat32(-1);	408	src3[3] = src3[3] * float24::FromFloat32(-1);
410	}	409	}
411		410
412	float24* dest = (instr.mad.dest.Value() < 0x08) ? state.output_register_table[4*instr.mad.dest.Value().GetIndex()]	411	float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0]
413	: (instr.mad.dest.Value() < 0x10) ? dummy_vec4_float24
414	: (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0]	412	: (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0]
415	: dummy_vec4_float24;	413	: dummy_vec4_float24;
416		414
@@ -587,12 +585,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
587	if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;	585	if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
588	if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;	586	if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
589		587
590	// Setup output register table	588	state.conditional_code[0] = false;
591	OutputVertex ret;	589	state.conditional_code[1] = false;
592	// Zero output so that attributes which aren't output won't have denormals in them, which will	590
593	// slow us down later.	591	ProcessShaderCode(state);
594	memset(&ret, 0, sizeof(ret));	592	DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
		593	state.debug.max_opdesc_id, registers.vs_main_offset,
		594	registers.vs_output_attributes);
595		595
		596	// Setup output data
		597	OutputVertex ret;
		598	// TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
		599	// figure out what those circumstances are and enable the remaining outputs then.
596	for (int i = 0; i < 7; ++i) {	600	for (int i = 0; i < 7; ++i) {
597	const auto& output_register_map = registers.vs_output_attributes[i];	601	const auto& output_register_map = registers.vs_output_attributes[i];
598		602
@@ -601,18 +605,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
601	output_register_map.map_z, output_register_map.map_w	605	output_register_map.map_z, output_register_map.map_w
602	};	606	};
603		607
604	for (int comp = 0; comp < 4; ++comp)	608	for (int comp = 0; comp < 4; ++comp) {
605	state.output_register_table[4i+comp] = ((float24)&ret) + semantics[comp];	609	float24* out = ((float24*)&ret) + semantics[comp];
		610	if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
		611	*out = state.output_registers[i][comp];
		612	} else {
		613	// Zero output so that attributes which aren't output won't have denormals in them,
		614	// which would slow us down later.
		615	memset(out, 0, sizeof(*out));
		616	}
		617	}
606	}	618	}
607		619
608	state.conditional_code[0] = false;
609	state.conditional_code[1] = false;
610
611	ProcessShaderCode(state);
612	DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
613	state.debug.max_opdesc_id, registers.vs_main_offset,
614	registers.vs_output_attributes);
615
616	LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",	620	LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
617	ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),	621	ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
618	ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),	622	ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),