Merge pull request #2671 from wwylele/dot3-rgba

rasterizer: implement combiner operation 7 (Dot3_RGBA)
author: bunnei 2017-04-21 17:03:22 -0400
committer: GitHub 2017-04-21 17:03:22 -0400
commit: ea53d6085a454215b4279e1c365273ef0b0202c8 (patch)
tree: 541a9579b76d955a5a543cd6c8451e489424c20c /src/video_core
parent: Merge pull request #2666 from yuriks/gl-cleanups (diff)
parent: gl_shader_gen: remove TODO about Lerp behaviour verification. The implementat... (diff)
download: yuzu-ea53d6085a454215b4279e1c365273ef0b0202c8.tar.gz
yuzu-ea53d6085a454215b4279e1c365273ef0b0202c8.tar.xz
yuzu-ea53d6085a454215b4279e1c365273ef0b0202c8.zip
4 files changed, 39 insertions, 22 deletions
diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h
index be8bc6826..0b62da145 100644
--- a/src/video_core/regs_texturing.h
+++ b/src/video_core/regs_texturing.h
@@ -199,7 +199,7 @@ struct TexturingRegs {
            Lerp = 4,
            Subtract = 5,
            Dot3_RGB = 6,
+            Dot3_RGBA = 7,
            MultiplyThenAdd = 8,
            AddThenMultiply = 9,
        };
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 54a8dde15..0f889b172 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -306,8 +306,6 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
        out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)";
        break;
    case Operation::Lerp:
-        // TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use
-        // builtin lerp
        out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name +
               "[1] * (vec3(1.0) - " + variable_name + "[2])";
        break;
@@ -322,6 +320,7 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
               variable_name + "[2]";
        break;
    case Operation::Dot3_RGB:
+    case Operation::Dot3_RGBA:
        out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name +
               "[1] - vec3(0.5)) * 4.0)";
        break;
@@ -421,17 +420,25 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
        AppendColorCombiner(out, stage.color_op, "color_results_" + index_name);
        out += ";\n";
-        out += "float alpha_results_" + index_name + "[3] = float[3](";
+        if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
-        AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name);
+            // result of Dot3_RGBA operation is also placed to the alpha component
-        out += ", ";
+            out += "float alpha_output_" + index_name + " = color_output_" + index_name + "[0];\n";
-        AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name);
+        } else {
-        out += ", ";
+            out += "float alpha_results_" + index_name + "[3] = float[3](";
-        AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name);
+            AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1,
-        out += ");\n";
+                                index_name);
+            out += ", ";
-        out += "float alpha_output_" + index_name + " = ";
+            AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2,
-        AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name);
+                                index_name);
-        out += ";\n";
+            out += ", ";
+            AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3,
+                                index_name);
+            out += ");\n";
+            out += "float alpha_output_" + index_name + " = ";
+            AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name);
+            out += ";\n";
+        }
        out += "last_tex_env_out = vec4("
               "clamp(color_output_" +
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
index 7557fcb89..cb1b90a81 100644
--- a/src/video_core/swrasterizer/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -403,13 +403,22 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
                };
                auto color_output = ColorCombine(tev_stage.color_op, color_result);
-                // alpha combiner
+                u8 alpha_output;
-                std::array<u8, 3> alpha_result = {{
+                if (tev_stage.color_op == TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
-                    GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)),
+                    // result of Dot3_RGBA operation is also placed to the alpha component
-                    GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)),
+                    alpha_output = color_output.x;
-                    GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)),
+                } else {
-                }};
+                    // alpha combiner
-                auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
+                    std::array<u8, 3> alpha_result = {{
+                        GetAlphaModifier(tev_stage.alpha_modifier1,
+                                         GetSource(tev_stage.alpha_source1)),
+                        GetAlphaModifier(tev_stage.alpha_modifier2,
+                                         GetSource(tev_stage.alpha_source2)),
+                        GetAlphaModifier(tev_stage.alpha_modifier3,
+                                         GetSource(tev_stage.alpha_source3)),
+                    }};
+                    alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
+                }
                combiner_output[0] =
                    std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier());
diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp
index eb18e4ba4..aeb6aeb8c 100644
--- a/src/video_core/swrasterizer/texturing.cpp
+++ b/src/video_core/swrasterizer/texturing.cpp
@@ -169,7 +169,8 @@ Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> i
        result = (result * input[2].Cast<int>()) / 255;
        return result.Cast<u8>();
    }
-    case Operation::Dot3_RGB: {
+    case Operation::Dot3_RGB:
+    case Operation::Dot3_RGBA: {
        // Not fully accurate.  Worst case scenario seems to yield a +/-3 error.  Some HW results
        // indicate that the per-component computation can't have a higher precision than 1/256,
        // while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give
author	bunnei	2017-04-21 17:03:22 -0400
committer	GitHub	2017-04-21 17:03:22 -0400
commit	ea53d6085a454215b4279e1c365273ef0b0202c8 (patch)
tree	541a9579b76d955a5a543cd6c8451e489424c20c /src/video_core
parent	Merge pull request #2666 from yuriks/gl-cleanups (diff)
parent	gl_shader_gen: remove TODO about Lerp behaviour verification. The implementat... (diff)
download	yuzu-ea53d6085a454215b4279e1c365273ef0b0202c8.tar.gz yuzu-ea53d6085a454215b4279e1c365273ef0b0202c8.tar.xz yuzu-ea53d6085a454215b4279e1c365273ef0b0202c8.zip

diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h index be8bc6826..0b62da145 100644 --- a/src/video_core/regs_texturing.h +++ b/src/video_core/regs_texturing.h
@@ -199,7 +199,7 @@ struct TexturingRegs {
199	Lerp = 4,	199	Lerp = 4,
200	Subtract = 5,	200	Subtract = 5,
201	Dot3_RGB = 6,	201	Dot3_RGB = 6,
202		202	Dot3_RGBA = 7,
203	MultiplyThenAdd = 8,	203	MultiplyThenAdd = 8,
204	AddThenMultiply = 9,	204	AddThenMultiply = 9,
205	};	205	};


diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 54a8dde15..0f889b172 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -306,8 +306,6 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
306	out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)";	306	out += variable_name + "[0] + " + variable_name + "[1] - vec3(0.5)";
307	break;	307	break;
308	case Operation::Lerp:	308	case Operation::Lerp:
309	// TODO(bunnei): Verify if HW actually does this per-component, otherwise we can just use
310	// builtin lerp
311	out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name +	309	out += variable_name + "[0] * " + variable_name + "[2] + " + variable_name +
312	"[1] * (vec3(1.0) - " + variable_name + "[2])";	310	"[1] * (vec3(1.0) - " + variable_name + "[2])";
313	break;	311	break;
@@ -322,6 +320,7 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
322	variable_name + "[2]";	320	variable_name + "[2]";
323	break;	321	break;
324	case Operation::Dot3_RGB:	322	case Operation::Dot3_RGB:
		323	case Operation::Dot3_RGBA:
325	out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name +	324	out += "vec3(dot(" + variable_name + "[0] - vec3(0.5), " + variable_name +
326	"[1] - vec3(0.5)) * 4.0)";	325	"[1] - vec3(0.5)) * 4.0)";
327	break;	326	break;
@@ -421,17 +420,25 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
421	AppendColorCombiner(out, stage.color_op, "color_results_" + index_name);	420	AppendColorCombiner(out, stage.color_op, "color_results_" + index_name);
422	out += ";\n";	421	out += ";\n";
423		422
424	out += "float alpha_results_" + index_name + "[3] = float[3](";	423	if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
425	AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name);	424	// result of Dot3_RGBA operation is also placed to the alpha component
426	out += ", ";	425	out += "float alpha_output_" + index_name + " = color_output_" + index_name + "[0];\n";
427	AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name);	426	} else {
428	out += ", ";	427	out += "float alpha_results_" + index_name + "[3] = float[3](";
429	AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name);	428	AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1,
430	out += ");\n";	429	index_name);
431		430	out += ", ";
432	out += "float alpha_output_" + index_name + " = ";	431	AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2,
433	AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name);	432	index_name);
434	out += ";\n";	433	out += ", ";
		434	AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3,
		435	index_name);
		436	out += ");\n";
		437
		438	out += "float alpha_output_" + index_name + " = ";
		439	AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name);
		440	out += ";\n";
		441	}
435		442
436	out += "last_tex_env_out = vec4("	443	out += "last_tex_env_out = vec4("
437	"clamp(color_output_" +	444	"clamp(color_output_" +


diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 7557fcb89..cb1b90a81 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -403,13 +403,22 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
403	};	403	};
404	auto color_output = ColorCombine(tev_stage.color_op, color_result);	404	auto color_output = ColorCombine(tev_stage.color_op, color_result);
405		405
406	// alpha combiner	406	u8 alpha_output;
407	std::array<u8, 3> alpha_result = {{	407	if (tev_stage.color_op == TexturingRegs::TevStageConfig::Operation::Dot3_RGBA) {
408	GetAlphaModifier(tev_stage.alpha_modifier1, GetSource(tev_stage.alpha_source1)),	408	// result of Dot3_RGBA operation is also placed to the alpha component
409	GetAlphaModifier(tev_stage.alpha_modifier2, GetSource(tev_stage.alpha_source2)),	409	alpha_output = color_output.x;
410	GetAlphaModifier(tev_stage.alpha_modifier3, GetSource(tev_stage.alpha_source3)),	410	} else {
411	}};	411	// alpha combiner
412	auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);	412	std::array<u8, 3> alpha_result = {{
		413	GetAlphaModifier(tev_stage.alpha_modifier1,
		414	GetSource(tev_stage.alpha_source1)),
		415	GetAlphaModifier(tev_stage.alpha_modifier2,
		416	GetSource(tev_stage.alpha_source2)),
		417	GetAlphaModifier(tev_stage.alpha_modifier3,
		418	GetSource(tev_stage.alpha_source3)),
		419	}};
		420	alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
		421	}
413		422
414	combiner_output[0] =	423	combiner_output[0] =
415	std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier());	424	std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier());


diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp index eb18e4ba4..aeb6aeb8c 100644 --- a/src/video_core/swrasterizer/texturing.cpp +++ b/src/video_core/swrasterizer/texturing.cpp
@@ -169,7 +169,8 @@ Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> i
169	result = (result * input[2].Cast<int>()) / 255;	169	result = (result * input[2].Cast<int>()) / 255;
170	return result.Cast<u8>();	170	return result.Cast<u8>();
171	}	171	}
172	case Operation::Dot3_RGB: {	172	case Operation::Dot3_RGB:
		173	case Operation::Dot3_RGBA: {
173	// Not fully accurate. Worst case scenario seems to yield a +/-3 error. Some HW results	174	// Not fully accurate. Worst case scenario seems to yield a +/-3 error. Some HW results
174	// indicate that the per-component computation can't have a higher precision than 1/256,	175	// indicate that the per-component computation can't have a higher precision than 1/256,
175	// while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give	176	// while dot3_rgb((0x80,g0,b0), (0x7F,g1,b1)) and dot3_rgb((0x80,g0,b0), (0x80,g1,b1)) give