shader: FMUL switch to using LUT (#3441)

* shader: add FmulPostFactor LUT table * shader: FMUL apply LUT * Update src/video_core/engines/shader_bytecode.h Co-Authored-By: Mat M. <mathew1800@gmail.com> * nit: mistype * clang-format & add missing import * shader: remove post factor LUT. * shader: move post factor LUT to function and fix incorrect order. * clang-format * shader: FMUL: add static to post factor LUT * nit: typo Co-authored-by: Mat M. <mathew1800@gmail.com>
author: Nguyen Dac Nam 2020-02-27 23:14:25 +0700
committer: GitHub 2020-02-27 11:14:25 -0500
commit: db2f547434cd989bc76f3fc579be692e6756b006 (patch)
tree: 4138f7946834ef60a931dce7a4d70b6f6089fc62 /src
parent: Merge pull request #3440 from namkazt/patch-6 (diff)
download: yuzu-db2f547434cd989bc76f3fc579be692e6756b006.tar.gz
yuzu-db2f547434cd989bc76f3fc579be692e6756b006.tar.xz
yuzu-db2f547434cd989bc76f3fc579be692e6756b006.zip
1 files changed, 14 insertions, 19 deletions
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 90240c765..478394682 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -53,29 +53,24 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
        op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
-        // TODO(Rodrigo): Should precise be used when there's a postfactor?
+        static constexpr std::array FmulPostFactor = {
-        Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
+            1.000f, // None
+            0.500f, // Divide 2
+            0.250f, // Divide 4
+            0.125f, // Divide 8
+            8.000f, // Mul 8
+            4.000f, // Mul 4
+            2.000f, // Mul 2
+        };
        if (instr.fmul.postfactor != 0) {
-            auto postfactor = static_cast<s32>(instr.fmul.postfactor);
+            op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
+                             Immediate(FmulPostFactor[instr.fmul.postfactor]));
-            // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
-            // logic.
-            if (postfactor >= 4) {
-                postfactor = 7 - postfactor;
-            } else {
-                postfactor = 0 - postfactor;
-            }
-            if (postfactor > 0) {
-                value = Operation(OperationCode::FMul, NO_PRECISE, value,
-                                  Immediate(static_cast<f32>(1 << postfactor)));
-            } else {
-                value = Operation(OperationCode::FDiv, NO_PRECISE, value,
-                                  Immediate(static_cast<f32>(1 << -postfactor)));
-            }
        }
+        // TODO(Rodrigo): Should precise be used when there's a postfactor?
+        Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
        value = GetSaturatedFloat(value, instr.alu.saturate_d);
        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
author	Nguyen Dac Nam	2020-02-27 23:14:25 +0700
committer	GitHub	2020-02-27 11:14:25 -0500
commit	db2f547434cd989bc76f3fc579be692e6756b006 (patch)
tree	4138f7946834ef60a931dce7a4d70b6f6089fc62 /src
parent	Merge pull request #3440 from namkazt/patch-6 (diff)
download	yuzu-db2f547434cd989bc76f3fc579be692e6756b006.tar.gz yuzu-db2f547434cd989bc76f3fc579be692e6756b006.tar.xz yuzu-db2f547434cd989bc76f3fc579be692e6756b006.zip

diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 90240c765..478394682 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -53,29 +53,24 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
53		53
54	op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);	54	op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
55		55
56	// TODO(Rodrigo): Should precise be used when there's a postfactor?	56	static constexpr std::array FmulPostFactor = {
57	Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);	57	1.000f, // None
		58	0.500f, // Divide 2
		59	0.250f, // Divide 4
		60	0.125f, // Divide 8
		61	8.000f, // Mul 8
		62	4.000f, // Mul 4
		63	2.000f, // Mul 2
		64	};
58		65
59	if (instr.fmul.postfactor != 0) {	66	if (instr.fmul.postfactor != 0) {
60	auto postfactor = static_cast<s32>(instr.fmul.postfactor);	67	op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
61		68	Immediate(FmulPostFactor[instr.fmul.postfactor]));
62	// Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below
63	// logic.
64	if (postfactor >= 4) {
65	postfactor = 7 - postfactor;
66	} else {
67	postfactor = 0 - postfactor;
68	}
69
70	if (postfactor > 0) {
71	value = Operation(OperationCode::FMul, NO_PRECISE, value,
72	Immediate(static_cast<f32>(1 << postfactor)));
73	} else {
74	value = Operation(OperationCode::FDiv, NO_PRECISE, value,
75	Immediate(static_cast<f32>(1 << -postfactor)));
76	}
77	}	69	}
78		70
		71	// TODO(Rodrigo): Should precise be used when there's a postfactor?
		72	Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
		73
79	value = GetSaturatedFloat(value, instr.alu.saturate_d);	74	value = GetSaturatedFloat(value, instr.alu.saturate_d);
80		75
81	SetInternalFlagsFromFloat(bb, value, instr.generates_cc);	76	SetInternalFlagsFromFloat(bb, value, instr.generates_cc);