diff options
| author | 2020-02-27 23:14:25 +0700 | |
|---|---|---|
| committer | 2020-02-27 11:14:25 -0500 | |
| commit | db2f547434cd989bc76f3fc579be692e6756b006 (patch) | |
| tree | 4138f7946834ef60a931dce7a4d70b6f6089fc62 /src | |
| parent | Merge pull request #3440 from namkazt/patch-6 (diff) | |
| download | yuzu-db2f547434cd989bc76f3fc579be692e6756b006.tar.gz yuzu-db2f547434cd989bc76f3fc579be692e6756b006.tar.xz yuzu-db2f547434cd989bc76f3fc579be692e6756b006.zip | |
shader: FMUL switch to using LUT (#3441)
* shader: add FmulPostFactor LUT table
* shader: FMUL apply LUT
* Update src/video_core/engines/shader_bytecode.h
Co-Authored-By: Mat M. <mathew1800@gmail.com>
* nit: mistype
* clang-format & add missing import
* shader: remove post factor LUT.
* shader: move post factor LUT to function and fix incorrect order.
* clang-format
* shader: FMUL: add static to post factor LUT
* nit: typo
Co-authored-by: Mat M. <mathew1800@gmail.com>
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/shader/decode/arithmetic.cpp | 33 |
1 files changed, 14 insertions, 19 deletions
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp index 90240c765..478394682 100644 --- a/src/video_core/shader/decode/arithmetic.cpp +++ b/src/video_core/shader/decode/arithmetic.cpp | |||
| @@ -53,29 +53,24 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { | |||
| 53 | 53 | ||
| 54 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); | 54 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); |
| 55 | 55 | ||
| 56 | // TODO(Rodrigo): Should precise be used when there's a postfactor? | 56 | static constexpr std::array FmulPostFactor = { |
| 57 | Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); | 57 | 1.000f, // None |
| 58 | 0.500f, // Divide 2 | ||
| 59 | 0.250f, // Divide 4 | ||
| 60 | 0.125f, // Divide 8 | ||
| 61 | 8.000f, // Mul 8 | ||
| 62 | 4.000f, // Mul 4 | ||
| 63 | 2.000f, // Mul 2 | ||
| 64 | }; | ||
| 58 | 65 | ||
| 59 | if (instr.fmul.postfactor != 0) { | 66 | if (instr.fmul.postfactor != 0) { |
| 60 | auto postfactor = static_cast<s32>(instr.fmul.postfactor); | 67 | op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, |
| 61 | 68 | Immediate(FmulPostFactor[instr.fmul.postfactor])); | |
| 62 | // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below | ||
| 63 | // logic. | ||
| 64 | if (postfactor >= 4) { | ||
| 65 | postfactor = 7 - postfactor; | ||
| 66 | } else { | ||
| 67 | postfactor = 0 - postfactor; | ||
| 68 | } | ||
| 69 | |||
| 70 | if (postfactor > 0) { | ||
| 71 | value = Operation(OperationCode::FMul, NO_PRECISE, value, | ||
| 72 | Immediate(static_cast<f32>(1 << postfactor))); | ||
| 73 | } else { | ||
| 74 | value = Operation(OperationCode::FDiv, NO_PRECISE, value, | ||
| 75 | Immediate(static_cast<f32>(1 << -postfactor))); | ||
| 76 | } | ||
| 77 | } | 69 | } |
| 78 | 70 | ||
| 71 | // TODO(Rodrigo): Should precise be used when there's a postfactor? | ||
| 72 | Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); | ||
| 73 | |||
| 79 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | 74 | value = GetSaturatedFloat(value, instr.alu.saturate_d); |
| 80 | 75 | ||
| 81 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | 76 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |