diff options
| author | 2021-02-16 20:52:12 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:22 -0400 | |
| commit | c67d64365a712830fe140dd36e24e2efd9b8a812 (patch) | |
| tree | 9287589f2b72d1cbd0cb113c2024b2bc531408c3 /src/video_core/shader/decode | |
| parent | shader: Add XMAD multiplication folding optimization (diff) | |
| download | yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.gz yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.xz yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.zip | |
shader: Remove old shader management
Diffstat (limited to 'src/video_core/shader/decode')
28 files changed, 0 insertions, 4919 deletions
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp deleted file mode 100644 index 15eb700e7..000000000 --- a/src/video_core/shader/decode/arithmetic.cpp +++ /dev/null | |||
| @@ -1,166 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::SubOp; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | ||
| 23 | |||
| 24 | Node op_b = [&] { | ||
| 25 | if (instr.is_b_imm) { | ||
| 26 | return GetImmediate19(instr); | ||
| 27 | } else if (instr.is_b_gpr) { | ||
| 28 | return GetRegister(instr.gpr20); | ||
| 29 | } else { | ||
| 30 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 31 | } | ||
| 32 | }(); | ||
| 33 | |||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::MOV_C: | ||
| 36 | case OpCode::Id::MOV_R: { | ||
| 37 | // MOV does not have neither 'abs' nor 'neg' bits. | ||
| 38 | SetRegister(bb, instr.gpr0, op_b); | ||
| 39 | break; | ||
| 40 | } | ||
| 41 | case OpCode::Id::FMUL_C: | ||
| 42 | case OpCode::Id::FMUL_R: | ||
| 43 | case OpCode::Id::FMUL_IMM: { | ||
| 44 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | ||
| 45 | if (instr.fmul.tab5cb8_2 != 0) { | ||
| 46 | LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", | ||
| 47 | instr.fmul.tab5cb8_2.Value()); | ||
| 48 | } | ||
| 49 | if (instr.fmul.tab5c68_0 != 1) { | ||
| 50 | LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", | ||
| 51 | instr.fmul.tab5c68_0.Value()); | ||
| 52 | } | ||
| 53 | |||
| 54 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); | ||
| 55 | |||
| 56 | static constexpr std::array FmulPostFactor = { | ||
| 57 | 1.000f, // None | ||
| 58 | 0.500f, // Divide 2 | ||
| 59 | 0.250f, // Divide 4 | ||
| 60 | 0.125f, // Divide 8 | ||
| 61 | 8.000f, // Mul 8 | ||
| 62 | 4.000f, // Mul 4 | ||
| 63 | 2.000f, // Mul 2 | ||
| 64 | }; | ||
| 65 | |||
| 66 | if (instr.fmul.postfactor != 0) { | ||
| 67 | op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, | ||
| 68 | Immediate(FmulPostFactor[instr.fmul.postfactor])); | ||
| 69 | } | ||
| 70 | |||
| 71 | // TODO(Rodrigo): Should precise be used when there's a postfactor? | ||
| 72 | Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); | ||
| 73 | |||
| 74 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 75 | |||
| 76 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 77 | SetRegister(bb, instr.gpr0, value); | ||
| 78 | break; | ||
| 79 | } | ||
| 80 | case OpCode::Id::FADD_C: | ||
| 81 | case OpCode::Id::FADD_R: | ||
| 82 | case OpCode::Id::FADD_IMM: { | ||
| 83 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 84 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 85 | |||
| 86 | Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 87 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 88 | |||
| 89 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 90 | SetRegister(bb, instr.gpr0, value); | ||
| 91 | break; | ||
| 92 | } | ||
| 93 | case OpCode::Id::MUFU: { | ||
| 94 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 95 | |||
| 96 | Node value = [&]() { | ||
| 97 | switch (instr.sub_op) { | ||
| 98 | case SubOp::Cos: | ||
| 99 | return Operation(OperationCode::FCos, PRECISE, op_a); | ||
| 100 | case SubOp::Sin: | ||
| 101 | return Operation(OperationCode::FSin, PRECISE, op_a); | ||
| 102 | case SubOp::Ex2: | ||
| 103 | return Operation(OperationCode::FExp2, PRECISE, op_a); | ||
| 104 | case SubOp::Lg2: | ||
| 105 | return Operation(OperationCode::FLog2, PRECISE, op_a); | ||
| 106 | case SubOp::Rcp: | ||
| 107 | return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); | ||
| 108 | case SubOp::Rsq: | ||
| 109 | return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); | ||
| 110 | case SubOp::Sqrt: | ||
| 111 | return Operation(OperationCode::FSqrt, PRECISE, op_a); | ||
| 112 | default: | ||
| 113 | UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); | ||
| 114 | return Immediate(0); | ||
| 115 | } | ||
| 116 | }(); | ||
| 117 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 118 | |||
| 119 | SetRegister(bb, instr.gpr0, value); | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::FMNMX_C: | ||
| 123 | case OpCode::Id::FMNMX_R: | ||
| 124 | case OpCode::Id::FMNMX_IMM: { | ||
| 125 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 126 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 127 | |||
| 128 | const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); | ||
| 129 | |||
| 130 | const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); | ||
| 131 | const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); | ||
| 132 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 133 | |||
| 134 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 135 | SetRegister(bb, instr.gpr0, value); | ||
| 136 | break; | ||
| 137 | } | ||
| 138 | case OpCode::Id::FCMP_RR: | ||
| 139 | case OpCode::Id::FCMP_RC: | ||
| 140 | case OpCode::Id::FCMP_IMMR: { | ||
| 141 | UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); | ||
| 142 | Node op_c = GetRegister(instr.gpr39); | ||
| 143 | Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); | ||
| 144 | SetRegister( | ||
| 145 | bb, instr.gpr0, | ||
| 146 | Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); | ||
| 147 | break; | ||
| 148 | } | ||
| 149 | case OpCode::Id::RRO_C: | ||
| 150 | case OpCode::Id::RRO_R: | ||
| 151 | case OpCode::Id::RRO_IMM: { | ||
| 152 | LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); | ||
| 153 | |||
| 154 | // Currently RRO is only implemented as a register move. | ||
| 155 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 156 | SetRegister(bb, instr.gpr0, op_b); | ||
| 157 | break; | ||
| 158 | } | ||
| 159 | default: | ||
| 160 | UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); | ||
| 161 | } | ||
| 162 | |||
| 163 | return pc; | ||
| 164 | } | ||
| 165 | |||
| 166 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp deleted file mode 100644 index 88103fede..000000000 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ /dev/null | |||
| @@ -1,101 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::HalfType; | ||
| 15 | using Tegra::Shader::Instruction; | ||
| 16 | using Tegra::Shader::OpCode; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | bool negate_a = false; | ||
| 23 | bool negate_b = false; | ||
| 24 | bool absolute_a = false; | ||
| 25 | bool absolute_b = false; | ||
| 26 | |||
| 27 | switch (opcode->get().GetId()) { | ||
| 28 | case OpCode::Id::HADD2_R: | ||
| 29 | if (instr.alu_half.ftz == 0) { | ||
| 30 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 31 | } | ||
| 32 | negate_a = ((instr.value >> 43) & 1) != 0; | ||
| 33 | negate_b = ((instr.value >> 31) & 1) != 0; | ||
| 34 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 35 | absolute_b = ((instr.value >> 30) & 1) != 0; | ||
| 36 | break; | ||
| 37 | case OpCode::Id::HADD2_C: | ||
| 38 | if (instr.alu_half.ftz == 0) { | ||
| 39 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 40 | } | ||
| 41 | negate_a = ((instr.value >> 43) & 1) != 0; | ||
| 42 | negate_b = ((instr.value >> 56) & 1) != 0; | ||
| 43 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 44 | absolute_b = ((instr.value >> 54) & 1) != 0; | ||
| 45 | break; | ||
| 46 | case OpCode::Id::HMUL2_R: | ||
| 47 | negate_a = ((instr.value >> 43) & 1) != 0; | ||
| 48 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 49 | absolute_b = ((instr.value >> 30) & 1) != 0; | ||
| 50 | break; | ||
| 51 | case OpCode::Id::HMUL2_C: | ||
| 52 | negate_b = ((instr.value >> 31) & 1) != 0; | ||
| 53 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 54 | absolute_b = ((instr.value >> 54) & 1) != 0; | ||
| 55 | break; | ||
| 56 | default: | ||
| 57 | UNREACHABLE(); | ||
| 58 | break; | ||
| 59 | } | ||
| 60 | |||
| 61 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); | ||
| 62 | op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); | ||
| 63 | |||
| 64 | auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> { | ||
| 65 | switch (opcode->get().GetId()) { | ||
| 66 | case OpCode::Id::HADD2_C: | ||
| 67 | case OpCode::Id::HMUL2_C: | ||
| 68 | return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 69 | case OpCode::Id::HADD2_R: | ||
| 70 | case OpCode::Id::HMUL2_R: | ||
| 71 | return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; | ||
| 72 | default: | ||
| 73 | UNREACHABLE(); | ||
| 74 | return {HalfType::F32, Immediate(0)}; | ||
| 75 | } | ||
| 76 | }(); | ||
| 77 | op_b = UnpackHalfFloat(op_b, type_b); | ||
| 78 | op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); | ||
| 79 | |||
| 80 | Node value = [this, opcode, op_a, op_b = op_b] { | ||
| 81 | switch (opcode->get().GetId()) { | ||
| 82 | case OpCode::Id::HADD2_C: | ||
| 83 | case OpCode::Id::HADD2_R: | ||
| 84 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); | ||
| 85 | case OpCode::Id::HMUL2_C: | ||
| 86 | case OpCode::Id::HMUL2_R: | ||
| 87 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); | ||
| 88 | default: | ||
| 89 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); | ||
| 90 | return Immediate(0); | ||
| 91 | } | ||
| 92 | }(); | ||
| 93 | value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); | ||
| 94 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); | ||
| 95 | |||
| 96 | SetRegister(bb, instr.gpr0, value); | ||
| 97 | |||
| 98 | return pc; | ||
| 99 | } | ||
| 100 | |||
| 101 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp deleted file mode 100644 index d179b9873..000000000 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ /dev/null | |||
| @@ -1,54 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { | ||
| 22 | if (instr.alu_half_imm.ftz == 0) { | ||
| 23 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 24 | } | ||
| 25 | } else { | ||
| 26 | if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { | ||
| 27 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); | ||
| 32 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); | ||
| 33 | |||
| 34 | const Node op_b = UnpackHalfImmediate(instr, true); | ||
| 35 | |||
| 36 | Node value = [&]() { | ||
| 37 | switch (opcode->get().GetId()) { | ||
| 38 | case OpCode::Id::HADD2_IMM: | ||
| 39 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); | ||
| 40 | case OpCode::Id::HMUL2_IMM: | ||
| 41 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); | ||
| 42 | default: | ||
| 43 | UNREACHABLE(); | ||
| 44 | return Immediate(0); | ||
| 45 | } | ||
| 46 | }(); | ||
| 47 | |||
| 48 | value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); | ||
| 49 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); | ||
| 50 | SetRegister(bb, instr.gpr0, value); | ||
| 51 | return pc; | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp deleted file mode 100644 index f1875967c..000000000 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ /dev/null | |||
| @@ -1,53 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::MOV32_IMM: { | ||
| 22 | SetRegister(bb, instr.gpr0, GetImmediate32(instr)); | ||
| 23 | break; | ||
| 24 | } | ||
| 25 | case OpCode::Id::FMUL32_IMM: { | ||
| 26 | Node value = | ||
| 27 | Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); | ||
| 28 | value = GetSaturatedFloat(value, instr.fmul32.saturate); | ||
| 29 | |||
| 30 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 31 | SetRegister(bb, instr.gpr0, value); | ||
| 32 | break; | ||
| 33 | } | ||
| 34 | case OpCode::Id::FADD32I: { | ||
| 35 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, | ||
| 36 | instr.fadd32i.negate_a); | ||
| 37 | const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, | ||
| 38 | instr.fadd32i.negate_b); | ||
| 39 | |||
| 40 | const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 41 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 42 | SetRegister(bb, instr.gpr0, value); | ||
| 43 | break; | ||
| 44 | } | ||
| 45 | default: | ||
| 46 | UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", | ||
| 47 | opcode->get().GetName()); | ||
| 48 | } | ||
| 49 | |||
| 50 | return pc; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp deleted file mode 100644 index 7b5bb7003..000000000 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ /dev/null | |||
| @@ -1,375 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::IAdd3Height; | ||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | using Tegra::Shader::Register; | ||
| 18 | |||
| 19 | u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { | ||
| 20 | const Instruction instr = {program_code[pc]}; | ||
| 21 | const auto opcode = OpCode::Decode(instr); | ||
| 22 | |||
| 23 | Node op_a = GetRegister(instr.gpr8); | ||
| 24 | Node op_b = [&]() { | ||
| 25 | if (instr.is_b_imm) { | ||
| 26 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 27 | } else if (instr.is_b_gpr) { | ||
| 28 | return GetRegister(instr.gpr20); | ||
| 29 | } else { | ||
| 30 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 31 | } | ||
| 32 | }(); | ||
| 33 | |||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::IADD_C: | ||
| 36 | case OpCode::Id::IADD_R: | ||
| 37 | case OpCode::Id::IADD_IMM: { | ||
| 38 | UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); | ||
| 39 | UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); | ||
| 40 | |||
| 41 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 42 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 43 | |||
| 44 | Node value = Operation(OperationCode::UAdd, op_a, op_b); | ||
| 45 | |||
| 46 | if (instr.iadd.x) { | ||
| 47 | Node carry = GetInternalFlag(InternalFlag::Carry); | ||
| 48 | Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); | ||
| 49 | value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); | ||
| 50 | } | ||
| 51 | |||
| 52 | if (instr.generates_cc) { | ||
| 53 | const Node i0 = Immediate(0); | ||
| 54 | |||
| 55 | Node zero = Operation(OperationCode::LogicalIEqual, value, i0); | ||
| 56 | Node sign = Operation(OperationCode::LogicalILessThan, value, i0); | ||
| 57 | Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); | ||
| 58 | |||
| 59 | Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); | ||
| 60 | Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); | ||
| 61 | Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); | ||
| 62 | Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); | ||
| 63 | |||
| 64 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); | ||
| 65 | SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); | ||
| 66 | SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); | ||
| 67 | SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); | ||
| 68 | } | ||
| 69 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | case OpCode::Id::IADD3_C: | ||
| 73 | case OpCode::Id::IADD3_R: | ||
| 74 | case OpCode::Id::IADD3_IMM: { | ||
| 75 | Node op_c = GetRegister(instr.gpr39); | ||
| 76 | |||
| 77 | const auto ApplyHeight = [&](IAdd3Height height, Node value) { | ||
| 78 | switch (height) { | ||
| 79 | case IAdd3Height::None: | ||
| 80 | return value; | ||
| 81 | case IAdd3Height::LowerHalfWord: | ||
| 82 | return BitfieldExtract(value, 0, 16); | ||
| 83 | case IAdd3Height::UpperHalfWord: | ||
| 84 | return BitfieldExtract(value, 16, 16); | ||
| 85 | default: | ||
| 86 | UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); | ||
| 87 | return Immediate(0); | ||
| 88 | } | ||
| 89 | }; | ||
| 90 | |||
| 91 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 92 | op_a = ApplyHeight(instr.iadd3.height_a, op_a); | ||
| 93 | op_b = ApplyHeight(instr.iadd3.height_b, op_b); | ||
| 94 | op_c = ApplyHeight(instr.iadd3.height_c, op_c); | ||
| 95 | } | ||
| 96 | |||
| 97 | op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); | ||
| 98 | op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); | ||
| 99 | op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); | ||
| 100 | |||
| 101 | const Node value = [&] { | ||
| 102 | Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); | ||
| 103 | if (opcode->get().GetId() != OpCode::Id::IADD3_R) { | ||
| 104 | return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); | ||
| 105 | } | ||
| 106 | const Node shifted = [&] { | ||
| 107 | switch (instr.iadd3.mode) { | ||
| 108 | case Tegra::Shader::IAdd3Mode::RightShift: | ||
| 109 | // TODO(tech4me): According to | ||
| 110 | // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 | ||
| 111 | // The addition between op_a and op_b should be done in uint33, more | ||
| 112 | // investigation required | ||
| 113 | return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, | ||
| 114 | Immediate(16)); | ||
| 115 | case Tegra::Shader::IAdd3Mode::LeftShift: | ||
| 116 | return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, | ||
| 117 | Immediate(16)); | ||
| 118 | default: | ||
| 119 | return add_ab; | ||
| 120 | } | ||
| 121 | }(); | ||
| 122 | return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); | ||
| 123 | }(); | ||
| 124 | |||
| 125 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 126 | SetRegister(bb, instr.gpr0, value); | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | case OpCode::Id::ISCADD_C: | ||
| 130 | case OpCode::Id::ISCADD_R: | ||
| 131 | case OpCode::Id::ISCADD_IMM: { | ||
| 132 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 133 | "Condition codes generation in ISCADD is not implemented"); | ||
| 134 | |||
| 135 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 136 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 137 | |||
| 138 | const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); | ||
| 139 | const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); | ||
| 140 | const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); | ||
| 141 | |||
| 142 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 143 | SetRegister(bb, instr.gpr0, value); | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | case OpCode::Id::POPC_C: | ||
| 147 | case OpCode::Id::POPC_R: | ||
| 148 | case OpCode::Id::POPC_IMM: { | ||
| 149 | if (instr.popc.invert) { | ||
| 150 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 151 | } | ||
| 152 | const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); | ||
| 153 | SetRegister(bb, instr.gpr0, value); | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | case OpCode::Id::FLO_R: | ||
| 157 | case OpCode::Id::FLO_C: | ||
| 158 | case OpCode::Id::FLO_IMM: { | ||
| 159 | Node value; | ||
| 160 | if (instr.flo.invert) { | ||
| 161 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); | ||
| 162 | } | ||
| 163 | if (instr.flo.is_signed) { | ||
| 164 | value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b)); | ||
| 165 | } else { | ||
| 166 | value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b)); | ||
| 167 | } | ||
| 168 | if (instr.flo.sh) { | ||
| 169 | value = | ||
| 170 | Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31)); | ||
| 171 | } | ||
| 172 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 173 | break; | ||
| 174 | } | ||
| 175 | case OpCode::Id::SEL_C: | ||
| 176 | case OpCode::Id::SEL_R: | ||
| 177 | case OpCode::Id::SEL_IMM: { | ||
| 178 | const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); | ||
| 179 | const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); | ||
| 180 | SetRegister(bb, instr.gpr0, value); | ||
| 181 | break; | ||
| 182 | } | ||
| 183 | case OpCode::Id::ICMP_CR: | ||
| 184 | case OpCode::Id::ICMP_R: | ||
| 185 | case OpCode::Id::ICMP_RC: | ||
| 186 | case OpCode::Id::ICMP_IMM: { | ||
| 187 | const Node zero = Immediate(0); | ||
| 188 | |||
| 189 | const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { | ||
| 190 | switch (opcode->get().GetId()) { | ||
| 191 | case OpCode::Id::ICMP_CR: | ||
| 192 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 193 | GetRegister(instr.gpr39)}; | ||
| 194 | case OpCode::Id::ICMP_R: | ||
| 195 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 196 | case OpCode::Id::ICMP_RC: | ||
| 197 | return {GetRegister(instr.gpr39), | ||
| 198 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 199 | case OpCode::Id::ICMP_IMM: | ||
| 200 | return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; | ||
| 201 | default: | ||
| 202 | UNREACHABLE(); | ||
| 203 | return {zero, zero}; | ||
| 204 | } | ||
| 205 | }(); | ||
| 206 | const Node op_lhs = GetRegister(instr.gpr8); | ||
| 207 | const Node comparison = | ||
| 208 | GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); | ||
| 209 | SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); | ||
| 210 | break; | ||
| 211 | } | ||
| 212 | case OpCode::Id::LOP_C: | ||
| 213 | case OpCode::Id::LOP_R: | ||
| 214 | case OpCode::Id::LOP_IMM: { | ||
| 215 | if (instr.alu.lop.invert_a) | ||
| 216 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); | ||
| 217 | if (instr.alu.lop.invert_b) | ||
| 218 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 219 | |||
| 220 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, | ||
| 221 | instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, | ||
| 222 | instr.generates_cc); | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | case OpCode::Id::LOP3_C: | ||
| 226 | case OpCode::Id::LOP3_R: | ||
| 227 | case OpCode::Id::LOP3_IMM: { | ||
| 228 | const Node op_c = GetRegister(instr.gpr39); | ||
| 229 | const Node lut = [&]() { | ||
| 230 | if (opcode->get().GetId() == OpCode::Id::LOP3_R) { | ||
| 231 | return Immediate(instr.alu.lop3.GetImmLut28()); | ||
| 232 | } else { | ||
| 233 | return Immediate(instr.alu.lop3.GetImmLut48()); | ||
| 234 | } | ||
| 235 | }(); | ||
| 236 | |||
| 237 | WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); | ||
| 238 | break; | ||
| 239 | } | ||
| 240 | case OpCode::Id::IMNMX_C: | ||
| 241 | case OpCode::Id::IMNMX_R: | ||
| 242 | case OpCode::Id::IMNMX_IMM: { | ||
| 243 | UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); | ||
| 244 | |||
| 245 | const bool is_signed = instr.imnmx.is_signed; | ||
| 246 | |||
| 247 | const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); | ||
| 248 | const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); | ||
| 249 | const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); | ||
| 250 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 251 | |||
| 252 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 253 | SetRegister(bb, instr.gpr0, value); | ||
| 254 | break; | ||
| 255 | } | ||
| 256 | case OpCode::Id::LEA_R2: | ||
| 257 | case OpCode::Id::LEA_R1: | ||
| 258 | case OpCode::Id::LEA_IMM: | ||
| 259 | case OpCode::Id::LEA_RZ: | ||
| 260 | case OpCode::Id::LEA_HI: { | ||
| 261 | auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> { | ||
| 262 | switch (opcode->get().GetId()) { | ||
| 263 | case OpCode::Id::LEA_R2: { | ||
| 264 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), | ||
| 265 | Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; | ||
| 266 | } | ||
| 267 | case OpCode::Id::LEA_R1: { | ||
| 268 | const bool neg = instr.lea.r1.neg != 0; | ||
| 269 | return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 270 | GetRegister(instr.gpr20), | ||
| 271 | Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; | ||
| 272 | } | ||
| 273 | case OpCode::Id::LEA_IMM: { | ||
| 274 | const bool neg = instr.lea.imm.neg != 0; | ||
| 275 | return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 276 | Immediate(static_cast<u32>(instr.lea.imm.entry_a)), | ||
| 277 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 278 | } | ||
| 279 | case OpCode::Id::LEA_RZ: { | ||
| 280 | const bool neg = instr.lea.rz.neg != 0; | ||
| 281 | return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), | ||
| 282 | GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 283 | Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; | ||
| 284 | } | ||
| 285 | case OpCode::Id::LEA_HI: | ||
| 286 | default: | ||
| 287 | UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); | ||
| 288 | |||
| 289 | return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), | ||
| 290 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 291 | } | ||
| 292 | }(); | ||
| 293 | |||
| 294 | UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), | ||
| 295 | "Unhandled LEA Predicate"); | ||
| 296 | |||
| 297 | Node value = | ||
| 298 | Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); | ||
| 299 | value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); | ||
| 300 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 301 | |||
| 302 | break; | ||
| 303 | } | ||
| 304 | default: | ||
| 305 | UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); | ||
| 306 | } | ||
| 307 | |||
| 308 | return pc; | ||
| 309 | } | ||
| 310 | |||
| 311 | void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, | ||
| 312 | Node imm_lut, bool sets_cc) { | ||
| 313 | const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { | ||
| 314 | Node value = Immediate(0); | ||
| 315 | const ImmediateNode imm = std::get<ImmediateNode>(*ttbl); | ||
| 316 | if (imm.GetValue() & 0x01) { | ||
| 317 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 318 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 319 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 320 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); | ||
| 321 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 322 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 323 | } | ||
| 324 | if (imm.GetValue() & 0x02) { | ||
| 325 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 326 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 327 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); | ||
| 328 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 329 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 330 | } | ||
| 331 | if (imm.GetValue() & 0x04) { | ||
| 332 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 333 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 334 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); | ||
| 335 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 336 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 337 | } | ||
| 338 | if (imm.GetValue() & 0x08) { | ||
| 339 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 340 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); | ||
| 341 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 342 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 343 | } | ||
| 344 | if (imm.GetValue() & 0x10) { | ||
| 345 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 346 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 347 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); | ||
| 348 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 349 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 350 | } | ||
| 351 | if (imm.GetValue() & 0x20) { | ||
| 352 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 353 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); | ||
| 354 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 355 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 356 | } | ||
| 357 | if (imm.GetValue() & 0x40) { | ||
| 358 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 359 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); | ||
| 360 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 361 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 362 | } | ||
| 363 | if (imm.GetValue() & 0x80) { | ||
| 364 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); | ||
| 365 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 366 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 367 | } | ||
| 368 | return value; | ||
| 369 | }(op_a, op_b, op_c, imm_lut); | ||
| 370 | |||
| 371 | SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); | ||
| 372 | SetRegister(bb, dest, lop3_fast); | ||
| 373 | } | ||
| 374 | |||
| 375 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp deleted file mode 100644 index 73580277a..000000000 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ /dev/null | |||
| @@ -1,99 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::LogicOperation; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | using Tegra::Shader::PredicateResultMode; | ||
| 18 | using Tegra::Shader::Register; | ||
| 19 | |||
| 20 | u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { | ||
| 21 | const Instruction instr = {program_code[pc]}; | ||
| 22 | const auto opcode = OpCode::Decode(instr); | ||
| 23 | |||
| 24 | Node op_a = GetRegister(instr.gpr8); | ||
| 25 | Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32)); | ||
| 26 | |||
| 27 | switch (opcode->get().GetId()) { | ||
| 28 | case OpCode::Id::IADD32I: { | ||
| 29 | UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); | ||
| 30 | |||
| 31 | op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); | ||
| 32 | |||
| 33 | Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 34 | |||
| 35 | SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); | ||
| 36 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 37 | break; | ||
| 38 | } | ||
| 39 | case OpCode::Id::LOP32I: { | ||
| 40 | if (instr.alu.lop32i.invert_a) { | ||
| 41 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); | ||
| 42 | } | ||
| 43 | |||
| 44 | if (instr.alu.lop32i.invert_b) { | ||
| 45 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); | ||
| 46 | } | ||
| 47 | |||
| 48 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), | ||
| 49 | std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, | ||
| 50 | instr.op_32.generates_cc != 0); | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | default: | ||
| 54 | UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", | ||
| 55 | opcode->get().GetName()); | ||
| 56 | } | ||
| 57 | |||
| 58 | return pc; | ||
| 59 | } | ||
| 60 | |||
| 61 | void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, | ||
| 62 | Node op_b, PredicateResultMode predicate_mode, Pred predicate, | ||
| 63 | bool sets_cc) { | ||
| 64 | Node result = [&] { | ||
| 65 | switch (logic_op) { | ||
| 66 | case LogicOperation::And: | ||
| 67 | return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 68 | case LogicOperation::Or: | ||
| 69 | return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 70 | case LogicOperation::Xor: | ||
| 71 | return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 72 | case LogicOperation::PassB: | ||
| 73 | return op_b; | ||
| 74 | default: | ||
| 75 | UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); | ||
| 76 | return Immediate(0); | ||
| 77 | } | ||
| 78 | }(); | ||
| 79 | |||
| 80 | SetInternalFlagsFromInteger(bb, result, sets_cc); | ||
| 81 | SetRegister(bb, dest, result); | ||
| 82 | |||
| 83 | // Write the predicate value depending on the predicate mode. | ||
| 84 | switch (predicate_mode) { | ||
| 85 | case PredicateResultMode::None: | ||
| 86 | // Do nothing. | ||
| 87 | return; | ||
| 88 | case PredicateResultMode::NotZero: { | ||
| 89 | // Set the predicate to true if the result is not zero. | ||
| 90 | Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); | ||
| 91 | SetPredicate(bb, static_cast<u64>(predicate), std::move(compare)); | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | default: | ||
| 95 | UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); | ||
| 96 | } | ||
| 97 | } | ||
| 98 | |||
| 99 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp deleted file mode 100644 index 8e3b46e8e..000000000 --- a/src/video_core/shader/decode/bfe.cpp +++ /dev/null | |||
| @@ -1,77 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | Node op_a = GetRegister(instr.gpr8); | ||
| 21 | Node op_b = [&] { | ||
| 22 | switch (opcode->get().GetId()) { | ||
| 23 | case OpCode::Id::BFE_R: | ||
| 24 | return GetRegister(instr.gpr20); | ||
| 25 | case OpCode::Id::BFE_C: | ||
| 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 27 | case OpCode::Id::BFE_IMM: | ||
| 28 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 29 | default: | ||
| 30 | UNREACHABLE(); | ||
| 31 | return Immediate(0); | ||
| 32 | } | ||
| 33 | }(); | ||
| 34 | |||
| 35 | UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); | ||
| 36 | |||
| 37 | const bool is_signed = instr.bfe.is_signed; | ||
| 38 | |||
| 39 | // using reverse parallel method in | ||
| 40 | // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel | ||
| 41 | // note for later if possible to implement faster method. | ||
| 42 | if (instr.bfe.brev) { | ||
| 43 | const auto swap = [&](u32 s, u32 mask) { | ||
| 44 | Node v1 = | ||
| 45 | SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); | ||
| 46 | if (mask != 0) { | ||
| 47 | v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), | ||
| 48 | Immediate(mask)); | ||
| 49 | } | ||
| 50 | Node v2 = op_a; | ||
| 51 | if (mask != 0) { | ||
| 52 | v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), | ||
| 53 | Immediate(mask)); | ||
| 54 | } | ||
| 55 | v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), | ||
| 56 | Immediate(s)); | ||
| 57 | return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), | ||
| 58 | std::move(v2)); | ||
| 59 | }; | ||
| 60 | op_a = swap(1, 0x55555555U); | ||
| 61 | op_a = swap(2, 0x33333333U); | ||
| 62 | op_a = swap(4, 0x0F0F0F0FU); | ||
| 63 | op_a = swap(8, 0x00FF00FFU); | ||
| 64 | op_a = swap(16, 0); | ||
| 65 | } | ||
| 66 | |||
| 67 | const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 68 | Immediate(0), Immediate(8)); | ||
| 69 | const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 70 | Immediate(8), Immediate(8)); | ||
| 71 | auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); | ||
| 72 | SetRegister(bb, instr.gpr0, std::move(result)); | ||
| 73 | |||
| 74 | return pc; | ||
| 75 | } | ||
| 76 | |||
| 77 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp deleted file mode 100644 index 70d1c055b..000000000 --- a/src/video_core/shader/decode/bfi.cpp +++ /dev/null | |||
| @@ -1,45 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> { | ||
| 21 | switch (opcode->get().GetId()) { | ||
| 22 | case OpCode::Id::BFI_RC: | ||
| 23 | return {GetRegister(instr.gpr39), | ||
| 24 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 25 | case OpCode::Id::BFI_IMM_R: | ||
| 26 | return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; | ||
| 27 | default: | ||
| 28 | UNREACHABLE(); | ||
| 29 | return {Immediate(0), Immediate(0)}; | ||
| 30 | } | ||
| 31 | }(); | ||
| 32 | const Node insert = GetRegister(instr.gpr8); | ||
| 33 | const Node offset = BitfieldExtract(packed_shift, 0, 8); | ||
| 34 | const Node bits = BitfieldExtract(packed_shift, 8, 8); | ||
| 35 | |||
| 36 | const Node value = | ||
| 37 | Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); | ||
| 38 | |||
| 39 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 40 | SetRegister(bb, instr.gpr0, value); | ||
| 41 | |||
| 42 | return pc; | ||
| 43 | } | ||
| 44 | |||
| 45 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp deleted file mode 100644 index fea7a54df..000000000 --- a/src/video_core/shader/decode/conversion.cpp +++ /dev/null | |||
| @@ -1,321 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | #include <optional> | ||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/node_helper.h" | ||
| 13 | #include "video_core/shader/shader_ir.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::Register; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | constexpr OperationCode GetFloatSelector(u64 selector) { | ||
| 24 | return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; | ||
| 25 | } | ||
| 26 | |||
| 27 | constexpr u32 SizeInBits(Register::Size size) { | ||
| 28 | switch (size) { | ||
| 29 | case Register::Size::Byte: | ||
| 30 | return 8; | ||
| 31 | case Register::Size::Short: | ||
| 32 | return 16; | ||
| 33 | case Register::Size::Word: | ||
| 34 | return 32; | ||
| 35 | case Register::Size::Long: | ||
| 36 | return 64; | ||
| 37 | } | ||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | |||
| 41 | constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size, | ||
| 42 | Register::Size dst_size, | ||
| 43 | bool src_signed, | ||
| 44 | bool dst_signed) { | ||
| 45 | const u32 dst_bits = SizeInBits(dst_size); | ||
| 46 | if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { | ||
| 47 | if (src_signed == dst_signed) { | ||
| 48 | return std::nullopt; | ||
| 49 | } | ||
| 50 | return std::make_pair(0, std::numeric_limits<s32>::max()); | ||
| 51 | } | ||
| 52 | if (dst_signed) { | ||
| 53 | // Signed destination, clamp to [-128, 127] for instance | ||
| 54 | return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); | ||
| 55 | } else { | ||
| 56 | // Unsigned destination | ||
| 57 | if (dst_bits == 32) { | ||
| 58 | // Avoid shifting by 32, that is undefined behavior | ||
| 59 | return std::make_pair(0, s32(std::numeric_limits<u32>::max())); | ||
| 60 | } | ||
| 61 | return std::make_pair(0, (1 << dst_bits) - 1); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | } // Anonymous namespace | ||
| 66 | |||
| 67 | u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | ||
| 68 | const Instruction instr = {program_code[pc]}; | ||
| 69 | const auto opcode = OpCode::Decode(instr); | ||
| 70 | |||
| 71 | switch (opcode->get().GetId()) { | ||
| 72 | case OpCode::Id::I2I_R: | ||
| 73 | case OpCode::Id::I2I_C: | ||
| 74 | case OpCode::Id::I2I_IMM: { | ||
| 75 | const bool src_signed = instr.conversion.is_input_signed; | ||
| 76 | const bool dst_signed = instr.conversion.is_output_signed; | ||
| 77 | const Register::Size src_size = instr.conversion.src_size; | ||
| 78 | const Register::Size dst_size = instr.conversion.dst_size; | ||
| 79 | const u32 selector = static_cast<u32>(instr.conversion.int_src.selector); | ||
| 80 | |||
| 81 | Node value = [this, instr, opcode] { | ||
| 82 | switch (opcode->get().GetId()) { | ||
| 83 | case OpCode::Id::I2I_R: | ||
| 84 | return GetRegister(instr.gpr20); | ||
| 85 | case OpCode::Id::I2I_C: | ||
| 86 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 87 | case OpCode::Id::I2I_IMM: | ||
| 88 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 89 | default: | ||
| 90 | UNREACHABLE(); | ||
| 91 | return Immediate(0); | ||
| 92 | } | ||
| 93 | }(); | ||
| 94 | |||
| 95 | // Ensure the source selector is valid | ||
| 96 | switch (instr.conversion.src_size) { | ||
| 97 | case Register::Size::Byte: | ||
| 98 | break; | ||
| 99 | case Register::Size::Short: | ||
| 100 | ASSERT(selector == 0 || selector == 2); | ||
| 101 | break; | ||
| 102 | default: | ||
| 103 | ASSERT(selector == 0); | ||
| 104 | break; | ||
| 105 | } | ||
| 106 | |||
| 107 | if (src_size != Register::Size::Word || selector != 0) { | ||
| 108 | value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), | ||
| 109 | Immediate(selector * 8), Immediate(SizeInBits(src_size))); | ||
| 110 | } | ||
| 111 | |||
| 112 | value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, | ||
| 113 | instr.conversion.negate_a, src_signed); | ||
| 114 | |||
| 115 | if (instr.alu.saturate_d) { | ||
| 116 | if (src_signed && !dst_signed) { | ||
| 117 | Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, | ||
| 118 | Immediate(1 << (SizeInBits(src_size) - 1))); | ||
| 119 | value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), | ||
| 120 | std::move(value)); | ||
| 121 | |||
| 122 | // Simplify generated expressions, this can be removed without semantic impact | ||
| 123 | SetTemporary(bb, 0, std::move(value)); | ||
| 124 | value = GetTemporary(0); | ||
| 125 | |||
| 126 | if (dst_size != Register::Size::Word) { | ||
| 127 | const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); | ||
| 128 | Node is_large = | ||
| 129 | Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); | ||
| 130 | value = Operation(OperationCode::Select, std::move(is_large), limit, | ||
| 131 | std::move(value)); | ||
| 132 | } | ||
| 133 | } else if (const std::optional bounds = | ||
| 134 | IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { | ||
| 135 | value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), | ||
| 136 | Immediate(bounds->first)); | ||
| 137 | value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), | ||
| 138 | Immediate(bounds->second)); | ||
| 139 | } | ||
| 140 | } else if (dst_size != Register::Size::Word) { | ||
| 141 | // No saturation, we only have to mask the result | ||
| 142 | Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); | ||
| 143 | value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); | ||
| 144 | } | ||
| 145 | |||
| 146 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 147 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | case OpCode::Id::I2F_R: | ||
| 151 | case OpCode::Id::I2F_C: | ||
| 152 | case OpCode::Id::I2F_IMM: { | ||
| 153 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | ||
| 154 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 155 | "Condition codes generation in I2F is not implemented"); | ||
| 156 | |||
| 157 | Node value = [&] { | ||
| 158 | switch (opcode->get().GetId()) { | ||
| 159 | case OpCode::Id::I2F_R: | ||
| 160 | return GetRegister(instr.gpr20); | ||
| 161 | case OpCode::Id::I2F_C: | ||
| 162 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 163 | case OpCode::Id::I2F_IMM: | ||
| 164 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 165 | default: | ||
| 166 | UNREACHABLE(); | ||
| 167 | return Immediate(0); | ||
| 168 | } | ||
| 169 | }(); | ||
| 170 | |||
| 171 | const bool input_signed = instr.conversion.is_input_signed; | ||
| 172 | |||
| 173 | if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) { | ||
| 174 | ASSERT(instr.conversion.src_size == Register::Size::Byte || | ||
| 175 | instr.conversion.src_size == Register::Size::Short); | ||
| 176 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 177 | ASSERT(offset == 0 || offset == 2); | ||
| 178 | } | ||
| 179 | value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, | ||
| 180 | std::move(value), Immediate(offset * 8)); | ||
| 181 | } | ||
| 182 | |||
| 183 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | ||
| 184 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); | ||
| 185 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); | ||
| 186 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | ||
| 187 | |||
| 188 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 189 | |||
| 190 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 191 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 192 | } | ||
| 193 | |||
| 194 | SetRegister(bb, instr.gpr0, value); | ||
| 195 | break; | ||
| 196 | } | ||
| 197 | case OpCode::Id::F2F_R: | ||
| 198 | case OpCode::Id::F2F_C: | ||
| 199 | case OpCode::Id::F2F_IMM: { | ||
| 200 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | ||
| 201 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); | ||
| 202 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 203 | "Condition codes generation in F2F is not implemented"); | ||
| 204 | |||
| 205 | Node value = [&]() { | ||
| 206 | switch (opcode->get().GetId()) { | ||
| 207 | case OpCode::Id::F2F_R: | ||
| 208 | return GetRegister(instr.gpr20); | ||
| 209 | case OpCode::Id::F2F_C: | ||
| 210 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 211 | case OpCode::Id::F2F_IMM: | ||
| 212 | return GetImmediate19(instr); | ||
| 213 | default: | ||
| 214 | UNREACHABLE(); | ||
| 215 | return Immediate(0); | ||
| 216 | } | ||
| 217 | }(); | ||
| 218 | |||
| 219 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 220 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 221 | std::move(value)); | ||
| 222 | } else { | ||
| 223 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 224 | } | ||
| 225 | |||
| 226 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 227 | |||
| 228 | value = [&] { | ||
| 229 | if (instr.conversion.src_size != instr.conversion.dst_size) { | ||
| 230 | // Rounding operations only matter when the source and destination conversion size | ||
| 231 | // is the same. | ||
| 232 | return value; | ||
| 233 | } | ||
| 234 | switch (instr.conversion.f2f.GetRoundingMode()) { | ||
| 235 | case Tegra::Shader::F2fRoundingOp::None: | ||
| 236 | return value; | ||
| 237 | case Tegra::Shader::F2fRoundingOp::Round: | ||
| 238 | return Operation(OperationCode::FRoundEven, value); | ||
| 239 | case Tegra::Shader::F2fRoundingOp::Floor: | ||
| 240 | return Operation(OperationCode::FFloor, value); | ||
| 241 | case Tegra::Shader::F2fRoundingOp::Ceil: | ||
| 242 | return Operation(OperationCode::FCeil, value); | ||
| 243 | case Tegra::Shader::F2fRoundingOp::Trunc: | ||
| 244 | return Operation(OperationCode::FTrunc, value); | ||
| 245 | default: | ||
| 246 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | ||
| 247 | instr.conversion.f2f.rounding.Value()); | ||
| 248 | return value; | ||
| 249 | } | ||
| 250 | }(); | ||
| 251 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 252 | |||
| 253 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 254 | |||
| 255 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 256 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 257 | } | ||
| 258 | |||
| 259 | SetRegister(bb, instr.gpr0, value); | ||
| 260 | break; | ||
| 261 | } | ||
| 262 | case OpCode::Id::F2I_R: | ||
| 263 | case OpCode::Id::F2I_C: | ||
| 264 | case OpCode::Id::F2I_IMM: { | ||
| 265 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); | ||
| 266 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 267 | "Condition codes generation in F2I is not implemented"); | ||
| 268 | Node value = [&]() { | ||
| 269 | switch (opcode->get().GetId()) { | ||
| 270 | case OpCode::Id::F2I_R: | ||
| 271 | return GetRegister(instr.gpr20); | ||
| 272 | case OpCode::Id::F2I_C: | ||
| 273 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 274 | case OpCode::Id::F2I_IMM: | ||
| 275 | return GetImmediate19(instr); | ||
| 276 | default: | ||
| 277 | UNREACHABLE(); | ||
| 278 | return Immediate(0); | ||
| 279 | } | ||
| 280 | }(); | ||
| 281 | |||
| 282 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 283 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 284 | std::move(value)); | ||
| 285 | } else { | ||
| 286 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 287 | } | ||
| 288 | |||
| 289 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 290 | |||
| 291 | value = [&]() { | ||
| 292 | switch (instr.conversion.f2i.rounding) { | ||
| 293 | case Tegra::Shader::F2iRoundingOp::RoundEven: | ||
| 294 | return Operation(OperationCode::FRoundEven, PRECISE, value); | ||
| 295 | case Tegra::Shader::F2iRoundingOp::Floor: | ||
| 296 | return Operation(OperationCode::FFloor, PRECISE, value); | ||
| 297 | case Tegra::Shader::F2iRoundingOp::Ceil: | ||
| 298 | return Operation(OperationCode::FCeil, PRECISE, value); | ||
| 299 | case Tegra::Shader::F2iRoundingOp::Trunc: | ||
| 300 | return Operation(OperationCode::FTrunc, PRECISE, value); | ||
| 301 | default: | ||
| 302 | UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", | ||
| 303 | instr.conversion.f2i.rounding.Value()); | ||
| 304 | return Immediate(0); | ||
| 305 | } | ||
| 306 | }(); | ||
| 307 | const bool is_signed = instr.conversion.is_output_signed; | ||
| 308 | value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); | ||
| 309 | value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); | ||
| 310 | |||
| 311 | SetRegister(bb, instr.gpr0, value); | ||
| 312 | break; | ||
| 313 | } | ||
| 314 | default: | ||
| 315 | UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); | ||
| 316 | } | ||
| 317 | |||
| 318 | return pc; | ||
| 319 | } | ||
| 320 | |||
| 321 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp deleted file mode 100644 index 5973588d6..000000000 --- a/src/video_core/shader/decode/ffma.cpp +++ /dev/null | |||
| @@ -1,62 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | ||
| 21 | if (instr.ffma.tab5980_0 != 1) { | ||
| 22 | LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); | ||
| 23 | } | ||
| 24 | if (instr.ffma.tab5980_1 != 0) { | ||
| 25 | LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); | ||
| 26 | } | ||
| 27 | |||
| 28 | const Node op_a = GetRegister(instr.gpr8); | ||
| 29 | |||
| 30 | auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { | ||
| 31 | switch (opcode->get().GetId()) { | ||
| 32 | case OpCode::Id::FFMA_CR: { | ||
| 33 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 34 | GetRegister(instr.gpr39)}; | ||
| 35 | } | ||
| 36 | case OpCode::Id::FFMA_RR: | ||
| 37 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 38 | case OpCode::Id::FFMA_RC: { | ||
| 39 | return {GetRegister(instr.gpr39), | ||
| 40 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 41 | } | ||
| 42 | case OpCode::Id::FFMA_IMM: | ||
| 43 | return {GetImmediate19(instr), GetRegister(instr.gpr39)}; | ||
| 44 | default: | ||
| 45 | UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); | ||
| 46 | return {Immediate(0), Immediate(0)}; | ||
| 47 | } | ||
| 48 | }(); | ||
| 49 | |||
| 50 | op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); | ||
| 51 | op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); | ||
| 52 | |||
| 53 | Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); | ||
| 54 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 55 | |||
| 56 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 57 | SetRegister(bb, instr.gpr0, value); | ||
| 58 | |||
| 59 | return pc; | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp deleted file mode 100644 index 5614e8a0d..000000000 --- a/src/video_core/shader/decode/float_set.cpp +++ /dev/null | |||
| @@ -1,58 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | |||
| 19 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, | ||
| 20 | instr.fset.neg_a != 0); | ||
| 21 | |||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); | ||
| 33 | |||
| 34 | // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 35 | // condition is true, and to 0 otherwise. | ||
| 36 | const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); | ||
| 37 | |||
| 38 | const OperationCode combiner = GetPredicateCombiner(instr.fset.op); | ||
| 39 | const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); | ||
| 40 | |||
| 41 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 42 | |||
| 43 | const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 44 | const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 45 | const Node value = | ||
| 46 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 47 | |||
| 48 | if (instr.fset.bf) { | ||
| 49 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 50 | } else { | ||
| 51 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 52 | } | ||
| 53 | SetRegister(bb, instr.gpr0, value); | ||
| 54 | |||
| 55 | return pc; | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp deleted file mode 100644 index 200c2c983..000000000 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ /dev/null | |||
| @@ -1,57 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | |||
| 20 | Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, | ||
| 21 | instr.fsetp.neg_a != 0); | ||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); | ||
| 32 | |||
| 33 | // We can't use the constant predicate as destination. | ||
| 34 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 35 | |||
| 36 | const Node predicate = | ||
| 37 | GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); | ||
| 38 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | ||
| 39 | |||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); | ||
| 41 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 42 | |||
| 43 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 44 | SetPredicate(bb, instr.fsetp.pred3, value); | ||
| 45 | |||
| 46 | if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 47 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 48 | // if enabled | ||
| 49 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 50 | const Node second_value = Operation(combiner, negated_pred, second_pred); | ||
| 51 | SetPredicate(bb, instr.fsetp.pred0, second_value); | ||
| 52 | } | ||
| 53 | |||
| 54 | return pc; | ||
| 55 | } | ||
| 56 | |||
| 57 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp deleted file mode 100644 index fa83108cd..000000000 --- a/src/video_core/shader/decode/half_set.cpp +++ /dev/null | |||
| @@ -1,115 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "common/logging/log.h" | ||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | #include "video_core/shader/node_helper.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using std::move; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::PredCondition; | ||
| 20 | |||
| 21 | u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { | ||
| 22 | const Instruction instr = {program_code[pc]}; | ||
| 23 | const auto opcode = OpCode::Decode(instr); | ||
| 24 | |||
| 25 | PredCondition cond{}; | ||
| 26 | bool bf = false; | ||
| 27 | bool ftz = false; | ||
| 28 | bool neg_a = false; | ||
| 29 | bool abs_a = false; | ||
| 30 | bool neg_b = false; | ||
| 31 | bool abs_b = false; | ||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::HSET2_C: | ||
| 34 | case OpCode::Id::HSET2_IMM: | ||
| 35 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 36 | bf = instr.Bit(53); | ||
| 37 | ftz = instr.Bit(54); | ||
| 38 | neg_a = instr.Bit(43); | ||
| 39 | abs_a = instr.Bit(44); | ||
| 40 | neg_b = instr.Bit(56); | ||
| 41 | abs_b = instr.Bit(54); | ||
| 42 | break; | ||
| 43 | case OpCode::Id::HSET2_R: | ||
| 44 | cond = instr.hsetp2.reg.cond; | ||
| 45 | bf = instr.Bit(49); | ||
| 46 | ftz = instr.Bit(50); | ||
| 47 | neg_a = instr.Bit(43); | ||
| 48 | abs_a = instr.Bit(44); | ||
| 49 | neg_b = instr.Bit(31); | ||
| 50 | abs_b = instr.Bit(30); | ||
| 51 | break; | ||
| 52 | default: | ||
| 53 | UNREACHABLE(); | ||
| 54 | } | ||
| 55 | |||
| 56 | Node op_b = [this, instr, opcode] { | ||
| 57 | switch (opcode->get().GetId()) { | ||
| 58 | case OpCode::Id::HSET2_C: | ||
| 59 | // Inform as unimplemented as this is not tested. | ||
| 60 | UNIMPLEMENTED_MSG("HSET2_C is not implemented"); | ||
| 61 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 62 | case OpCode::Id::HSET2_R: | ||
| 63 | return GetRegister(instr.gpr20); | ||
| 64 | case OpCode::Id::HSET2_IMM: | ||
| 65 | return UnpackHalfImmediate(instr, true); | ||
| 66 | default: | ||
| 67 | UNREACHABLE(); | ||
| 68 | return Node{}; | ||
| 69 | } | ||
| 70 | }(); | ||
| 71 | |||
| 72 | if (!ftz) { | ||
| 73 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 74 | } | ||
| 75 | |||
| 76 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); | ||
| 77 | op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); | ||
| 78 | |||
| 79 | switch (opcode->get().GetId()) { | ||
| 80 | case OpCode::Id::HSET2_R: | ||
| 81 | op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); | ||
| 82 | [[fallthrough]]; | ||
| 83 | case OpCode::Id::HSET2_C: | ||
| 84 | op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); | ||
| 85 | break; | ||
| 86 | default: | ||
| 87 | break; | ||
| 88 | } | ||
| 89 | |||
| 90 | Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); | ||
| 91 | |||
| 92 | Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); | ||
| 93 | |||
| 94 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); | ||
| 95 | |||
| 96 | // HSET2 operates on each half float in the pack. | ||
| 97 | std::array<Node, 2> values; | ||
| 98 | for (u32 i = 0; i < 2; ++i) { | ||
| 99 | const u32 raw_value = bf ? 0x3c00 : 0xffff; | ||
| 100 | Node true_value = Immediate(raw_value << (i * 16)); | ||
| 101 | Node false_value = Immediate(0); | ||
| 102 | |||
| 103 | Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); | ||
| 104 | Node predicate = Operation(combiner, comparison, second_pred); | ||
| 105 | values[i] = | ||
| 106 | Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); | ||
| 107 | } | ||
| 108 | |||
| 109 | Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); | ||
| 110 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 111 | |||
| 112 | return pc; | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp deleted file mode 100644 index 310655619..000000000 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ /dev/null | |||
| @@ -1,80 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | if (instr.hsetp2.ftz != 0) { | ||
| 23 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 24 | } | ||
| 25 | |||
| 26 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); | ||
| 27 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | ||
| 28 | |||
| 29 | Tegra::Shader::PredCondition cond{}; | ||
| 30 | bool h_and{}; | ||
| 31 | Node op_b{}; | ||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::HSETP2_C: | ||
| 34 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 35 | h_and = instr.hsetp2.cbuf_and_imm.h_and; | ||
| 36 | op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 37 | instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); | ||
| 38 | // F32 is hardcoded in hardware | ||
| 39 | op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); | ||
| 40 | break; | ||
| 41 | case OpCode::Id::HSETP2_IMM: | ||
| 42 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 43 | h_and = instr.hsetp2.cbuf_and_imm.h_and; | ||
| 44 | op_b = UnpackHalfImmediate(instr, true); | ||
| 45 | break; | ||
| 46 | case OpCode::Id::HSETP2_R: | ||
| 47 | cond = instr.hsetp2.reg.cond; | ||
| 48 | h_and = instr.hsetp2.reg.h_and; | ||
| 49 | op_b = | ||
| 50 | GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), | ||
| 51 | instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); | ||
| 52 | break; | ||
| 53 | default: | ||
| 54 | UNREACHABLE(); | ||
| 55 | op_b = Immediate(0); | ||
| 56 | } | ||
| 57 | |||
| 58 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); | ||
| 59 | const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); | ||
| 60 | |||
| 61 | const auto Write = [&](u64 dest, Node src) { | ||
| 62 | SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); | ||
| 63 | }; | ||
| 64 | |||
| 65 | const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); | ||
| 66 | const u64 first = instr.hsetp2.pred3; | ||
| 67 | const u64 second = instr.hsetp2.pred0; | ||
| 68 | if (h_and) { | ||
| 69 | Node joined = Operation(OperationCode::LogicalAnd2, comparison); | ||
| 70 | Write(first, joined); | ||
| 71 | Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); | ||
| 72 | } else { | ||
| 73 | Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); | ||
| 74 | Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); | ||
| 75 | } | ||
| 76 | |||
| 77 | return pc; | ||
| 78 | } | ||
| 79 | |||
| 80 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp deleted file mode 100644 index 5b44cb79c..000000000 --- a/src/video_core/shader/decode/hfma2.cpp +++ /dev/null | |||
| @@ -1,73 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/node_helper.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using Tegra::Shader::HalfPrecision; | ||
| 16 | using Tegra::Shader::HalfType; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | |||
| 20 | u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | ||
| 21 | const Instruction instr = {program_code[pc]}; | ||
| 22 | const auto opcode = OpCode::Decode(instr); | ||
| 23 | |||
| 24 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | ||
| 25 | DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); | ||
| 26 | } else { | ||
| 27 | DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); | ||
| 28 | } | ||
| 29 | |||
| 30 | constexpr auto identity = HalfType::H0_H1; | ||
| 31 | bool neg_b{}, neg_c{}; | ||
| 32 | auto [saturate, type_b, op_b, type_c, | ||
| 33 | op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { | ||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::HFMA2_CR: | ||
| 36 | neg_b = instr.hfma2.negate_b; | ||
| 37 | neg_c = instr.hfma2.negate_c; | ||
| 38 | return {instr.hfma2.saturate, HalfType::F32, | ||
| 39 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 40 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 41 | case OpCode::Id::HFMA2_RC: | ||
| 42 | neg_b = instr.hfma2.negate_b; | ||
| 43 | neg_c = instr.hfma2.negate_c; | ||
| 44 | return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), | ||
| 45 | HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 46 | case OpCode::Id::HFMA2_RR: | ||
| 47 | neg_b = instr.hfma2.rr.negate_b; | ||
| 48 | neg_c = instr.hfma2.rr.negate_c; | ||
| 49 | return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), | ||
| 50 | instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; | ||
| 51 | case OpCode::Id::HFMA2_IMM_R: | ||
| 52 | neg_c = instr.hfma2.negate_c; | ||
| 53 | return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), | ||
| 54 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 55 | default: | ||
| 56 | return {false, identity, Immediate(0), identity, Immediate(0)}; | ||
| 57 | } | ||
| 58 | }(); | ||
| 59 | |||
| 60 | const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); | ||
| 61 | op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); | ||
| 62 | op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); | ||
| 63 | |||
| 64 | Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); | ||
| 65 | value = GetSaturatedHalfFloat(value, saturate); | ||
| 66 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); | ||
| 67 | |||
| 68 | SetRegister(bb, instr.gpr0, value); | ||
| 69 | |||
| 70 | return pc; | ||
| 71 | } | ||
| 72 | |||
| 73 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp deleted file mode 100644 index 5470e8cf4..000000000 --- a/src/video_core/shader/decode/image.cpp +++ /dev/null | |||
| @@ -1,536 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | #include "video_core/textures/texture.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::Instruction; | ||
| 21 | using Tegra::Shader::OpCode; | ||
| 22 | using Tegra::Shader::PredCondition; | ||
| 23 | using Tegra::Shader::StoreType; | ||
| 24 | using Tegra::Texture::ComponentType; | ||
| 25 | using Tegra::Texture::TextureFormat; | ||
| 26 | using Tegra::Texture::TICEntry; | ||
| 27 | |||
| 28 | namespace { | ||
| 29 | |||
| 30 | ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, | ||
| 31 | std::size_t component) { | ||
| 32 | const TextureFormat format{descriptor.format}; | ||
| 33 | switch (format) { | ||
| 34 | case TextureFormat::R16G16B16A16: | ||
| 35 | case TextureFormat::R32G32B32A32: | ||
| 36 | case TextureFormat::R32G32B32: | ||
| 37 | case TextureFormat::R32G32: | ||
| 38 | case TextureFormat::R16G16: | ||
| 39 | case TextureFormat::R32: | ||
| 40 | case TextureFormat::R16: | ||
| 41 | case TextureFormat::R8: | ||
| 42 | case TextureFormat::R1: | ||
| 43 | if (component == 0) { | ||
| 44 | return descriptor.r_type; | ||
| 45 | } | ||
| 46 | if (component == 1) { | ||
| 47 | return descriptor.g_type; | ||
| 48 | } | ||
| 49 | if (component == 2) { | ||
| 50 | return descriptor.b_type; | ||
| 51 | } | ||
| 52 | if (component == 3) { | ||
| 53 | return descriptor.a_type; | ||
| 54 | } | ||
| 55 | break; | ||
| 56 | case TextureFormat::A8R8G8B8: | ||
| 57 | if (component == 0) { | ||
| 58 | return descriptor.a_type; | ||
| 59 | } | ||
| 60 | if (component == 1) { | ||
| 61 | return descriptor.r_type; | ||
| 62 | } | ||
| 63 | if (component == 2) { | ||
| 64 | return descriptor.g_type; | ||
| 65 | } | ||
| 66 | if (component == 3) { | ||
| 67 | return descriptor.b_type; | ||
| 68 | } | ||
| 69 | break; | ||
| 70 | case TextureFormat::A2B10G10R10: | ||
| 71 | case TextureFormat::A4B4G4R4: | ||
| 72 | case TextureFormat::A5B5G5R1: | ||
| 73 | case TextureFormat::A1B5G5R5: | ||
| 74 | if (component == 0) { | ||
| 75 | return descriptor.a_type; | ||
| 76 | } | ||
| 77 | if (component == 1) { | ||
| 78 | return descriptor.b_type; | ||
| 79 | } | ||
| 80 | if (component == 2) { | ||
| 81 | return descriptor.g_type; | ||
| 82 | } | ||
| 83 | if (component == 3) { | ||
| 84 | return descriptor.r_type; | ||
| 85 | } | ||
| 86 | break; | ||
| 87 | case TextureFormat::R32_B24G8: | ||
| 88 | if (component == 0) { | ||
| 89 | return descriptor.r_type; | ||
| 90 | } | ||
| 91 | if (component == 1) { | ||
| 92 | return descriptor.b_type; | ||
| 93 | } | ||
| 94 | if (component == 2) { | ||
| 95 | return descriptor.g_type; | ||
| 96 | } | ||
| 97 | break; | ||
| 98 | case TextureFormat::B5G6R5: | ||
| 99 | case TextureFormat::B6G5R5: | ||
| 100 | case TextureFormat::B10G11R11: | ||
| 101 | if (component == 0) { | ||
| 102 | return descriptor.b_type; | ||
| 103 | } | ||
| 104 | if (component == 1) { | ||
| 105 | return descriptor.g_type; | ||
| 106 | } | ||
| 107 | if (component == 2) { | ||
| 108 | return descriptor.r_type; | ||
| 109 | } | ||
| 110 | break; | ||
| 111 | case TextureFormat::R24G8: | ||
| 112 | case TextureFormat::R8G24: | ||
| 113 | case TextureFormat::R8G8: | ||
| 114 | case TextureFormat::G4R4: | ||
| 115 | if (component == 0) { | ||
| 116 | return descriptor.g_type; | ||
| 117 | } | ||
| 118 | if (component == 1) { | ||
| 119 | return descriptor.r_type; | ||
| 120 | } | ||
| 121 | break; | ||
| 122 | default: | ||
| 123 | break; | ||
| 124 | } | ||
| 125 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 126 | return ComponentType::FLOAT; | ||
| 127 | } | ||
| 128 | |||
| 129 | bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { | ||
| 130 | constexpr u8 R = 0b0001; | ||
| 131 | constexpr u8 G = 0b0010; | ||
| 132 | constexpr u8 B = 0b0100; | ||
| 133 | constexpr u8 A = 0b1000; | ||
| 134 | constexpr std::array<u8, 16> mask = { | ||
| 135 | 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), | ||
| 136 | (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; | ||
| 137 | return std::bitset<4>{mask.at(component_mask)}.test(component); | ||
| 138 | } | ||
| 139 | |||
| 140 | u32 GetComponentSize(TextureFormat format, std::size_t component) { | ||
| 141 | switch (format) { | ||
| 142 | case TextureFormat::R32G32B32A32: | ||
| 143 | return 32; | ||
| 144 | case TextureFormat::R16G16B16A16: | ||
| 145 | return 16; | ||
| 146 | case TextureFormat::R32G32B32: | ||
| 147 | return component <= 2 ? 32 : 0; | ||
| 148 | case TextureFormat::R32G32: | ||
| 149 | return component <= 1 ? 32 : 0; | ||
| 150 | case TextureFormat::R16G16: | ||
| 151 | return component <= 1 ? 16 : 0; | ||
| 152 | case TextureFormat::R32: | ||
| 153 | return component == 0 ? 32 : 0; | ||
| 154 | case TextureFormat::R16: | ||
| 155 | return component == 0 ? 16 : 0; | ||
| 156 | case TextureFormat::R8: | ||
| 157 | return component == 0 ? 8 : 0; | ||
| 158 | case TextureFormat::R1: | ||
| 159 | return component == 0 ? 1 : 0; | ||
| 160 | case TextureFormat::A8R8G8B8: | ||
| 161 | return 8; | ||
| 162 | case TextureFormat::A2B10G10R10: | ||
| 163 | return (component == 3 || component == 2 || component == 1) ? 10 : 2; | ||
| 164 | case TextureFormat::A4B4G4R4: | ||
| 165 | return 4; | ||
| 166 | case TextureFormat::A5B5G5R1: | ||
| 167 | return (component == 0 || component == 1 || component == 2) ? 5 : 1; | ||
| 168 | case TextureFormat::A1B5G5R5: | ||
| 169 | return (component == 1 || component == 2 || component == 3) ? 5 : 1; | ||
| 170 | case TextureFormat::R32_B24G8: | ||
| 171 | if (component == 0) { | ||
| 172 | return 32; | ||
| 173 | } | ||
| 174 | if (component == 1) { | ||
| 175 | return 24; | ||
| 176 | } | ||
| 177 | if (component == 2) { | ||
| 178 | return 8; | ||
| 179 | } | ||
| 180 | return 0; | ||
| 181 | case TextureFormat::B5G6R5: | ||
| 182 | if (component == 0 || component == 2) { | ||
| 183 | return 5; | ||
| 184 | } | ||
| 185 | if (component == 1) { | ||
| 186 | return 6; | ||
| 187 | } | ||
| 188 | return 0; | ||
| 189 | case TextureFormat::B6G5R5: | ||
| 190 | if (component == 1 || component == 2) { | ||
| 191 | return 5; | ||
| 192 | } | ||
| 193 | if (component == 0) { | ||
| 194 | return 6; | ||
| 195 | } | ||
| 196 | return 0; | ||
| 197 | case TextureFormat::B10G11R11: | ||
| 198 | if (component == 1 || component == 2) { | ||
| 199 | return 11; | ||
| 200 | } | ||
| 201 | if (component == 0) { | ||
| 202 | return 10; | ||
| 203 | } | ||
| 204 | return 0; | ||
| 205 | case TextureFormat::R24G8: | ||
| 206 | if (component == 0) { | ||
| 207 | return 8; | ||
| 208 | } | ||
| 209 | if (component == 1) { | ||
| 210 | return 24; | ||
| 211 | } | ||
| 212 | return 0; | ||
| 213 | case TextureFormat::R8G24: | ||
| 214 | if (component == 0) { | ||
| 215 | return 24; | ||
| 216 | } | ||
| 217 | if (component == 1) { | ||
| 218 | return 8; | ||
| 219 | } | ||
| 220 | return 0; | ||
| 221 | case TextureFormat::R8G8: | ||
| 222 | return (component == 0 || component == 1) ? 8 : 0; | ||
| 223 | case TextureFormat::G4R4: | ||
| 224 | return (component == 0 || component == 1) ? 4 : 0; | ||
| 225 | default: | ||
| 226 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 227 | return 0; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | std::size_t GetImageComponentMask(TextureFormat format) { | ||
| 232 | constexpr u8 R = 0b0001; | ||
| 233 | constexpr u8 G = 0b0010; | ||
| 234 | constexpr u8 B = 0b0100; | ||
| 235 | constexpr u8 A = 0b1000; | ||
| 236 | switch (format) { | ||
| 237 | case TextureFormat::R32G32B32A32: | ||
| 238 | case TextureFormat::R16G16B16A16: | ||
| 239 | case TextureFormat::A8R8G8B8: | ||
| 240 | case TextureFormat::A2B10G10R10: | ||
| 241 | case TextureFormat::A4B4G4R4: | ||
| 242 | case TextureFormat::A5B5G5R1: | ||
| 243 | case TextureFormat::A1B5G5R5: | ||
| 244 | return std::size_t{R | G | B | A}; | ||
| 245 | case TextureFormat::R32G32B32: | ||
| 246 | case TextureFormat::R32_B24G8: | ||
| 247 | case TextureFormat::B5G6R5: | ||
| 248 | case TextureFormat::B6G5R5: | ||
| 249 | case TextureFormat::B10G11R11: | ||
| 250 | return std::size_t{R | G | B}; | ||
| 251 | case TextureFormat::R32G32: | ||
| 252 | case TextureFormat::R16G16: | ||
| 253 | case TextureFormat::R24G8: | ||
| 254 | case TextureFormat::R8G24: | ||
| 255 | case TextureFormat::R8G8: | ||
| 256 | case TextureFormat::G4R4: | ||
| 257 | return std::size_t{R | G}; | ||
| 258 | case TextureFormat::R32: | ||
| 259 | case TextureFormat::R16: | ||
| 260 | case TextureFormat::R8: | ||
| 261 | case TextureFormat::R1: | ||
| 262 | return std::size_t{R}; | ||
| 263 | default: | ||
| 264 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 265 | return std::size_t{R | G | B | A}; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | |||
| 269 | std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { | ||
| 270 | switch (image_type) { | ||
| 271 | case Tegra::Shader::ImageType::Texture1D: | ||
| 272 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 273 | return 1; | ||
| 274 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 275 | case Tegra::Shader::ImageType::Texture2D: | ||
| 276 | return 2; | ||
| 277 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 278 | case Tegra::Shader::ImageType::Texture3D: | ||
| 279 | return 3; | ||
| 280 | } | ||
| 281 | UNREACHABLE(); | ||
| 282 | return 1; | ||
| 283 | } | ||
| 284 | } // Anonymous namespace | ||
| 285 | |||
| 286 | std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, | ||
| 287 | Node original_value) { | ||
| 288 | switch (component_type) { | ||
| 289 | case ComponentType::SNORM: { | ||
| 290 | // range [-1.0, 1.0] | ||
| 291 | auto cnv_value = Operation(OperationCode::FMul, original_value, | ||
| 292 | Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); | ||
| 293 | cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); | ||
| 294 | return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; | ||
| 295 | } | ||
| 296 | case ComponentType::SINT: | ||
| 297 | case ComponentType::UNORM: { | ||
| 298 | bool is_signed = component_type == ComponentType::SINT; | ||
| 299 | // range [0.0, 1.0] | ||
| 300 | auto cnv_value = Operation(OperationCode::FMul, original_value, | ||
| 301 | Immediate(static_cast<float>(1 << component_size) - 1.f)); | ||
| 302 | return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), | ||
| 303 | is_signed}; | ||
| 304 | } | ||
| 305 | case ComponentType::UINT: // range [0, (1 << component_size) - 1] | ||
| 306 | return {std::move(original_value), false}; | ||
| 307 | case ComponentType::FLOAT: | ||
| 308 | if (component_size == 16) { | ||
| 309 | return {Operation(OperationCode::HCastFloat, original_value), true}; | ||
| 310 | } else { | ||
| 311 | return {std::move(original_value), true}; | ||
| 312 | } | ||
| 313 | default: | ||
| 314 | UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); | ||
| 315 | return {std::move(original_value), true}; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | |||
| 319 | u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | ||
| 320 | const Instruction instr = {program_code[pc]}; | ||
| 321 | const auto opcode = OpCode::Decode(instr); | ||
| 322 | |||
| 323 | const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { | ||
| 324 | std::vector<Node> coords; | ||
| 325 | const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; | ||
| 326 | coords.reserve(num_coords); | ||
| 327 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 328 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 329 | } | ||
| 330 | return coords; | ||
| 331 | }; | ||
| 332 | |||
| 333 | switch (opcode->get().GetId()) { | ||
| 334 | case OpCode::Id::SULD: { | ||
| 335 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != | ||
| 336 | Tegra::Shader::OutOfBoundsStore::Ignore); | ||
| 337 | |||
| 338 | const auto type{instr.suldst.image_type}; | ||
| 339 | auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) | ||
| 340 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 341 | image.MarkRead(); | ||
| 342 | |||
| 343 | if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { | ||
| 344 | u32 indexer = 0; | ||
| 345 | for (u32 element = 0; element < 4; ++element) { | ||
| 346 | if (!instr.suldst.IsComponentEnabled(element)) { | ||
| 347 | continue; | ||
| 348 | } | ||
| 349 | MetaImage meta{image, {}, element}; | ||
| 350 | Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); | ||
| 351 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 352 | } | ||
| 353 | for (u32 i = 0; i < indexer; ++i) { | ||
| 354 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 355 | } | ||
| 356 | } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { | ||
| 357 | UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && | ||
| 358 | instr.suldst.GetStoreDataLayout() != StoreType::Bits64); | ||
| 359 | |||
| 360 | auto descriptor = [this, instr] { | ||
| 361 | std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor; | ||
| 362 | if (instr.suldst.is_immediate) { | ||
| 363 | sampler_descriptor = | ||
| 364 | registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); | ||
| 365 | } else { | ||
| 366 | const Node image_register = GetRegister(instr.gpr39); | ||
| 367 | const auto result = TrackCbuf(image_register, global_code, | ||
| 368 | static_cast<s64>(global_code.size())); | ||
| 369 | const auto buffer = std::get<1>(result); | ||
| 370 | const auto offset = std::get<2>(result); | ||
| 371 | sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); | ||
| 372 | } | ||
| 373 | if (!sampler_descriptor) { | ||
| 374 | UNREACHABLE_MSG("Failed to obtain image descriptor"); | ||
| 375 | } | ||
| 376 | return *sampler_descriptor; | ||
| 377 | }(); | ||
| 378 | |||
| 379 | const auto comp_mask = GetImageComponentMask(descriptor.format); | ||
| 380 | |||
| 381 | switch (instr.suldst.GetStoreDataLayout()) { | ||
| 382 | case StoreType::Bits32: | ||
| 383 | case StoreType::Bits64: { | ||
| 384 | u32 indexer = 0; | ||
| 385 | u32 shifted_counter = 0; | ||
| 386 | Node value = Immediate(0); | ||
| 387 | for (u32 element = 0; element < 4; ++element) { | ||
| 388 | if (!IsComponentEnabled(comp_mask, element)) { | ||
| 389 | continue; | ||
| 390 | } | ||
| 391 | const auto component_type = GetComponentType(descriptor, element); | ||
| 392 | const auto component_size = GetComponentSize(descriptor.format, element); | ||
| 393 | MetaImage meta{image, {}, element}; | ||
| 394 | |||
| 395 | auto [converted_value, is_signed] = GetComponentValue( | ||
| 396 | component_type, component_size, | ||
| 397 | Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); | ||
| 398 | |||
| 399 | // shift element to correct position | ||
| 400 | const auto shifted = shifted_counter; | ||
| 401 | if (shifted > 0) { | ||
| 402 | converted_value = | ||
| 403 | SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, | ||
| 404 | std::move(converted_value), Immediate(shifted)); | ||
| 405 | } | ||
| 406 | shifted_counter += component_size; | ||
| 407 | |||
| 408 | // add value into result | ||
| 409 | value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); | ||
| 410 | |||
| 411 | // if we shifted enough for 1 byte -> we save it into temp | ||
| 412 | if (shifted_counter >= 32) { | ||
| 413 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 414 | // reset counter and value to prepare pack next byte | ||
| 415 | value = Immediate(0); | ||
| 416 | shifted_counter = 0; | ||
| 417 | } | ||
| 418 | } | ||
| 419 | for (u32 i = 0; i < indexer; ++i) { | ||
| 420 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 421 | } | ||
| 422 | break; | ||
| 423 | } | ||
| 424 | default: | ||
| 425 | UNREACHABLE(); | ||
| 426 | break; | ||
| 427 | } | ||
| 428 | } | ||
| 429 | break; | ||
| 430 | } | ||
| 431 | case OpCode::Id::SUST: { | ||
| 432 | UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); | ||
| 433 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != | ||
| 434 | Tegra::Shader::OutOfBoundsStore::Ignore); | ||
| 435 | UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA | ||
| 436 | |||
| 437 | std::vector<Node> values; | ||
| 438 | constexpr std::size_t hardcoded_size{4}; | ||
| 439 | for (std::size_t i = 0; i < hardcoded_size; ++i) { | ||
| 440 | values.push_back(GetRegister(instr.gpr0.Value() + i)); | ||
| 441 | } | ||
| 442 | |||
| 443 | const auto type{instr.suldst.image_type}; | ||
| 444 | auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) | ||
| 445 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 446 | image.MarkWrite(); | ||
| 447 | |||
| 448 | MetaImage meta{image, std::move(values)}; | ||
| 449 | bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); | ||
| 450 | break; | ||
| 451 | } | ||
| 452 | case OpCode::Id::SUATOM: { | ||
| 453 | UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); | ||
| 454 | |||
| 455 | const OperationCode operation_code = [instr] { | ||
| 456 | switch (instr.suatom_d.operation_type) { | ||
| 457 | case Tegra::Shader::ImageAtomicOperationType::S32: | ||
| 458 | case Tegra::Shader::ImageAtomicOperationType::U32: | ||
| 459 | switch (instr.suatom_d.operation) { | ||
| 460 | case Tegra::Shader::ImageAtomicOperation::Add: | ||
| 461 | return OperationCode::AtomicImageAdd; | ||
| 462 | case Tegra::Shader::ImageAtomicOperation::And: | ||
| 463 | return OperationCode::AtomicImageAnd; | ||
| 464 | case Tegra::Shader::ImageAtomicOperation::Or: | ||
| 465 | return OperationCode::AtomicImageOr; | ||
| 466 | case Tegra::Shader::ImageAtomicOperation::Xor: | ||
| 467 | return OperationCode::AtomicImageXor; | ||
| 468 | case Tegra::Shader::ImageAtomicOperation::Exch: | ||
| 469 | return OperationCode::AtomicImageExchange; | ||
| 470 | default: | ||
| 471 | break; | ||
| 472 | } | ||
| 473 | break; | ||
| 474 | default: | ||
| 475 | break; | ||
| 476 | } | ||
| 477 | UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", | ||
| 478 | static_cast<u64>(instr.suatom_d.operation.Value()), | ||
| 479 | static_cast<u64>(instr.suatom_d.operation_type.Value())); | ||
| 480 | return OperationCode::AtomicImageAdd; | ||
| 481 | }(); | ||
| 482 | |||
| 483 | Node value = GetRegister(instr.gpr0); | ||
| 484 | |||
| 485 | const auto type = instr.suatom_d.image_type; | ||
| 486 | auto& image = GetImage(instr.image, type); | ||
| 487 | image.MarkAtomic(); | ||
| 488 | |||
| 489 | MetaImage meta{image, {std::move(value)}}; | ||
| 490 | SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); | ||
| 491 | break; | ||
| 492 | } | ||
| 493 | default: | ||
| 494 | UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); | ||
| 495 | } | ||
| 496 | |||
| 497 | return pc; | ||
| 498 | } | ||
| 499 | |||
| 500 | ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { | ||
| 501 | const auto offset = static_cast<u32>(image.index.Value()); | ||
| 502 | |||
| 503 | const auto it = | ||
| 504 | std::find_if(std::begin(used_images), std::end(used_images), | ||
| 505 | [offset](const ImageEntry& entry) { return entry.offset == offset; }); | ||
| 506 | if (it != std::end(used_images)) { | ||
| 507 | ASSERT(!it->is_bindless && it->type == type); | ||
| 508 | return *it; | ||
| 509 | } | ||
| 510 | |||
| 511 | const auto next_index = static_cast<u32>(used_images.size()); | ||
| 512 | return used_images.emplace_back(next_index, offset, type); | ||
| 513 | } | ||
| 514 | |||
| 515 | ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { | ||
| 516 | const Node image_register = GetRegister(reg); | ||
| 517 | const auto result = | ||
| 518 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); | ||
| 519 | |||
| 520 | const auto buffer = std::get<1>(result); | ||
| 521 | const auto offset = std::get<2>(result); | ||
| 522 | |||
| 523 | const auto it = std::find_if(std::begin(used_images), std::end(used_images), | ||
| 524 | [buffer, offset](const ImageEntry& entry) { | ||
| 525 | return entry.buffer == buffer && entry.offset == offset; | ||
| 526 | }); | ||
| 527 | if (it != std::end(used_images)) { | ||
| 528 | ASSERT(it->is_bindless && it->type == type); | ||
| 529 | return *it; | ||
| 530 | } | ||
| 531 | |||
| 532 | const auto next_index = static_cast<u32>(used_images.size()); | ||
| 533 | return used_images.emplace_back(next_index, offset, buffer, type); | ||
| 534 | } | ||
| 535 | |||
| 536 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp deleted file mode 100644 index 59809bcd8..000000000 --- a/src/video_core/shader/decode/integer_set.cpp +++ /dev/null | |||
| @@ -1,49 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "video_core/engines/shader_bytecode.h" | ||
| 7 | #include "video_core/shader/node_helper.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | |||
| 18 | const Node op_a = GetRegister(instr.gpr8); | ||
| 19 | const Node op_b = [&]() { | ||
| 20 | if (instr.is_b_imm) { | ||
| 21 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 22 | } else if (instr.is_b_gpr) { | ||
| 23 | return GetRegister(instr.gpr20); | ||
| 24 | } else { | ||
| 25 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 26 | } | ||
| 27 | }(); | ||
| 28 | |||
| 29 | // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition | ||
| 30 | // is true, and to 0 otherwise. | ||
| 31 | const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); | ||
| 32 | const Node first_pred = | ||
| 33 | GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); | ||
| 34 | |||
| 35 | const OperationCode combiner = GetPredicateCombiner(instr.iset.op); | ||
| 36 | |||
| 37 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 38 | |||
| 39 | const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 40 | const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 41 | const Node value = | ||
| 42 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 43 | |||
| 44 | SetRegister(bb, instr.gpr0, value); | ||
| 45 | |||
| 46 | return pc; | ||
| 47 | } | ||
| 48 | |||
| 49 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp deleted file mode 100644 index 25e48fef8..000000000 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ /dev/null | |||
| @@ -1,53 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | |||
| 20 | const Node op_a = GetRegister(instr.gpr8); | ||
| 21 | |||
| 22 | const Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | // We can't use the constant predicate as destination. | ||
| 33 | ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 34 | |||
| 35 | const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); | ||
| 36 | const Node predicate = | ||
| 37 | GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); | ||
| 38 | |||
| 39 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); | ||
| 41 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 42 | SetPredicate(bb, instr.isetp.pred3, value); | ||
| 43 | |||
| 44 | if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 45 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | ||
| 46 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 47 | SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); | ||
| 48 | } | ||
| 49 | |||
| 50 | return pc; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp deleted file mode 100644 index 7728f600e..000000000 --- a/src/video_core/shader/decode/memory.cpp +++ /dev/null | |||
| @@ -1,493 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <utility> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/alignment.h" | ||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/logging/log.h" | ||
| 15 | #include "video_core/engines/shader_bytecode.h" | ||
| 16 | #include "video_core/shader/node_helper.h" | ||
| 17 | #include "video_core/shader/shader_ir.h" | ||
| 18 | |||
| 19 | namespace VideoCommon::Shader { | ||
| 20 | |||
| 21 | using std::move; | ||
| 22 | using Tegra::Shader::AtomicOp; | ||
| 23 | using Tegra::Shader::AtomicType; | ||
| 24 | using Tegra::Shader::Attribute; | ||
| 25 | using Tegra::Shader::GlobalAtomicType; | ||
| 26 | using Tegra::Shader::Instruction; | ||
| 27 | using Tegra::Shader::OpCode; | ||
| 28 | using Tegra::Shader::Register; | ||
| 29 | using Tegra::Shader::StoreType; | ||
| 30 | |||
| 31 | namespace { | ||
| 32 | |||
| 33 | OperationCode GetAtomOperation(AtomicOp op) { | ||
| 34 | switch (op) { | ||
| 35 | case AtomicOp::Add: | ||
| 36 | return OperationCode::AtomicIAdd; | ||
| 37 | case AtomicOp::Min: | ||
| 38 | return OperationCode::AtomicIMin; | ||
| 39 | case AtomicOp::Max: | ||
| 40 | return OperationCode::AtomicIMax; | ||
| 41 | case AtomicOp::And: | ||
| 42 | return OperationCode::AtomicIAnd; | ||
| 43 | case AtomicOp::Or: | ||
| 44 | return OperationCode::AtomicIOr; | ||
| 45 | case AtomicOp::Xor: | ||
| 46 | return OperationCode::AtomicIXor; | ||
| 47 | case AtomicOp::Exch: | ||
| 48 | return OperationCode::AtomicIExchange; | ||
| 49 | default: | ||
| 50 | UNIMPLEMENTED_MSG("op={}", op); | ||
| 51 | return OperationCode::AtomicIAdd; | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { | ||
| 56 | return uniform_type == Tegra::Shader::UniformType::UnsignedByte || | ||
| 57 | uniform_type == Tegra::Shader::UniformType::UnsignedShort; | ||
| 58 | } | ||
| 59 | |||
| 60 | u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { | ||
| 61 | switch (uniform_type) { | ||
| 62 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 63 | return 0b11; | ||
| 64 | case Tegra::Shader::UniformType::UnsignedShort: | ||
| 65 | return 0b10; | ||
| 66 | default: | ||
| 67 | UNREACHABLE(); | ||
| 68 | return 0; | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { | ||
| 73 | switch (uniform_type) { | ||
| 74 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 75 | return 8; | ||
| 76 | case Tegra::Shader::UniformType::UnsignedShort: | ||
| 77 | return 16; | ||
| 78 | case Tegra::Shader::UniformType::Single: | ||
| 79 | return 32; | ||
| 80 | case Tegra::Shader::UniformType::Double: | ||
| 81 | return 64; | ||
| 82 | case Tegra::Shader::UniformType::Quad: | ||
| 83 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 84 | return 128; | ||
| 85 | default: | ||
| 86 | UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); | ||
| 87 | return 32; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { | ||
| 92 | Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); | ||
| 93 | offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); | ||
| 94 | return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); | ||
| 95 | } | ||
| 96 | |||
| 97 | Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { | ||
| 98 | Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); | ||
| 99 | offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); | ||
| 100 | return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), | ||
| 101 | Immediate(size)); | ||
| 102 | } | ||
| 103 | |||
| 104 | Node Sign16Extend(Node value) { | ||
| 105 | Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); | ||
| 106 | Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); | ||
| 107 | Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); | ||
| 108 | return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); | ||
| 109 | } | ||
| 110 | |||
| 111 | } // Anonymous namespace | ||
| 112 | |||
| 113 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | ||
| 114 | const Instruction instr = {program_code[pc]}; | ||
| 115 | const auto opcode = OpCode::Decode(instr); | ||
| 116 | |||
| 117 | switch (opcode->get().GetId()) { | ||
| 118 | case OpCode::Id::LD_A: { | ||
| 119 | // Note: Shouldn't this be interp mode flat? As in no interpolation made. | ||
| 120 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 121 | "Indirect attribute loads are not supported"); | ||
| 122 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 123 | "Unaligned attribute loads are not supported"); | ||
| 124 | UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && | ||
| 125 | instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, | ||
| 126 | "Non-32 bits PHYS reads are not implemented"); | ||
| 127 | |||
| 128 | const Node buffer{GetRegister(instr.gpr39)}; | ||
| 129 | |||
| 130 | u64 next_element = instr.attribute.fmt20.element; | ||
| 131 | auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 132 | |||
| 133 | const auto LoadNextElement = [&](u32 reg_offset) { | ||
| 134 | const Node attribute{instr.attribute.fmt20.IsPhysical() | ||
| 135 | ? GetPhysicalInputAttribute(instr.gpr8, buffer) | ||
| 136 | : GetInputAttribute(static_cast<Attribute::Index>(next_index), | ||
| 137 | next_element, buffer)}; | ||
| 138 | |||
| 139 | SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); | ||
| 140 | |||
| 141 | // Load the next attribute element into the following register. If the element | ||
| 142 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 143 | // attribute. | ||
| 144 | next_element = (next_element + 1) % 4; | ||
| 145 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 146 | }; | ||
| 147 | |||
| 148 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 149 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 150 | LoadNextElement(reg_offset); | ||
| 151 | } | ||
| 152 | break; | ||
| 153 | } | ||
| 154 | case OpCode::Id::LD_C: { | ||
| 155 | UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); | ||
| 156 | |||
| 157 | Node index = GetRegister(instr.gpr8); | ||
| 158 | |||
| 159 | const Node op_a = | ||
| 160 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 161 | |||
| 162 | switch (instr.ld_c.type.Value()) { | ||
| 163 | case Tegra::Shader::UniformType::Single: | ||
| 164 | SetRegister(bb, instr.gpr0, op_a); | ||
| 165 | break; | ||
| 166 | |||
| 167 | case Tegra::Shader::UniformType::Double: { | ||
| 168 | const Node op_b = | ||
| 169 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); | ||
| 170 | |||
| 171 | SetTemporary(bb, 0, op_a); | ||
| 172 | SetTemporary(bb, 1, op_b); | ||
| 173 | SetRegister(bb, instr.gpr0, GetTemporary(0)); | ||
| 174 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | default: | ||
| 178 | UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); | ||
| 179 | } | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | case OpCode::Id::LD_L: | ||
| 183 | LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); | ||
| 184 | [[fallthrough]]; | ||
| 185 | case OpCode::Id::LD_S: { | ||
| 186 | const auto GetAddress = [&](s32 offset) { | ||
| 187 | ASSERT(offset % 4 == 0); | ||
| 188 | const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); | ||
| 189 | return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); | ||
| 190 | }; | ||
| 191 | const auto GetMemory = [&](s32 offset) { | ||
| 192 | return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) | ||
| 193 | : GetLocalMemory(GetAddress(offset)); | ||
| 194 | }; | ||
| 195 | |||
| 196 | switch (instr.ldst_sl.type.Value()) { | ||
| 197 | case StoreType::Signed16: | ||
| 198 | SetRegister(bb, instr.gpr0, | ||
| 199 | Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); | ||
| 200 | break; | ||
| 201 | case StoreType::Bits32: | ||
| 202 | case StoreType::Bits64: | ||
| 203 | case StoreType::Bits128: { | ||
| 204 | const u32 count = [&] { | ||
| 205 | switch (instr.ldst_sl.type.Value()) { | ||
| 206 | case StoreType::Bits32: | ||
| 207 | return 1; | ||
| 208 | case StoreType::Bits64: | ||
| 209 | return 2; | ||
| 210 | case StoreType::Bits128: | ||
| 211 | return 4; | ||
| 212 | default: | ||
| 213 | UNREACHABLE(); | ||
| 214 | return 0; | ||
| 215 | } | ||
| 216 | }(); | ||
| 217 | for (u32 i = 0; i < count; ++i) { | ||
| 218 | SetTemporary(bb, i, GetMemory(i * 4)); | ||
| 219 | } | ||
| 220 | for (u32 i = 0; i < count; ++i) { | ||
| 221 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 222 | } | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | default: | ||
| 226 | UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), | ||
| 227 | instr.ldst_sl.type.Value()); | ||
| 228 | } | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | case OpCode::Id::LD: | ||
| 232 | case OpCode::Id::LDG: { | ||
| 233 | const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { | ||
| 234 | switch (opcode->get().GetId()) { | ||
| 235 | case OpCode::Id::LD: | ||
| 236 | UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); | ||
| 237 | return instr.generic.type; | ||
| 238 | case OpCode::Id::LDG: | ||
| 239 | return instr.ldg.type; | ||
| 240 | default: | ||
| 241 | UNREACHABLE(); | ||
| 242 | return {}; | ||
| 243 | } | ||
| 244 | }(); | ||
| 245 | |||
| 246 | const auto [real_address_base, base_address, descriptor] = | ||
| 247 | TrackGlobalMemory(bb, instr, true, false); | ||
| 248 | |||
| 249 | const u32 size = GetMemorySize(type); | ||
| 250 | const u32 count = Common::AlignUp(size, 32) / 32; | ||
| 251 | if (!real_address_base || !base_address) { | ||
| 252 | // Tracking failed, load zeroes. | ||
| 253 | for (u32 i = 0; i < count; ++i) { | ||
| 254 | SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); | ||
| 255 | } | ||
| 256 | break; | ||
| 257 | } | ||
| 258 | |||
| 259 | for (u32 i = 0; i < count; ++i) { | ||
| 260 | const Node it_offset = Immediate(i * 4); | ||
| 261 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | ||
| 262 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 263 | |||
| 264 | // To handle unaligned loads get the bytes used to dereference global memory and extract | ||
| 265 | // those bytes from the loaded u32. | ||
| 266 | if (IsUnaligned(type)) { | ||
| 267 | gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); | ||
| 268 | } | ||
| 269 | |||
| 270 | SetTemporary(bb, i, gmem); | ||
| 271 | } | ||
| 272 | |||
| 273 | for (u32 i = 0; i < count; ++i) { | ||
| 274 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 275 | } | ||
| 276 | break; | ||
| 277 | } | ||
| 278 | case OpCode::Id::ST_A: { | ||
| 279 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 280 | "Indirect attribute loads are not supported"); | ||
| 281 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 282 | "Unaligned attribute loads are not supported"); | ||
| 283 | |||
| 284 | u64 element = instr.attribute.fmt20.element; | ||
| 285 | auto index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 286 | |||
| 287 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 288 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 289 | Node dest; | ||
| 290 | if (instr.attribute.fmt20.patch) { | ||
| 291 | const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element); | ||
| 292 | dest = MakeNode<PatchNode>(offset); | ||
| 293 | } else { | ||
| 294 | dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element, | ||
| 295 | GetRegister(instr.gpr39)); | ||
| 296 | } | ||
| 297 | const auto src = GetRegister(instr.gpr0.Value() + reg_offset); | ||
| 298 | |||
| 299 | bb.push_back(Operation(OperationCode::Assign, dest, src)); | ||
| 300 | |||
| 301 | // Load the next attribute element into the following register. If the element to load | ||
| 302 | // goes beyond the vec4 size, load the first element of the next attribute. | ||
| 303 | element = (element + 1) % 4; | ||
| 304 | index = index + (element == 0 ? 1 : 0); | ||
| 305 | } | ||
| 306 | break; | ||
| 307 | } | ||
| 308 | case OpCode::Id::ST_L: | ||
| 309 | LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); | ||
| 310 | [[fallthrough]]; | ||
| 311 | case OpCode::Id::ST_S: { | ||
| 312 | const auto GetAddress = [&](s32 offset) { | ||
| 313 | ASSERT(offset % 4 == 0); | ||
| 314 | const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); | ||
| 315 | return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); | ||
| 316 | }; | ||
| 317 | |||
| 318 | const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; | ||
| 319 | const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; | ||
| 320 | const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; | ||
| 321 | |||
| 322 | switch (instr.ldst_sl.type.Value()) { | ||
| 323 | case StoreType::Bits128: | ||
| 324 | (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); | ||
| 325 | (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); | ||
| 326 | [[fallthrough]]; | ||
| 327 | case StoreType::Bits64: | ||
| 328 | (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); | ||
| 329 | [[fallthrough]]; | ||
| 330 | case StoreType::Bits32: | ||
| 331 | (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); | ||
| 332 | break; | ||
| 333 | case StoreType::Unsigned16: | ||
| 334 | case StoreType::Signed16: { | ||
| 335 | Node address = GetAddress(0); | ||
| 336 | Node memory = (this->*get_memory)(address); | ||
| 337 | (this->*set_memory)( | ||
| 338 | bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); | ||
| 339 | break; | ||
| 340 | } | ||
| 341 | default: | ||
| 342 | UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), | ||
| 343 | instr.ldst_sl.type.Value()); | ||
| 344 | } | ||
| 345 | break; | ||
| 346 | } | ||
| 347 | case OpCode::Id::ST: | ||
| 348 | case OpCode::Id::STG: { | ||
| 349 | const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { | ||
| 350 | switch (opcode->get().GetId()) { | ||
| 351 | case OpCode::Id::ST: | ||
| 352 | UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); | ||
| 353 | return instr.generic.type; | ||
| 354 | case OpCode::Id::STG: | ||
| 355 | return instr.stg.type; | ||
| 356 | default: | ||
| 357 | UNREACHABLE(); | ||
| 358 | return {}; | ||
| 359 | } | ||
| 360 | }(); | ||
| 361 | |||
| 362 | // For unaligned reads we have to read memory too. | ||
| 363 | const bool is_read = IsUnaligned(type); | ||
| 364 | const auto [real_address_base, base_address, descriptor] = | ||
| 365 | TrackGlobalMemory(bb, instr, is_read, true); | ||
| 366 | if (!real_address_base || !base_address) { | ||
| 367 | // Tracking failed, skip the store. | ||
| 368 | break; | ||
| 369 | } | ||
| 370 | |||
| 371 | const u32 size = GetMemorySize(type); | ||
| 372 | const u32 count = Common::AlignUp(size, 32) / 32; | ||
| 373 | for (u32 i = 0; i < count; ++i) { | ||
| 374 | const Node it_offset = Immediate(i * 4); | ||
| 375 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | ||
| 376 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 377 | Node value = GetRegister(instr.gpr0.Value() + i); | ||
| 378 | |||
| 379 | if (IsUnaligned(type)) { | ||
| 380 | const u32 mask = GetUnalignedMask(type); | ||
| 381 | value = InsertUnaligned(gmem, move(value), real_address, mask, size); | ||
| 382 | } | ||
| 383 | |||
| 384 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); | ||
| 385 | } | ||
| 386 | break; | ||
| 387 | } | ||
| 388 | case OpCode::Id::RED: { | ||
| 389 | UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", | ||
| 390 | instr.red.type.Value()); | ||
| 391 | const auto [real_address, base_address, descriptor] = | ||
| 392 | TrackGlobalMemory(bb, instr, true, true); | ||
| 393 | if (!real_address || !base_address) { | ||
| 394 | // Tracking failed, skip atomic. | ||
| 395 | break; | ||
| 396 | } | ||
| 397 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 398 | Node value = GetRegister(instr.gpr0); | ||
| 399 | bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); | ||
| 400 | break; | ||
| 401 | } | ||
| 402 | case OpCode::Id::ATOM: { | ||
| 403 | UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || | ||
| 404 | instr.atom.operation == AtomicOp::Dec || | ||
| 405 | instr.atom.operation == AtomicOp::SafeAdd, | ||
| 406 | "operation={}", instr.atom.operation.Value()); | ||
| 407 | UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || | ||
| 408 | instr.atom.type == GlobalAtomicType::U64 || | ||
| 409 | instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || | ||
| 410 | instr.atom.type == GlobalAtomicType::F32_FTZ_RN, | ||
| 411 | "type={}", instr.atom.type.Value()); | ||
| 412 | |||
| 413 | const auto [real_address, base_address, descriptor] = | ||
| 414 | TrackGlobalMemory(bb, instr, true, true); | ||
| 415 | if (!real_address || !base_address) { | ||
| 416 | // Tracking failed, skip atomic. | ||
| 417 | break; | ||
| 418 | } | ||
| 419 | |||
| 420 | const bool is_signed = | ||
| 421 | instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; | ||
| 422 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 423 | SetRegister(bb, instr.gpr0, | ||
| 424 | SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, | ||
| 425 | GetRegister(instr.gpr20))); | ||
| 426 | break; | ||
| 427 | } | ||
| 428 | case OpCode::Id::ATOMS: { | ||
| 429 | UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || | ||
| 430 | instr.atoms.operation == AtomicOp::Dec, | ||
| 431 | "operation={}", instr.atoms.operation.Value()); | ||
| 432 | UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || | ||
| 433 | instr.atoms.type == AtomicType::U64, | ||
| 434 | "type={}", instr.atoms.type.Value()); | ||
| 435 | const bool is_signed = | ||
| 436 | instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; | ||
| 437 | const s32 offset = instr.atoms.GetImmediateOffset(); | ||
| 438 | Node address = GetRegister(instr.gpr8); | ||
| 439 | address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); | ||
| 440 | SetRegister(bb, instr.gpr0, | ||
| 441 | SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, | ||
| 442 | GetSharedMemory(move(address)), GetRegister(instr.gpr20))); | ||
| 443 | break; | ||
| 444 | } | ||
| 445 | case OpCode::Id::AL2P: { | ||
| 446 | // Ignore al2p.direction since we don't care about it. | ||
| 447 | |||
| 448 | // Calculate emulation fake physical address. | ||
| 449 | const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))}; | ||
| 450 | const Node reg{GetRegister(instr.gpr8)}; | ||
| 451 | const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; | ||
| 452 | |||
| 453 | // Set the fake address to target register. | ||
| 454 | SetRegister(bb, instr.gpr0, fake_address); | ||
| 455 | |||
| 456 | // Signal the shader IR to declare all possible attributes and varyings | ||
| 457 | uses_physical_attributes = true; | ||
| 458 | break; | ||
| 459 | } | ||
| 460 | default: | ||
| 461 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 462 | } | ||
| 463 | |||
| 464 | return pc; | ||
| 465 | } | ||
| 466 | |||
| 467 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, | ||
| 468 | Instruction instr, | ||
| 469 | bool is_read, bool is_write) { | ||
| 470 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | ||
| 471 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | ||
| 472 | |||
| 473 | const auto [base_address, index, offset] = | ||
| 474 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); | ||
| 475 | ASSERT_OR_EXECUTE_MSG( | ||
| 476 | base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, | ||
| 477 | "Global memory tracking failed"); | ||
| 478 | |||
| 479 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); | ||
| 480 | |||
| 481 | const GlobalMemoryBase descriptor{index, offset}; | ||
| 482 | const auto& entry = used_global_memory.try_emplace(descriptor).first; | ||
| 483 | auto& usage = entry->second; | ||
| 484 | usage.is_written |= is_write; | ||
| 485 | usage.is_read |= is_read; | ||
| 486 | |||
| 487 | const auto real_address = | ||
| 488 | Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); | ||
| 489 | |||
| 490 | return {real_address, base_address, descriptor}; | ||
| 491 | } | ||
| 492 | |||
| 493 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp deleted file mode 100644 index 5f88537bc..000000000 --- a/src/video_core/shader/decode/other.cpp +++ /dev/null | |||
| @@ -1,322 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using std::move; | ||
| 15 | using Tegra::Shader::ConditionCode; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::IpaInterpMode; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::PixelImap; | ||
| 20 | using Tegra::Shader::Register; | ||
| 21 | using Tegra::Shader::SystemVariable; | ||
| 22 | |||
| 23 | using Index = Tegra::Shader::Attribute::Index; | ||
| 24 | |||
| 25 | u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||
| 26 | const Instruction instr = {program_code[pc]}; | ||
| 27 | const auto opcode = OpCode::Decode(instr); | ||
| 28 | |||
| 29 | switch (opcode->get().GetId()) { | ||
| 30 | case OpCode::Id::NOP: { | ||
| 31 | UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); | ||
| 32 | UNIMPLEMENTED_IF(instr.nop.trigger != 0); | ||
| 33 | // With the previous preconditions, this instruction is a no-operation. | ||
| 34 | break; | ||
| 35 | } | ||
| 36 | case OpCode::Id::EXIT: { | ||
| 37 | const ConditionCode cc = instr.flow_condition_code; | ||
| 38 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); | ||
| 39 | |||
| 40 | switch (instr.flow.cond) { | ||
| 41 | case Tegra::Shader::FlowCondition::Always: | ||
| 42 | bb.push_back(Operation(OperationCode::Exit)); | ||
| 43 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 44 | // If this is an unconditional exit then just end processing here, | ||
| 45 | // otherwise we have to account for the possibility of the condition | ||
| 46 | // not being met, so continue processing the next instruction. | ||
| 47 | pc = MAX_PROGRAM_LENGTH - 1; | ||
| 48 | } | ||
| 49 | break; | ||
| 50 | |||
| 51 | case Tegra::Shader::FlowCondition::Fcsm_Tr: | ||
| 52 | // TODO(bunnei): What is this used for? If we assume this conditon is not | ||
| 53 | // satisifed, dual vertex shaders in Farming Simulator make more sense | ||
| 54 | UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); | ||
| 55 | break; | ||
| 56 | |||
| 57 | default: | ||
| 58 | UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); | ||
| 59 | } | ||
| 60 | break; | ||
| 61 | } | ||
| 62 | case OpCode::Id::KIL: { | ||
| 63 | UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); | ||
| 64 | |||
| 65 | const ConditionCode cc = instr.flow_condition_code; | ||
| 66 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); | ||
| 67 | |||
| 68 | bb.push_back(Operation(OperationCode::Discard)); | ||
| 69 | break; | ||
| 70 | } | ||
| 71 | case OpCode::Id::S2R: { | ||
| 72 | const Node value = [this, instr] { | ||
| 73 | switch (instr.sys20) { | ||
| 74 | case SystemVariable::LaneId: | ||
| 75 | return Operation(OperationCode::ThreadId); | ||
| 76 | case SystemVariable::InvocationId: | ||
| 77 | return Operation(OperationCode::InvocationId); | ||
| 78 | case SystemVariable::Ydirection: | ||
| 79 | uses_y_negate = true; | ||
| 80 | return Operation(OperationCode::YNegate); | ||
| 81 | case SystemVariable::InvocationInfo: | ||
| 82 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); | ||
| 83 | return Immediate(0x00ff'0000U); | ||
| 84 | case SystemVariable::WscaleFactorXY: | ||
| 85 | UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); | ||
| 86 | return Immediate(0U); | ||
| 87 | case SystemVariable::WscaleFactorZ: | ||
| 88 | UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); | ||
| 89 | return Immediate(0U); | ||
| 90 | case SystemVariable::Tid: { | ||
| 91 | Node val = Immediate(0); | ||
| 92 | val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); | ||
| 93 | val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); | ||
| 94 | val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); | ||
| 95 | return val; | ||
| 96 | } | ||
| 97 | case SystemVariable::TidX: | ||
| 98 | return Operation(OperationCode::LocalInvocationIdX); | ||
| 99 | case SystemVariable::TidY: | ||
| 100 | return Operation(OperationCode::LocalInvocationIdY); | ||
| 101 | case SystemVariable::TidZ: | ||
| 102 | return Operation(OperationCode::LocalInvocationIdZ); | ||
| 103 | case SystemVariable::CtaIdX: | ||
| 104 | return Operation(OperationCode::WorkGroupIdX); | ||
| 105 | case SystemVariable::CtaIdY: | ||
| 106 | return Operation(OperationCode::WorkGroupIdY); | ||
| 107 | case SystemVariable::CtaIdZ: | ||
| 108 | return Operation(OperationCode::WorkGroupIdZ); | ||
| 109 | case SystemVariable::EqMask: | ||
| 110 | case SystemVariable::LtMask: | ||
| 111 | case SystemVariable::LeMask: | ||
| 112 | case SystemVariable::GtMask: | ||
| 113 | case SystemVariable::GeMask: | ||
| 114 | uses_warps = true; | ||
| 115 | switch (instr.sys20) { | ||
| 116 | case SystemVariable::EqMask: | ||
| 117 | return Operation(OperationCode::ThreadEqMask); | ||
| 118 | case SystemVariable::LtMask: | ||
| 119 | return Operation(OperationCode::ThreadLtMask); | ||
| 120 | case SystemVariable::LeMask: | ||
| 121 | return Operation(OperationCode::ThreadLeMask); | ||
| 122 | case SystemVariable::GtMask: | ||
| 123 | return Operation(OperationCode::ThreadGtMask); | ||
| 124 | case SystemVariable::GeMask: | ||
| 125 | return Operation(OperationCode::ThreadGeMask); | ||
| 126 | default: | ||
| 127 | UNREACHABLE(); | ||
| 128 | return Immediate(0u); | ||
| 129 | } | ||
| 130 | default: | ||
| 131 | UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); | ||
| 132 | return Immediate(0u); | ||
| 133 | } | ||
| 134 | }(); | ||
| 135 | SetRegister(bb, instr.gpr0, value); | ||
| 136 | |||
| 137 | break; | ||
| 138 | } | ||
| 139 | case OpCode::Id::BRA: { | ||
| 140 | Node branch; | ||
| 141 | if (instr.bra.constant_buffer == 0) { | ||
| 142 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 143 | branch = Operation(OperationCode::Branch, Immediate(target)); | ||
| 144 | } else { | ||
| 145 | const u32 target = pc + 1; | ||
| 146 | const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); | ||
| 147 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 148 | PRECISE, op_a, Immediate(3)); | ||
| 149 | const Node operand = | ||
| 150 | Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 151 | branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 152 | } | ||
| 153 | |||
| 154 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 155 | if (cc != Tegra::Shader::ConditionCode::T) { | ||
| 156 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 157 | } else { | ||
| 158 | bb.push_back(branch); | ||
| 159 | } | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | case OpCode::Id::BRX: { | ||
| 163 | Node operand; | ||
| 164 | if (instr.brx.constant_buffer != 0) { | ||
| 165 | const s32 target = pc + 1; | ||
| 166 | const Node index = GetRegister(instr.gpr8); | ||
| 167 | const Node op_a = | ||
| 168 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 169 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 170 | PRECISE, op_a, Immediate(3)); | ||
| 171 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 172 | } else { | ||
| 173 | const s32 target = pc + instr.brx.GetBranchExtend(); | ||
| 174 | const Node op_a = GetRegister(instr.gpr8); | ||
| 175 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 176 | PRECISE, op_a, Immediate(3)); | ||
| 177 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 178 | } | ||
| 179 | const Node branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 180 | |||
| 181 | const ConditionCode cc = instr.flow_condition_code; | ||
| 182 | if (cc != ConditionCode::T) { | ||
| 183 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 184 | } else { | ||
| 185 | bb.push_back(branch); | ||
| 186 | } | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | case OpCode::Id::SSY: { | ||
| 190 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 191 | "Constant buffer flow is not supported"); | ||
| 192 | |||
| 193 | if (disable_flow_stack) { | ||
| 194 | break; | ||
| 195 | } | ||
| 196 | |||
| 197 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. | ||
| 198 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 199 | bb.push_back( | ||
| 200 | Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | case OpCode::Id::PBK: { | ||
| 204 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 205 | "Constant buffer PBK is not supported"); | ||
| 206 | |||
| 207 | if (disable_flow_stack) { | ||
| 208 | break; | ||
| 209 | } | ||
| 210 | |||
| 211 | // PBK pushes to a stack the address where BRK will jump to. | ||
| 212 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 213 | bb.push_back( | ||
| 214 | Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); | ||
| 215 | break; | ||
| 216 | } | ||
| 217 | case OpCode::Id::SYNC: { | ||
| 218 | const ConditionCode cc = instr.flow_condition_code; | ||
| 219 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); | ||
| 220 | |||
| 221 | if (decompiled) { | ||
| 222 | break; | ||
| 223 | } | ||
| 224 | |||
| 225 | // The SYNC opcode jumps to the address previously set by the SSY opcode | ||
| 226 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | case OpCode::Id::BRK: { | ||
| 230 | const ConditionCode cc = instr.flow_condition_code; | ||
| 231 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); | ||
| 232 | if (decompiled) { | ||
| 233 | break; | ||
| 234 | } | ||
| 235 | |||
| 236 | // The BRK opcode jumps to the address previously set by the PBK opcode | ||
| 237 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); | ||
| 238 | break; | ||
| 239 | } | ||
| 240 | case OpCode::Id::IPA: { | ||
| 241 | const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; | ||
| 242 | const auto attribute = instr.attribute.fmt28; | ||
| 243 | const Index index = attribute.index; | ||
| 244 | |||
| 245 | Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) | ||
| 246 | : GetInputAttribute(index, attribute.element); | ||
| 247 | |||
| 248 | // Code taken from Ryujinx. | ||
| 249 | if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { | ||
| 250 | const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0); | ||
| 251 | if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { | ||
| 252 | Node position_w = GetInputAttribute(Index::Position, 3); | ||
| 253 | value = Operation(OperationCode::FMul, move(value), move(position_w)); | ||
| 254 | } | ||
| 255 | } | ||
| 256 | |||
| 257 | if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { | ||
| 258 | value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); | ||
| 259 | } | ||
| 260 | |||
| 261 | value = GetSaturatedFloat(move(value), instr.ipa.saturate); | ||
| 262 | |||
| 263 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 264 | break; | ||
| 265 | } | ||
| 266 | case OpCode::Id::OUT_R: { | ||
| 267 | UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, | ||
| 268 | "Stream buffer is not supported"); | ||
| 269 | |||
| 270 | if (instr.out.emit) { | ||
| 271 | // gpr0 is used to store the next address and gpr8 contains the address to emit. | ||
| 272 | // Hardware uses pointers here but we just ignore it | ||
| 273 | bb.push_back(Operation(OperationCode::EmitVertex)); | ||
| 274 | SetRegister(bb, instr.gpr0, Immediate(0)); | ||
| 275 | } | ||
| 276 | if (instr.out.cut) { | ||
| 277 | bb.push_back(Operation(OperationCode::EndPrimitive)); | ||
| 278 | } | ||
| 279 | break; | ||
| 280 | } | ||
| 281 | case OpCode::Id::ISBERD: { | ||
| 282 | UNIMPLEMENTED_IF(instr.isberd.o != 0); | ||
| 283 | UNIMPLEMENTED_IF(instr.isberd.skew != 0); | ||
| 284 | UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); | ||
| 285 | UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); | ||
| 286 | LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); | ||
| 287 | SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); | ||
| 288 | break; | ||
| 289 | } | ||
| 290 | case OpCode::Id::BAR: { | ||
| 291 | UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); | ||
| 292 | bb.push_back(Operation(OperationCode::Barrier)); | ||
| 293 | break; | ||
| 294 | } | ||
| 295 | case OpCode::Id::MEMBAR: { | ||
| 296 | UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); | ||
| 297 | const OperationCode type = [instr] { | ||
| 298 | switch (instr.membar.type) { | ||
| 299 | case Tegra::Shader::MembarType::CTA: | ||
| 300 | return OperationCode::MemoryBarrierGroup; | ||
| 301 | case Tegra::Shader::MembarType::GL: | ||
| 302 | return OperationCode::MemoryBarrierGlobal; | ||
| 303 | default: | ||
| 304 | UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); | ||
| 305 | return OperationCode::MemoryBarrierGlobal; | ||
| 306 | } | ||
| 307 | }(); | ||
| 308 | bb.push_back(Operation(type)); | ||
| 309 | break; | ||
| 310 | } | ||
| 311 | case OpCode::Id::DEPBAR: { | ||
| 312 | LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); | ||
| 313 | break; | ||
| 314 | } | ||
| 315 | default: | ||
| 316 | UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); | ||
| 317 | } | ||
| 318 | |||
| 319 | return pc; | ||
| 320 | } | ||
| 321 | |||
| 322 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp deleted file mode 100644 index 9290d22eb..000000000 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ /dev/null | |||
| @@ -1,68 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | switch (opcode->get().GetId()) { | ||
| 22 | case OpCode::Id::PSETP: { | ||
| 23 | const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | ||
| 24 | const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||
| 25 | |||
| 26 | // We can't use the constant predicate as destination. | ||
| 27 | ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 28 | |||
| 29 | const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | ||
| 30 | |||
| 31 | const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); | ||
| 32 | const Node predicate = Operation(combiner, op_a, op_b); | ||
| 33 | |||
| 34 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 35 | SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); | ||
| 36 | |||
| 37 | if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 38 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if | ||
| 39 | // enabled | ||
| 40 | SetPredicate(bb, instr.psetp.pred0, | ||
| 41 | Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), | ||
| 42 | second_pred)); | ||
| 43 | } | ||
| 44 | break; | ||
| 45 | } | ||
| 46 | case OpCode::Id::CSETP: { | ||
| 47 | const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); | ||
| 48 | const Node condition_code = GetConditionCode(instr.csetp.cc); | ||
| 49 | |||
| 50 | const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); | ||
| 51 | |||
| 52 | if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 53 | SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); | ||
| 54 | } | ||
| 55 | if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 56 | const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); | ||
| 57 | SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); | ||
| 58 | } | ||
| 59 | break; | ||
| 60 | } | ||
| 61 | default: | ||
| 62 | UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); | ||
| 63 | } | ||
| 64 | |||
| 65 | return pc; | ||
| 66 | } | ||
| 67 | |||
| 68 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp deleted file mode 100644 index 84dbc50fe..000000000 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ /dev/null | |||
| @@ -1,46 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 20 | "Condition codes generation in PSET is not implemented"); | ||
| 21 | |||
| 22 | const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); | ||
| 23 | const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); | ||
| 24 | const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); | ||
| 25 | |||
| 26 | const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); | ||
| 27 | |||
| 28 | const OperationCode combiner = GetPredicateCombiner(instr.pset.op); | ||
| 29 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 30 | |||
| 31 | const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); | ||
| 32 | const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 33 | const Node value = | ||
| 34 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 35 | |||
| 36 | if (instr.pset.bf) { | ||
| 37 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 38 | } else { | ||
| 39 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 40 | } | ||
| 41 | SetRegister(bb, instr.gpr0, value); | ||
| 42 | |||
| 43 | return pc; | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp deleted file mode 100644 index 6116c31aa..000000000 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ /dev/null | |||
| @@ -1,86 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/node_helper.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using std::move; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::OpCode; | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | constexpr u64 NUM_CONDITION_CODES = 4; | ||
| 21 | constexpr u64 NUM_PREDICATES = 7; | ||
| 22 | } // namespace | ||
| 23 | |||
| 24 | u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 25 | const Instruction instr = {program_code[pc]}; | ||
| 26 | const auto opcode = OpCode::Decode(instr); | ||
| 27 | |||
| 28 | Node apply_mask = [this, opcode, instr] { | ||
| 29 | switch (opcode->get().GetId()) { | ||
| 30 | case OpCode::Id::R2P_IMM: | ||
| 31 | case OpCode::Id::P2R_IMM: | ||
| 32 | return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask)); | ||
| 33 | default: | ||
| 34 | UNREACHABLE(); | ||
| 35 | return Immediate(0); | ||
| 36 | } | ||
| 37 | }(); | ||
| 38 | |||
| 39 | const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8; | ||
| 40 | |||
| 41 | const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; | ||
| 42 | const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; | ||
| 43 | const auto get_entry = [this, cc](u64 entry) { | ||
| 44 | return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry); | ||
| 45 | }; | ||
| 46 | |||
| 47 | switch (opcode->get().GetId()) { | ||
| 48 | case OpCode::Id::R2P_IMM: { | ||
| 49 | Node mask = GetRegister(instr.gpr8); | ||
| 50 | |||
| 51 | for (u64 entry = 0; entry < num_entries; ++entry) { | ||
| 52 | const u32 shift = static_cast<u32>(entry); | ||
| 53 | |||
| 54 | Node apply = BitfieldExtract(apply_mask, shift, 1); | ||
| 55 | Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); | ||
| 56 | |||
| 57 | Node compare = BitfieldExtract(mask, offset + shift, 1); | ||
| 58 | Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); | ||
| 59 | |||
| 60 | Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); | ||
| 61 | bb.push_back(Conditional(condition, {move(code)})); | ||
| 62 | } | ||
| 63 | break; | ||
| 64 | } | ||
| 65 | case OpCode::Id::P2R_IMM: { | ||
| 66 | Node value = Immediate(0); | ||
| 67 | for (u64 entry = 0; entry < num_entries; ++entry) { | ||
| 68 | Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), | ||
| 69 | Immediate(0)); | ||
| 70 | value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); | ||
| 71 | } | ||
| 72 | value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); | ||
| 73 | value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); | ||
| 74 | |||
| 75 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | default: | ||
| 79 | UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); | ||
| 80 | break; | ||
| 81 | } | ||
| 82 | |||
| 83 | return pc; | ||
| 84 | } | ||
| 85 | |||
| 86 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp deleted file mode 100644 index a53819c15..000000000 --- a/src/video_core/shader/decode/shift.cpp +++ /dev/null | |||
| @@ -1,153 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using std::move; | ||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::ShfType; | ||
| 17 | using Tegra::Shader::ShfXmode; | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | |||
| 21 | Node IsFull(Node shift) { | ||
| 22 | return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); | ||
| 23 | } | ||
| 24 | |||
| 25 | Node Shift(OperationCode opcode, Node value, Node shift) { | ||
| 26 | Node shifted = Operation(opcode, move(value), shift); | ||
| 27 | return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); | ||
| 28 | } | ||
| 29 | |||
| 30 | Node ClampShift(Node shift, s32 size = 32) { | ||
| 31 | shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); | ||
| 32 | return Operation(OperationCode::IMin, move(shift), Immediate(size)); | ||
| 33 | } | ||
| 34 | |||
| 35 | Node WrapShift(Node shift, s32 size = 32) { | ||
| 36 | return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); | ||
| 37 | } | ||
| 38 | |||
| 39 | Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { | ||
| 40 | // These values are used when the shift value is less than 32 | ||
| 41 | Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); | ||
| 42 | Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); | ||
| 43 | Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); | ||
| 44 | |||
| 45 | if (type == ShfType::Bits32) { | ||
| 46 | // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits | ||
| 47 | return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); | ||
| 48 | } | ||
| 49 | |||
| 50 | // And these when it's larger than or 32 | ||
| 51 | const bool is_signed = type == ShfType::S64; | ||
| 52 | const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); | ||
| 53 | Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); | ||
| 54 | Node greater = Shift(opcode, high, move(reduced)); | ||
| 55 | |||
| 56 | Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); | ||
| 57 | Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); | ||
| 58 | |||
| 59 | Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); | ||
| 60 | return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); | ||
| 61 | } | ||
| 62 | |||
| 63 | Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { | ||
| 64 | // These values are used when the shift value is less than 32 | ||
| 65 | Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); | ||
| 66 | Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); | ||
| 67 | Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); | ||
| 68 | |||
| 69 | if (type == ShfType::Bits32) { | ||
| 70 | // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits | ||
| 71 | return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); | ||
| 72 | } | ||
| 73 | |||
| 74 | // And these when it's larger than or 32 | ||
| 75 | Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); | ||
| 76 | Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); | ||
| 77 | |||
| 78 | Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); | ||
| 79 | Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); | ||
| 80 | |||
| 81 | Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); | ||
| 82 | return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); | ||
| 83 | } | ||
| 84 | |||
| 85 | } // Anonymous namespace | ||
| 86 | |||
| 87 | u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { | ||
| 88 | const Instruction instr = {program_code[pc]}; | ||
| 89 | const auto opcode = OpCode::Decode(instr); | ||
| 90 | |||
| 91 | Node op_a = GetRegister(instr.gpr8); | ||
| 92 | Node op_b = [this, instr] { | ||
| 93 | if (instr.is_b_imm) { | ||
| 94 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 95 | } else if (instr.is_b_gpr) { | ||
| 96 | return GetRegister(instr.gpr20); | ||
| 97 | } else { | ||
| 98 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 99 | } | ||
| 100 | }(); | ||
| 101 | |||
| 102 | switch (const auto opid = opcode->get().GetId(); opid) { | ||
| 103 | case OpCode::Id::SHR_C: | ||
| 104 | case OpCode::Id::SHR_R: | ||
| 105 | case OpCode::Id::SHR_IMM: { | ||
| 106 | op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); | ||
| 107 | |||
| 108 | Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, | ||
| 109 | move(op_a), move(op_b)); | ||
| 110 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 111 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | case OpCode::Id::SHL_C: | ||
| 115 | case OpCode::Id::SHL_R: | ||
| 116 | case OpCode::Id::SHL_IMM: { | ||
| 117 | Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); | ||
| 118 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 119 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::SHF_RIGHT_R: | ||
| 123 | case OpCode::Id::SHF_RIGHT_IMM: | ||
| 124 | case OpCode::Id::SHF_LEFT_R: | ||
| 125 | case OpCode::Id::SHF_LEFT_IMM: { | ||
| 126 | UNIMPLEMENTED_IF(instr.generates_cc); | ||
| 127 | UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", | ||
| 128 | instr.shf.xmode.Value()); | ||
| 129 | |||
| 130 | if (instr.is_b_imm) { | ||
| 131 | op_b = Immediate(static_cast<u32>(instr.shf.immediate)); | ||
| 132 | } | ||
| 133 | const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; | ||
| 134 | Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); | ||
| 135 | |||
| 136 | Node negated_shift = Operation(OperationCode::INegate, shift); | ||
| 137 | Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); | ||
| 138 | |||
| 139 | const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; | ||
| 140 | Node value = (is_right ? ShiftRight : ShiftLeft)( | ||
| 141 | move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); | ||
| 142 | |||
| 143 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | default: | ||
| 147 | UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); | ||
| 148 | } | ||
| 149 | |||
| 150 | return pc; | ||
| 151 | } | ||
| 152 | |||
| 153 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp deleted file mode 100644 index c69681e8d..000000000 --- a/src/video_core/shader/decode/texture.cpp +++ /dev/null | |||
| @@ -1,935 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::Instruction; | ||
| 21 | using Tegra::Shader::OpCode; | ||
| 22 | using Tegra::Shader::Register; | ||
| 23 | using Tegra::Shader::TextureMiscMode; | ||
| 24 | using Tegra::Shader::TextureProcessMode; | ||
| 25 | using Tegra::Shader::TextureType; | ||
| 26 | |||
| 27 | static std::size_t GetCoordCount(TextureType texture_type) { | ||
| 28 | switch (texture_type) { | ||
| 29 | case TextureType::Texture1D: | ||
| 30 | return 1; | ||
| 31 | case TextureType::Texture2D: | ||
| 32 | return 2; | ||
| 33 | case TextureType::Texture3D: | ||
| 34 | case TextureType::TextureCube: | ||
| 35 | return 3; | ||
| 36 | default: | ||
| 37 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); | ||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||
| 43 | const Instruction instr = {program_code[pc]}; | ||
| 44 | const auto opcode = OpCode::Decode(instr); | ||
| 45 | bool is_bindless = false; | ||
| 46 | switch (opcode->get().GetId()) { | ||
| 47 | case OpCode::Id::TEX: { | ||
| 48 | const TextureType texture_type{instr.tex.texture_type}; | ||
| 49 | const bool is_array = instr.tex.array != 0; | ||
| 50 | const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 51 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 52 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 53 | WriteTexInstructionFloat( | ||
| 54 | bb, instr, | ||
| 55 | GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); | ||
| 56 | break; | ||
| 57 | } | ||
| 58 | case OpCode::Id::TEX_B: { | ||
| 59 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 60 | "AOFFI is not implemented"); | ||
| 61 | |||
| 62 | const TextureType texture_type{instr.tex_b.texture_type}; | ||
| 63 | const bool is_array = instr.tex_b.array != 0; | ||
| 64 | const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 65 | const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); | ||
| 66 | const auto process_mode = instr.tex_b.GetTextureProcessMode(); | ||
| 67 | WriteTexInstructionFloat(bb, instr, | ||
| 68 | GetTexCode(instr, texture_type, process_mode, depth_compare, | ||
| 69 | is_array, is_aoffi, {instr.gpr20})); | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | case OpCode::Id::TEXS: { | ||
| 73 | const TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 74 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 75 | const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); | ||
| 76 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 77 | |||
| 78 | const Node4 components = | ||
| 79 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 80 | |||
| 81 | if (instr.texs.fp32_flag) { | ||
| 82 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 83 | } else { | ||
| 84 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 85 | } | ||
| 86 | break; | ||
| 87 | } | ||
| 88 | case OpCode::Id::TLD4_B: { | ||
| 89 | is_bindless = true; | ||
| 90 | [[fallthrough]]; | ||
| 91 | } | ||
| 92 | case OpCode::Id::TLD4: { | ||
| 93 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||
| 94 | "NDV is not implemented"); | ||
| 95 | const auto texture_type = instr.tld4.texture_type.Value(); | ||
| 96 | const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) | ||
| 97 | : instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||
| 98 | const bool is_array = instr.tld4.array != 0; | ||
| 99 | const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) | ||
| 100 | : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 101 | const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) | ||
| 102 | : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); | ||
| 103 | WriteTexInstructionFloat(bb, instr, | ||
| 104 | GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, | ||
| 105 | is_ptp, is_bindless)); | ||
| 106 | break; | ||
| 107 | } | ||
| 108 | case OpCode::Id::TLD4S: { | ||
| 109 | constexpr std::size_t num_coords = 2; | ||
| 110 | const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 111 | const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); | ||
| 112 | const Node op_a = GetRegister(instr.gpr8); | ||
| 113 | const Node op_b = GetRegister(instr.gpr20); | ||
| 114 | |||
| 115 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | ||
| 116 | std::vector<Node> coords; | ||
| 117 | std::vector<Node> aoffi; | ||
| 118 | Node depth_compare; | ||
| 119 | if (is_depth_compare) { | ||
| 120 | // Note: TLD4S coordinate encoding works just like TEXS's | ||
| 121 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | ||
| 122 | coords.push_back(op_a); | ||
| 123 | coords.push_back(op_y); | ||
| 124 | if (is_aoffi) { | ||
| 125 | aoffi = GetAoffiCoordinates(op_b, num_coords, true); | ||
| 126 | depth_compare = GetRegister(instr.gpr20.Value() + 1); | ||
| 127 | } else { | ||
| 128 | depth_compare = op_b; | ||
| 129 | } | ||
| 130 | } else { | ||
| 131 | // There's no depth compare | ||
| 132 | coords.push_back(op_a); | ||
| 133 | if (is_aoffi) { | ||
| 134 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 135 | aoffi = GetAoffiCoordinates(op_b, num_coords, true); | ||
| 136 | } else { | ||
| 137 | coords.push_back(op_b); | ||
| 138 | } | ||
| 139 | } | ||
| 140 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); | ||
| 141 | |||
| 142 | SamplerInfo info; | ||
| 143 | info.is_shadow = is_depth_compare; | ||
| 144 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); | ||
| 145 | |||
| 146 | Node4 values; | ||
| 147 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 148 | MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {}, | ||
| 149 | {}, {}, component, element, {}}; | ||
| 150 | values[element] = Operation(OperationCode::TextureGather, meta, coords); | ||
| 151 | } | ||
| 152 | |||
| 153 | if (instr.tld4s.fp16_flag) { | ||
| 154 | WriteTexsInstructionHalfFloat(bb, instr, values, true); | ||
| 155 | } else { | ||
| 156 | WriteTexsInstructionFloat(bb, instr, values, true); | ||
| 157 | } | ||
| 158 | break; | ||
| 159 | } | ||
| 160 | case OpCode::Id::TXD_B: | ||
| 161 | is_bindless = true; | ||
| 162 | [[fallthrough]]; | ||
| 163 | case OpCode::Id::TXD: { | ||
| 164 | UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 165 | "AOFFI is not implemented"); | ||
| 166 | |||
| 167 | const bool is_array = instr.txd.is_array != 0; | ||
| 168 | const auto derivate_reg = instr.gpr20.Value(); | ||
| 169 | const auto texture_type = instr.txd.texture_type.Value(); | ||
| 170 | const auto coord_count = GetCoordCount(texture_type); | ||
| 171 | u64 base_reg = instr.gpr8.Value(); | ||
| 172 | Node index_var; | ||
| 173 | SamplerInfo info; | ||
| 174 | info.type = texture_type; | ||
| 175 | info.is_array = is_array; | ||
| 176 | const std::optional<SamplerEntry> sampler = | ||
| 177 | is_bindless ? GetBindlessSampler(base_reg, info, index_var) | ||
| 178 | : GetSampler(instr.sampler, info); | ||
| 179 | Node4 values; | ||
| 180 | if (!sampler) { | ||
| 181 | std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); | ||
| 182 | WriteTexInstructionFloat(bb, instr, values); | ||
| 183 | break; | ||
| 184 | } | ||
| 185 | |||
| 186 | if (is_bindless) { | ||
| 187 | base_reg++; | ||
| 188 | } | ||
| 189 | |||
| 190 | std::vector<Node> coords; | ||
| 191 | std::vector<Node> derivates; | ||
| 192 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 193 | coords.push_back(GetRegister(base_reg + i)); | ||
| 194 | const std::size_t derivate = i * 2; | ||
| 195 | derivates.push_back(GetRegister(derivate_reg + derivate)); | ||
| 196 | derivates.push_back(GetRegister(derivate_reg + derivate + 1)); | ||
| 197 | } | ||
| 198 | |||
| 199 | Node array_node = {}; | ||
| 200 | if (is_array) { | ||
| 201 | const Node info_reg = GetRegister(base_reg + coord_count); | ||
| 202 | array_node = BitfieldExtract(info_reg, 0, 16); | ||
| 203 | } | ||
| 204 | |||
| 205 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 206 | MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, | ||
| 207 | {}, {}, {}, element, index_var}; | ||
| 208 | values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); | ||
| 209 | } | ||
| 210 | |||
| 211 | WriteTexInstructionFloat(bb, instr, values); | ||
| 212 | |||
| 213 | break; | ||
| 214 | } | ||
| 215 | case OpCode::Id::TXQ_B: | ||
| 216 | is_bindless = true; | ||
| 217 | [[fallthrough]]; | ||
| 218 | case OpCode::Id::TXQ: { | ||
| 219 | Node index_var; | ||
| 220 | const std::optional<SamplerEntry> sampler = | ||
| 221 | is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) | ||
| 222 | : GetSampler(instr.sampler, {}); | ||
| 223 | |||
| 224 | if (!sampler) { | ||
| 225 | u32 indexer = 0; | ||
| 226 | for (u32 element = 0; element < 4; ++element) { | ||
| 227 | if (!instr.txq.IsComponentEnabled(element)) { | ||
| 228 | continue; | ||
| 229 | } | ||
| 230 | const Node value = Immediate(0); | ||
| 231 | SetTemporary(bb, indexer++, value); | ||
| 232 | } | ||
| 233 | for (u32 i = 0; i < indexer; ++i) { | ||
| 234 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 235 | } | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | |||
| 239 | u32 indexer = 0; | ||
| 240 | switch (instr.txq.query_type) { | ||
| 241 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 242 | for (u32 element = 0; element < 4; ++element) { | ||
| 243 | if (!instr.txq.IsComponentEnabled(element)) { | ||
| 244 | continue; | ||
| 245 | } | ||
| 246 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; | ||
| 247 | const Node value = | ||
| 248 | Operation(OperationCode::TextureQueryDimensions, meta, | ||
| 249 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | ||
| 250 | SetTemporary(bb, indexer++, value); | ||
| 251 | } | ||
| 252 | for (u32 i = 0; i < indexer; ++i) { | ||
| 253 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 254 | } | ||
| 255 | break; | ||
| 256 | } | ||
| 257 | default: | ||
| 258 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); | ||
| 259 | } | ||
| 260 | break; | ||
| 261 | } | ||
| 262 | case OpCode::Id::TMML_B: | ||
| 263 | is_bindless = true; | ||
| 264 | [[fallthrough]]; | ||
| 265 | case OpCode::Id::TMML: { | ||
| 266 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 267 | "NDV is not implemented"); | ||
| 268 | |||
| 269 | const auto texture_type = instr.tmml.texture_type.Value(); | ||
| 270 | const bool is_array = instr.tmml.array != 0; | ||
| 271 | SamplerInfo info; | ||
| 272 | info.type = texture_type; | ||
| 273 | info.is_array = is_array; | ||
| 274 | Node index_var; | ||
| 275 | const std::optional<SamplerEntry> sampler = | ||
| 276 | is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) | ||
| 277 | : GetSampler(instr.sampler, info); | ||
| 278 | |||
| 279 | if (!sampler) { | ||
| 280 | u32 indexer = 0; | ||
| 281 | for (u32 element = 0; element < 2; ++element) { | ||
| 282 | if (!instr.tmml.IsComponentEnabled(element)) { | ||
| 283 | continue; | ||
| 284 | } | ||
| 285 | const Node value = Immediate(0); | ||
| 286 | SetTemporary(bb, indexer++, value); | ||
| 287 | } | ||
| 288 | for (u32 i = 0; i < indexer; ++i) { | ||
| 289 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 290 | } | ||
| 291 | break; | ||
| 292 | } | ||
| 293 | |||
| 294 | const u64 base_index = is_array ? 1 : 0; | ||
| 295 | const u64 num_components = [texture_type] { | ||
| 296 | switch (texture_type) { | ||
| 297 | case TextureType::Texture1D: | ||
| 298 | return 1; | ||
| 299 | case TextureType::Texture2D: | ||
| 300 | return 2; | ||
| 301 | case TextureType::TextureCube: | ||
| 302 | return 3; | ||
| 303 | default: | ||
| 304 | UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); | ||
| 305 | return 2; | ||
| 306 | } | ||
| 307 | }(); | ||
| 308 | // TODO: What's the array component used for? | ||
| 309 | |||
| 310 | std::vector<Node> coords; | ||
| 311 | coords.reserve(num_components); | ||
| 312 | for (u64 component = 0; component < num_components; ++component) { | ||
| 313 | coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); | ||
| 314 | } | ||
| 315 | |||
| 316 | u32 indexer = 0; | ||
| 317 | for (u32 element = 0; element < 2; ++element) { | ||
| 318 | if (!instr.tmml.IsComponentEnabled(element)) { | ||
| 319 | continue; | ||
| 320 | } | ||
| 321 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; | ||
| 322 | Node value = Operation(OperationCode::TextureQueryLod, meta, coords); | ||
| 323 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 324 | } | ||
| 325 | for (u32 i = 0; i < indexer; ++i) { | ||
| 326 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 327 | } | ||
| 328 | break; | ||
| 329 | } | ||
| 330 | case OpCode::Id::TLD: { | ||
| 331 | UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); | ||
| 332 | UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); | ||
| 333 | UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); | ||
| 334 | |||
| 335 | WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); | ||
| 336 | break; | ||
| 337 | } | ||
| 338 | case OpCode::Id::TLDS: { | ||
| 339 | const TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 340 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 341 | |||
| 342 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 343 | "AOFFI is not implemented"); | ||
| 344 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); | ||
| 345 | |||
| 346 | const Node4 components = GetTldsCode(instr, texture_type, is_array); | ||
| 347 | |||
| 348 | if (instr.tlds.fp32_flag) { | ||
| 349 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 350 | } else { | ||
| 351 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 352 | } | ||
| 353 | break; | ||
| 354 | } | ||
| 355 | default: | ||
| 356 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 357 | } | ||
| 358 | |||
| 359 | return pc; | ||
| 360 | } | ||
| 361 | |||
| 362 | ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( | ||
| 363 | SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) { | ||
| 364 | if (info.IsComplete()) { | ||
| 365 | return info; | ||
| 366 | } | ||
| 367 | if (!sampler) { | ||
| 368 | LOG_WARNING(HW_GPU, "Unknown sampler info"); | ||
| 369 | info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); | ||
| 370 | info.is_array = info.is_array.value_or(false); | ||
| 371 | info.is_shadow = info.is_shadow.value_or(false); | ||
| 372 | info.is_buffer = info.is_buffer.value_or(false); | ||
| 373 | return info; | ||
| 374 | } | ||
| 375 | info.type = info.type.value_or(sampler->texture_type); | ||
| 376 | info.is_array = info.is_array.value_or(sampler->is_array != 0); | ||
| 377 | info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0); | ||
| 378 | info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0); | ||
| 379 | return info; | ||
| 380 | } | ||
| 381 | |||
| 382 | std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, | ||
| 383 | SamplerInfo sampler_info) { | ||
| 384 | const u32 offset = static_cast<u32>(sampler.index.Value()); | ||
| 385 | const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); | ||
| 386 | |||
| 387 | // If this sampler has already been used, return the existing mapping. | ||
| 388 | const auto it = | ||
| 389 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 390 | [offset](const SamplerEntry& entry) { return entry.offset == offset; }); | ||
| 391 | if (it != used_samplers.end()) { | ||
| 392 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && | ||
| 393 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | ||
| 394 | return *it; | ||
| 395 | } | ||
| 396 | |||
| 397 | // Otherwise create a new mapping for this sampler | ||
| 398 | const auto next_index = static_cast<u32>(used_samplers.size()); | ||
| 399 | return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array, | ||
| 400 | *info.is_shadow, *info.is_buffer, false); | ||
| 401 | } | ||
| 402 | |||
| 403 | std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | ||
| 404 | SamplerInfo info, Node& index_var) { | ||
| 405 | const Node sampler_register = GetRegister(reg); | ||
| 406 | const auto [base_node, tracked_sampler_info] = | ||
| 407 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); | ||
| 408 | if (!base_node) { | ||
| 409 | UNREACHABLE(); | ||
| 410 | return std::nullopt; | ||
| 411 | } | ||
| 412 | |||
| 413 | if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { | ||
| 414 | const u32 buffer = sampler_info->index; | ||
| 415 | const u32 offset = sampler_info->offset; | ||
| 416 | info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); | ||
| 417 | |||
| 418 | // If this sampler has already been used, return the existing mapping. | ||
| 419 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 420 | [buffer, offset](const SamplerEntry& entry) { | ||
| 421 | return entry.buffer == buffer && entry.offset == offset; | ||
| 422 | }); | ||
| 423 | if (it != used_samplers.end()) { | ||
| 424 | ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array && | ||
| 425 | it->is_shadow == info.is_shadow); | ||
| 426 | return *it; | ||
| 427 | } | ||
| 428 | |||
| 429 | // Otherwise create a new mapping for this sampler | ||
| 430 | const auto next_index = static_cast<u32>(used_samplers.size()); | ||
| 431 | return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, | ||
| 432 | *info.is_shadow, *info.is_buffer, false); | ||
| 433 | } | ||
| 434 | if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) { | ||
| 435 | const std::pair indices = sampler_info->indices; | ||
| 436 | const std::pair offsets = sampler_info->offsets; | ||
| 437 | info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); | ||
| 438 | |||
| 439 | // Try to use an already created sampler if it exists | ||
| 440 | const auto it = | ||
| 441 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 442 | [indices, offsets](const SamplerEntry& entry) { | ||
| 443 | return offsets == std::pair{entry.offset, entry.secondary_offset} && | ||
| 444 | indices == std::pair{entry.buffer, entry.secondary_buffer}; | ||
| 445 | }); | ||
| 446 | if (it != used_samplers.end()) { | ||
| 447 | ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && | ||
| 448 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | ||
| 449 | return *it; | ||
| 450 | } | ||
| 451 | |||
| 452 | // Otherwise create a new mapping for this sampler | ||
| 453 | const u32 next_index = static_cast<u32>(used_samplers.size()); | ||
| 454 | return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, | ||
| 455 | *info.is_shadow, *info.is_buffer); | ||
| 456 | } | ||
| 457 | if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { | ||
| 458 | const u32 base_offset = sampler_info->base_offset / 4; | ||
| 459 | index_var = GetCustomVariable(sampler_info->bindless_var); | ||
| 460 | info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); | ||
| 461 | |||
| 462 | // If this sampler has already been used, return the existing mapping. | ||
| 463 | const auto it = std::find_if( | ||
| 464 | used_samplers.begin(), used_samplers.end(), | ||
| 465 | [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); | ||
| 466 | if (it != used_samplers.end()) { | ||
| 467 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && | ||
| 468 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && | ||
| 469 | it->is_indexed); | ||
| 470 | return *it; | ||
| 471 | } | ||
| 472 | |||
| 473 | uses_indexed_samplers = true; | ||
| 474 | // Otherwise create a new mapping for this sampler | ||
| 475 | const auto next_index = static_cast<u32>(used_samplers.size()); | ||
| 476 | return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array, | ||
| 477 | *info.is_shadow, *info.is_buffer, true); | ||
| 478 | } | ||
| 479 | return std::nullopt; | ||
| 480 | } | ||
| 481 | |||
| 482 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | ||
| 483 | u32 dest_elem = 0; | ||
| 484 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 485 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 486 | // Skip disabled components | ||
| 487 | continue; | ||
| 488 | } | ||
| 489 | SetTemporary(bb, dest_elem++, components[elem]); | ||
| 490 | } | ||
| 491 | // After writing values in temporals, move them to the real registers | ||
| 492 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 493 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 494 | } | ||
| 495 | } | ||
| 496 | |||
| 497 | void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, | ||
| 498 | bool ignore_mask) { | ||
| 499 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||
| 500 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||
| 501 | |||
| 502 | u32 dest_elem = 0; | ||
| 503 | for (u32 component = 0; component < 4; ++component) { | ||
| 504 | if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) | ||
| 505 | continue; | ||
| 506 | SetTemporary(bb, dest_elem++, components[component]); | ||
| 507 | } | ||
| 508 | |||
| 509 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 510 | if (i < 2) { | ||
| 511 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 512 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); | ||
| 513 | } else { | ||
| 514 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 515 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 516 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); | ||
| 517 | } | ||
| 518 | } | ||
| 519 | } | ||
| 520 | |||
| 521 | void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | ||
| 522 | const Node4& components, bool ignore_mask) { | ||
| 523 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 524 | // float instruction). | ||
| 525 | |||
| 526 | Node4 values; | ||
| 527 | u32 dest_elem = 0; | ||
| 528 | for (u32 component = 0; component < 4; ++component) { | ||
| 529 | if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) | ||
| 530 | continue; | ||
| 531 | values[dest_elem++] = components[component]; | ||
| 532 | } | ||
| 533 | if (dest_elem == 0) | ||
| 534 | return; | ||
| 535 | |||
| 536 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); | ||
| 537 | |||
| 538 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 539 | if (dest_elem <= 2) { | ||
| 540 | SetRegister(bb, instr.gpr0, first_value); | ||
| 541 | return; | ||
| 542 | } | ||
| 543 | |||
| 544 | SetTemporary(bb, 0, first_value); | ||
| 545 | SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 546 | |||
| 547 | SetRegister(bb, instr.gpr0, GetTemporary(0)); | ||
| 548 | SetRegister(bb, instr.gpr28, GetTemporary(1)); | ||
| 549 | } | ||
| 550 | |||
| 551 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||
| 552 | TextureProcessMode process_mode, std::vector<Node> coords, | ||
| 553 | Node array, Node depth_compare, u32 bias_offset, | ||
| 554 | std::vector<Node> aoffi, | ||
| 555 | std::optional<Tegra::Shader::Register> bindless_reg) { | ||
| 556 | const bool is_array = array != nullptr; | ||
| 557 | const bool is_shadow = depth_compare != nullptr; | ||
| 558 | const bool is_bindless = bindless_reg.has_value(); | ||
| 559 | |||
| 560 | ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, | ||
| 561 | "Illegal texture type"); | ||
| 562 | |||
| 563 | SamplerInfo info; | ||
| 564 | info.type = texture_type; | ||
| 565 | info.is_array = is_array; | ||
| 566 | info.is_shadow = is_shadow; | ||
| 567 | info.is_buffer = false; | ||
| 568 | |||
| 569 | Node index_var; | ||
| 570 | const std::optional<SamplerEntry> sampler = | ||
| 571 | is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) | ||
| 572 | : GetSampler(instr.sampler, info); | ||
| 573 | if (!sampler) { | ||
| 574 | return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; | ||
| 575 | } | ||
| 576 | |||
| 577 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | ||
| 578 | process_mode == TextureProcessMode::LL || | ||
| 579 | process_mode == TextureProcessMode::LLA; | ||
| 580 | const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; | ||
| 581 | |||
| 582 | Node bias; | ||
| 583 | Node lod; | ||
| 584 | switch (process_mode) { | ||
| 585 | case TextureProcessMode::None: | ||
| 586 | break; | ||
| 587 | case TextureProcessMode::LZ: | ||
| 588 | lod = Immediate(0.0f); | ||
| 589 | break; | ||
| 590 | case TextureProcessMode::LB: | ||
| 591 | // If present, lod or bias are always stored in the register indexed by the gpr20 field with | ||
| 592 | // an offset depending on the usage of the other registers. | ||
| 593 | bias = GetRegister(instr.gpr20.Value() + bias_offset); | ||
| 594 | break; | ||
| 595 | case TextureProcessMode::LL: | ||
| 596 | lod = GetRegister(instr.gpr20.Value() + bias_offset); | ||
| 597 | break; | ||
| 598 | default: | ||
| 599 | UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); | ||
| 600 | break; | ||
| 601 | } | ||
| 602 | |||
| 603 | Node4 values; | ||
| 604 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 605 | MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, | ||
| 606 | lod, {}, element, index_var}; | ||
| 607 | values[element] = Operation(opcode, meta, coords); | ||
| 608 | } | ||
| 609 | |||
| 610 | return values; | ||
| 611 | } | ||
| 612 | |||
| 613 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||
| 614 | TextureProcessMode process_mode, bool depth_compare, bool is_array, | ||
| 615 | bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { | ||
| 616 | const bool lod_bias_enabled{ | ||
| 617 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; | ||
| 618 | |||
| 619 | const bool is_bindless = bindless_reg.has_value(); | ||
| 620 | |||
| 621 | u64 parameter_register = instr.gpr20.Value(); | ||
| 622 | if (is_bindless) { | ||
| 623 | ++parameter_register; | ||
| 624 | } | ||
| 625 | |||
| 626 | const u32 bias_lod_offset = (is_bindless ? 1 : 0); | ||
| 627 | if (lod_bias_enabled) { | ||
| 628 | ++parameter_register; | ||
| 629 | } | ||
| 630 | |||
| 631 | const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, | ||
| 632 | lod_bias_enabled, 4, 5); | ||
| 633 | const auto coord_count = std::get<0>(coord_counts); | ||
| 634 | // If enabled arrays index is always stored in the gpr8 field | ||
| 635 | const u64 array_register = instr.gpr8.Value(); | ||
| 636 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 637 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 638 | |||
| 639 | std::vector<Node> coords; | ||
| 640 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 641 | coords.push_back(GetRegister(coord_register + i)); | ||
| 642 | } | ||
| 643 | // 1D.DC in OpenGL the 2nd component is ignored. | ||
| 644 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | ||
| 645 | coords.push_back(Immediate(0.0f)); | ||
| 646 | } | ||
| 647 | |||
| 648 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 649 | |||
| 650 | std::vector<Node> aoffi; | ||
| 651 | if (is_aoffi) { | ||
| 652 | aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); | ||
| 653 | } | ||
| 654 | |||
| 655 | Node dc; | ||
| 656 | if (depth_compare) { | ||
| 657 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 658 | // or bias are used | ||
| 659 | dc = GetRegister(parameter_register++); | ||
| 660 | } | ||
| 661 | |||
| 662 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, | ||
| 663 | aoffi, bindless_reg); | ||
| 664 | } | ||
| 665 | |||
| 666 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||
| 667 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 668 | const bool lod_bias_enabled = | ||
| 669 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 670 | |||
| 671 | const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, | ||
| 672 | lod_bias_enabled, 4, 4); | ||
| 673 | const auto coord_count = std::get<0>(coord_counts); | ||
| 674 | |||
| 675 | // If enabled arrays index is always stored in the gpr8 field | ||
| 676 | const u64 array_register = instr.gpr8.Value(); | ||
| 677 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 678 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 679 | const u64 last_coord_register = | ||
| 680 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 681 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 682 | : coord_register + 1; | ||
| 683 | const u32 bias_offset = coord_count > 2 ? 1 : 0; | ||
| 684 | |||
| 685 | std::vector<Node> coords; | ||
| 686 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 687 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 688 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 689 | } | ||
| 690 | |||
| 691 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 692 | |||
| 693 | Node dc; | ||
| 694 | if (depth_compare) { | ||
| 695 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 696 | // or bias are used | ||
| 697 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 698 | dc = GetRegister(depth_register); | ||
| 699 | } | ||
| 700 | |||
| 701 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, | ||
| 702 | {}); | ||
| 703 | } | ||
| 704 | |||
| 705 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||
| 706 | bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { | ||
| 707 | ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); | ||
| 708 | |||
| 709 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 710 | |||
| 711 | // If enabled arrays index is always stored in the gpr8 field | ||
| 712 | const u64 array_register = instr.gpr8.Value(); | ||
| 713 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 714 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 715 | |||
| 716 | std::vector<Node> coords; | ||
| 717 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 718 | coords.push_back(GetRegister(coord_register + i)); | ||
| 719 | } | ||
| 720 | |||
| 721 | u64 parameter_register = instr.gpr20.Value(); | ||
| 722 | |||
| 723 | SamplerInfo info; | ||
| 724 | info.type = texture_type; | ||
| 725 | info.is_array = is_array; | ||
| 726 | info.is_shadow = depth_compare; | ||
| 727 | |||
| 728 | Node index_var; | ||
| 729 | const std::optional<SamplerEntry> sampler = | ||
| 730 | is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) | ||
| 731 | : GetSampler(instr.sampler, info); | ||
| 732 | Node4 values; | ||
| 733 | if (!sampler) { | ||
| 734 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 735 | values[element] = Immediate(0); | ||
| 736 | } | ||
| 737 | return values; | ||
| 738 | } | ||
| 739 | |||
| 740 | std::vector<Node> aoffi, ptp; | ||
| 741 | if (is_aoffi) { | ||
| 742 | aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); | ||
| 743 | } else if (is_ptp) { | ||
| 744 | ptp = GetPtpCoordinates( | ||
| 745 | {GetRegister(parameter_register++), GetRegister(parameter_register++)}); | ||
| 746 | } | ||
| 747 | |||
| 748 | Node dc; | ||
| 749 | if (depth_compare) { | ||
| 750 | dc = GetRegister(parameter_register++); | ||
| 751 | } | ||
| 752 | |||
| 753 | const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) | ||
| 754 | : Immediate(static_cast<u32>(instr.tld4.component)); | ||
| 755 | |||
| 756 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 757 | auto coords_copy = coords; | ||
| 758 | MetaTexture meta{ | ||
| 759 | *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, | ||
| 760 | index_var}; | ||
| 761 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||
| 762 | } | ||
| 763 | |||
| 764 | return values; | ||
| 765 | } | ||
| 766 | |||
| 767 | Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | ||
| 768 | const auto texture_type{instr.tld.texture_type}; | ||
| 769 | const bool is_array{instr.tld.is_array != 0}; | ||
| 770 | const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; | ||
| 771 | const std::size_t coord_count{GetCoordCount(texture_type)}; | ||
| 772 | |||
| 773 | u64 gpr8_cursor{instr.gpr8.Value()}; | ||
| 774 | const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; | ||
| 775 | |||
| 776 | std::vector<Node> coords; | ||
| 777 | coords.reserve(coord_count); | ||
| 778 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 779 | coords.push_back(GetRegister(gpr8_cursor++)); | ||
| 780 | } | ||
| 781 | |||
| 782 | u64 gpr20_cursor{instr.gpr20.Value()}; | ||
| 783 | // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 784 | const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; | ||
| 785 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 786 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 787 | |||
| 788 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {}); | ||
| 789 | |||
| 790 | Node4 values; | ||
| 791 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 792 | auto coords_copy = coords; | ||
| 793 | MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; | ||
| 794 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 795 | } | ||
| 796 | |||
| 797 | return values; | ||
| 798 | } | ||
| 799 | |||
| 800 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | ||
| 801 | SamplerInfo info; | ||
| 802 | info.type = texture_type; | ||
| 803 | info.is_array = is_array; | ||
| 804 | info.is_shadow = false; | ||
| 805 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); | ||
| 806 | |||
| 807 | const std::size_t type_coord_count = GetCoordCount(texture_type); | ||
| 808 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | ||
| 809 | const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 810 | |||
| 811 | // If enabled arrays index is always stored in the gpr8 field | ||
| 812 | const u64 array_register = instr.gpr8.Value(); | ||
| 813 | // if is array gpr20 is used | ||
| 814 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 815 | |||
| 816 | const u64 last_coord_register = | ||
| 817 | ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array | ||
| 818 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 819 | : coord_register + 1; | ||
| 820 | |||
| 821 | std::vector<Node> coords; | ||
| 822 | for (std::size_t i = 0; i < type_coord_count; ++i) { | ||
| 823 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | ||
| 824 | coords.push_back( | ||
| 825 | GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); | ||
| 826 | } | ||
| 827 | |||
| 828 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 829 | // When lod is used always is in gpr20 | ||
| 830 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | ||
| 831 | |||
| 832 | std::vector<Node> aoffi; | ||
| 833 | if (aoffi_enabled) { | ||
| 834 | aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); | ||
| 835 | } | ||
| 836 | |||
| 837 | Node4 values; | ||
| 838 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 839 | auto coords_copy = coords; | ||
| 840 | MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; | ||
| 841 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 842 | } | ||
| 843 | return values; | ||
| 844 | } | ||
| 845 | |||
| 846 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | ||
| 847 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | ||
| 848 | std::size_t max_coords, std::size_t max_inputs) { | ||
| 849 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 850 | |||
| 851 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 852 | const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 853 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 854 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 855 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 856 | } | ||
| 857 | // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. | ||
| 858 | total_coord_count += | ||
| 859 | (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; | ||
| 860 | |||
| 861 | return {coord_count, total_coord_count}; | ||
| 862 | } | ||
| 863 | |||
| 864 | std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, | ||
| 865 | bool is_tld4) { | ||
| 866 | const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; | ||
| 867 | const u32 size = is_tld4 ? 6 : 4; | ||
| 868 | const s32 wrap_value = is_tld4 ? 32 : 8; | ||
| 869 | const s32 diff_value = is_tld4 ? 64 : 16; | ||
| 870 | const u32 mask = (1U << size) - 1; | ||
| 871 | |||
| 872 | std::vector<Node> aoffi; | ||
| 873 | aoffi.reserve(coord_count); | ||
| 874 | |||
| 875 | const auto aoffi_immediate{ | ||
| 876 | TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; | ||
| 877 | if (!aoffi_immediate) { | ||
| 878 | // Variable access, not supported on AMD. | ||
| 879 | LOG_WARNING(HW_GPU, | ||
| 880 | "AOFFI constant folding failed, some hardware might have graphical issues"); | ||
| 881 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||
| 882 | const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); | ||
| 883 | const Node condition = | ||
| 884 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); | ||
| 885 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); | ||
| 886 | aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); | ||
| 887 | } | ||
| 888 | return aoffi; | ||
| 889 | } | ||
| 890 | |||
| 891 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||
| 892 | s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; | ||
| 893 | if (value >= wrap_value) { | ||
| 894 | value -= diff_value; | ||
| 895 | } | ||
| 896 | aoffi.push_back(Immediate(value)); | ||
| 897 | } | ||
| 898 | return aoffi; | ||
| 899 | } | ||
| 900 | |||
| 901 | std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) { | ||
| 902 | static constexpr u32 num_entries = 8; | ||
| 903 | |||
| 904 | std::vector<Node> ptp; | ||
| 905 | ptp.reserve(num_entries); | ||
| 906 | |||
| 907 | const auto global_size = static_cast<s64>(global_code.size()); | ||
| 908 | const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); | ||
| 909 | const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); | ||
| 910 | if (!low || !high) { | ||
| 911 | for (u32 entry = 0; entry < num_entries; ++entry) { | ||
| 912 | const u32 reg = entry / 4; | ||
| 913 | const u32 offset = entry % 4; | ||
| 914 | const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); | ||
| 915 | const Node condition = | ||
| 916 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); | ||
| 917 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); | ||
| 918 | ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); | ||
| 919 | } | ||
| 920 | return ptp; | ||
| 921 | } | ||
| 922 | |||
| 923 | const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low); | ||
| 924 | for (u32 entry = 0; entry < num_entries; ++entry) { | ||
| 925 | s32 value = (immediate >> (entry * 8)) & 0b111111; | ||
| 926 | if (value >= 32) { | ||
| 927 | value -= 64; | ||
| 928 | } | ||
| 929 | ptp.push_back(Immediate(value)); | ||
| 930 | } | ||
| 931 | |||
| 932 | return ptp; | ||
| 933 | } | ||
| 934 | |||
| 935 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp deleted file mode 100644 index 1c0957277..000000000 --- a/src/video_core/shader/decode/video.cpp +++ /dev/null | |||
| @@ -1,169 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using std::move; | ||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | using Tegra::Shader::VideoType; | ||
| 18 | using Tegra::Shader::VmadShr; | ||
| 19 | using Tegra::Shader::VmnmxOperation; | ||
| 20 | using Tegra::Shader::VmnmxType; | ||
| 21 | |||
| 22 | u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { | ||
| 23 | const Instruction instr = {program_code[pc]}; | ||
| 24 | const auto opcode = OpCode::Decode(instr); | ||
| 25 | |||
| 26 | if (opcode->get().GetId() == OpCode::Id::VMNMX) { | ||
| 27 | DecodeVMNMX(bb, instr); | ||
| 28 | return pc; | ||
| 29 | } | ||
| 30 | |||
| 31 | const Node op_a = | ||
| 32 | GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, | ||
| 33 | instr.video.type_a, instr.video.byte_height_a); | ||
| 34 | const Node op_b = [this, instr] { | ||
| 35 | if (instr.video.use_register_b) { | ||
| 36 | return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, | ||
| 37 | instr.video.signed_b, instr.video.type_b, | ||
| 38 | instr.video.byte_height_b); | ||
| 39 | } | ||
| 40 | if (instr.video.signed_b) { | ||
| 41 | const auto imm = static_cast<s16>(instr.alu.GetImm20_16()); | ||
| 42 | return Immediate(static_cast<u32>(imm)); | ||
| 43 | } else { | ||
| 44 | return Immediate(instr.alu.GetImm20_16()); | ||
| 45 | } | ||
| 46 | }(); | ||
| 47 | |||
| 48 | switch (opcode->get().GetId()) { | ||
| 49 | case OpCode::Id::VMAD: { | ||
| 50 | const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 51 | const Node op_c = GetRegister(instr.gpr39); | ||
| 52 | |||
| 53 | Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); | ||
| 54 | value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); | ||
| 55 | |||
| 56 | if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { | ||
| 57 | const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); | ||
| 58 | value = | ||
| 59 | SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); | ||
| 60 | } | ||
| 61 | |||
| 62 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 63 | SetRegister(bb, instr.gpr0, value); | ||
| 64 | break; | ||
| 65 | } | ||
| 66 | case OpCode::Id::VSETP: { | ||
| 67 | // We can't use the constant predicate as destination. | ||
| 68 | ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 69 | |||
| 70 | const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 71 | const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); | ||
| 72 | const Node second_pred = GetPredicate(instr.vsetp.pred39, false); | ||
| 73 | |||
| 74 | const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); | ||
| 75 | |||
| 76 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 77 | SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); | ||
| 78 | |||
| 79 | if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 80 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 81 | // if enabled | ||
| 82 | const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); | ||
| 83 | SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); | ||
| 84 | } | ||
| 85 | break; | ||
| 86 | } | ||
| 87 | default: | ||
| 88 | UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); | ||
| 89 | } | ||
| 90 | |||
| 91 | return pc; | ||
| 92 | } | ||
| 93 | |||
| 94 | Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, | ||
| 95 | u64 byte_height) { | ||
| 96 | if (!is_chunk) { | ||
| 97 | return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); | ||
| 98 | } | ||
| 99 | |||
| 100 | switch (type) { | ||
| 101 | case VideoType::Size16_Low: | ||
| 102 | return BitfieldExtract(op, 0, 16); | ||
| 103 | case VideoType::Size16_High: | ||
| 104 | return BitfieldExtract(op, 16, 16); | ||
| 105 | case VideoType::Size32: | ||
| 106 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used | ||
| 107 | // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. | ||
| 108 | UNIMPLEMENTED(); | ||
| 109 | return Immediate(0); | ||
| 110 | case VideoType::Invalid: | ||
| 111 | UNREACHABLE_MSG("Invalid instruction encoding"); | ||
| 112 | return Immediate(0); | ||
| 113 | default: | ||
| 114 | UNREACHABLE(); | ||
| 115 | return Immediate(0); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { | ||
| 120 | UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); | ||
| 121 | UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); | ||
| 122 | UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); | ||
| 123 | UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); | ||
| 124 | UNIMPLEMENTED_IF(instr.vmnmx.sat); | ||
| 125 | UNIMPLEMENTED_IF(instr.generates_cc); | ||
| 126 | |||
| 127 | Node op_a = GetRegister(instr.gpr8); | ||
| 128 | Node op_b = GetRegister(instr.gpr20); | ||
| 129 | Node op_c = GetRegister(instr.gpr39); | ||
| 130 | |||
| 131 | const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed | ||
| 132 | const bool is_oper2_signed = instr.vmnmx.is_dest_signed; | ||
| 133 | |||
| 134 | const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; | ||
| 135 | Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); | ||
| 136 | |||
| 137 | switch (instr.vmnmx.operation) { | ||
| 138 | case VmnmxOperation::Mrg_16H: | ||
| 139 | value = BitfieldInsert(move(op_c), move(value), 16, 16); | ||
| 140 | break; | ||
| 141 | case VmnmxOperation::Mrg_16L: | ||
| 142 | value = BitfieldInsert(move(op_c), move(value), 0, 16); | ||
| 143 | break; | ||
| 144 | case VmnmxOperation::Mrg_8B0: | ||
| 145 | value = BitfieldInsert(move(op_c), move(value), 0, 8); | ||
| 146 | break; | ||
| 147 | case VmnmxOperation::Mrg_8B2: | ||
| 148 | value = BitfieldInsert(move(op_c), move(value), 16, 8); | ||
| 149 | break; | ||
| 150 | case VmnmxOperation::Acc: | ||
| 151 | value = Operation(OperationCode::IAdd, move(value), move(op_c)); | ||
| 152 | break; | ||
| 153 | case VmnmxOperation::Min: | ||
| 154 | value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); | ||
| 155 | break; | ||
| 156 | case VmnmxOperation::Max: | ||
| 157 | value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); | ||
| 158 | break; | ||
| 159 | case VmnmxOperation::Nop: | ||
| 160 | break; | ||
| 161 | default: | ||
| 162 | UNREACHABLE(); | ||
| 163 | break; | ||
| 164 | } | ||
| 165 | |||
| 166 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 167 | } | ||
| 168 | |||
| 169 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp deleted file mode 100644 index 37433d783..000000000 --- a/src/video_core/shader/decode/warp.cpp +++ /dev/null | |||
| @@ -1,117 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::ShuffleOperation; | ||
| 17 | using Tegra::Shader::VoteOperation; | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | |||
| 21 | OperationCode GetOperationCode(VoteOperation vote_op) { | ||
| 22 | switch (vote_op) { | ||
| 23 | case VoteOperation::All: | ||
| 24 | return OperationCode::VoteAll; | ||
| 25 | case VoteOperation::Any: | ||
| 26 | return OperationCode::VoteAny; | ||
| 27 | case VoteOperation::Eq: | ||
| 28 | return OperationCode::VoteEqual; | ||
| 29 | default: | ||
| 30 | UNREACHABLE_MSG("Invalid vote operation={}", vote_op); | ||
| 31 | return OperationCode::VoteAll; | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | } // Anonymous namespace | ||
| 36 | |||
| 37 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | ||
| 38 | const Instruction instr = {program_code[pc]}; | ||
| 39 | const auto opcode = OpCode::Decode(instr); | ||
| 40 | |||
| 41 | // Signal the backend that this shader uses warp instructions. | ||
| 42 | uses_warps = true; | ||
| 43 | |||
| 44 | switch (opcode->get().GetId()) { | ||
| 45 | case OpCode::Id::VOTE: { | ||
| 46 | const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); | ||
| 47 | const Node active = Operation(OperationCode::BallotThread, value); | ||
| 48 | const Node vote = Operation(GetOperationCode(instr.vote.operation), value); | ||
| 49 | SetRegister(bb, instr.gpr0, active); | ||
| 50 | SetPredicate(bb, instr.vote.dest_pred, vote); | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | case OpCode::Id::SHFL: { | ||
| 54 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) | ||
| 55 | : GetRegister(instr.gpr39); | ||
| 56 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) | ||
| 57 | : GetRegister(instr.gpr20); | ||
| 58 | |||
| 59 | Node thread_id = Operation(OperationCode::ThreadId); | ||
| 60 | Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); | ||
| 61 | Node seg_mask = BitfieldExtract(mask, 8, 16); | ||
| 62 | |||
| 63 | Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); | ||
| 64 | Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); | ||
| 65 | Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, | ||
| 66 | Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); | ||
| 67 | |||
| 68 | Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { | ||
| 69 | switch (instr.shfl.operation) { | ||
| 70 | case ShuffleOperation::Idx: | ||
| 71 | return Operation(OperationCode::IBitwiseOr, | ||
| 72 | Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), | ||
| 73 | min_thread_id); | ||
| 74 | case ShuffleOperation::Down: | ||
| 75 | return Operation(OperationCode::IAdd, thread_id, index); | ||
| 76 | case ShuffleOperation::Up: | ||
| 77 | return Operation(OperationCode::IAdd, thread_id, | ||
| 78 | Operation(OperationCode::INegate, index)); | ||
| 79 | case ShuffleOperation::Bfly: | ||
| 80 | return Operation(OperationCode::IBitwiseXor, thread_id, index); | ||
| 81 | } | ||
| 82 | UNREACHABLE(); | ||
| 83 | return Immediate(0U); | ||
| 84 | }(); | ||
| 85 | |||
| 86 | Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { | ||
| 87 | if (instr.shfl.operation == ShuffleOperation::Up) { | ||
| 88 | return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); | ||
| 89 | } else { | ||
| 90 | return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); | ||
| 91 | } | ||
| 92 | }(); | ||
| 93 | |||
| 94 | SetPredicate(bb, instr.shfl.pred48, in_bounds); | ||
| 95 | SetRegister( | ||
| 96 | bb, instr.gpr0, | ||
| 97 | Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | case OpCode::Id::FSWZADD: { | ||
| 101 | UNIMPLEMENTED_IF(instr.fswzadd.ndv); | ||
| 102 | |||
| 103 | Node op_a = GetRegister(instr.gpr8); | ||
| 104 | Node op_b = GetRegister(instr.gpr20); | ||
| 105 | Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); | ||
| 106 | SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); | ||
| 107 | break; | ||
| 108 | } | ||
| 109 | default: | ||
| 110 | UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); | ||
| 111 | break; | ||
| 112 | } | ||
| 113 | |||
| 114 | return pc; | ||
| 115 | } | ||
| 116 | |||
| 117 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp deleted file mode 100644 index 233b8fa42..000000000 --- a/src/video_core/shader/decode/xmad.cpp +++ /dev/null | |||
| @@ -1,156 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::PredCondition; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | UNIMPLEMENTED_IF(instr.xmad.sign_a); | ||
| 22 | UNIMPLEMENTED_IF(instr.xmad.sign_b); | ||
| 23 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 24 | "Condition codes generation in XMAD is not implemented"); | ||
| 25 | |||
| 26 | Node op_a = GetRegister(instr.gpr8); | ||
| 27 | |||
| 28 | // TODO(bunnei): Needs to be fixed once op_a or op_b is signed | ||
| 29 | UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); | ||
| 30 | const bool is_signed_a = instr.xmad.sign_a == 1; | ||
| 31 | const bool is_signed_b = instr.xmad.sign_b == 1; | ||
| 32 | const bool is_signed_c = is_signed_a; | ||
| 33 | |||
| 34 | auto [is_merge, is_psl, is_high_b, mode, op_b_binding, | ||
| 35 | op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { | ||
| 36 | switch (opcode->get().GetId()) { | ||
| 37 | case OpCode::Id::XMAD_CR: | ||
| 38 | return {instr.xmad.merge_56, | ||
| 39 | instr.xmad.product_shift_left_second, | ||
| 40 | instr.xmad.high_b, | ||
| 41 | instr.xmad.mode_cbf, | ||
| 42 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 43 | GetRegister(instr.gpr39)}; | ||
| 44 | case OpCode::Id::XMAD_RR: | ||
| 45 | return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, | ||
| 46 | instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 47 | case OpCode::Id::XMAD_RC: | ||
| 48 | return {false, | ||
| 49 | false, | ||
| 50 | instr.xmad.high_b, | ||
| 51 | instr.xmad.mode_cbf, | ||
| 52 | GetRegister(instr.gpr39), | ||
| 53 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 54 | case OpCode::Id::XMAD_IMM: | ||
| 55 | return {instr.xmad.merge_37, | ||
| 56 | instr.xmad.product_shift_left, | ||
| 57 | false, | ||
| 58 | instr.xmad.mode, | ||
| 59 | Immediate(static_cast<u32>(instr.xmad.imm20_16)), | ||
| 60 | GetRegister(instr.gpr39)}; | ||
| 61 | default: | ||
| 62 | UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); | ||
| 63 | return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; | ||
| 64 | } | ||
| 65 | }(); | ||
| 66 | |||
| 67 | op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), | ||
| 68 | instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); | ||
| 69 | |||
| 70 | const Node original_b = op_b_binding; | ||
| 71 | const Node op_b = | ||
| 72 | SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding), | ||
| 73 | is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); | ||
| 74 | |||
| 75 | // we already check sign_a and sign_b is difference or not before so just use one in here. | ||
| 76 | Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); | ||
| 77 | if (is_psl) { | ||
| 78 | product = | ||
| 79 | SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); | ||
| 80 | } | ||
| 81 | SetTemporary(bb, 0, product); | ||
| 82 | product = GetTemporary(0); | ||
| 83 | |||
| 84 | Node original_c = op_c; | ||
| 85 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error | ||
| 86 | op_c = [&] { | ||
| 87 | switch (set_mode) { | ||
| 88 | case Tegra::Shader::XmadMode::None: | ||
| 89 | return original_c; | ||
| 90 | case Tegra::Shader::XmadMode::CLo: | ||
| 91 | return BitfieldExtract(std::move(original_c), 0, 16); | ||
| 92 | case Tegra::Shader::XmadMode::CHi: | ||
| 93 | return BitfieldExtract(std::move(original_c), 16, 16); | ||
| 94 | case Tegra::Shader::XmadMode::CBcc: { | ||
| 95 | Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, | ||
| 96 | original_b, Immediate(16)); | ||
| 97 | return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), | ||
| 98 | std::move(shifted_b)); | ||
| 99 | } | ||
| 100 | case Tegra::Shader::XmadMode::CSfu: { | ||
| 101 | const Node comp_a = | ||
| 102 | GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); | ||
| 103 | const Node comp_b = | ||
| 104 | GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); | ||
| 105 | const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); | ||
| 106 | |||
| 107 | const Node comp_minus_a = GetPredicateComparisonInteger( | ||
| 108 | PredCondition::NE, is_signed_a, | ||
| 109 | SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, | ||
| 110 | Immediate(0x80000000)), | ||
| 111 | Immediate(0)); | ||
| 112 | const Node comp_minus_b = GetPredicateComparisonInteger( | ||
| 113 | PredCondition::NE, is_signed_b, | ||
| 114 | SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, | ||
| 115 | Immediate(0x80000000)), | ||
| 116 | Immediate(0)); | ||
| 117 | |||
| 118 | Node new_c = Operation( | ||
| 119 | OperationCode::Select, comp_minus_a, | ||
| 120 | SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), | ||
| 121 | original_c); | ||
| 122 | new_c = Operation( | ||
| 123 | OperationCode::Select, comp_minus_b, | ||
| 124 | SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), | ||
| 125 | std::move(new_c)); | ||
| 126 | |||
| 127 | return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); | ||
| 128 | } | ||
| 129 | default: | ||
| 130 | UNREACHABLE(); | ||
| 131 | return Immediate(0); | ||
| 132 | } | ||
| 133 | }(); | ||
| 134 | |||
| 135 | SetTemporary(bb, 1, op_c); | ||
| 136 | op_c = GetTemporary(1); | ||
| 137 | |||
| 138 | // TODO(Rodrigo): Use an appropiate sign for this operation | ||
| 139 | Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); | ||
| 140 | SetTemporary(bb, 2, sum); | ||
| 141 | sum = GetTemporary(2); | ||
| 142 | if (is_merge) { | ||
| 143 | const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), | ||
| 144 | Immediate(0), Immediate(16)); | ||
| 145 | const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, | ||
| 146 | Immediate(16)); | ||
| 147 | sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); | ||
| 148 | } | ||
| 149 | |||
| 150 | SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); | ||
| 151 | SetRegister(bb, instr.gpr0, std::move(sum)); | ||
| 152 | |||
| 153 | return pc; | ||
| 154 | } | ||
| 155 | |||
| 156 | } // namespace VideoCommon::Shader | ||