diff options
| author | 2019-01-25 23:42:14 -0500 | |
|---|---|---|
| committer | 2019-01-25 23:42:14 -0500 | |
| commit | 1f4ca1e841cd0b0427218d787efe10a3fa62df33 (patch) | |
| tree | 00cc1743c6a6ba593e3b56897b13c2272a71d779 /src/video_core/shader | |
| parent | Merge pull request #2054 from bunnei/scope-context-refactor (diff) | |
| parent | shader_ir: Fixup clang build (diff) | |
| download | yuzu-1f4ca1e841cd0b0427218d787efe10a3fa62df33.tar.gz yuzu-1f4ca1e841cd0b0427218d787efe10a3fa62df33.tar.xz yuzu-1f4ca1e841cd0b0427218d787efe10a3fa62df33.zip | |
Merge pull request #1927 from ReinUsesLisp/shader-ir
video_core: Replace gl_shader_decompiler with an IR based decompiler
Diffstat (limited to 'src/video_core/shader')
29 files changed, 4167 insertions, 0 deletions
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp new file mode 100644 index 000000000..6fdcac784 --- /dev/null +++ b/src/video_core/shader/decode.cpp | |||
| @@ -0,0 +1,206 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <set> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/engines/shader_header.h" | ||
| 14 | #include "video_core/shader/shader_ir.h" | ||
| 15 | |||
| 16 | namespace VideoCommon::Shader { | ||
| 17 | |||
| 18 | using Tegra::Shader::Instruction; | ||
| 19 | using Tegra::Shader::OpCode; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | /// Merges exit method of two parallel branches. | ||
| 24 | constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) { | ||
| 25 | if (a == ExitMethod::Undetermined) { | ||
| 26 | return b; | ||
| 27 | } | ||
| 28 | if (b == ExitMethod::Undetermined) { | ||
| 29 | return a; | ||
| 30 | } | ||
| 31 | if (a == b) { | ||
| 32 | return a; | ||
| 33 | } | ||
| 34 | return ExitMethod::Conditional; | ||
| 35 | } | ||
| 36 | |||
| 37 | /** | ||
| 38 | * Returns whether the instruction at the specified offset is a 'sched' instruction. | ||
| 39 | * Sched instructions always appear before a sequence of 3 instructions. | ||
| 40 | */ | ||
| 41 | constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | ||
| 42 | constexpr u32 SchedPeriod = 4; | ||
| 43 | u32 absolute_offset = offset - main_offset; | ||
| 44 | |||
| 45 | return (absolute_offset % SchedPeriod) == 0; | ||
| 46 | } | ||
| 47 | |||
| 48 | } // namespace | ||
| 49 | |||
| 50 | void ShaderIR::Decode() { | ||
| 51 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||
| 52 | |||
| 53 | std::set<u32> labels; | ||
| 54 | const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); | ||
| 55 | if (exit_method != ExitMethod::AlwaysEnd) { | ||
| 56 | UNREACHABLE_MSG("Program does not always end"); | ||
| 57 | } | ||
| 58 | |||
| 59 | if (labels.empty()) { | ||
| 60 | basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); | ||
| 61 | return; | ||
| 62 | } | ||
| 63 | |||
| 64 | labels.insert(main_offset); | ||
| 65 | |||
| 66 | for (const u32 label : labels) { | ||
| 67 | const auto next_it = labels.lower_bound(label + 1); | ||
| 68 | const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; | ||
| 69 | |||
| 70 | basic_blocks.insert({label, DecodeRange(label, next_label)}); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) { | ||
| 75 | const auto [iter, inserted] = | ||
| 76 | exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined); | ||
| 77 | ExitMethod& exit_method = iter->second; | ||
| 78 | if (!inserted) | ||
| 79 | return exit_method; | ||
| 80 | |||
| 81 | for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) { | ||
| 82 | coverage_begin = std::min(coverage_begin, offset); | ||
| 83 | coverage_end = std::max(coverage_end, offset + 1); | ||
| 84 | |||
| 85 | const Instruction instr = {program_code[offset]}; | ||
| 86 | const auto opcode = OpCode::Decode(instr); | ||
| 87 | if (!opcode) | ||
| 88 | continue; | ||
| 89 | switch (opcode->get().GetId()) { | ||
| 90 | case OpCode::Id::EXIT: { | ||
| 91 | // The EXIT instruction can be predicated, which means that the shader can conditionally | ||
| 92 | // end on this instruction. We have to consider the case where the condition is not met | ||
| 93 | // and check the exit method of that other basic block. | ||
| 94 | using Tegra::Shader::Pred; | ||
| 95 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 96 | return exit_method = ExitMethod::AlwaysEnd; | ||
| 97 | } else { | ||
| 98 | const ExitMethod not_met = Scan(offset + 1, end, labels); | ||
| 99 | return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met); | ||
| 100 | } | ||
| 101 | } | ||
| 102 | case OpCode::Id::BRA: { | ||
| 103 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 104 | labels.insert(target); | ||
| 105 | const ExitMethod no_jmp = Scan(offset + 1, end, labels); | ||
| 106 | const ExitMethod jmp = Scan(target, end, labels); | ||
| 107 | return exit_method = ParallelExit(no_jmp, jmp); | ||
| 108 | } | ||
| 109 | case OpCode::Id::SSY: | ||
| 110 | case OpCode::Id::PBK: { | ||
| 111 | // The SSY and PBK use a similar encoding as the BRA instruction. | ||
| 112 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 113 | "Constant buffer branching is not supported"); | ||
| 114 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 115 | labels.insert(target); | ||
| 116 | // Continue scanning for an exit method. | ||
| 117 | break; | ||
| 118 | } | ||
| 119 | } | ||
| 120 | } | ||
| 121 | return exit_method = ExitMethod::AlwaysReturn; | ||
| 122 | } | ||
| 123 | |||
| 124 | BasicBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | ||
| 125 | BasicBlock basic_block; | ||
| 126 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 127 | pc = DecodeInstr(basic_block, pc); | ||
| 128 | } | ||
| 129 | return std::move(basic_block); | ||
| 130 | } | ||
| 131 | |||
| 132 | u32 ShaderIR::DecodeInstr(BasicBlock& bb, u32 pc) { | ||
| 133 | // Ignore sched instructions when generating code. | ||
| 134 | if (IsSchedInstruction(pc, main_offset)) { | ||
| 135 | return pc + 1; | ||
| 136 | } | ||
| 137 | |||
| 138 | const Instruction instr = {program_code[pc]}; | ||
| 139 | const auto opcode = OpCode::Decode(instr); | ||
| 140 | |||
| 141 | // Decoding failure | ||
| 142 | if (!opcode) { | ||
| 143 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | ||
| 144 | return pc + 1; | ||
| 145 | } | ||
| 146 | |||
| 147 | bb.push_back( | ||
| 148 | Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value))); | ||
| 149 | |||
| 150 | using Tegra::Shader::Pred; | ||
| 151 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | ||
| 152 | "NeverExecute predicate not implemented"); | ||
| 153 | |||
| 154 | static const std::map<OpCode::Type, u32 (ShaderIR::*)(BasicBlock&, const BasicBlock&, u32)> | ||
| 155 | decoders = { | ||
| 156 | {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, | ||
| 157 | {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, | ||
| 158 | {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, | ||
| 159 | {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, | ||
| 160 | {OpCode::Type::Shift, &ShaderIR::DecodeShift}, | ||
| 161 | {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, | ||
| 162 | {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, | ||
| 163 | {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, | ||
| 164 | {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, | ||
| 165 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, | ||
| 166 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, | ||
| 167 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | ||
| 168 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | ||
| 169 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, | ||
| 170 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, | ||
| 171 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, | ||
| 172 | {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, | ||
| 173 | {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, | ||
| 174 | {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, | ||
| 175 | {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, | ||
| 176 | {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, | ||
| 177 | {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, | ||
| 178 | {OpCode::Type::Video, &ShaderIR::DecodeVideo}, | ||
| 179 | {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, | ||
| 180 | }; | ||
| 181 | |||
| 182 | std::vector<Node> tmp_block; | ||
| 183 | if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { | ||
| 184 | pc = (this->*decoder->second)(tmp_block, bb, pc); | ||
| 185 | } else { | ||
| 186 | pc = DecodeOther(tmp_block, bb, pc); | ||
| 187 | } | ||
| 188 | |||
| 189 | // Some instructions (like SSY) don't have a predicate field, they are always unconditionally | ||
| 190 | // executed. | ||
| 191 | const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); | ||
| 192 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 193 | |||
| 194 | if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { | ||
| 195 | bb.push_back( | ||
| 196 | Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block))); | ||
| 197 | } else { | ||
| 198 | for (auto& node : tmp_block) { | ||
| 199 | bb.push_back(std::move(node)); | ||
| 200 | } | ||
| 201 | } | ||
| 202 | |||
| 203 | return pc + 1; | ||
| 204 | } | ||
| 205 | |||
| 206 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp new file mode 100644 index 000000000..e7847f614 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic.cpp | |||
| @@ -0,0 +1,155 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::SubOp; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | Node op_a = GetRegister(instr.gpr8); | ||
| 21 | |||
| 22 | Node op_b = [&]() -> Node { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::MOV_C: | ||
| 34 | case OpCode::Id::MOV_R: { | ||
| 35 | // MOV does not have neither 'abs' nor 'neg' bits. | ||
| 36 | SetRegister(bb, instr.gpr0, op_b); | ||
| 37 | break; | ||
| 38 | } | ||
| 39 | case OpCode::Id::FMUL_C: | ||
| 40 | case OpCode::Id::FMUL_R: | ||
| 41 | case OpCode::Id::FMUL_IMM: { | ||
| 42 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | ||
| 43 | UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0, "FMUL tab5cb8_2({}) is not implemented", | ||
| 44 | instr.fmul.tab5cb8_2.Value()); | ||
| 45 | UNIMPLEMENTED_IF_MSG( | ||
| 46 | instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", | ||
| 47 | instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default | ||
| 48 | |||
| 49 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); | ||
| 50 | |||
| 51 | // TODO(Rodrigo): Should precise be used when there's a postfactor? | ||
| 52 | Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); | ||
| 53 | |||
| 54 | if (instr.fmul.postfactor != 0) { | ||
| 55 | auto postfactor = static_cast<s32>(instr.fmul.postfactor); | ||
| 56 | |||
| 57 | // Postfactor encoded as 3-bit 1's complement in instruction, interpreted with below | ||
| 58 | // logic. | ||
| 59 | if (postfactor >= 4) { | ||
| 60 | postfactor = 7 - postfactor; | ||
| 61 | } else { | ||
| 62 | postfactor = 0 - postfactor; | ||
| 63 | } | ||
| 64 | |||
| 65 | if (postfactor > 0) { | ||
| 66 | value = Operation(OperationCode::FMul, NO_PRECISE, value, | ||
| 67 | Immediate(static_cast<f32>(1 << postfactor))); | ||
| 68 | } else { | ||
| 69 | value = Operation(OperationCode::FDiv, NO_PRECISE, value, | ||
| 70 | Immediate(static_cast<f32>(1 << -postfactor))); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 75 | |||
| 76 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 77 | SetRegister(bb, instr.gpr0, value); | ||
| 78 | break; | ||
| 79 | } | ||
| 80 | case OpCode::Id::FADD_C: | ||
| 81 | case OpCode::Id::FADD_R: | ||
| 82 | case OpCode::Id::FADD_IMM: { | ||
| 83 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 84 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 85 | |||
| 86 | Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 87 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 88 | |||
| 89 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 90 | SetRegister(bb, instr.gpr0, value); | ||
| 91 | break; | ||
| 92 | } | ||
| 93 | case OpCode::Id::MUFU: { | ||
| 94 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 95 | |||
| 96 | Node value = [&]() { | ||
| 97 | switch (instr.sub_op) { | ||
| 98 | case SubOp::Cos: | ||
| 99 | return Operation(OperationCode::FCos, PRECISE, op_a); | ||
| 100 | case SubOp::Sin: | ||
| 101 | return Operation(OperationCode::FSin, PRECISE, op_a); | ||
| 102 | case SubOp::Ex2: | ||
| 103 | return Operation(OperationCode::FExp2, PRECISE, op_a); | ||
| 104 | case SubOp::Lg2: | ||
| 105 | return Operation(OperationCode::FLog2, PRECISE, op_a); | ||
| 106 | case SubOp::Rcp: | ||
| 107 | return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); | ||
| 108 | case SubOp::Rsq: | ||
| 109 | return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); | ||
| 110 | case SubOp::Sqrt: | ||
| 111 | return Operation(OperationCode::FSqrt, PRECISE, op_a); | ||
| 112 | default: | ||
| 113 | UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", | ||
| 114 | static_cast<unsigned>(instr.sub_op.Value())); | ||
| 115 | return Immediate(0); | ||
| 116 | } | ||
| 117 | }(); | ||
| 118 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 119 | |||
| 120 | SetRegister(bb, instr.gpr0, value); | ||
| 121 | break; | ||
| 122 | } | ||
| 123 | case OpCode::Id::FMNMX_C: | ||
| 124 | case OpCode::Id::FMNMX_R: | ||
| 125 | case OpCode::Id::FMNMX_IMM: { | ||
| 126 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 127 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 128 | |||
| 129 | const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); | ||
| 130 | |||
| 131 | const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); | ||
| 132 | const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); | ||
| 133 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 134 | |||
| 135 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 136 | SetRegister(bb, instr.gpr0, value); | ||
| 137 | break; | ||
| 138 | } | ||
| 139 | case OpCode::Id::RRO_C: | ||
| 140 | case OpCode::Id::RRO_R: | ||
| 141 | case OpCode::Id::RRO_IMM: { | ||
| 142 | // Currently RRO is only implemented as a register move. | ||
| 143 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 144 | SetRegister(bb, instr.gpr0, op_b); | ||
| 145 | LOG_WARNING(HW_GPU, "RRO instruction is incomplete"); | ||
| 146 | break; | ||
| 147 | } | ||
| 148 | default: | ||
| 149 | UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); | ||
| 150 | } | ||
| 151 | |||
| 152 | return pc; | ||
| 153 | } | ||
| 154 | |||
| 155 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp new file mode 100644 index 000000000..a237dcb92 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_half.cpp | |||
| @@ -0,0 +1,70 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_C || | ||
| 20 | opcode->get().GetId() == OpCode::Id::HADD2_R) { | ||
| 21 | UNIMPLEMENTED_IF(instr.alu_half.ftz != 0); | ||
| 22 | } | ||
| 23 | UNIMPLEMENTED_IF_MSG(instr.alu_half.saturate != 0, "Half float saturation not implemented"); | ||
| 24 | |||
| 25 | const bool negate_a = | ||
| 26 | opcode->get().GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; | ||
| 27 | const bool negate_b = | ||
| 28 | opcode->get().GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; | ||
| 29 | |||
| 30 | const Node op_a = GetOperandAbsNegHalf(GetRegister(instr.gpr8), instr.alu_half.abs_a, negate_a); | ||
| 31 | |||
| 32 | // instr.alu_half.type_a | ||
| 33 | |||
| 34 | Node op_b = [&]() { | ||
| 35 | switch (opcode->get().GetId()) { | ||
| 36 | case OpCode::Id::HADD2_C: | ||
| 37 | case OpCode::Id::HMUL2_C: | ||
| 38 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 39 | case OpCode::Id::HADD2_R: | ||
| 40 | case OpCode::Id::HMUL2_R: | ||
| 41 | return GetRegister(instr.gpr20); | ||
| 42 | default: | ||
| 43 | UNREACHABLE(); | ||
| 44 | return Immediate(0); | ||
| 45 | } | ||
| 46 | }(); | ||
| 47 | op_b = GetOperandAbsNegHalf(op_b, instr.alu_half.abs_b, negate_b); | ||
| 48 | |||
| 49 | Node value = [&]() { | ||
| 50 | MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a, instr.alu_half.type_b}}; | ||
| 51 | switch (opcode->get().GetId()) { | ||
| 52 | case OpCode::Id::HADD2_C: | ||
| 53 | case OpCode::Id::HADD2_R: | ||
| 54 | return Operation(OperationCode::HAdd, meta, op_a, op_b); | ||
| 55 | case OpCode::Id::HMUL2_C: | ||
| 56 | case OpCode::Id::HMUL2_R: | ||
| 57 | return Operation(OperationCode::HMul, meta, op_a, op_b); | ||
| 58 | default: | ||
| 59 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); | ||
| 60 | return Immediate(0); | ||
| 61 | } | ||
| 62 | }(); | ||
| 63 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); | ||
| 64 | |||
| 65 | SetRegister(bb, instr.gpr0, value); | ||
| 66 | |||
| 67 | return pc; | ||
| 68 | } | ||
| 69 | |||
| 70 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp new file mode 100644 index 000000000..7b4f7d284 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { | ||
| 20 | UNIMPLEMENTED_IF(instr.alu_half_imm.ftz != 0); | ||
| 21 | } else { | ||
| 22 | UNIMPLEMENTED_IF(instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None); | ||
| 23 | } | ||
| 24 | UNIMPLEMENTED_IF_MSG(instr.alu_half_imm.saturate != 0, | ||
| 25 | "Half float immediate saturation not implemented"); | ||
| 26 | |||
| 27 | Node op_a = GetRegister(instr.gpr8); | ||
| 28 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); | ||
| 29 | |||
| 30 | const Node op_b = UnpackHalfImmediate(instr, true); | ||
| 31 | |||
| 32 | Node value = [&]() { | ||
| 33 | MetaHalfArithmetic meta{true, {instr.alu_half_imm.type_a}}; | ||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::HADD2_IMM: | ||
| 36 | return Operation(OperationCode::HAdd, meta, op_a, op_b); | ||
| 37 | case OpCode::Id::HMUL2_IMM: | ||
| 38 | return Operation(OperationCode::HMul, meta, op_a, op_b); | ||
| 39 | default: | ||
| 40 | UNREACHABLE(); | ||
| 41 | return Immediate(0); | ||
| 42 | } | ||
| 43 | }(); | ||
| 44 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); | ||
| 45 | |||
| 46 | SetRegister(bb, instr.gpr0, value); | ||
| 47 | |||
| 48 | return pc; | ||
| 49 | } | ||
| 50 | |||
| 51 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp new file mode 100644 index 000000000..4fd3db54e --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_immediate.cpp | |||
| @@ -0,0 +1,52 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | switch (opcode->get().GetId()) { | ||
| 20 | case OpCode::Id::MOV32_IMM: { | ||
| 21 | SetRegister(bb, instr.gpr0, GetImmediate32(instr)); | ||
| 22 | break; | ||
| 23 | } | ||
| 24 | case OpCode::Id::FMUL32_IMM: { | ||
| 25 | Node value = | ||
| 26 | Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); | ||
| 27 | value = GetSaturatedFloat(value, instr.fmul32.saturate); | ||
| 28 | |||
| 29 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 30 | SetRegister(bb, instr.gpr0, value); | ||
| 31 | break; | ||
| 32 | } | ||
| 33 | case OpCode::Id::FADD32I: { | ||
| 34 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, | ||
| 35 | instr.fadd32i.negate_a); | ||
| 36 | const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, | ||
| 37 | instr.fadd32i.negate_b); | ||
| 38 | |||
| 39 | const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 40 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 41 | SetRegister(bb, instr.gpr0, value); | ||
| 42 | break; | ||
| 43 | } | ||
| 44 | default: | ||
| 45 | UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", | ||
| 46 | opcode->get().GetName()); | ||
| 47 | } | ||
| 48 | |||
| 49 | return pc; | ||
| 50 | } | ||
| 51 | |||
| 52 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp new file mode 100644 index 000000000..4a8cc1a1c --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_integer.cpp | |||
| @@ -0,0 +1,287 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::IAdd3Height; | ||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::Register; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | ||
| 23 | Node op_b = [&]() { | ||
| 24 | if (instr.is_b_imm) { | ||
| 25 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 26 | } else if (instr.is_b_gpr) { | ||
| 27 | return GetRegister(instr.gpr20); | ||
| 28 | } else { | ||
| 29 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 30 | } | ||
| 31 | }(); | ||
| 32 | |||
| 33 | switch (opcode->get().GetId()) { | ||
| 34 | case OpCode::Id::IADD_C: | ||
| 35 | case OpCode::Id::IADD_R: | ||
| 36 | case OpCode::Id::IADD_IMM: { | ||
| 37 | UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); | ||
| 38 | |||
| 39 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 40 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 41 | |||
| 42 | const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); | ||
| 43 | |||
| 44 | SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); | ||
| 45 | SetRegister(bb, instr.gpr0, value); | ||
| 46 | break; | ||
| 47 | } | ||
| 48 | case OpCode::Id::IADD3_C: | ||
| 49 | case OpCode::Id::IADD3_R: | ||
| 50 | case OpCode::Id::IADD3_IMM: { | ||
| 51 | Node op_c = GetRegister(instr.gpr39); | ||
| 52 | |||
| 53 | const auto ApplyHeight = [&](IAdd3Height height, Node value) { | ||
| 54 | switch (height) { | ||
| 55 | case IAdd3Height::None: | ||
| 56 | return value; | ||
| 57 | case IAdd3Height::LowerHalfWord: | ||
| 58 | return BitfieldExtract(value, 0, 16); | ||
| 59 | case IAdd3Height::UpperHalfWord: | ||
| 60 | return BitfieldExtract(value, 16, 16); | ||
| 61 | default: | ||
| 62 | UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", static_cast<u32>(height)); | ||
| 63 | return Immediate(0); | ||
| 64 | } | ||
| 65 | }; | ||
| 66 | |||
| 67 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 68 | op_a = ApplyHeight(instr.iadd3.height_a, op_a); | ||
| 69 | op_b = ApplyHeight(instr.iadd3.height_b, op_b); | ||
| 70 | op_c = ApplyHeight(instr.iadd3.height_c, op_c); | ||
| 71 | } | ||
| 72 | |||
| 73 | op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); | ||
| 74 | op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); | ||
| 75 | op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); | ||
| 76 | |||
| 77 | const Node value = [&]() { | ||
| 78 | const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); | ||
| 79 | if (opcode->get().GetId() != OpCode::Id::IADD3_R) { | ||
| 80 | return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); | ||
| 81 | } | ||
| 82 | const Node shifted = [&]() { | ||
| 83 | switch (instr.iadd3.mode) { | ||
| 84 | case Tegra::Shader::IAdd3Mode::RightShift: | ||
| 85 | // TODO(tech4me): According to | ||
| 86 | // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 | ||
| 87 | // The addition between op_a and op_b should be done in uint33, more | ||
| 88 | // investigation required | ||
| 89 | return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, | ||
| 90 | Immediate(16)); | ||
| 91 | case Tegra::Shader::IAdd3Mode::LeftShift: | ||
| 92 | return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, | ||
| 93 | Immediate(16)); | ||
| 94 | default: | ||
| 95 | return add_ab; | ||
| 96 | } | ||
| 97 | }(); | ||
| 98 | return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); | ||
| 99 | }(); | ||
| 100 | |||
| 101 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 102 | SetRegister(bb, instr.gpr0, value); | ||
| 103 | break; | ||
| 104 | } | ||
| 105 | case OpCode::Id::ISCADD_C: | ||
| 106 | case OpCode::Id::ISCADD_R: | ||
| 107 | case OpCode::Id::ISCADD_IMM: { | ||
| 108 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 109 | "Condition codes generation in ISCADD is not implemented"); | ||
| 110 | |||
| 111 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 112 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 113 | |||
| 114 | const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); | ||
| 115 | const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); | ||
| 116 | const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); | ||
| 117 | |||
| 118 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 119 | SetRegister(bb, instr.gpr0, value); | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::POPC_C: | ||
| 123 | case OpCode::Id::POPC_R: | ||
| 124 | case OpCode::Id::POPC_IMM: { | ||
| 125 | if (instr.popc.invert) { | ||
| 126 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 127 | } | ||
| 128 | const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); | ||
| 129 | SetRegister(bb, instr.gpr0, value); | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | case OpCode::Id::SEL_C: | ||
| 133 | case OpCode::Id::SEL_R: | ||
| 134 | case OpCode::Id::SEL_IMM: { | ||
| 135 | const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); | ||
| 136 | const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); | ||
| 137 | SetRegister(bb, instr.gpr0, value); | ||
| 138 | break; | ||
| 139 | } | ||
| 140 | case OpCode::Id::LOP_C: | ||
| 141 | case OpCode::Id::LOP_R: | ||
| 142 | case OpCode::Id::LOP_IMM: { | ||
| 143 | if (instr.alu.lop.invert_a) | ||
| 144 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); | ||
| 145 | if (instr.alu.lop.invert_b) | ||
| 146 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 147 | |||
| 148 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, | ||
| 149 | instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, | ||
| 150 | instr.generates_cc); | ||
| 151 | break; | ||
| 152 | } | ||
| 153 | case OpCode::Id::LOP3_C: | ||
| 154 | case OpCode::Id::LOP3_R: | ||
| 155 | case OpCode::Id::LOP3_IMM: { | ||
| 156 | const Node op_c = GetRegister(instr.gpr39); | ||
| 157 | const Node lut = [&]() { | ||
| 158 | if (opcode->get().GetId() == OpCode::Id::LOP3_R) { | ||
| 159 | return Immediate(instr.alu.lop3.GetImmLut28()); | ||
| 160 | } else { | ||
| 161 | return Immediate(instr.alu.lop3.GetImmLut48()); | ||
| 162 | } | ||
| 163 | }(); | ||
| 164 | |||
| 165 | WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); | ||
| 166 | break; | ||
| 167 | } | ||
| 168 | case OpCode::Id::IMNMX_C: | ||
| 169 | case OpCode::Id::IMNMX_R: | ||
| 170 | case OpCode::Id::IMNMX_IMM: { | ||
| 171 | UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); | ||
| 172 | |||
| 173 | const bool is_signed = instr.imnmx.is_signed; | ||
| 174 | |||
| 175 | const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); | ||
| 176 | const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); | ||
| 177 | const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); | ||
| 178 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 179 | |||
| 180 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 181 | SetRegister(bb, instr.gpr0, value); | ||
| 182 | break; | ||
| 183 | } | ||
| 184 | case OpCode::Id::LEA_R2: | ||
| 185 | case OpCode::Id::LEA_R1: | ||
| 186 | case OpCode::Id::LEA_IMM: | ||
| 187 | case OpCode::Id::LEA_RZ: | ||
| 188 | case OpCode::Id::LEA_HI: { | ||
| 189 | const auto [op_a, op_b, op_c] = [&]() -> std::tuple<Node, Node, Node> { | ||
| 190 | switch (opcode->get().GetId()) { | ||
| 191 | case OpCode::Id::LEA_R2: { | ||
| 192 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), | ||
| 193 | Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; | ||
| 194 | } | ||
| 195 | |||
| 196 | case OpCode::Id::LEA_R1: { | ||
| 197 | const bool neg = instr.lea.r1.neg != 0; | ||
| 198 | return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 199 | GetRegister(instr.gpr20), | ||
| 200 | Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; | ||
| 201 | } | ||
| 202 | |||
| 203 | case OpCode::Id::LEA_IMM: { | ||
| 204 | const bool neg = instr.lea.imm.neg != 0; | ||
| 205 | return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), | ||
| 206 | GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 207 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 208 | } | ||
| 209 | |||
| 210 | case OpCode::Id::LEA_RZ: { | ||
| 211 | const bool neg = instr.lea.rz.neg != 0; | ||
| 212 | return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), | ||
| 213 | GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 214 | Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; | ||
| 215 | } | ||
| 216 | |||
| 217 | case OpCode::Id::LEA_HI: | ||
| 218 | default: | ||
| 219 | UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); | ||
| 220 | |||
| 221 | return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), | ||
| 222 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 223 | } | ||
| 224 | }(); | ||
| 225 | |||
| 226 | UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), | ||
| 227 | "Unhandled LEA Predicate"); | ||
| 228 | |||
| 229 | const Node shifted_c = | ||
| 230 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, Immediate(1), op_c); | ||
| 231 | const Node mul_bc = Operation(OperationCode::IMul, NO_PRECISE, op_b, shifted_c); | ||
| 232 | const Node value = Operation(OperationCode::IAdd, NO_PRECISE, op_a, mul_bc); | ||
| 233 | |||
| 234 | SetRegister(bb, instr.gpr0, value); | ||
| 235 | |||
| 236 | break; | ||
| 237 | } | ||
| 238 | default: | ||
| 239 | UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); | ||
| 240 | } | ||
| 241 | |||
| 242 | return pc; | ||
| 243 | } | ||
| 244 | |||
| 245 | void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, | ||
| 246 | Node imm_lut, bool sets_cc) { | ||
| 247 | constexpr u32 lop_iterations = 32; | ||
| 248 | const Node one = Immediate(1); | ||
| 249 | const Node two = Immediate(2); | ||
| 250 | |||
| 251 | Node value{}; | ||
| 252 | for (u32 i = 0; i < lop_iterations; ++i) { | ||
| 253 | const Node shift_amount = Immediate(i); | ||
| 254 | |||
| 255 | const Node a = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_c, shift_amount); | ||
| 256 | const Node pack_0 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, one); | ||
| 257 | |||
| 258 | const Node b = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_b, shift_amount); | ||
| 259 | const Node c = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, b, one); | ||
| 260 | const Node pack_1 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, c, one); | ||
| 261 | |||
| 262 | const Node d = Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, op_a, shift_amount); | ||
| 263 | const Node e = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, d, one); | ||
| 264 | const Node pack_2 = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, e, two); | ||
| 265 | |||
| 266 | const Node pack_01 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_0, pack_1); | ||
| 267 | const Node pack_012 = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, pack_01, pack_2); | ||
| 268 | |||
| 269 | const Node shifted_bit = | ||
| 270 | Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, imm_lut, pack_012); | ||
| 271 | const Node bit = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, shifted_bit, one); | ||
| 272 | |||
| 273 | const Node right = | ||
| 274 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, bit, shift_amount); | ||
| 275 | |||
| 276 | if (i > 0) { | ||
| 277 | value = Operation(OperationCode::IBitwiseOr, NO_PRECISE, value, right); | ||
| 278 | } else { | ||
| 279 | value = right; | ||
| 280 | } | ||
| 281 | } | ||
| 282 | |||
| 283 | SetInternalFlagsFromInteger(bb, value, sets_cc); | ||
| 284 | SetRegister(bb, dest, value); | ||
| 285 | } | ||
| 286 | |||
| 287 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp new file mode 100644 index 000000000..b26a6e473 --- /dev/null +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::LogicOperation; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::PredicateResultMode; | ||
| 17 | using Tegra::Shader::Register; | ||
| 18 | |||
| 19 | u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 20 | const Instruction instr = {program_code[pc]}; | ||
| 21 | const auto opcode = OpCode::Decode(instr); | ||
| 22 | |||
| 23 | Node op_a = GetRegister(instr.gpr8); | ||
| 24 | Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32)); | ||
| 25 | |||
| 26 | switch (opcode->get().GetId()) { | ||
| 27 | case OpCode::Id::IADD32I: { | ||
| 28 | UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); | ||
| 29 | |||
| 30 | op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true); | ||
| 31 | |||
| 32 | const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); | ||
| 33 | |||
| 34 | SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); | ||
| 35 | SetRegister(bb, instr.gpr0, value); | ||
| 36 | break; | ||
| 37 | } | ||
| 38 | case OpCode::Id::LOP32I: { | ||
| 39 | if (instr.alu.lop32i.invert_a) | ||
| 40 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); | ||
| 41 | |||
| 42 | if (instr.alu.lop32i.invert_b) | ||
| 43 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 44 | |||
| 45 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, | ||
| 46 | PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc); | ||
| 47 | break; | ||
| 48 | } | ||
| 49 | default: | ||
| 50 | UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", | ||
| 51 | opcode->get().GetName()); | ||
| 52 | } | ||
| 53 | |||
| 54 | return pc; | ||
| 55 | } | ||
| 56 | |||
| 57 | void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, | ||
| 58 | Node op_a, Node op_b, PredicateResultMode predicate_mode, | ||
| 59 | Pred predicate, bool sets_cc) { | ||
| 60 | const Node result = [&]() { | ||
| 61 | switch (logic_op) { | ||
| 62 | case LogicOperation::And: | ||
| 63 | return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b); | ||
| 64 | case LogicOperation::Or: | ||
| 65 | return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b); | ||
| 66 | case LogicOperation::Xor: | ||
| 67 | return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b); | ||
| 68 | case LogicOperation::PassB: | ||
| 69 | return op_b; | ||
| 70 | default: | ||
| 71 | UNIMPLEMENTED_MSG("Unimplemented logic operation={}", static_cast<u32>(logic_op)); | ||
| 72 | return Immediate(0); | ||
| 73 | } | ||
| 74 | }(); | ||
| 75 | |||
| 76 | SetInternalFlagsFromInteger(bb, result, sets_cc); | ||
| 77 | SetRegister(bb, dest, result); | ||
| 78 | |||
| 79 | // Write the predicate value depending on the predicate mode. | ||
| 80 | switch (predicate_mode) { | ||
| 81 | case PredicateResultMode::None: | ||
| 82 | // Do nothing. | ||
| 83 | return; | ||
| 84 | case PredicateResultMode::NotZero: { | ||
| 85 | // Set the predicate to true if the result is not zero. | ||
| 86 | const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0)); | ||
| 87 | SetPredicate(bb, static_cast<u64>(predicate), compare); | ||
| 88 | break; | ||
| 89 | } | ||
| 90 | default: | ||
| 91 | UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", | ||
| 92 | static_cast<u32>(predicate_mode)); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp new file mode 100644 index 000000000..0734141b0 --- /dev/null +++ b/src/video_core/shader/decode/bfe.cpp | |||
| @@ -0,0 +1,49 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF(instr.bfe.negate_b); | ||
| 20 | |||
| 21 | Node op_a = GetRegister(instr.gpr8); | ||
| 22 | op_a = GetOperandAbsNegInteger(op_a, false, instr.bfe.negate_a, false); | ||
| 23 | |||
| 24 | switch (opcode->get().GetId()) { | ||
| 25 | case OpCode::Id::BFE_IMM: { | ||
| 26 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 27 | "Condition codes generation in BFE is not implemented"); | ||
| 28 | |||
| 29 | const Node inner_shift_imm = Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue())); | ||
| 30 | const Node outer_shift_imm = | ||
| 31 | Immediate(static_cast<u32>(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position)); | ||
| 32 | |||
| 33 | const Node inner_shift = | ||
| 34 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, inner_shift_imm); | ||
| 35 | const Node outer_shift = | ||
| 36 | Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); | ||
| 37 | |||
| 38 | SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc); | ||
| 39 | SetRegister(bb, instr.gpr0, outer_shift); | ||
| 40 | break; | ||
| 41 | } | ||
| 42 | default: | ||
| 43 | UNIMPLEMENTED_MSG("Unhandled BFE instruction: {}", opcode->get().GetName()); | ||
| 44 | } | ||
| 45 | |||
| 46 | return pc; | ||
| 47 | } | ||
| 48 | |||
| 49 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp new file mode 100644 index 000000000..942d6729d --- /dev/null +++ b/src/video_core/shader/decode/bfi.cpp | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> { | ||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::BFI_IMM_R: | ||
| 22 | return {GetRegister(instr.gpr39), Immediate(instr.alu.GetSignedImm20_20())}; | ||
| 23 | default: | ||
| 24 | UNREACHABLE(); | ||
| 25 | return {Immediate(0), Immediate(0)}; | ||
| 26 | } | ||
| 27 | }(); | ||
| 28 | const Node insert = GetRegister(instr.gpr8); | ||
| 29 | const Node offset = BitfieldExtract(packed_shift, 0, 8); | ||
| 30 | const Node bits = BitfieldExtract(packed_shift, 8, 8); | ||
| 31 | |||
| 32 | const Node value = | ||
| 33 | Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); | ||
| 34 | |||
| 35 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 36 | SetRegister(bb, instr.gpr0, value); | ||
| 37 | |||
| 38 | return pc; | ||
| 39 | } | ||
| 40 | |||
| 41 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp new file mode 100644 index 000000000..ee18d3a99 --- /dev/null +++ b/src/video_core/shader/decode/conversion.cpp | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Register; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::I2I_R: { | ||
| 22 | UNIMPLEMENTED_IF(instr.conversion.selector); | ||
| 23 | |||
| 24 | const bool input_signed = instr.conversion.is_input_signed; | ||
| 25 | const bool output_signed = instr.conversion.is_output_signed; | ||
| 26 | |||
| 27 | Node value = GetRegister(instr.gpr20); | ||
| 28 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | ||
| 29 | |||
| 30 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, instr.conversion.negate_a, | ||
| 31 | input_signed); | ||
| 32 | if (input_signed != output_signed) { | ||
| 33 | value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); | ||
| 34 | } | ||
| 35 | |||
| 36 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 37 | SetRegister(bb, instr.gpr0, value); | ||
| 38 | break; | ||
| 39 | } | ||
| 40 | case OpCode::Id::I2F_R: | ||
| 41 | case OpCode::Id::I2F_C: { | ||
| 42 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | ||
| 43 | UNIMPLEMENTED_IF(instr.conversion.selector); | ||
| 44 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 45 | "Condition codes generation in I2F is not implemented"); | ||
| 46 | |||
| 47 | Node value = [&]() { | ||
| 48 | if (instr.is_b_gpr) { | ||
| 49 | return GetRegister(instr.gpr20); | ||
| 50 | } else { | ||
| 51 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 52 | } | ||
| 53 | }(); | ||
| 54 | const bool input_signed = instr.conversion.is_input_signed; | ||
| 55 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | ||
| 56 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); | ||
| 57 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); | ||
| 58 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | ||
| 59 | |||
| 60 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 61 | SetRegister(bb, instr.gpr0, value); | ||
| 62 | break; | ||
| 63 | } | ||
| 64 | case OpCode::Id::F2F_R: | ||
| 65 | case OpCode::Id::F2F_C: { | ||
| 66 | UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word); | ||
| 67 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | ||
| 68 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 69 | "Condition codes generation in F2F is not implemented"); | ||
| 70 | |||
| 71 | Node value = [&]() { | ||
| 72 | if (instr.is_b_gpr) { | ||
| 73 | return GetRegister(instr.gpr20); | ||
| 74 | } else { | ||
| 75 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 76 | } | ||
| 77 | }(); | ||
| 78 | |||
| 79 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 80 | |||
| 81 | value = [&]() { | ||
| 82 | switch (instr.conversion.f2f.rounding) { | ||
| 83 | case Tegra::Shader::F2fRoundingOp::None: | ||
| 84 | return value; | ||
| 85 | case Tegra::Shader::F2fRoundingOp::Round: | ||
| 86 | return Operation(OperationCode::FRoundEven, PRECISE, value); | ||
| 87 | case Tegra::Shader::F2fRoundingOp::Floor: | ||
| 88 | return Operation(OperationCode::FFloor, PRECISE, value); | ||
| 89 | case Tegra::Shader::F2fRoundingOp::Ceil: | ||
| 90 | return Operation(OperationCode::FCeil, PRECISE, value); | ||
| 91 | case Tegra::Shader::F2fRoundingOp::Trunc: | ||
| 92 | return Operation(OperationCode::FTrunc, PRECISE, value); | ||
| 93 | } | ||
| 94 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | ||
| 95 | static_cast<u32>(instr.conversion.f2f.rounding.Value())); | ||
| 96 | return Immediate(0); | ||
| 97 | }(); | ||
| 98 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 99 | |||
| 100 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 101 | SetRegister(bb, instr.gpr0, value); | ||
| 102 | break; | ||
| 103 | } | ||
| 104 | case OpCode::Id::F2I_R: | ||
| 105 | case OpCode::Id::F2I_C: { | ||
| 106 | UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | ||
| 107 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 108 | "Condition codes generation in F2I is not implemented"); | ||
| 109 | Node value = [&]() { | ||
| 110 | if (instr.is_b_gpr) { | ||
| 111 | return GetRegister(instr.gpr20); | ||
| 112 | } else { | ||
| 113 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 114 | } | ||
| 115 | }(); | ||
| 116 | |||
| 117 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 118 | |||
| 119 | value = [&]() { | ||
| 120 | switch (instr.conversion.f2i.rounding) { | ||
| 121 | case Tegra::Shader::F2iRoundingOp::None: | ||
| 122 | return value; | ||
| 123 | case Tegra::Shader::F2iRoundingOp::Floor: | ||
| 124 | return Operation(OperationCode::FFloor, PRECISE, value); | ||
| 125 | case Tegra::Shader::F2iRoundingOp::Ceil: | ||
| 126 | return Operation(OperationCode::FCeil, PRECISE, value); | ||
| 127 | case Tegra::Shader::F2iRoundingOp::Trunc: | ||
| 128 | return Operation(OperationCode::FTrunc, PRECISE, value); | ||
| 129 | default: | ||
| 130 | UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", | ||
| 131 | static_cast<u32>(instr.conversion.f2i.rounding.Value())); | ||
| 132 | return Immediate(0); | ||
| 133 | } | ||
| 134 | }(); | ||
| 135 | const bool is_signed = instr.conversion.is_output_signed; | ||
| 136 | value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); | ||
| 137 | value = ConvertIntegerSize(value, instr.conversion.dest_size, is_signed); | ||
| 138 | |||
| 139 | SetRegister(bb, instr.gpr0, value); | ||
| 140 | break; | ||
| 141 | } | ||
| 142 | default: | ||
| 143 | UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); | ||
| 144 | } | ||
| 145 | |||
| 146 | return pc; | ||
| 147 | } | ||
| 148 | |||
| 149 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/decode_integer_set.cpp b/src/video_core/shader/decode/decode_integer_set.cpp new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/src/video_core/shader/decode/decode_integer_set.cpp | |||
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp new file mode 100644 index 000000000..be8dc2230 --- /dev/null +++ b/src/video_core/shader/decode/ffma.cpp | |||
| @@ -0,0 +1,59 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | ||
| 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_0 != 1, "FFMA tab5980_0({}) not implemented", | ||
| 21 | instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO | ||
| 22 | UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", | ||
| 23 | instr.ffma.tab5980_1.Value()); | ||
| 24 | |||
| 25 | const Node op_a = GetRegister(instr.gpr8); | ||
| 26 | |||
| 27 | auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { | ||
| 28 | switch (opcode->get().GetId()) { | ||
| 29 | case OpCode::Id::FFMA_CR: { | ||
| 30 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), | ||
| 31 | GetRegister(instr.gpr39)}; | ||
| 32 | } | ||
| 33 | case OpCode::Id::FFMA_RR: | ||
| 34 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 35 | case OpCode::Id::FFMA_RC: { | ||
| 36 | return {GetRegister(instr.gpr39), | ||
| 37 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; | ||
| 38 | } | ||
| 39 | case OpCode::Id::FFMA_IMM: | ||
| 40 | return {GetImmediate19(instr), GetRegister(instr.gpr39)}; | ||
| 41 | default: | ||
| 42 | UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); | ||
| 43 | return {Immediate(0), Immediate(0)}; | ||
| 44 | } | ||
| 45 | }(); | ||
| 46 | |||
| 47 | op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); | ||
| 48 | op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); | ||
| 49 | |||
| 50 | Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); | ||
| 51 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 52 | |||
| 53 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 54 | SetRegister(bb, instr.gpr0, value); | ||
| 55 | |||
| 56 | return pc; | ||
| 57 | } | ||
| 58 | |||
| 59 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp new file mode 100644 index 000000000..ba846f1bd --- /dev/null +++ b/src/video_core/shader/decode/float_set.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, | ||
| 20 | instr.fset.neg_a != 0); | ||
| 21 | |||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); | ||
| 33 | |||
| 34 | // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 35 | // condition is true, and to 0 otherwise. | ||
| 36 | const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); | ||
| 37 | |||
| 38 | const OperationCode combiner = GetPredicateCombiner(instr.fset.op); | ||
| 39 | const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); | ||
| 40 | |||
| 41 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 42 | |||
| 43 | const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 44 | const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 45 | const Node value = | ||
| 46 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 47 | |||
| 48 | if (instr.fset.bf) { | ||
| 49 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 50 | } else { | ||
| 51 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 52 | } | ||
| 53 | SetRegister(bb, instr.gpr0, value); | ||
| 54 | |||
| 55 | return pc; | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp new file mode 100644 index 000000000..e88b04d18 --- /dev/null +++ b/src/video_core/shader/decode/float_set_predicate.cpp | |||
| @@ -0,0 +1,56 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, | ||
| 21 | instr.fsetp.neg_a != 0); | ||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false); | ||
| 32 | |||
| 33 | // We can't use the constant predicate as destination. | ||
| 34 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 35 | |||
| 36 | const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b); | ||
| 37 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | ||
| 38 | |||
| 39 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); | ||
| 40 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 41 | |||
| 42 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 43 | SetPredicate(bb, instr.fsetp.pred3, value); | ||
| 44 | |||
| 45 | if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 46 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 47 | // if enabled | ||
| 48 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 49 | const Node second_value = Operation(combiner, negated_pred, second_pred); | ||
| 50 | SetPredicate(bb, instr.fsetp.pred0, second_value); | ||
| 51 | } | ||
| 52 | |||
| 53 | return pc; | ||
| 54 | } | ||
| 55 | |||
| 56 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp new file mode 100644 index 000000000..dfd7cb98f --- /dev/null +++ b/src/video_core/shader/decode/half_set.cpp | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | UNIMPLEMENTED_IF(instr.hset2.ftz != 0); | ||
| 22 | |||
| 23 | // instr.hset2.type_a | ||
| 24 | // instr.hset2.type_b | ||
| 25 | Node op_a = GetRegister(instr.gpr8); | ||
| 26 | Node op_b = [&]() { | ||
| 27 | switch (opcode->get().GetId()) { | ||
| 28 | case OpCode::Id::HSET2_R: | ||
| 29 | return GetRegister(instr.gpr20); | ||
| 30 | default: | ||
| 31 | UNREACHABLE(); | ||
| 32 | return Immediate(0); | ||
| 33 | } | ||
| 34 | }(); | ||
| 35 | |||
| 36 | op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); | ||
| 37 | op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); | ||
| 38 | |||
| 39 | const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); | ||
| 40 | |||
| 41 | MetaHalfArithmetic meta{false, {instr.hset2.type_a, instr.hset2.type_b}}; | ||
| 42 | const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, meta, op_a, op_b); | ||
| 43 | |||
| 44 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); | ||
| 45 | |||
| 46 | // HSET2 operates on each half float in the pack. | ||
| 47 | std::array<Node, 2> values; | ||
| 48 | for (u32 i = 0; i < 2; ++i) { | ||
| 49 | const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; | ||
| 50 | const Node true_value = Immediate(raw_value << (i * 16)); | ||
| 51 | const Node false_value = Immediate(0); | ||
| 52 | |||
| 53 | const Node comparison = | ||
| 54 | Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); | ||
| 55 | const Node predicate = Operation(combiner, comparison, second_pred); | ||
| 56 | |||
| 57 | values[i] = | ||
| 58 | Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); | ||
| 59 | } | ||
| 60 | |||
| 61 | const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); | ||
| 62 | SetRegister(bb, instr.gpr0, value); | ||
| 63 | |||
| 64 | return pc; | ||
| 65 | } | ||
| 66 | |||
| 67 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp new file mode 100644 index 000000000..53c44ae5a --- /dev/null +++ b/src/video_core/shader/decode/half_set_predicate.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | UNIMPLEMENTED_IF(instr.hsetp2.ftz != 0); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | ||
| 23 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | ||
| 24 | |||
| 25 | const Node op_b = [&]() { | ||
| 26 | switch (opcode->get().GetId()) { | ||
| 27 | case OpCode::Id::HSETP2_R: | ||
| 28 | return GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.abs_a, | ||
| 29 | instr.hsetp2.negate_b); | ||
| 30 | default: | ||
| 31 | UNREACHABLE(); | ||
| 32 | return Immediate(0); | ||
| 33 | } | ||
| 34 | }(); | ||
| 35 | |||
| 36 | // We can't use the constant predicate as destination. | ||
| 37 | ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 38 | |||
| 39 | const Node second_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); | ||
| 40 | |||
| 41 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); | ||
| 42 | const OperationCode pair_combiner = | ||
| 43 | instr.hsetp2.h_and ? OperationCode::LogicalAll2 : OperationCode::LogicalAny2; | ||
| 44 | |||
| 45 | MetaHalfArithmetic meta = {false, {instr.hsetp2.type_a, instr.hsetp2.type_b}}; | ||
| 46 | const Node comparison = GetPredicateComparisonHalf(instr.hsetp2.cond, meta, op_a, op_b); | ||
| 47 | const Node first_pred = Operation(pair_combiner, comparison); | ||
| 48 | |||
| 49 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 50 | const Node value = Operation(combiner, first_pred, second_pred); | ||
| 51 | SetPredicate(bb, instr.hsetp2.pred3, value); | ||
| 52 | |||
| 53 | if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 54 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | ||
| 55 | const Node negated_pred = Operation(OperationCode::LogicalNegate, first_pred); | ||
| 56 | SetPredicate(bb, instr.hsetp2.pred0, Operation(combiner, negated_pred, second_pred)); | ||
| 57 | } | ||
| 58 | |||
| 59 | return pc; | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp new file mode 100644 index 000000000..4a6b945f9 --- /dev/null +++ b/src/video_core/shader/decode/hfma2.cpp | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::HalfPrecision; | ||
| 15 | using Tegra::Shader::HalfType; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::OpCode; | ||
| 18 | |||
| 19 | u32 ShaderIR::DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 20 | const Instruction instr = {program_code[pc]}; | ||
| 21 | const auto opcode = OpCode::Decode(instr); | ||
| 22 | |||
| 23 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | ||
| 24 | UNIMPLEMENTED_IF(instr.hfma2.rr.precision != HalfPrecision::None); | ||
| 25 | } else { | ||
| 26 | UNIMPLEMENTED_IF(instr.hfma2.precision != HalfPrecision::None); | ||
| 27 | } | ||
| 28 | |||
| 29 | constexpr auto identity = HalfType::H0_H1; | ||
| 30 | |||
| 31 | const HalfType type_a = instr.hfma2.type_a; | ||
| 32 | const Node op_a = GetRegister(instr.gpr8); | ||
| 33 | |||
| 34 | bool neg_b{}, neg_c{}; | ||
| 35 | auto [saturate, type_b, op_b, type_c, | ||
| 36 | op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { | ||
| 37 | switch (opcode->get().GetId()) { | ||
| 38 | case OpCode::Id::HFMA2_CR: | ||
| 39 | neg_b = instr.hfma2.negate_b; | ||
| 40 | neg_c = instr.hfma2.negate_c; | ||
| 41 | return {instr.hfma2.saturate, instr.hfma2.type_b, | ||
| 42 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), instr.hfma2.type_reg39, | ||
| 43 | GetRegister(instr.gpr39)}; | ||
| 44 | case OpCode::Id::HFMA2_RC: | ||
| 45 | neg_b = instr.hfma2.negate_b; | ||
| 46 | neg_c = instr.hfma2.negate_c; | ||
| 47 | return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), | ||
| 48 | instr.hfma2.type_b, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; | ||
| 49 | case OpCode::Id::HFMA2_RR: | ||
| 50 | neg_b = instr.hfma2.rr.negate_b; | ||
| 51 | neg_c = instr.hfma2.rr.negate_c; | ||
| 52 | return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), | ||
| 53 | instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; | ||
| 54 | case OpCode::Id::HFMA2_IMM_R: | ||
| 55 | neg_c = instr.hfma2.negate_c; | ||
| 56 | return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), | ||
| 57 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 58 | default: | ||
| 59 | return {false, identity, Immediate(0), identity, Immediate(0)}; | ||
| 60 | } | ||
| 61 | }(); | ||
| 62 | UNIMPLEMENTED_IF_MSG(saturate, "HFMA2 saturation is not implemented"); | ||
| 63 | |||
| 64 | op_b = GetOperandAbsNegHalf(op_b, false, neg_b); | ||
| 65 | op_c = GetOperandAbsNegHalf(op_c, false, neg_c); | ||
| 66 | |||
| 67 | MetaHalfArithmetic meta{true, {type_a, type_b, type_c}}; | ||
| 68 | Node value = Operation(OperationCode::HFma, meta, op_a, op_b, op_c); | ||
| 69 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); | ||
| 70 | |||
| 71 | SetRegister(bb, instr.gpr0, value); | ||
| 72 | |||
| 73 | return pc; | ||
| 74 | } | ||
| 75 | |||
| 76 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp new file mode 100644 index 000000000..85e67b03b --- /dev/null +++ b/src/video_core/shader/decode/integer_set.cpp | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | const Node op_a = GetRegister(instr.gpr8); | ||
| 20 | const Node op_b = [&]() { | ||
| 21 | if (instr.is_b_imm) { | ||
| 22 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 23 | } else if (instr.is_b_gpr) { | ||
| 24 | return GetRegister(instr.gpr20); | ||
| 25 | } else { | ||
| 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 27 | } | ||
| 28 | }(); | ||
| 29 | |||
| 30 | // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition | ||
| 31 | // is true, and to 0 otherwise. | ||
| 32 | const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); | ||
| 33 | const Node first_pred = | ||
| 34 | GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); | ||
| 35 | |||
| 36 | const OperationCode combiner = GetPredicateCombiner(instr.iset.op); | ||
| 37 | |||
| 38 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 39 | |||
| 40 | const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 41 | const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 42 | const Node value = | ||
| 43 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 44 | |||
| 45 | SetRegister(bb, instr.gpr0, value); | ||
| 46 | |||
| 47 | return pc; | ||
| 48 | } | ||
| 49 | |||
| 50 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp new file mode 100644 index 000000000..c8b105a08 --- /dev/null +++ b/src/video_core/shader/decode/integer_set_predicate.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | const Node op_a = GetRegister(instr.gpr8); | ||
| 21 | |||
| 22 | const Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | // We can't use the constant predicate as destination. | ||
| 33 | ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 34 | |||
| 35 | const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); | ||
| 36 | const Node predicate = | ||
| 37 | GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); | ||
| 38 | |||
| 39 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); | ||
| 41 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 42 | SetPredicate(bb, instr.isetp.pred3, value); | ||
| 43 | |||
| 44 | if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 45 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | ||
| 46 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 47 | SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); | ||
| 48 | } | ||
| 49 | |||
| 50 | return pc; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp new file mode 100644 index 000000000..ae71672d6 --- /dev/null +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -0,0 +1,688 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using Tegra::Shader::Attribute; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::OpCode; | ||
| 18 | using Tegra::Shader::Register; | ||
| 19 | using Tegra::Shader::TextureMiscMode; | ||
| 20 | using Tegra::Shader::TextureProcessMode; | ||
| 21 | using Tegra::Shader::TextureType; | ||
| 22 | |||
| 23 | static std::size_t GetCoordCount(TextureType texture_type) { | ||
| 24 | switch (texture_type) { | ||
| 25 | case TextureType::Texture1D: | ||
| 26 | return 1; | ||
| 27 | case TextureType::Texture2D: | ||
| 28 | return 2; | ||
| 29 | case TextureType::Texture3D: | ||
| 30 | case TextureType::TextureCube: | ||
| 31 | return 3; | ||
| 32 | default: | ||
| 33 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type)); | ||
| 34 | return 0; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 39 | const Instruction instr = {program_code[pc]}; | ||
| 40 | const auto opcode = OpCode::Decode(instr); | ||
| 41 | |||
| 42 | switch (opcode->get().GetId()) { | ||
| 43 | case OpCode::Id::LD_A: { | ||
| 44 | // Note: Shouldn't this be interp mode flat? As in no interpolation made. | ||
| 45 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 46 | "Indirect attribute loads are not supported"); | ||
| 47 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 48 | "Unaligned attribute loads are not supported"); | ||
| 49 | |||
| 50 | Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective, | ||
| 51 | Tegra::Shader::IpaSampleMode::Default}; | ||
| 52 | |||
| 53 | u64 next_element = instr.attribute.fmt20.element; | ||
| 54 | auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 55 | |||
| 56 | const auto LoadNextElement = [&](u32 reg_offset) { | ||
| 57 | const Node buffer = GetRegister(instr.gpr39); | ||
| 58 | const Node attribute = GetInputAttribute(static_cast<Attribute::Index>(next_index), | ||
| 59 | next_element, input_mode, buffer); | ||
| 60 | |||
| 61 | SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); | ||
| 62 | |||
| 63 | // Load the next attribute element into the following register. If the element | ||
| 64 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 65 | // attribute. | ||
| 66 | next_element = (next_element + 1) % 4; | ||
| 67 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 68 | }; | ||
| 69 | |||
| 70 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 71 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 72 | LoadNextElement(reg_offset); | ||
| 73 | } | ||
| 74 | break; | ||
| 75 | } | ||
| 76 | case OpCode::Id::LD_C: { | ||
| 77 | UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); | ||
| 78 | |||
| 79 | Node index = GetRegister(instr.gpr8); | ||
| 80 | |||
| 81 | const Node op_a = | ||
| 82 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, index); | ||
| 83 | |||
| 84 | switch (instr.ld_c.type.Value()) { | ||
| 85 | case Tegra::Shader::UniformType::Single: | ||
| 86 | SetRegister(bb, instr.gpr0, op_a); | ||
| 87 | break; | ||
| 88 | |||
| 89 | case Tegra::Shader::UniformType::Double: { | ||
| 90 | const Node op_b = | ||
| 91 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); | ||
| 92 | |||
| 93 | SetTemporal(bb, 0, op_a); | ||
| 94 | SetTemporal(bb, 1, op_b); | ||
| 95 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | ||
| 96 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); | ||
| 97 | break; | ||
| 98 | } | ||
| 99 | default: | ||
| 100 | UNIMPLEMENTED_MSG("Unhandled type: {}", static_cast<unsigned>(instr.ld_c.type.Value())); | ||
| 101 | } | ||
| 102 | break; | ||
| 103 | } | ||
| 104 | case OpCode::Id::LD_L: { | ||
| 105 | UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}", | ||
| 106 | static_cast<unsigned>(instr.ld_l.unknown.Value())); | ||
| 107 | |||
| 108 | const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8), | ||
| 109 | Immediate(static_cast<s32>(instr.smem_imm))); | ||
| 110 | const Node lmem = GetLocalMemory(index); | ||
| 111 | |||
| 112 | switch (instr.ldst_sl.type.Value()) { | ||
| 113 | case Tegra::Shader::StoreType::Bytes32: | ||
| 114 | SetRegister(bb, instr.gpr0, lmem); | ||
| 115 | break; | ||
| 116 | default: | ||
| 117 | UNIMPLEMENTED_MSG("LD_L Unhandled type: {}", | ||
| 118 | static_cast<unsigned>(instr.ldst_sl.type.Value())); | ||
| 119 | } | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::ST_A: { | ||
| 123 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 124 | "Indirect attribute loads are not supported"); | ||
| 125 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 126 | "Unaligned attribute loads are not supported"); | ||
| 127 | |||
| 128 | u64 next_element = instr.attribute.fmt20.element; | ||
| 129 | auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 130 | |||
| 131 | const auto StoreNextElement = [&](u32 reg_offset) { | ||
| 132 | const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index), | ||
| 133 | next_element, GetRegister(instr.gpr39)); | ||
| 134 | const auto src = GetRegister(instr.gpr0.Value() + reg_offset); | ||
| 135 | |||
| 136 | bb.push_back(Operation(OperationCode::Assign, dest, src)); | ||
| 137 | |||
| 138 | // Load the next attribute element into the following register. If the element | ||
| 139 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 140 | // attribute. | ||
| 141 | next_element = (next_element + 1) % 4; | ||
| 142 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 143 | }; | ||
| 144 | |||
| 145 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 146 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 147 | StoreNextElement(reg_offset); | ||
| 148 | } | ||
| 149 | |||
| 150 | break; | ||
| 151 | } | ||
| 152 | case OpCode::Id::ST_L: { | ||
| 153 | UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}", | ||
| 154 | static_cast<u32>(instr.st_l.unknown.Value())); | ||
| 155 | |||
| 156 | const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), | ||
| 157 | Immediate(static_cast<s32>(instr.smem_imm))); | ||
| 158 | |||
| 159 | switch (instr.ldst_sl.type.Value()) { | ||
| 160 | case Tegra::Shader::StoreType::Bytes32: | ||
| 161 | SetLocalMemory(bb, index, GetRegister(instr.gpr0)); | ||
| 162 | break; | ||
| 163 | default: | ||
| 164 | UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", | ||
| 165 | static_cast<u32>(instr.ldst_sl.type.Value())); | ||
| 166 | } | ||
| 167 | break; | ||
| 168 | } | ||
| 169 | case OpCode::Id::TEX: { | ||
| 170 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 171 | "AOFFI is not implemented"); | ||
| 172 | |||
| 173 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 174 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | ||
| 175 | } | ||
| 176 | |||
| 177 | const TextureType texture_type{instr.tex.texture_type}; | ||
| 178 | const bool is_array = instr.tex.array != 0; | ||
| 179 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 180 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 181 | WriteTexInstructionFloat( | ||
| 182 | bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); | ||
| 183 | break; | ||
| 184 | } | ||
| 185 | case OpCode::Id::TEXS: { | ||
| 186 | const TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 187 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 188 | const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); | ||
| 189 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 190 | |||
| 191 | if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 192 | LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); | ||
| 193 | } | ||
| 194 | |||
| 195 | const Node4 components = | ||
| 196 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 197 | |||
| 198 | if (instr.texs.fp32_flag) { | ||
| 199 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 200 | } else { | ||
| 201 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 202 | } | ||
| 203 | break; | ||
| 204 | } | ||
| 205 | case OpCode::Id::TLD4: { | ||
| 206 | ASSERT(instr.tld4.array == 0); | ||
| 207 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 208 | "AOFFI is not implemented"); | ||
| 209 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||
| 210 | "NDV is not implemented"); | ||
| 211 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), | ||
| 212 | "PTP is not implemented"); | ||
| 213 | |||
| 214 | if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 215 | LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete"); | ||
| 216 | } | ||
| 217 | |||
| 218 | const auto texture_type = instr.tld4.texture_type.Value(); | ||
| 219 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||
| 220 | const bool is_array = instr.tld4.array != 0; | ||
| 221 | WriteTexInstructionFloat(bb, instr, | ||
| 222 | GetTld4Code(instr, texture_type, depth_compare, is_array)); | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | case OpCode::Id::TLD4S: { | ||
| 226 | UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 227 | "AOFFI is not implemented"); | ||
| 228 | |||
| 229 | if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 230 | LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete"); | ||
| 231 | } | ||
| 232 | |||
| 233 | const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); | ||
| 234 | const Node op_a = GetRegister(instr.gpr8); | ||
| 235 | const Node op_b = GetRegister(instr.gpr20); | ||
| 236 | |||
| 237 | std::vector<Node> coords; | ||
| 238 | |||
| 239 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | ||
| 240 | if (depth_compare) { | ||
| 241 | // Note: TLD4S coordinate encoding works just like TEXS's | ||
| 242 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | ||
| 243 | coords.push_back(op_a); | ||
| 244 | coords.push_back(op_y); | ||
| 245 | coords.push_back(op_b); | ||
| 246 | } else { | ||
| 247 | coords.push_back(op_a); | ||
| 248 | coords.push_back(op_b); | ||
| 249 | } | ||
| 250 | const auto num_coords = static_cast<u32>(coords.size()); | ||
| 251 | coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); | ||
| 252 | |||
| 253 | const auto& sampler = | ||
| 254 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | ||
| 255 | |||
| 256 | Node4 values; | ||
| 257 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 258 | auto params = coords; | ||
| 259 | MetaTexture meta{sampler, element, num_coords}; | ||
| 260 | values[element] = | ||
| 261 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 262 | } | ||
| 263 | |||
| 264 | WriteTexsInstructionFloat(bb, instr, values); | ||
| 265 | break; | ||
| 266 | } | ||
| 267 | case OpCode::Id::TXQ: { | ||
| 268 | if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 269 | LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete"); | ||
| 270 | } | ||
| 271 | |||
| 272 | // TODO: The new commits on the texture refactor, change the way samplers work. | ||
| 273 | // Sadly, not all texture instructions specify the type of texture their sampler | ||
| 274 | // uses. This must be fixed at a later instance. | ||
| 275 | const auto& sampler = | ||
| 276 | GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 277 | |||
| 278 | switch (instr.txq.query_type) { | ||
| 279 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 280 | for (u32 element = 0; element < 4; ++element) { | ||
| 281 | MetaTexture meta{sampler, element}; | ||
| 282 | const Node value = Operation(OperationCode::F4TextureQueryDimensions, | ||
| 283 | std::move(meta), GetRegister(instr.gpr8)); | ||
| 284 | SetTemporal(bb, element, value); | ||
| 285 | } | ||
| 286 | for (u32 i = 0; i < 4; ++i) { | ||
| 287 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 288 | } | ||
| 289 | break; | ||
| 290 | } | ||
| 291 | default: | ||
| 292 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", | ||
| 293 | static_cast<u32>(instr.txq.query_type.Value())); | ||
| 294 | } | ||
| 295 | break; | ||
| 296 | } | ||
| 297 | case OpCode::Id::TMML: { | ||
| 298 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 299 | "NDV is not implemented"); | ||
| 300 | |||
| 301 | if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 302 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); | ||
| 303 | } | ||
| 304 | |||
| 305 | auto texture_type = instr.tmml.texture_type.Value(); | ||
| 306 | const bool is_array = instr.tmml.array != 0; | ||
| 307 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 308 | |||
| 309 | std::vector<Node> coords; | ||
| 310 | |||
| 311 | // TODO: Add coordinates for different samplers once other texture types are implemented. | ||
| 312 | switch (texture_type) { | ||
| 313 | case TextureType::Texture1D: | ||
| 314 | coords.push_back(GetRegister(instr.gpr8)); | ||
| 315 | break; | ||
| 316 | case TextureType::Texture2D: | ||
| 317 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 318 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 319 | break; | ||
| 320 | default: | ||
| 321 | UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type)); | ||
| 322 | |||
| 323 | // Fallback to interpreting as a 2D texture for now | ||
| 324 | coords.push_back(GetRegister(instr.gpr8.Value() + 0)); | ||
| 325 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 326 | texture_type = TextureType::Texture2D; | ||
| 327 | } | ||
| 328 | |||
| 329 | for (u32 element = 0; element < 2; ++element) { | ||
| 330 | auto params = coords; | ||
| 331 | MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())}; | ||
| 332 | const Node value = | ||
| 333 | Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); | ||
| 334 | SetTemporal(bb, element, value); | ||
| 335 | } | ||
| 336 | for (u32 element = 0; element < 2; ++element) { | ||
| 337 | SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); | ||
| 338 | } | ||
| 339 | |||
| 340 | break; | ||
| 341 | } | ||
| 342 | case OpCode::Id::TLDS: { | ||
| 343 | const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 344 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 345 | |||
| 346 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 347 | "AOFFI is not implemented"); | ||
| 348 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); | ||
| 349 | |||
| 350 | if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) { | ||
| 351 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); | ||
| 352 | } | ||
| 353 | |||
| 354 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); | ||
| 355 | break; | ||
| 356 | } | ||
| 357 | default: | ||
| 358 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 359 | } | ||
| 360 | |||
| 361 | return pc; | ||
| 362 | } | ||
| 363 | |||
| 364 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, | ||
| 365 | bool is_array, bool is_shadow) { | ||
| 366 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); | ||
| 367 | |||
| 368 | // If this sampler has already been used, return the existing mapping. | ||
| 369 | const auto itr = | ||
| 370 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 371 | [&](const Sampler& entry) { return entry.GetOffset() == offset; }); | ||
| 372 | if (itr != used_samplers.end()) { | ||
| 373 | ASSERT(itr->GetType() == type && itr->IsArray() == is_array && | ||
| 374 | itr->IsShadow() == is_shadow); | ||
| 375 | return *itr; | ||
| 376 | } | ||
| 377 | |||
| 378 | // Otherwise create a new mapping for this sampler | ||
| 379 | const std::size_t next_index = used_samplers.size(); | ||
| 380 | const Sampler entry{offset, next_index, type, is_array, is_shadow}; | ||
| 381 | return *used_samplers.emplace(entry).first; | ||
| 382 | } | ||
| 383 | |||
| 384 | void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr, | ||
| 385 | const Node4& components) { | ||
| 386 | u32 dest_elem = 0; | ||
| 387 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 388 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 389 | // Skip disabled components | ||
| 390 | continue; | ||
| 391 | } | ||
| 392 | SetTemporal(bb, dest_elem++, components[elem]); | ||
| 393 | } | ||
| 394 | // After writing values in temporals, move them to the real registers | ||
| 395 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 396 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 397 | } | ||
| 398 | } | ||
| 399 | |||
| 400 | void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, | ||
| 401 | const Node4& components) { | ||
| 402 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||
| 403 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||
| 404 | |||
| 405 | u32 dest_elem = 0; | ||
| 406 | for (u32 component = 0; component < 4; ++component) { | ||
| 407 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 408 | continue; | ||
| 409 | SetTemporal(bb, dest_elem++, components[component]); | ||
| 410 | } | ||
| 411 | |||
| 412 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 413 | if (i < 2) { | ||
| 414 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 415 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); | ||
| 416 | } else { | ||
| 417 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 418 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 419 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); | ||
| 420 | } | ||
| 421 | } | ||
| 422 | } | ||
| 423 | |||
| 424 | void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, | ||
| 425 | const Node4& components) { | ||
| 426 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 427 | // float instruction). | ||
| 428 | |||
| 429 | Node4 values; | ||
| 430 | u32 dest_elem = 0; | ||
| 431 | for (u32 component = 0; component < 4; ++component) { | ||
| 432 | if (!instr.texs.IsComponentEnabled(component)) | ||
| 433 | continue; | ||
| 434 | values[dest_elem++] = components[component]; | ||
| 435 | } | ||
| 436 | if (dest_elem == 0) | ||
| 437 | return; | ||
| 438 | |||
| 439 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); | ||
| 440 | |||
| 441 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 442 | if (dest_elem <= 2) { | ||
| 443 | SetRegister(bb, instr.gpr0, first_value); | ||
| 444 | return; | ||
| 445 | } | ||
| 446 | |||
| 447 | SetTemporal(bb, 0, first_value); | ||
| 448 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 449 | |||
| 450 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | ||
| 451 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | ||
| 452 | } | ||
| 453 | |||
| 454 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||
| 455 | TextureProcessMode process_mode, bool depth_compare, bool is_array, | ||
| 456 | std::size_t array_offset, std::size_t bias_offset, | ||
| 457 | std::vector<Node>&& coords) { | ||
| 458 | UNIMPLEMENTED_IF_MSG( | ||
| 459 | (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || | ||
| 460 | (texture_type == TextureType::TextureCube && is_array && depth_compare), | ||
| 461 | "This method is not supported."); | ||
| 462 | |||
| 463 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 464 | |||
| 465 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | ||
| 466 | process_mode == TextureProcessMode::LL || | ||
| 467 | process_mode == TextureProcessMode::LLA; | ||
| 468 | |||
| 469 | // LOD selection (either via bias or explicit textureLod) not supported in GL for | ||
| 470 | // sampler2DArrayShadow and samplerCubeArrayShadow. | ||
| 471 | const bool gl_lod_supported = | ||
| 472 | !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) || | ||
| 473 | (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare)); | ||
| 474 | |||
| 475 | const OperationCode read_method = | ||
| 476 | lod_needed && gl_lod_supported ? OperationCode::F4TextureLod : OperationCode::F4Texture; | ||
| 477 | |||
| 478 | UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported); | ||
| 479 | |||
| 480 | std::optional<u32> array_offset_value; | ||
| 481 | if (is_array) | ||
| 482 | array_offset_value = static_cast<u32>(array_offset); | ||
| 483 | |||
| 484 | const auto coords_count = static_cast<u32>(coords.size()); | ||
| 485 | |||
| 486 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { | ||
| 487 | if (process_mode == TextureProcessMode::LZ) { | ||
| 488 | coords.push_back(Immediate(0.0f)); | ||
| 489 | } else { | ||
| 490 | // If present, lod or bias are always stored in the register indexed by the gpr20 | ||
| 491 | // field with an offset depending on the usage of the other registers | ||
| 492 | coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); | ||
| 493 | } | ||
| 494 | } | ||
| 495 | |||
| 496 | Node4 values; | ||
| 497 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 498 | auto params = coords; | ||
| 499 | MetaTexture meta{sampler, element, coords_count, array_offset_value}; | ||
| 500 | values[element] = Operation(read_method, std::move(meta), std::move(params)); | ||
| 501 | } | ||
| 502 | |||
| 503 | return values; | ||
| 504 | } | ||
| 505 | |||
| 506 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||
| 507 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 508 | const bool lod_bias_enabled = | ||
| 509 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 510 | |||
| 511 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 512 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | ||
| 513 | // If enabled arrays index is always stored in the gpr8 field | ||
| 514 | const u64 array_register = instr.gpr8.Value(); | ||
| 515 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 516 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 517 | |||
| 518 | std::vector<Node> coords; | ||
| 519 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 520 | coords.push_back(GetRegister(coord_register + i)); | ||
| 521 | } | ||
| 522 | // 1D.DC in opengl the 2nd component is ignored. | ||
| 523 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | ||
| 524 | coords.push_back(Immediate(0.0f)); | ||
| 525 | } | ||
| 526 | std::size_t array_offset{}; | ||
| 527 | if (is_array) { | ||
| 528 | array_offset = coords.size(); | ||
| 529 | coords.push_back(GetRegister(array_register)); | ||
| 530 | } | ||
| 531 | if (depth_compare) { | ||
| 532 | // Depth is always stored in the register signaled by gpr20 | ||
| 533 | // or in the next register if lod or bias are used | ||
| 534 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 535 | coords.push_back(GetRegister(depth_register)); | ||
| 536 | } | ||
| 537 | // Fill ignored coordinates | ||
| 538 | while (coords.size() < total_coord_count) { | ||
| 539 | coords.push_back(Immediate(0)); | ||
| 540 | } | ||
| 541 | |||
| 542 | return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, | ||
| 543 | 0, std::move(coords)); | ||
| 544 | } | ||
| 545 | |||
| 546 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||
| 547 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 548 | const bool lod_bias_enabled = | ||
| 549 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 550 | |||
| 551 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||
| 552 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4); | ||
| 553 | // If enabled arrays index is always stored in the gpr8 field | ||
| 554 | const u64 array_register = instr.gpr8.Value(); | ||
| 555 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 556 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 557 | const u64 last_coord_register = | ||
| 558 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 559 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 560 | : coord_register + 1; | ||
| 561 | |||
| 562 | std::vector<Node> coords; | ||
| 563 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 564 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 565 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 566 | } | ||
| 567 | |||
| 568 | std::size_t array_offset{}; | ||
| 569 | if (is_array) { | ||
| 570 | array_offset = coords.size(); | ||
| 571 | coords.push_back(GetRegister(array_register)); | ||
| 572 | } | ||
| 573 | if (depth_compare) { | ||
| 574 | // Depth is always stored in the register signaled by gpr20 | ||
| 575 | // or in the next register if lod or bias are used | ||
| 576 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 577 | coords.push_back(GetRegister(depth_register)); | ||
| 578 | } | ||
| 579 | // Fill ignored coordinates | ||
| 580 | while (coords.size() < total_coord_count) { | ||
| 581 | coords.push_back(Immediate(0)); | ||
| 582 | } | ||
| 583 | |||
| 584 | return GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, array_offset, | ||
| 585 | (coord_count > 2 ? 1 : 0), std::move(coords)); | ||
| 586 | } | ||
| 587 | |||
| 588 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||
| 589 | bool is_array) { | ||
| 590 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 591 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||
| 592 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | ||
| 593 | |||
| 594 | // If enabled arrays index is always stored in the gpr8 field | ||
| 595 | const u64 array_register = instr.gpr8.Value(); | ||
| 596 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 597 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 598 | |||
| 599 | std::vector<Node> coords; | ||
| 600 | |||
| 601 | for (size_t i = 0; i < coord_count; ++i) { | ||
| 602 | coords.push_back(GetRegister(coord_register + i)); | ||
| 603 | } | ||
| 604 | std::optional<u32> array_offset; | ||
| 605 | if (is_array) { | ||
| 606 | array_offset = static_cast<u32>(coords.size()); | ||
| 607 | coords.push_back(GetRegister(array_register)); | ||
| 608 | } | ||
| 609 | |||
| 610 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||
| 611 | |||
| 612 | Node4 values; | ||
| 613 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 614 | auto params = coords; | ||
| 615 | MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset}; | ||
| 616 | values[element] = | ||
| 617 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 618 | } | ||
| 619 | |||
| 620 | return values; | ||
| 621 | } | ||
| 622 | |||
| 623 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | ||
| 624 | const std::size_t type_coord_count = GetCoordCount(texture_type); | ||
| 625 | const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); | ||
| 626 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | ||
| 627 | |||
| 628 | // If enabled arrays index is always stored in the gpr8 field | ||
| 629 | const u64 array_register = instr.gpr8.Value(); | ||
| 630 | // if is array gpr20 is used | ||
| 631 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 632 | |||
| 633 | const u64 last_coord_register = | ||
| 634 | ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array | ||
| 635 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 636 | : coord_register + 1; | ||
| 637 | |||
| 638 | std::vector<Node> coords; | ||
| 639 | |||
| 640 | for (std::size_t i = 0; i < type_coord_count; ++i) { | ||
| 641 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | ||
| 642 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 643 | } | ||
| 644 | std::optional<u32> array_offset; | ||
| 645 | if (is_array) { | ||
| 646 | array_offset = static_cast<u32>(coords.size()); | ||
| 647 | coords.push_back(GetRegister(array_register)); | ||
| 648 | } | ||
| 649 | const auto coords_count = static_cast<u32>(coords.size()); | ||
| 650 | |||
| 651 | if (lod_enabled) { | ||
| 652 | // When lod is used always is in grp20 | ||
| 653 | coords.push_back(GetRegister(instr.gpr20)); | ||
| 654 | } else { | ||
| 655 | coords.push_back(Immediate(0)); | ||
| 656 | } | ||
| 657 | |||
| 658 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | ||
| 659 | |||
| 660 | Node4 values; | ||
| 661 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 662 | auto params = coords; | ||
| 663 | MetaTexture meta{sampler, element, coords_count, array_offset}; | ||
| 664 | values[element] = | ||
| 665 | Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); | ||
| 666 | } | ||
| 667 | return values; | ||
| 668 | } | ||
| 669 | |||
| 670 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | ||
| 671 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | ||
| 672 | std::size_t max_coords, std::size_t max_inputs) { | ||
| 673 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 674 | |||
| 675 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 676 | const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 677 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 678 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 679 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 680 | } | ||
| 681 | // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. | ||
| 682 | total_coord_count += | ||
| 683 | (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; | ||
| 684 | |||
| 685 | return {coord_count, total_coord_count}; | ||
| 686 | } | ||
| 687 | |||
| 688 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp new file mode 100644 index 000000000..c1e5f4efb --- /dev/null +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -0,0 +1,178 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::ConditionCode; | ||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Register; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | switch (opcode->get().GetId()) { | ||
| 22 | case OpCode::Id::EXIT: { | ||
| 23 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 24 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "EXIT condition code used: {}", | ||
| 25 | static_cast<u32>(cc)); | ||
| 26 | |||
| 27 | switch (instr.flow.cond) { | ||
| 28 | case Tegra::Shader::FlowCondition::Always: | ||
| 29 | bb.push_back(Operation(OperationCode::Exit)); | ||
| 30 | if (instr.pred.pred_index == static_cast<u64>(Tegra::Shader::Pred::UnusedIndex)) { | ||
| 31 | // If this is an unconditional exit then just end processing here, | ||
| 32 | // otherwise we have to account for the possibility of the condition | ||
| 33 | // not being met, so continue processing the next instruction. | ||
| 34 | pc = MAX_PROGRAM_LENGTH - 1; | ||
| 35 | } | ||
| 36 | break; | ||
| 37 | |||
| 38 | case Tegra::Shader::FlowCondition::Fcsm_Tr: | ||
| 39 | // TODO(bunnei): What is this used for? If we assume this conditon is not | ||
| 40 | // satisifed, dual vertex shaders in Farming Simulator make more sense | ||
| 41 | UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); | ||
| 42 | break; | ||
| 43 | |||
| 44 | default: | ||
| 45 | UNIMPLEMENTED_MSG("Unhandled flow condition: {}", | ||
| 46 | static_cast<u32>(instr.flow.cond.Value())); | ||
| 47 | } | ||
| 48 | break; | ||
| 49 | } | ||
| 50 | case OpCode::Id::KIL: { | ||
| 51 | UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); | ||
| 52 | |||
| 53 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 54 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "KIL condition code used: {}", | ||
| 55 | static_cast<u32>(cc)); | ||
| 56 | |||
| 57 | bb.push_back(Operation(OperationCode::Discard)); | ||
| 58 | break; | ||
| 59 | } | ||
| 60 | case OpCode::Id::MOV_SYS: { | ||
| 61 | switch (instr.sys20) { | ||
| 62 | case Tegra::Shader::SystemVariable::InvocationInfo: { | ||
| 63 | LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete"); | ||
| 64 | SetRegister(bb, instr.gpr0, Immediate(0u)); | ||
| 65 | break; | ||
| 66 | } | ||
| 67 | case Tegra::Shader::SystemVariable::Ydirection: { | ||
| 68 | // Config pack's third value is Y_NEGATE's state. | ||
| 69 | SetRegister(bb, instr.gpr0, Operation(OperationCode::YNegate)); | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | default: | ||
| 73 | UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value())); | ||
| 74 | } | ||
| 75 | break; | ||
| 76 | } | ||
| 77 | case OpCode::Id::BRA: { | ||
| 78 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 79 | "BRA with constant buffers are not implemented"); | ||
| 80 | |||
| 81 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 82 | const Node branch = Operation(OperationCode::Branch, Immediate(target)); | ||
| 83 | |||
| 84 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 85 | if (cc != Tegra::Shader::ConditionCode::T) { | ||
| 86 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 87 | } else { | ||
| 88 | bb.push_back(branch); | ||
| 89 | } | ||
| 90 | break; | ||
| 91 | } | ||
| 92 | case OpCode::Id::SSY: { | ||
| 93 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 94 | "Constant buffer flow is not supported"); | ||
| 95 | |||
| 96 | // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the | ||
| 97 | // target of the jump that the SYNC instruction will make. The SSY opcode has a similar | ||
| 98 | // structure to the BRA opcode. | ||
| 99 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 100 | bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); | ||
| 101 | break; | ||
| 102 | } | ||
| 103 | case OpCode::Id::PBK: { | ||
| 104 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 105 | "Constant buffer PBK is not supported"); | ||
| 106 | |||
| 107 | // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but | ||
| 108 | // using SYNC on a PBK address will kill the shader execution. We don't emulate this because | ||
| 109 | // it's very unlikely a driver will emit such invalid shader. | ||
| 110 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 111 | bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | case OpCode::Id::SYNC: { | ||
| 115 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 116 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}", | ||
| 117 | static_cast<u32>(cc)); | ||
| 118 | |||
| 119 | // The SYNC opcode jumps to the address previously set by the SSY opcode | ||
| 120 | bb.push_back(Operation(OperationCode::PopFlowStack)); | ||
| 121 | break; | ||
| 122 | } | ||
| 123 | case OpCode::Id::BRK: { | ||
| 124 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 125 | UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}", | ||
| 126 | static_cast<u32>(cc)); | ||
| 127 | |||
| 128 | // The BRK opcode jumps to the address previously set by the PBK opcode | ||
| 129 | bb.push_back(Operation(OperationCode::PopFlowStack)); | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | case OpCode::Id::IPA: { | ||
| 133 | const auto& attribute = instr.attribute.fmt28; | ||
| 134 | const Tegra::Shader::IpaMode input_mode{instr.ipa.interp_mode.Value(), | ||
| 135 | instr.ipa.sample_mode.Value()}; | ||
| 136 | |||
| 137 | const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode); | ||
| 138 | const Node value = GetSaturatedFloat(attr, instr.ipa.saturate); | ||
| 139 | |||
| 140 | SetRegister(bb, instr.gpr0, value); | ||
| 141 | break; | ||
| 142 | } | ||
| 143 | case OpCode::Id::OUT_R: { | ||
| 144 | UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, | ||
| 145 | "Stream buffer is not supported"); | ||
| 146 | |||
| 147 | if (instr.out.emit) { | ||
| 148 | // gpr0 is used to store the next address and gpr8 contains the address to emit. | ||
| 149 | // Hardware uses pointers here but we just ignore it | ||
| 150 | bb.push_back(Operation(OperationCode::EmitVertex)); | ||
| 151 | SetRegister(bb, instr.gpr0, Immediate(0)); | ||
| 152 | } | ||
| 153 | if (instr.out.cut) { | ||
| 154 | bb.push_back(Operation(OperationCode::EndPrimitive)); | ||
| 155 | } | ||
| 156 | break; | ||
| 157 | } | ||
| 158 | case OpCode::Id::ISBERD: { | ||
| 159 | UNIMPLEMENTED_IF(instr.isberd.o != 0); | ||
| 160 | UNIMPLEMENTED_IF(instr.isberd.skew != 0); | ||
| 161 | UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); | ||
| 162 | UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); | ||
| 163 | LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); | ||
| 164 | SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); | ||
| 165 | break; | ||
| 166 | } | ||
| 167 | case OpCode::Id::DEPBAR: { | ||
| 168 | LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed"); | ||
| 169 | break; | ||
| 170 | } | ||
| 171 | default: | ||
| 172 | UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); | ||
| 173 | } | ||
| 174 | |||
| 175 | return pc; | ||
| 176 | } | ||
| 177 | |||
| 178 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp new file mode 100644 index 000000000..1717f0653 --- /dev/null +++ b/src/video_core/shader/decode/predicate_set_predicate.cpp | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::PSETP: { | ||
| 22 | const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | ||
| 23 | const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||
| 24 | |||
| 25 | // We can't use the constant predicate as destination. | ||
| 26 | ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 27 | |||
| 28 | const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | ||
| 29 | |||
| 30 | const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); | ||
| 31 | const Node predicate = Operation(combiner, op_a, op_b); | ||
| 32 | |||
| 33 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 34 | SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); | ||
| 35 | |||
| 36 | if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 37 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if | ||
| 38 | // enabled | ||
| 39 | SetPredicate(bb, instr.psetp.pred0, | ||
| 40 | Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), | ||
| 41 | second_pred)); | ||
| 42 | } | ||
| 43 | break; | ||
| 44 | } | ||
| 45 | case OpCode::Id::CSETP: { | ||
| 46 | const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); | ||
| 47 | const Node condition_code = GetConditionCode(instr.csetp.cc); | ||
| 48 | |||
| 49 | const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); | ||
| 50 | |||
| 51 | if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 52 | SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); | ||
| 53 | } | ||
| 54 | if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 55 | const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); | ||
| 56 | SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); | ||
| 57 | } | ||
| 58 | break; | ||
| 59 | } | ||
| 60 | default: | ||
| 61 | UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); | ||
| 62 | } | ||
| 63 | |||
| 64 | return pc; | ||
| 65 | } | ||
| 66 | |||
| 67 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp new file mode 100644 index 000000000..8bd15fb00 --- /dev/null +++ b/src/video_core/shader/decode/predicate_set_register.cpp | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 20 | "Condition codes generation in PSET is not implemented"); | ||
| 21 | |||
| 22 | const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); | ||
| 23 | const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); | ||
| 24 | const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); | ||
| 25 | |||
| 26 | const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); | ||
| 27 | |||
| 28 | const OperationCode combiner = GetPredicateCombiner(instr.pset.op); | ||
| 29 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 30 | |||
| 31 | const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); | ||
| 32 | const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 33 | const Node value = | ||
| 34 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 35 | |||
| 36 | if (instr.pset.bf) { | ||
| 37 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 38 | } else { | ||
| 39 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 40 | } | ||
| 41 | SetRegister(bb, instr.gpr0, value); | ||
| 42 | |||
| 43 | return pc; | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp new file mode 100644 index 000000000..bdb4424a6 --- /dev/null +++ b/src/video_core/shader/decode/register_set_predicate.cpp | |||
| @@ -0,0 +1,51 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr); | ||
| 20 | |||
| 21 | const Node apply_mask = [&]() { | ||
| 22 | switch (opcode->get().GetId()) { | ||
| 23 | case OpCode::Id::R2P_IMM: | ||
| 24 | return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); | ||
| 25 | default: | ||
| 26 | UNREACHABLE(); | ||
| 27 | return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); | ||
| 28 | } | ||
| 29 | }(); | ||
| 30 | const Node mask = GetRegister(instr.gpr8); | ||
| 31 | const auto offset = static_cast<u32>(instr.r2p.byte) * 8; | ||
| 32 | |||
| 33 | constexpr u32 programmable_preds = 7; | ||
| 34 | for (u64 pred = 0; pred < programmable_preds; ++pred) { | ||
| 35 | const auto shift = static_cast<u32>(pred); | ||
| 36 | |||
| 37 | const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); | ||
| 38 | const Node condition = | ||
| 39 | Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); | ||
| 40 | |||
| 41 | const Node value_compare = BitfieldExtract(mask, offset + shift, 1); | ||
| 42 | const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); | ||
| 43 | |||
| 44 | const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); | ||
| 45 | bb.push_back(Conditional(condition, {code})); | ||
| 46 | } | ||
| 47 | |||
| 48 | return pc; | ||
| 49 | } | ||
| 50 | |||
| 51 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp new file mode 100644 index 000000000..85026bb37 --- /dev/null +++ b/src/video_core/shader/decode/shift.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | const Node op_a = GetRegister(instr.gpr8); | ||
| 20 | const Node op_b = [&]() { | ||
| 21 | if (instr.is_b_imm) { | ||
| 22 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 23 | } else if (instr.is_b_gpr) { | ||
| 24 | return GetRegister(instr.gpr20); | ||
| 25 | } else { | ||
| 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset); | ||
| 27 | } | ||
| 28 | }(); | ||
| 29 | |||
| 30 | switch (opcode->get().GetId()) { | ||
| 31 | case OpCode::Id::SHR_C: | ||
| 32 | case OpCode::Id::SHR_R: | ||
| 33 | case OpCode::Id::SHR_IMM: { | ||
| 34 | const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, | ||
| 35 | instr.shift.is_signed, PRECISE, op_a, op_b); | ||
| 36 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 37 | SetRegister(bb, instr.gpr0, value); | ||
| 38 | break; | ||
| 39 | } | ||
| 40 | case OpCode::Id::SHL_C: | ||
| 41 | case OpCode::Id::SHL_R: | ||
| 42 | case OpCode::Id::SHL_IMM: { | ||
| 43 | const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b); | ||
| 44 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 45 | SetRegister(bb, instr.gpr0, value); | ||
| 46 | break; | ||
| 47 | } | ||
| 48 | default: | ||
| 49 | UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); | ||
| 50 | } | ||
| 51 | |||
| 52 | return pc; | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp new file mode 100644 index 000000000..c3432356d --- /dev/null +++ b/src/video_core/shader/decode/video.cpp | |||
| @@ -0,0 +1,111 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | using Tegra::Shader::Pred; | ||
| 15 | using Tegra::Shader::VideoType; | ||
| 16 | using Tegra::Shader::VmadShr; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | const Node op_a = | ||
| 23 | GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, | ||
| 24 | instr.video.type_a, instr.video.byte_height_a); | ||
| 25 | const Node op_b = [&]() { | ||
| 26 | if (instr.video.use_register_b) { | ||
| 27 | return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, | ||
| 28 | instr.video.signed_b, instr.video.type_b, | ||
| 29 | instr.video.byte_height_b); | ||
| 30 | } | ||
| 31 | if (instr.video.signed_b) { | ||
| 32 | const auto imm = static_cast<s16>(instr.alu.GetImm20_16()); | ||
| 33 | return Immediate(static_cast<u32>(imm)); | ||
| 34 | } else { | ||
| 35 | return Immediate(instr.alu.GetImm20_16()); | ||
| 36 | } | ||
| 37 | }(); | ||
| 38 | |||
| 39 | switch (opcode->get().GetId()) { | ||
| 40 | case OpCode::Id::VMAD: { | ||
| 41 | const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 42 | const Node op_c = GetRegister(instr.gpr39); | ||
| 43 | |||
| 44 | Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); | ||
| 45 | value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); | ||
| 46 | |||
| 47 | if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { | ||
| 48 | const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); | ||
| 49 | value = | ||
| 50 | SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); | ||
| 51 | } | ||
| 52 | |||
| 53 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 54 | SetRegister(bb, instr.gpr0, value); | ||
| 55 | break; | ||
| 56 | } | ||
| 57 | case OpCode::Id::VSETP: { | ||
| 58 | // We can't use the constant predicate as destination. | ||
| 59 | ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 60 | |||
| 61 | const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 62 | const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); | ||
| 63 | const Node second_pred = GetPredicate(instr.vsetp.pred39, false); | ||
| 64 | |||
| 65 | const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); | ||
| 66 | |||
| 67 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 68 | SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); | ||
| 69 | |||
| 70 | if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 71 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 72 | // if enabled | ||
| 73 | const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); | ||
| 74 | SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); | ||
| 75 | } | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | default: | ||
| 79 | UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); | ||
| 80 | } | ||
| 81 | |||
| 82 | return pc; | ||
| 83 | } | ||
| 84 | |||
| 85 | Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, | ||
| 86 | Tegra::Shader::VideoType type, u64 byte_height) { | ||
| 87 | if (!is_chunk) { | ||
| 88 | return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); | ||
| 89 | } | ||
| 90 | const Node zero = Immediate(0); | ||
| 91 | |||
| 92 | switch (type) { | ||
| 93 | case Tegra::Shader::VideoType::Size16_Low: | ||
| 94 | return BitfieldExtract(op, 0, 16); | ||
| 95 | case Tegra::Shader::VideoType::Size16_High: | ||
| 96 | return BitfieldExtract(op, 16, 16); | ||
| 97 | case Tegra::Shader::VideoType::Size32: | ||
| 98 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used | ||
| 99 | // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. | ||
| 100 | UNIMPLEMENTED(); | ||
| 101 | return zero; | ||
| 102 | case Tegra::Shader::VideoType::Invalid: | ||
| 103 | UNREACHABLE_MSG("Invalid instruction encoding"); | ||
| 104 | return zero; | ||
| 105 | default: | ||
| 106 | UNREACHABLE(); | ||
| 107 | return zero; | ||
| 108 | } | ||
| 109 | } | ||
| 110 | |||
| 111 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp new file mode 100644 index 000000000..0cd9cd1cc --- /dev/null +++ b/src/video_core/shader/decode/xmad.cpp | |||
| @@ -0,0 +1,97 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | const auto opcode = OpCode::Decode(instr); | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF(instr.xmad.sign_a); | ||
| 20 | UNIMPLEMENTED_IF(instr.xmad.sign_b); | ||
| 21 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 22 | "Condition codes generation in XMAD is not implemented"); | ||
| 23 | |||
| 24 | Node op_a = GetRegister(instr.gpr8); | ||
| 25 | |||
| 26 | // TODO(bunnei): Needs to be fixed once op_a or op_b is signed | ||
| 27 | UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); | ||
| 28 | const bool is_signed_a = instr.xmad.sign_a == 1; | ||
| 29 | const bool is_signed_b = instr.xmad.sign_b == 1; | ||
| 30 | const bool is_signed_c = is_signed_a; | ||
| 31 | |||
| 32 | auto [is_merge, op_b, op_c] = [&]() -> std::tuple<bool, Node, Node> { | ||
| 33 | switch (opcode->get().GetId()) { | ||
| 34 | case OpCode::Id::XMAD_CR: | ||
| 35 | return {instr.xmad.merge_56, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset), | ||
| 36 | GetRegister(instr.gpr39)}; | ||
| 37 | case OpCode::Id::XMAD_RR: | ||
| 38 | return {instr.xmad.merge_37, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 39 | case OpCode::Id::XMAD_RC: | ||
| 40 | return {false, GetRegister(instr.gpr39), | ||
| 41 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset)}; | ||
| 42 | case OpCode::Id::XMAD_IMM: | ||
| 43 | return {instr.xmad.merge_37, Immediate(static_cast<u32>(instr.xmad.imm20_16)), | ||
| 44 | GetRegister(instr.gpr39)}; | ||
| 45 | } | ||
| 46 | UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); | ||
| 47 | return {false, Immediate(0), Immediate(0)}; | ||
| 48 | }(); | ||
| 49 | |||
| 50 | op_a = BitfieldExtract(op_a, instr.xmad.high_a ? 16 : 0, 16); | ||
| 51 | |||
| 52 | const Node original_b = op_b; | ||
| 53 | op_b = BitfieldExtract(op_b, instr.xmad.high_b ? 16 : 0, 16); | ||
| 54 | |||
| 55 | // TODO(Rodrigo): Use an appropiate sign for this operation | ||
| 56 | Node product = Operation(OperationCode::IMul, NO_PRECISE, op_a, op_b); | ||
| 57 | if (instr.xmad.product_shift_left) { | ||
| 58 | product = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, product, Immediate(16)); | ||
| 59 | } | ||
| 60 | |||
| 61 | const Node original_c = op_c; | ||
| 62 | op_c = [&]() { | ||
| 63 | switch (instr.xmad.mode) { | ||
| 64 | case Tegra::Shader::XmadMode::None: | ||
| 65 | return original_c; | ||
| 66 | case Tegra::Shader::XmadMode::CLo: | ||
| 67 | return BitfieldExtract(original_c, 0, 16); | ||
| 68 | case Tegra::Shader::XmadMode::CHi: | ||
| 69 | return BitfieldExtract(original_c, 16, 16); | ||
| 70 | case Tegra::Shader::XmadMode::CBcc: { | ||
| 71 | const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, | ||
| 72 | NO_PRECISE, original_b, Immediate(16)); | ||
| 73 | return SignedOperation(OperationCode::IAdd, is_signed_c, NO_PRECISE, original_c, | ||
| 74 | shifted_b); | ||
| 75 | } | ||
| 76 | default: | ||
| 77 | UNIMPLEMENTED_MSG("Unhandled XMAD mode: {}", static_cast<u32>(instr.xmad.mode.Value())); | ||
| 78 | return Immediate(0); | ||
| 79 | } | ||
| 80 | }(); | ||
| 81 | |||
| 82 | // TODO(Rodrigo): Use an appropiate sign for this operation | ||
| 83 | Node sum = Operation(OperationCode::IAdd, product, op_c); | ||
| 84 | if (is_merge) { | ||
| 85 | const Node a = BitfieldExtract(sum, 0, 16); | ||
| 86 | const Node b = | ||
| 87 | Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, original_b, Immediate(16)); | ||
| 88 | sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); | ||
| 89 | } | ||
| 90 | |||
| 91 | SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); | ||
| 92 | SetRegister(bb, instr.gpr0, sum); | ||
| 93 | |||
| 94 | return pc; | ||
| 95 | } | ||
| 96 | |||
| 97 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp new file mode 100644 index 000000000..d7747103e --- /dev/null +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -0,0 +1,444 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cmath> | ||
| 6 | #include <unordered_map> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/logging/log.h" | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using Tegra::Shader::Attribute; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::IpaMode; | ||
| 19 | using Tegra::Shader::Pred; | ||
| 20 | using Tegra::Shader::PredCondition; | ||
| 21 | using Tegra::Shader::PredOperation; | ||
| 22 | using Tegra::Shader::Register; | ||
| 23 | |||
| 24 | Node ShaderIR::StoreNode(NodeData&& node_data) { | ||
| 25 | auto store = std::make_unique<NodeData>(node_data); | ||
| 26 | const Node node = store.get(); | ||
| 27 | stored_nodes.push_back(std::move(store)); | ||
| 28 | return node; | ||
| 29 | } | ||
| 30 | |||
| 31 | Node ShaderIR::Conditional(Node condition, std::vector<Node>&& code) { | ||
| 32 | return StoreNode(ConditionalNode(condition, std::move(code))); | ||
| 33 | } | ||
| 34 | |||
| 35 | Node ShaderIR::Comment(const std::string& text) { | ||
| 36 | return StoreNode(CommentNode(text)); | ||
| 37 | } | ||
| 38 | |||
| 39 | Node ShaderIR::Immediate(u32 value) { | ||
| 40 | return StoreNode(ImmediateNode(value)); | ||
| 41 | } | ||
| 42 | |||
| 43 | Node ShaderIR::GetRegister(Register reg) { | ||
| 44 | if (reg != Register::ZeroIndex) { | ||
| 45 | used_registers.insert(static_cast<u32>(reg)); | ||
| 46 | } | ||
| 47 | return StoreNode(GprNode(reg)); | ||
| 48 | } | ||
| 49 | |||
| 50 | Node ShaderIR::GetImmediate19(Instruction instr) { | ||
| 51 | return Immediate(instr.alu.GetImm20_19()); | ||
| 52 | } | ||
| 53 | |||
| 54 | Node ShaderIR::GetImmediate32(Instruction instr) { | ||
| 55 | return Immediate(instr.alu.GetImm20_32()); | ||
| 56 | } | ||
| 57 | |||
| 58 | Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { | ||
| 59 | const auto index = static_cast<u32>(index_); | ||
| 60 | const auto offset = static_cast<u32>(offset_); | ||
| 61 | |||
| 62 | const auto [entry, is_new] = used_cbufs.try_emplace(index); | ||
| 63 | entry->second.MarkAsUsed(offset); | ||
| 64 | |||
| 65 | return StoreNode(CbufNode(index, Immediate(offset))); | ||
| 66 | } | ||
| 67 | |||
| 68 | Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | ||
| 69 | const auto index = static_cast<u32>(index_); | ||
| 70 | const auto offset = static_cast<u32>(offset_); | ||
| 71 | |||
| 72 | const auto [entry, is_new] = used_cbufs.try_emplace(index); | ||
| 73 | entry->second.MarkAsUsedIndirect(); | ||
| 74 | |||
| 75 | const Node final_offset = Operation(OperationCode::UAdd, NO_PRECISE, node, Immediate(offset)); | ||
| 76 | return StoreNode(CbufNode(index, final_offset)); | ||
| 77 | } | ||
| 78 | |||
| 79 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { | ||
| 80 | const auto pred = static_cast<Pred>(pred_); | ||
| 81 | if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { | ||
| 82 | used_predicates.insert(pred); | ||
| 83 | } | ||
| 84 | |||
| 85 | return StoreNode(PredicateNode(pred, negated)); | ||
| 86 | } | ||
| 87 | |||
| 88 | Node ShaderIR::GetPredicate(bool immediate) { | ||
| 89 | return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); | ||
| 90 | } | ||
| 91 | |||
| 92 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, | ||
| 93 | const Tegra::Shader::IpaMode& input_mode, Node buffer) { | ||
| 94 | const auto [entry, is_new] = | ||
| 95 | used_input_attributes.emplace(std::make_pair(index, std::set<Tegra::Shader::IpaMode>{})); | ||
| 96 | entry->second.insert(input_mode); | ||
| 97 | |||
| 98 | return StoreNode(AbufNode(index, static_cast<u32>(element), input_mode, buffer)); | ||
| 99 | } | ||
| 100 | |||
| 101 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { | ||
| 102 | if (index == Attribute::Index::ClipDistances0123 || | ||
| 103 | index == Attribute::Index::ClipDistances4567) { | ||
| 104 | const auto clip_index = | ||
| 105 | static_cast<u32>((index == Attribute::Index::ClipDistances4567 ? 1 : 0) + element); | ||
| 106 | used_clip_distances.at(clip_index) = true; | ||
| 107 | } | ||
| 108 | used_output_attributes.insert(index); | ||
| 109 | |||
| 110 | return StoreNode(AbufNode(index, static_cast<u32>(element), buffer)); | ||
| 111 | } | ||
| 112 | |||
| 113 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) { | ||
| 114 | const Node node = StoreNode(InternalFlagNode(flag)); | ||
| 115 | if (negated) { | ||
| 116 | return Operation(OperationCode::LogicalNegate, node); | ||
| 117 | } | ||
| 118 | return node; | ||
| 119 | } | ||
| 120 | |||
| 121 | Node ShaderIR::GetLocalMemory(Node address) { | ||
| 122 | return StoreNode(LmemNode(address)); | ||
| 123 | } | ||
| 124 | |||
| 125 | Node ShaderIR::GetTemporal(u32 id) { | ||
| 126 | return GetRegister(Register::ZeroIndex + 1 + id); | ||
| 127 | } | ||
| 128 | |||
| 129 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { | ||
| 130 | if (absolute) { | ||
| 131 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); | ||
| 132 | } | ||
| 133 | if (negate) { | ||
| 134 | value = Operation(OperationCode::FNegate, NO_PRECISE, value); | ||
| 135 | } | ||
| 136 | return value; | ||
| 137 | } | ||
| 138 | |||
| 139 | Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { | ||
| 140 | if (!saturate) { | ||
| 141 | return value; | ||
| 142 | } | ||
| 143 | const Node positive_zero = Immediate(std::copysignf(0, 1)); | ||
| 144 | const Node positive_one = Immediate(1.0f); | ||
| 145 | return Operation(OperationCode::FClamp, NO_PRECISE, value, positive_zero, positive_one); | ||
| 146 | } | ||
| 147 | |||
| 148 | Node ShaderIR::ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed) { | ||
| 149 | switch (size) { | ||
| 150 | case Register::Size::Byte: | ||
| 151 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | ||
| 152 | Immediate(24)); | ||
| 153 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | ||
| 154 | Immediate(24)); | ||
| 155 | return value; | ||
| 156 | case Register::Size::Short: | ||
| 157 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, value, | ||
| 158 | Immediate(16)); | ||
| 159 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, value, | ||
| 160 | Immediate(16)); | ||
| 161 | case Register::Size::Word: | ||
| 162 | // Default - do nothing | ||
| 163 | return value; | ||
| 164 | default: | ||
| 165 | UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size)); | ||
| 166 | return value; | ||
| 167 | } | ||
| 168 | } | ||
| 169 | |||
| 170 | Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { | ||
| 171 | if (!is_signed) { | ||
| 172 | // Absolute or negate on an unsigned is pointless | ||
| 173 | return value; | ||
| 174 | } | ||
| 175 | if (absolute) { | ||
| 176 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, value); | ||
| 177 | } | ||
| 178 | if (negate) { | ||
| 179 | value = Operation(OperationCode::INegate, NO_PRECISE, value); | ||
| 180 | } | ||
| 181 | return value; | ||
| 182 | } | ||
| 183 | |||
| 184 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | ||
| 185 | const Node value = Immediate(instr.half_imm.PackImmediates()); | ||
| 186 | if (!has_negation) { | ||
| 187 | return value; | ||
| 188 | } | ||
| 189 | const Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | ||
| 190 | const Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 191 | |||
| 192 | return Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, first_negate, second_negate); | ||
| 193 | } | ||
| 194 | |||
| 195 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | ||
| 196 | switch (merge) { | ||
| 197 | case Tegra::Shader::HalfMerge::H0_H1: | ||
| 198 | return src; | ||
| 199 | case Tegra::Shader::HalfMerge::F32: | ||
| 200 | return Operation(OperationCode::HMergeF32, src); | ||
| 201 | case Tegra::Shader::HalfMerge::Mrg_H0: | ||
| 202 | return Operation(OperationCode::HMergeH0, dest, src); | ||
| 203 | case Tegra::Shader::HalfMerge::Mrg_H1: | ||
| 204 | return Operation(OperationCode::HMergeH1, dest, src); | ||
| 205 | } | ||
| 206 | UNREACHABLE(); | ||
| 207 | return src; | ||
| 208 | } | ||
| 209 | |||
| 210 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | ||
| 211 | if (absolute) { | ||
| 212 | value = Operation(OperationCode::HAbsolute, HALF_NO_PRECISE, value); | ||
| 213 | } | ||
| 214 | if (negate) { | ||
| 215 | value = Operation(OperationCode::HNegate, HALF_NO_PRECISE, value, GetPredicate(true), | ||
| 216 | GetPredicate(true)); | ||
| 217 | } | ||
| 218 | return value; | ||
| 219 | } | ||
| 220 | |||
| 221 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | ||
| 222 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | ||
| 223 | {PredCondition::LessThan, OperationCode::LogicalFLessThan}, | ||
| 224 | {PredCondition::Equal, OperationCode::LogicalFEqual}, | ||
| 225 | {PredCondition::LessEqual, OperationCode::LogicalFLessEqual}, | ||
| 226 | {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan}, | ||
| 227 | {PredCondition::NotEqual, OperationCode::LogicalFNotEqual}, | ||
| 228 | {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual}, | ||
| 229 | {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan}, | ||
| 230 | {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual}, | ||
| 231 | {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual}, | ||
| 232 | {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan}, | ||
| 233 | {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}}; | ||
| 234 | |||
| 235 | const auto comparison{PredicateComparisonTable.find(condition)}; | ||
| 236 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | ||
| 237 | "Unknown predicate comparison operation"); | ||
| 238 | |||
| 239 | Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b); | ||
| 240 | |||
| 241 | if (condition == PredCondition::LessThanWithNan || | ||
| 242 | condition == PredCondition::NotEqualWithNan || | ||
| 243 | condition == PredCondition::LessEqualWithNan || | ||
| 244 | condition == PredCondition::GreaterThanWithNan || | ||
| 245 | condition == PredCondition::GreaterEqualWithNan) { | ||
| 246 | |||
| 247 | predicate = Operation(OperationCode::LogicalOr, predicate, | ||
| 248 | Operation(OperationCode::LogicalFIsNan, op_a)); | ||
| 249 | predicate = Operation(OperationCode::LogicalOr, predicate, | ||
| 250 | Operation(OperationCode::LogicalFIsNan, op_b)); | ||
| 251 | } | ||
| 252 | |||
| 253 | return predicate; | ||
| 254 | } | ||
| 255 | |||
| 256 | Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, | ||
| 257 | Node op_b) { | ||
| 258 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | ||
| 259 | {PredCondition::LessThan, OperationCode::LogicalILessThan}, | ||
| 260 | {PredCondition::Equal, OperationCode::LogicalIEqual}, | ||
| 261 | {PredCondition::LessEqual, OperationCode::LogicalILessEqual}, | ||
| 262 | {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan}, | ||
| 263 | {PredCondition::NotEqual, OperationCode::LogicalINotEqual}, | ||
| 264 | {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual}, | ||
| 265 | {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan}, | ||
| 266 | {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual}, | ||
| 267 | {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual}, | ||
| 268 | {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan}, | ||
| 269 | {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}}; | ||
| 270 | |||
| 271 | const auto comparison{PredicateComparisonTable.find(condition)}; | ||
| 272 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | ||
| 273 | "Unknown predicate comparison operation"); | ||
| 274 | |||
| 275 | Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, op_a, op_b); | ||
| 276 | |||
| 277 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | ||
| 278 | condition == PredCondition::NotEqualWithNan || | ||
| 279 | condition == PredCondition::LessEqualWithNan || | ||
| 280 | condition == PredCondition::GreaterThanWithNan || | ||
| 281 | condition == PredCondition::GreaterEqualWithNan, | ||
| 282 | "NaN comparisons for integers are not implemented"); | ||
| 283 | return predicate; | ||
| 284 | } | ||
| 285 | |||
| 286 | Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, | ||
| 287 | const MetaHalfArithmetic& meta, Node op_a, Node op_b) { | ||
| 288 | |||
| 289 | UNIMPLEMENTED_IF_MSG(condition == PredCondition::LessThanWithNan || | ||
| 290 | condition == PredCondition::NotEqualWithNan || | ||
| 291 | condition == PredCondition::LessEqualWithNan || | ||
| 292 | condition == PredCondition::GreaterThanWithNan || | ||
| 293 | condition == PredCondition::GreaterEqualWithNan, | ||
| 294 | "Unimplemented NaN comparison for half floats"); | ||
| 295 | |||
| 296 | static const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = { | ||
| 297 | {PredCondition::LessThan, OperationCode::Logical2HLessThan}, | ||
| 298 | {PredCondition::Equal, OperationCode::Logical2HEqual}, | ||
| 299 | {PredCondition::LessEqual, OperationCode::Logical2HLessEqual}, | ||
| 300 | {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan}, | ||
| 301 | {PredCondition::NotEqual, OperationCode::Logical2HNotEqual}, | ||
| 302 | {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual}, | ||
| 303 | {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThan}, | ||
| 304 | {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqual}, | ||
| 305 | {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqual}, | ||
| 306 | {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThan}, | ||
| 307 | {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqual}}; | ||
| 308 | |||
| 309 | const auto comparison{PredicateComparisonTable.find(condition)}; | ||
| 310 | UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(), | ||
| 311 | "Unknown predicate comparison operation"); | ||
| 312 | |||
| 313 | const Node predicate = Operation(comparison->second, meta, op_a, op_b); | ||
| 314 | |||
| 315 | return predicate; | ||
| 316 | } | ||
| 317 | |||
| 318 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | ||
| 319 | static const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = { | ||
| 320 | {PredOperation::And, OperationCode::LogicalAnd}, | ||
| 321 | {PredOperation::Or, OperationCode::LogicalOr}, | ||
| 322 | {PredOperation::Xor, OperationCode::LogicalXor}, | ||
| 323 | }; | ||
| 324 | |||
| 325 | const auto op = PredicateOperationTable.find(operation); | ||
| 326 | UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation"); | ||
| 327 | return op->second; | ||
| 328 | } | ||
| 329 | |||
| 330 | Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) { | ||
| 331 | switch (cc) { | ||
| 332 | case Tegra::Shader::ConditionCode::NEU: | ||
| 333 | return GetInternalFlag(InternalFlag::Zero, true); | ||
| 334 | default: | ||
| 335 | UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc)); | ||
| 336 | return GetPredicate(static_cast<u64>(Pred::NeverExecute)); | ||
| 337 | } | ||
| 338 | } | ||
| 339 | |||
| 340 | void ShaderIR::SetRegister(BasicBlock& bb, Register dest, Node src) { | ||
| 341 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), src)); | ||
| 342 | } | ||
| 343 | |||
| 344 | void ShaderIR::SetPredicate(BasicBlock& bb, u64 dest, Node src) { | ||
| 345 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), src)); | ||
| 346 | } | ||
| 347 | |||
| 348 | void ShaderIR::SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value) { | ||
| 349 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), value)); | ||
| 350 | } | ||
| 351 | |||
| 352 | void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) { | ||
| 353 | bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); | ||
| 354 | } | ||
| 355 | |||
| 356 | void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) { | ||
| 357 | SetRegister(bb, Register::ZeroIndex + 1 + id, value); | ||
| 358 | } | ||
| 359 | |||
| 360 | void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) { | ||
| 361 | if (!sets_cc) { | ||
| 362 | return; | ||
| 363 | } | ||
| 364 | const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); | ||
| 365 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | ||
| 366 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | ||
| 367 | } | ||
| 368 | |||
| 369 | void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) { | ||
| 370 | if (!sets_cc) { | ||
| 371 | return; | ||
| 372 | } | ||
| 373 | const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0)); | ||
| 374 | SetInternalFlag(bb, InternalFlag::Zero, zerop); | ||
| 375 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | ||
| 376 | } | ||
| 377 | |||
| 378 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | ||
| 379 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), | ||
| 380 | Immediate(bits)); | ||
| 381 | } | ||
| 382 | |||
| 383 | /*static*/ OperationCode ShaderIR::SignedToUnsignedCode(OperationCode operation_code, | ||
| 384 | bool is_signed) { | ||
| 385 | if (is_signed) { | ||
| 386 | return operation_code; | ||
| 387 | } | ||
| 388 | switch (operation_code) { | ||
| 389 | case OperationCode::FCastInteger: | ||
| 390 | return OperationCode::FCastUInteger; | ||
| 391 | case OperationCode::IAdd: | ||
| 392 | return OperationCode::UAdd; | ||
| 393 | case OperationCode::IMul: | ||
| 394 | return OperationCode::UMul; | ||
| 395 | case OperationCode::IDiv: | ||
| 396 | return OperationCode::UDiv; | ||
| 397 | case OperationCode::IMin: | ||
| 398 | return OperationCode::UMin; | ||
| 399 | case OperationCode::IMax: | ||
| 400 | return OperationCode::UMax; | ||
| 401 | case OperationCode::ICastFloat: | ||
| 402 | return OperationCode::UCastFloat; | ||
| 403 | case OperationCode::ICastUnsigned: | ||
| 404 | return OperationCode::UCastSigned; | ||
| 405 | case OperationCode::ILogicalShiftLeft: | ||
| 406 | return OperationCode::ULogicalShiftLeft; | ||
| 407 | case OperationCode::ILogicalShiftRight: | ||
| 408 | return OperationCode::ULogicalShiftRight; | ||
| 409 | case OperationCode::IArithmeticShiftRight: | ||
| 410 | return OperationCode::UArithmeticShiftRight; | ||
| 411 | case OperationCode::IBitwiseAnd: | ||
| 412 | return OperationCode::UBitwiseAnd; | ||
| 413 | case OperationCode::IBitwiseOr: | ||
| 414 | return OperationCode::UBitwiseOr; | ||
| 415 | case OperationCode::IBitwiseXor: | ||
| 416 | return OperationCode::UBitwiseXor; | ||
| 417 | case OperationCode::IBitwiseNot: | ||
| 418 | return OperationCode::UBitwiseNot; | ||
| 419 | case OperationCode::IBitfieldInsert: | ||
| 420 | return OperationCode::UBitfieldInsert; | ||
| 421 | case OperationCode::IBitCount: | ||
| 422 | return OperationCode::UBitCount; | ||
| 423 | case OperationCode::LogicalILessThan: | ||
| 424 | return OperationCode::LogicalULessThan; | ||
| 425 | case OperationCode::LogicalIEqual: | ||
| 426 | return OperationCode::LogicalUEqual; | ||
| 427 | case OperationCode::LogicalILessEqual: | ||
| 428 | return OperationCode::LogicalULessEqual; | ||
| 429 | case OperationCode::LogicalIGreaterThan: | ||
| 430 | return OperationCode::LogicalUGreaterThan; | ||
| 431 | case OperationCode::LogicalINotEqual: | ||
| 432 | return OperationCode::LogicalUNotEqual; | ||
| 433 | case OperationCode::LogicalIGreaterEqual: | ||
| 434 | return OperationCode::LogicalUGreaterEqual; | ||
| 435 | case OperationCode::INegate: | ||
| 436 | UNREACHABLE_MSG("Can't negate an unsigned integer"); | ||
| 437 | case OperationCode::IAbsolute: | ||
| 438 | UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); | ||
| 439 | } | ||
| 440 | UNREACHABLE_MSG("Unknown signed operation with code={}", static_cast<u32>(operation_code)); | ||
| 441 | return {}; | ||
| 442 | } | ||
| 443 | |||
| 444 | } // namespace VideoCommon::Shader \ No newline at end of file | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h new file mode 100644 index 000000000..96e7df6b6 --- /dev/null +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -0,0 +1,793 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstring> | ||
| 9 | #include <map> | ||
| 10 | #include <set> | ||
| 11 | #include <string> | ||
| 12 | #include <tuple> | ||
| 13 | #include <variant> | ||
| 14 | #include <vector> | ||
| 15 | |||
| 16 | #include "common/common_types.h" | ||
| 17 | #include "video_core/engines/maxwell_3d.h" | ||
| 18 | #include "video_core/engines/shader_bytecode.h" | ||
| 19 | #include "video_core/engines/shader_header.h" | ||
| 20 | |||
| 21 | namespace VideoCommon::Shader { | ||
| 22 | |||
| 23 | class OperationNode; | ||
| 24 | class ConditionalNode; | ||
| 25 | class GprNode; | ||
| 26 | class ImmediateNode; | ||
| 27 | class InternalFlagNode; | ||
| 28 | class PredicateNode; | ||
| 29 | class AbufNode; ///< Attribute buffer | ||
| 30 | class CbufNode; ///< Constant buffer | ||
| 31 | class LmemNode; ///< Local memory | ||
| 32 | class GmemNode; ///< Global memory | ||
| 33 | class CommentNode; | ||
| 34 | |||
| 35 | using ProgramCode = std::vector<u64>; | ||
| 36 | |||
| 37 | using NodeData = | ||
| 38 | std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, | ||
| 39 | PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; | ||
| 40 | using Node = const NodeData*; | ||
| 41 | using Node4 = std::array<Node, 4>; | ||
| 42 | using BasicBlock = std::vector<Node>; | ||
| 43 | |||
| 44 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; | ||
| 45 | |||
| 46 | enum class OperationCode { | ||
| 47 | Assign, /// (float& dest, float src) -> void | ||
| 48 | |||
| 49 | Select, /// (MetaArithmetic, bool pred, float a, float b) -> float | ||
| 50 | |||
| 51 | FAdd, /// (MetaArithmetic, float a, float b) -> float | ||
| 52 | FMul, /// (MetaArithmetic, float a, float b) -> float | ||
| 53 | FDiv, /// (MetaArithmetic, float a, float b) -> float | ||
| 54 | FFma, /// (MetaArithmetic, float a, float b, float c) -> float | ||
| 55 | FNegate, /// (MetaArithmetic, float a) -> float | ||
| 56 | FAbsolute, /// (MetaArithmetic, float a) -> float | ||
| 57 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float | ||
| 58 | FMin, /// (MetaArithmetic, float a, float b) -> float | ||
| 59 | FMax, /// (MetaArithmetic, float a, float b) -> float | ||
| 60 | FCos, /// (MetaArithmetic, float a) -> float | ||
| 61 | FSin, /// (MetaArithmetic, float a) -> float | ||
| 62 | FExp2, /// (MetaArithmetic, float a) -> float | ||
| 63 | FLog2, /// (MetaArithmetic, float a) -> float | ||
| 64 | FInverseSqrt, /// (MetaArithmetic, float a) -> float | ||
| 65 | FSqrt, /// (MetaArithmetic, float a) -> float | ||
| 66 | FRoundEven, /// (MetaArithmetic, float a) -> float | ||
| 67 | FFloor, /// (MetaArithmetic, float a) -> float | ||
| 68 | FCeil, /// (MetaArithmetic, float a) -> float | ||
| 69 | FTrunc, /// (MetaArithmetic, float a) -> float | ||
| 70 | FCastInteger, /// (MetaArithmetic, int a) -> float | ||
| 71 | FCastUInteger, /// (MetaArithmetic, uint a) -> float | ||
| 72 | |||
| 73 | IAdd, /// (MetaArithmetic, int a, int b) -> int | ||
| 74 | IMul, /// (MetaArithmetic, int a, int b) -> int | ||
| 75 | IDiv, /// (MetaArithmetic, int a, int b) -> int | ||
| 76 | INegate, /// (MetaArithmetic, int a) -> int | ||
| 77 | IAbsolute, /// (MetaArithmetic, int a) -> int | ||
| 78 | IMin, /// (MetaArithmetic, int a, int b) -> int | ||
| 79 | IMax, /// (MetaArithmetic, int a, int b) -> int | ||
| 80 | ICastFloat, /// (MetaArithmetic, float a) -> int | ||
| 81 | ICastUnsigned, /// (MetaArithmetic, uint a) -> int | ||
| 82 | ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int | ||
| 83 | ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int | ||
| 84 | IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int | ||
| 85 | IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int | ||
| 86 | IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int | ||
| 87 | IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int | ||
| 88 | IBitwiseNot, /// (MetaArithmetic, int a) -> int | ||
| 89 | IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int | ||
| 90 | IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int | ||
| 91 | IBitCount, /// (MetaArithmetic, int) -> int | ||
| 92 | |||
| 93 | UAdd, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 94 | UMul, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 95 | UDiv, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 96 | UMin, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 97 | UMax, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 98 | UCastFloat, /// (MetaArithmetic, float a) -> uint | ||
| 99 | UCastSigned, /// (MetaArithmetic, int a) -> uint | ||
| 100 | ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 101 | ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 102 | UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 103 | UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 104 | UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 105 | UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 106 | UBitwiseNot, /// (MetaArithmetic, uint a) -> uint | ||
| 107 | UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint | ||
| 108 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint | ||
| 109 | UBitCount, /// (MetaArithmetic, uint) -> uint | ||
| 110 | |||
| 111 | HAdd, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | ||
| 112 | HMul, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | ||
| 113 | HFma, /// (MetaHalfArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | ||
| 114 | HAbsolute, /// (f16vec2 a) -> f16vec2 | ||
| 115 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | ||
| 116 | HMergeF32, /// (f16vec2 src) -> float | ||
| 117 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | ||
| 118 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | ||
| 119 | HPack2, /// (float a, float b) -> f16vec2 | ||
| 120 | |||
| 121 | LogicalAssign, /// (bool& dst, bool src) -> void | ||
| 122 | LogicalAnd, /// (bool a, bool b) -> bool | ||
| 123 | LogicalOr, /// (bool a, bool b) -> bool | ||
| 124 | LogicalXor, /// (bool a, bool b) -> bool | ||
| 125 | LogicalNegate, /// (bool a) -> bool | ||
| 126 | LogicalPick2, /// (bool2 pair, uint index) -> bool | ||
| 127 | LogicalAll2, /// (bool2 a) -> bool | ||
| 128 | LogicalAny2, /// (bool2 a) -> bool | ||
| 129 | |||
| 130 | LogicalFLessThan, /// (float a, float b) -> bool | ||
| 131 | LogicalFEqual, /// (float a, float b) -> bool | ||
| 132 | LogicalFLessEqual, /// (float a, float b) -> bool | ||
| 133 | LogicalFGreaterThan, /// (float a, float b) -> bool | ||
| 134 | LogicalFNotEqual, /// (float a, float b) -> bool | ||
| 135 | LogicalFGreaterEqual, /// (float a, float b) -> bool | ||
| 136 | LogicalFIsNan, /// (float a) -> bool | ||
| 137 | |||
| 138 | LogicalILessThan, /// (int a, int b) -> bool | ||
| 139 | LogicalIEqual, /// (int a, int b) -> bool | ||
| 140 | LogicalILessEqual, /// (int a, int b) -> bool | ||
| 141 | LogicalIGreaterThan, /// (int a, int b) -> bool | ||
| 142 | LogicalINotEqual, /// (int a, int b) -> bool | ||
| 143 | LogicalIGreaterEqual, /// (int a, int b) -> bool | ||
| 144 | |||
| 145 | LogicalULessThan, /// (uint a, uint b) -> bool | ||
| 146 | LogicalUEqual, /// (uint a, uint b) -> bool | ||
| 147 | LogicalULessEqual, /// (uint a, uint b) -> bool | ||
| 148 | LogicalUGreaterThan, /// (uint a, uint b) -> bool | ||
| 149 | LogicalUNotEqual, /// (uint a, uint b) -> bool | ||
| 150 | LogicalUGreaterEqual, /// (uint a, uint b) -> bool | ||
| 151 | |||
| 152 | Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 153 | Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 154 | Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 155 | Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 156 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 157 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 158 | |||
| 159 | F4Texture, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | ||
| 160 | F4TextureLod, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | ||
| 161 | F4TextureGather, /// (MetaTexture, float[N] coords, float[M] params) -> float4 | ||
| 162 | F4TextureQueryDimensions, /// (MetaTexture, float a) -> float4 | ||
| 163 | F4TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | ||
| 164 | F4TexelFetch, /// (MetaTexture, int[N], int) -> float4 | ||
| 165 | |||
| 166 | Branch, /// (uint branch_target) -> void | ||
| 167 | PushFlowStack, /// (uint branch_target) -> void | ||
| 168 | PopFlowStack, /// () -> void | ||
| 169 | Exit, /// () -> void | ||
| 170 | Discard, /// () -> void | ||
| 171 | |||
| 172 | EmitVertex, /// () -> void | ||
| 173 | EndPrimitive, /// () -> void | ||
| 174 | |||
| 175 | YNegate, /// () -> float | ||
| 176 | |||
| 177 | Amount, | ||
| 178 | }; | ||
| 179 | |||
| 180 | enum class InternalFlag { | ||
| 181 | Zero = 0, | ||
| 182 | Sign = 1, | ||
| 183 | Carry = 2, | ||
| 184 | Overflow = 3, | ||
| 185 | Amount = 4, | ||
| 186 | }; | ||
| 187 | |||
| 188 | /// Describes the behaviour of code path of a given entry point and a return point. | ||
| 189 | enum class ExitMethod { | ||
| 190 | Undetermined, ///< Internal value. Only occur when analyzing JMP loop. | ||
| 191 | AlwaysReturn, ///< All code paths reach the return point. | ||
| 192 | Conditional, ///< Code path reaches the return point or an END instruction conditionally. | ||
| 193 | AlwaysEnd, ///< All code paths reach a END instruction. | ||
| 194 | }; | ||
| 195 | |||
| 196 | class Sampler { | ||
| 197 | public: | ||
| 198 | explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type, | ||
| 199 | bool is_array, bool is_shadow) | ||
| 200 | : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {} | ||
| 201 | |||
| 202 | std::size_t GetOffset() const { | ||
| 203 | return offset; | ||
| 204 | } | ||
| 205 | |||
| 206 | std::size_t GetIndex() const { | ||
| 207 | return index; | ||
| 208 | } | ||
| 209 | |||
| 210 | Tegra::Shader::TextureType GetType() const { | ||
| 211 | return type; | ||
| 212 | } | ||
| 213 | |||
| 214 | bool IsArray() const { | ||
| 215 | return is_array; | ||
| 216 | } | ||
| 217 | |||
| 218 | bool IsShadow() const { | ||
| 219 | return is_shadow; | ||
| 220 | } | ||
| 221 | |||
| 222 | bool operator<(const Sampler& rhs) const { | ||
| 223 | return std::tie(offset, index, type, is_array, is_shadow) < | ||
| 224 | std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow); | ||
| 225 | } | ||
| 226 | |||
| 227 | private: | ||
| 228 | /// Offset in TSC memory from which to read the sampler object, as specified by the sampling | ||
| 229 | /// instruction. | ||
| 230 | std::size_t offset{}; | ||
| 231 | std::size_t index{}; ///< Value used to index into the generated GLSL sampler array. | ||
| 232 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | ||
| 233 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. | ||
| 234 | bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. | ||
| 235 | }; | ||
| 236 | |||
| 237 | class ConstBuffer { | ||
| 238 | public: | ||
| 239 | void MarkAsUsed(u64 offset) { | ||
| 240 | max_offset = std::max(max_offset, static_cast<u32>(offset)); | ||
| 241 | } | ||
| 242 | |||
| 243 | void MarkAsUsedIndirect() { | ||
| 244 | is_indirect = true; | ||
| 245 | } | ||
| 246 | |||
| 247 | bool IsIndirect() const { | ||
| 248 | return is_indirect; | ||
| 249 | } | ||
| 250 | |||
| 251 | u32 GetSize() const { | ||
| 252 | return max_offset + 1; | ||
| 253 | } | ||
| 254 | |||
| 255 | private: | ||
| 256 | u32 max_offset{}; | ||
| 257 | bool is_indirect{}; | ||
| 258 | }; | ||
| 259 | |||
| 260 | struct MetaArithmetic { | ||
| 261 | bool precise{}; | ||
| 262 | }; | ||
| 263 | |||
| 264 | struct MetaHalfArithmetic { | ||
| 265 | bool precise{}; | ||
| 266 | std::array<Tegra::Shader::HalfType, 3> types = {Tegra::Shader::HalfType::H0_H1, | ||
| 267 | Tegra::Shader::HalfType::H0_H1, | ||
| 268 | Tegra::Shader::HalfType::H0_H1}; | ||
| 269 | }; | ||
| 270 | |||
| 271 | struct MetaTexture { | ||
| 272 | const Sampler& sampler; | ||
| 273 | u32 element{}; | ||
| 274 | u32 coords_count{}; | ||
| 275 | std::optional<u32> array_index; | ||
| 276 | }; | ||
| 277 | |||
| 278 | constexpr MetaArithmetic PRECISE = {true}; | ||
| 279 | constexpr MetaArithmetic NO_PRECISE = {false}; | ||
| 280 | constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false}; | ||
| 281 | |||
| 282 | using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>; | ||
| 283 | |||
| 284 | /// Holds any kind of operation that can be done in the IR | ||
| 285 | class OperationNode final { | ||
| 286 | public: | ||
| 287 | template <typename... T> | ||
| 288 | explicit constexpr OperationNode(OperationCode code) : code{code}, meta{} {} | ||
| 289 | |||
| 290 | template <typename... T> | ||
| 291 | explicit constexpr OperationNode(OperationCode code, Meta&& meta) | ||
| 292 | : code{code}, meta{std::move(meta)} {} | ||
| 293 | |||
| 294 | template <typename... T> | ||
| 295 | explicit constexpr OperationNode(OperationCode code, const T*... operands) | ||
| 296 | : OperationNode(code, {}, operands...) {} | ||
| 297 | |||
| 298 | template <typename... T> | ||
| 299 | explicit constexpr OperationNode(OperationCode code, Meta&& meta, const T*... operands_) | ||
| 300 | : code{code}, meta{std::move(meta)} { | ||
| 301 | |||
| 302 | auto operands_list = {operands_...}; | ||
| 303 | for (auto& operand : operands_list) { | ||
| 304 | operands.push_back(operand); | ||
| 305 | } | ||
| 306 | } | ||
| 307 | |||
| 308 | explicit OperationNode(OperationCode code, Meta&& meta, std::vector<Node>&& operands) | ||
| 309 | : code{code}, meta{meta}, operands{std::move(operands)} {} | ||
| 310 | |||
| 311 | explicit OperationNode(OperationCode code, std::vector<Node>&& operands) | ||
| 312 | : code{code}, meta{}, operands{std::move(operands)} {} | ||
| 313 | |||
| 314 | OperationCode GetCode() const { | ||
| 315 | return code; | ||
| 316 | } | ||
| 317 | |||
| 318 | const Meta& GetMeta() const { | ||
| 319 | return meta; | ||
| 320 | } | ||
| 321 | |||
| 322 | std::size_t GetOperandsCount() const { | ||
| 323 | return operands.size(); | ||
| 324 | } | ||
| 325 | |||
| 326 | Node operator[](std::size_t operand_index) const { | ||
| 327 | return operands.at(operand_index); | ||
| 328 | } | ||
| 329 | |||
| 330 | private: | ||
| 331 | const OperationCode code; | ||
| 332 | const Meta meta; | ||
| 333 | std::vector<Node> operands; | ||
| 334 | }; | ||
| 335 | |||
| 336 | /// Encloses inside any kind of node that returns a boolean conditionally-executed code | ||
| 337 | class ConditionalNode final { | ||
| 338 | public: | ||
| 339 | explicit ConditionalNode(Node condition, std::vector<Node>&& code) | ||
| 340 | : condition{condition}, code{std::move(code)} {} | ||
| 341 | |||
| 342 | Node GetCondition() const { | ||
| 343 | return condition; | ||
| 344 | } | ||
| 345 | |||
| 346 | const std::vector<Node>& GetCode() const { | ||
| 347 | return code; | ||
| 348 | } | ||
| 349 | |||
| 350 | private: | ||
| 351 | const Node condition; ///< Condition to be satisfied | ||
| 352 | std::vector<Node> code; ///< Code to execute | ||
| 353 | }; | ||
| 354 | |||
| 355 | /// A general purpose register | ||
| 356 | class GprNode final { | ||
| 357 | public: | ||
| 358 | explicit constexpr GprNode(Tegra::Shader::Register index) : index{index} {} | ||
| 359 | |||
| 360 | u32 GetIndex() const { | ||
| 361 | return static_cast<u32>(index); | ||
| 362 | } | ||
| 363 | |||
| 364 | private: | ||
| 365 | const Tegra::Shader::Register index; | ||
| 366 | }; | ||
| 367 | |||
| 368 | /// A 32-bits value that represents an immediate value | ||
| 369 | class ImmediateNode final { | ||
| 370 | public: | ||
| 371 | explicit constexpr ImmediateNode(u32 value) : value{value} {} | ||
| 372 | |||
| 373 | u32 GetValue() const { | ||
| 374 | return value; | ||
| 375 | } | ||
| 376 | |||
| 377 | private: | ||
| 378 | const u32 value; | ||
| 379 | }; | ||
| 380 | |||
| 381 | /// One of Maxwell's internal flags | ||
| 382 | class InternalFlagNode final { | ||
| 383 | public: | ||
| 384 | explicit constexpr InternalFlagNode(InternalFlag flag) : flag{flag} {} | ||
| 385 | |||
| 386 | InternalFlag GetFlag() const { | ||
| 387 | return flag; | ||
| 388 | } | ||
| 389 | |||
| 390 | private: | ||
| 391 | const InternalFlag flag; | ||
| 392 | }; | ||
| 393 | |||
| 394 | /// A predicate register, it can be negated without aditional nodes | ||
| 395 | class PredicateNode final { | ||
| 396 | public: | ||
| 397 | explicit constexpr PredicateNode(Tegra::Shader::Pred index, bool negated) | ||
| 398 | : index{index}, negated{negated} {} | ||
| 399 | |||
| 400 | Tegra::Shader::Pred GetIndex() const { | ||
| 401 | return index; | ||
| 402 | } | ||
| 403 | |||
| 404 | bool IsNegated() const { | ||
| 405 | return negated; | ||
| 406 | } | ||
| 407 | |||
| 408 | private: | ||
| 409 | const Tegra::Shader::Pred index; | ||
| 410 | const bool negated; | ||
| 411 | }; | ||
| 412 | |||
| 413 | /// Attribute buffer memory (known as attributes or varyings in GLSL terms) | ||
| 414 | class AbufNode final { | ||
| 415 | public: | ||
| 416 | explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, | ||
| 417 | const Tegra::Shader::IpaMode& input_mode, Node buffer = {}) | ||
| 418 | : input_mode{input_mode}, index{index}, element{element}, buffer{buffer} {} | ||
| 419 | |||
| 420 | explicit constexpr AbufNode(Tegra::Shader::Attribute::Index index, u32 element, | ||
| 421 | Node buffer = {}) | ||
| 422 | : input_mode{}, index{index}, element{element}, buffer{buffer} {} | ||
| 423 | |||
| 424 | Tegra::Shader::IpaMode GetInputMode() const { | ||
| 425 | return input_mode; | ||
| 426 | } | ||
| 427 | |||
| 428 | Tegra::Shader::Attribute::Index GetIndex() const { | ||
| 429 | return index; | ||
| 430 | } | ||
| 431 | |||
| 432 | u32 GetElement() const { | ||
| 433 | return element; | ||
| 434 | } | ||
| 435 | |||
| 436 | Node GetBuffer() const { | ||
| 437 | return buffer; | ||
| 438 | } | ||
| 439 | |||
| 440 | private: | ||
| 441 | const Tegra::Shader::IpaMode input_mode; | ||
| 442 | const Node buffer; | ||
| 443 | const Tegra::Shader::Attribute::Index index; | ||
| 444 | const u32 element; | ||
| 445 | }; | ||
| 446 | |||
| 447 | /// Constant buffer node, usually mapped to uniform buffers in GLSL | ||
| 448 | class CbufNode final { | ||
| 449 | public: | ||
| 450 | explicit constexpr CbufNode(u32 index, Node offset) : index{index}, offset{offset} {} | ||
| 451 | |||
| 452 | u32 GetIndex() const { | ||
| 453 | return index; | ||
| 454 | } | ||
| 455 | |||
| 456 | Node GetOffset() const { | ||
| 457 | return offset; | ||
| 458 | } | ||
| 459 | |||
| 460 | private: | ||
| 461 | const u32 index; | ||
| 462 | const Node offset; | ||
| 463 | }; | ||
| 464 | |||
| 465 | /// Local memory node | ||
| 466 | class LmemNode final { | ||
| 467 | public: | ||
| 468 | explicit constexpr LmemNode(Node address) : address{address} {} | ||
| 469 | |||
| 470 | Node GetAddress() const { | ||
| 471 | return address; | ||
| 472 | } | ||
| 473 | |||
| 474 | private: | ||
| 475 | const Node address; | ||
| 476 | }; | ||
| 477 | |||
| 478 | /// Global memory node | ||
| 479 | class GmemNode final { | ||
| 480 | public: | ||
| 481 | explicit constexpr GmemNode(Node address) : address{address} {} | ||
| 482 | |||
| 483 | Node GetAddress() const { | ||
| 484 | return address; | ||
| 485 | } | ||
| 486 | |||
| 487 | private: | ||
| 488 | const Node address; | ||
| 489 | }; | ||
| 490 | |||
| 491 | /// Commentary, can be dropped | ||
| 492 | class CommentNode final { | ||
| 493 | public: | ||
| 494 | explicit CommentNode(std::string text) : text{std::move(text)} {} | ||
| 495 | |||
| 496 | const std::string& GetText() const { | ||
| 497 | return text; | ||
| 498 | } | ||
| 499 | |||
| 500 | private: | ||
| 501 | std::string text; | ||
| 502 | }; | ||
| 503 | |||
| 504 | class ShaderIR final { | ||
| 505 | public: | ||
| 506 | explicit ShaderIR(const ProgramCode& program_code, u32 main_offset) | ||
| 507 | : program_code{program_code}, main_offset{main_offset} { | ||
| 508 | |||
| 509 | Decode(); | ||
| 510 | } | ||
| 511 | |||
| 512 | const std::map<u32, BasicBlock>& GetBasicBlocks() const { | ||
| 513 | return basic_blocks; | ||
| 514 | } | ||
| 515 | |||
| 516 | const std::set<u32>& GetRegisters() const { | ||
| 517 | return used_registers; | ||
| 518 | } | ||
| 519 | |||
| 520 | const std::set<Tegra::Shader::Pred>& GetPredicates() const { | ||
| 521 | return used_predicates; | ||
| 522 | } | ||
| 523 | |||
| 524 | const std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>>& | ||
| 525 | GetInputAttributes() const { | ||
| 526 | return used_input_attributes; | ||
| 527 | } | ||
| 528 | |||
| 529 | const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const { | ||
| 530 | return used_output_attributes; | ||
| 531 | } | ||
| 532 | |||
| 533 | const std::map<u32, ConstBuffer>& GetConstantBuffers() const { | ||
| 534 | return used_cbufs; | ||
| 535 | } | ||
| 536 | |||
| 537 | const std::set<Sampler>& GetSamplers() const { | ||
| 538 | return used_samplers; | ||
| 539 | } | ||
| 540 | |||
| 541 | const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() | ||
| 542 | const { | ||
| 543 | return used_clip_distances; | ||
| 544 | } | ||
| 545 | |||
| 546 | std::size_t GetLength() const { | ||
| 547 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); | ||
| 548 | } | ||
| 549 | |||
| 550 | const Tegra::Shader::Header& GetHeader() const { | ||
| 551 | return header; | ||
| 552 | } | ||
| 553 | |||
| 554 | private: | ||
| 555 | void Decode(); | ||
| 556 | |||
| 557 | ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels); | ||
| 558 | |||
| 559 | BasicBlock DecodeRange(u32 begin, u32 end); | ||
| 560 | |||
| 561 | /** | ||
| 562 | * Decodes a single instruction from Tegra to IR. | ||
| 563 | * @param bb Basic block where the nodes will be written to. | ||
| 564 | * @param pc Program counter. Offset to decode. | ||
| 565 | * @return Next address to decode. | ||
| 566 | */ | ||
| 567 | u32 DecodeInstr(BasicBlock& bb, u32 pc); | ||
| 568 | |||
| 569 | u32 DecodeArithmetic(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 570 | u32 DecodeArithmeticImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 571 | u32 DecodeBfe(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 572 | u32 DecodeBfi(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 573 | u32 DecodeShift(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 574 | u32 DecodeArithmeticInteger(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 575 | u32 DecodeArithmeticIntegerImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 576 | u32 DecodeArithmeticHalf(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 577 | u32 DecodeArithmeticHalfImmediate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 578 | u32 DecodeFfma(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 579 | u32 DecodeHfma2(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 580 | u32 DecodeConversion(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 581 | u32 DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 582 | u32 DecodeFloatSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 583 | u32 DecodeIntegerSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 584 | u32 DecodeHalfSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 585 | u32 DecodePredicateSetRegister(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 586 | u32 DecodePredicateSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 587 | u32 DecodeRegisterSetPredicate(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 588 | u32 DecodeFloatSet(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 589 | u32 DecodeIntegerSet(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 590 | u32 DecodeHalfSet(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 591 | u32 DecodeVideo(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 592 | u32 DecodeXmad(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 593 | u32 DecodeOther(BasicBlock& bb, const BasicBlock& code, u32 pc); | ||
| 594 | |||
| 595 | /// Internalizes node's data and returns a managed pointer to a clone of that node | ||
| 596 | Node StoreNode(NodeData&& node_data); | ||
| 597 | |||
| 598 | /// Creates a conditional node | ||
| 599 | Node Conditional(Node condition, std::vector<Node>&& code); | ||
| 600 | /// Creates a commentary | ||
| 601 | Node Comment(const std::string& text); | ||
| 602 | /// Creates an u32 immediate | ||
| 603 | Node Immediate(u32 value); | ||
| 604 | /// Creates a s32 immediate | ||
| 605 | Node Immediate(s32 value) { | ||
| 606 | return Immediate(static_cast<u32>(value)); | ||
| 607 | } | ||
| 608 | /// Creates a f32 immediate | ||
| 609 | Node Immediate(f32 value) { | ||
| 610 | u32 integral; | ||
| 611 | std::memcpy(&integral, &value, sizeof(u32)); | ||
| 612 | return Immediate(integral); | ||
| 613 | } | ||
| 614 | |||
| 615 | /// Generates a node for a passed register. | ||
| 616 | Node GetRegister(Tegra::Shader::Register reg); | ||
| 617 | /// Generates a node representing a 19-bit immediate value | ||
| 618 | Node GetImmediate19(Tegra::Shader::Instruction instr); | ||
| 619 | /// Generates a node representing a 32-bit immediate value | ||
| 620 | Node GetImmediate32(Tegra::Shader::Instruction instr); | ||
| 621 | /// Generates a node representing a constant buffer | ||
| 622 | Node GetConstBuffer(u64 index, u64 offset); | ||
| 623 | /// Generates a node representing a constant buffer with a variadic offset | ||
| 624 | Node GetConstBufferIndirect(u64 index, u64 offset, Node node); | ||
| 625 | /// Generates a node for a passed predicate. It can be optionally negated | ||
| 626 | Node GetPredicate(u64 pred, bool negated = false); | ||
| 627 | /// Generates a predicate node for an immediate true or false value | ||
| 628 | Node GetPredicate(bool immediate); | ||
| 629 | /// Generates a node representing an input atttribute. Keeps track of used attributes. | ||
| 630 | Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, | ||
| 631 | const Tegra::Shader::IpaMode& input_mode, Node buffer = {}); | ||
| 632 | /// Generates a node representing an output atttribute. Keeps track of used attributes. | ||
| 633 | Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); | ||
| 634 | /// Generates a node representing an internal flag | ||
| 635 | Node GetInternalFlag(InternalFlag flag, bool negated = false); | ||
| 636 | /// Generates a node representing a local memory address | ||
| 637 | Node GetLocalMemory(Node address); | ||
| 638 | /// Generates a temporal, internally it uses a post-RZ register | ||
| 639 | Node GetTemporal(u32 id); | ||
| 640 | |||
| 641 | /// Sets a register. src value must be a number-evaluated node. | ||
| 642 | void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src); | ||
| 643 | /// Sets a predicate. src value must be a bool-evaluated node | ||
| 644 | void SetPredicate(BasicBlock& bb, u64 dest, Node src); | ||
| 645 | /// Sets an internal flag. src value must be a bool-evaluated node | ||
| 646 | void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value); | ||
| 647 | /// Sets a local memory address. address and value must be a number-evaluated node | ||
| 648 | void SetLocalMemory(BasicBlock& bb, Node address, Node value); | ||
| 649 | /// Sets a temporal. Internally it uses a post-RZ register | ||
| 650 | void SetTemporal(BasicBlock& bb, u32 id, Node value); | ||
| 651 | |||
| 652 | /// Sets internal flags from a float | ||
| 653 | void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true); | ||
| 654 | /// Sets internal flags from an integer | ||
| 655 | void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true); | ||
| 656 | |||
| 657 | /// Conditionally absolute/negated float. Absolute is applied first | ||
| 658 | Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); | ||
| 659 | /// Conditionally saturates a float | ||
| 660 | Node GetSaturatedFloat(Node value, bool saturate = true); | ||
| 661 | |||
| 662 | /// Converts an integer to different sizes. | ||
| 663 | Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); | ||
| 664 | /// Conditionally absolute/negated integer. Absolute is applied first | ||
| 665 | Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); | ||
| 666 | |||
| 667 | /// Unpacks a half immediate from an instruction | ||
| 668 | Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); | ||
| 669 | /// Merges a half pair into another value | ||
| 670 | Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); | ||
| 671 | /// Conditionally absolute/negated half float pair. Absolute is applied first | ||
| 672 | Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); | ||
| 673 | |||
| 674 | /// Returns a predicate comparing two floats | ||
| 675 | Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | ||
| 676 | /// Returns a predicate comparing two integers | ||
| 677 | Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, | ||
| 678 | Node op_a, Node op_b); | ||
| 679 | /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared | ||
| 680 | Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, | ||
| 681 | const MetaHalfArithmetic& meta, Node op_a, Node op_b); | ||
| 682 | |||
| 683 | /// Returns a predicate combiner operation | ||
| 684 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); | ||
| 685 | |||
| 686 | /// Returns a condition code evaluated from internal flags | ||
| 687 | Node GetConditionCode(Tegra::Shader::ConditionCode cc); | ||
| 688 | |||
| 689 | /// Accesses a texture sampler | ||
| 690 | const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, | ||
| 691 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow); | ||
| 692 | |||
| 693 | /// Extracts a sequence of bits from a node | ||
| 694 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | ||
| 695 | |||
| 696 | void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, | ||
| 697 | const Node4& components); | ||
| 698 | |||
| 699 | void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, | ||
| 700 | const Node4& components); | ||
| 701 | void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, | ||
| 702 | const Node4& components); | ||
| 703 | |||
| 704 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 705 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 706 | bool is_array); | ||
| 707 | |||
| 708 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 709 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 710 | bool is_array); | ||
| 711 | |||
| 712 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 713 | bool depth_compare, bool is_array); | ||
| 714 | |||
| 715 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 716 | bool is_array); | ||
| 717 | |||
| 718 | std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( | ||
| 719 | Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, | ||
| 720 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | ||
| 721 | |||
| 722 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 723 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 724 | bool is_array, std::size_t array_offset, std::size_t bias_offset, | ||
| 725 | std::vector<Node>&& coords); | ||
| 726 | |||
| 727 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | ||
| 728 | u64 byte_height); | ||
| 729 | |||
| 730 | void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest, | ||
| 731 | Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, | ||
| 732 | Tegra::Shader::PredicateResultMode predicate_mode, | ||
| 733 | Tegra::Shader::Pred predicate, bool sets_cc); | ||
| 734 | void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | ||
| 735 | Node op_c, Node imm_lut, bool sets_cc); | ||
| 736 | |||
| 737 | template <typename... T> | ||
| 738 | Node Operation(OperationCode code, const T*... operands) { | ||
| 739 | return StoreNode(OperationNode(code, operands...)); | ||
| 740 | } | ||
| 741 | |||
| 742 | template <typename... T> | ||
| 743 | Node Operation(OperationCode code, Meta&& meta, const T*... operands) { | ||
| 744 | return StoreNode(OperationNode(code, std::move(meta), operands...)); | ||
| 745 | } | ||
| 746 | |||
| 747 | template <typename... T> | ||
| 748 | Node Operation(OperationCode code, std::vector<Node>&& operands) { | ||
| 749 | return StoreNode(OperationNode(code, std::move(operands))); | ||
| 750 | } | ||
| 751 | |||
| 752 | template <typename... T> | ||
| 753 | Node Operation(OperationCode code, Meta&& meta, std::vector<Node>&& operands) { | ||
| 754 | return StoreNode(OperationNode(code, std::move(meta), std::move(operands))); | ||
| 755 | } | ||
| 756 | |||
| 757 | template <typename... T> | ||
| 758 | Node SignedOperation(OperationCode code, bool is_signed, const T*... operands) { | ||
| 759 | return StoreNode(OperationNode(SignedToUnsignedCode(code, is_signed), operands...)); | ||
| 760 | } | ||
| 761 | |||
| 762 | template <typename... T> | ||
| 763 | Node SignedOperation(OperationCode code, bool is_signed, Meta&& meta, const T*... operands) { | ||
| 764 | return StoreNode( | ||
| 765 | OperationNode(SignedToUnsignedCode(code, is_signed), std::move(meta), operands...)); | ||
| 766 | } | ||
| 767 | |||
| 768 | static OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); | ||
| 769 | |||
| 770 | const ProgramCode& program_code; | ||
| 771 | const u32 main_offset; | ||
| 772 | |||
| 773 | u32 coverage_begin{}; | ||
| 774 | u32 coverage_end{}; | ||
| 775 | std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; | ||
| 776 | |||
| 777 | std::map<u32, BasicBlock> basic_blocks; | ||
| 778 | |||
| 779 | std::vector<std::unique_ptr<NodeData>> stored_nodes; | ||
| 780 | |||
| 781 | std::set<u32> used_registers; | ||
| 782 | std::set<Tegra::Shader::Pred> used_predicates; | ||
| 783 | std::map<Tegra::Shader::Attribute::Index, std::set<Tegra::Shader::IpaMode>> | ||
| 784 | used_input_attributes; | ||
| 785 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; | ||
| 786 | std::map<u32, ConstBuffer> used_cbufs; | ||
| 787 | std::set<Sampler> used_samplers; | ||
| 788 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | ||
| 789 | |||
| 790 | Tegra::Shader::Header header; | ||
| 791 | }; | ||
| 792 | |||
| 793 | } // namespace VideoCommon::Shader | ||