summaryrefslogtreecommitdiff
path: root/src/video_core/shader/decode
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-02-16 20:52:12 -0300
committerGravatar ameerj2021-07-22 21:51:22 -0400
commitc67d64365a712830fe140dd36e24e2efd9b8a812 (patch)
tree9287589f2b72d1cbd0cb113c2024b2bc531408c3 /src/video_core/shader/decode
parentshader: Add XMAD multiplication folding optimization (diff)
downloadyuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.gz
yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.tar.xz
yuzu-c67d64365a712830fe140dd36e24e2efd9b8a812.zip
shader: Remove old shader management
Diffstat (limited to 'src/video_core/shader/decode')
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp166
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp101
-rw-r--r--src/video_core/shader/decode/arithmetic_half_immediate.cpp54
-rw-r--r--src/video_core/shader/decode/arithmetic_immediate.cpp53
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp375
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp99
-rw-r--r--src/video_core/shader/decode/bfe.cpp77
-rw-r--r--src/video_core/shader/decode/bfi.cpp45
-rw-r--r--src/video_core/shader/decode/conversion.cpp321
-rw-r--r--src/video_core/shader/decode/ffma.cpp62
-rw-r--r--src/video_core/shader/decode/float_set.cpp58
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp57
-rw-r--r--src/video_core/shader/decode/half_set.cpp115
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp80
-rw-r--r--src/video_core/shader/decode/hfma2.cpp73
-rw-r--r--src/video_core/shader/decode/image.cpp536
-rw-r--r--src/video_core/shader/decode/integer_set.cpp49
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp53
-rw-r--r--src/video_core/shader/decode/memory.cpp493
-rw-r--r--src/video_core/shader/decode/other.cpp322
-rw-r--r--src/video_core/shader/decode/predicate_set_predicate.cpp68
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp46
-rw-r--r--src/video_core/shader/decode/register_set_predicate.cpp86
-rw-r--r--src/video_core/shader/decode/shift.cpp153
-rw-r--r--src/video_core/shader/decode/texture.cpp935
-rw-r--r--src/video_core/shader/decode/video.cpp169
-rw-r--r--src/video_core/shader/decode/warp.cpp117
-rw-r--r--src/video_core/shader/decode/xmad.cpp156
28 files changed, 0 insertions, 4919 deletions
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
deleted file mode 100644
index 15eb700e7..000000000
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::SubOp;
17
18u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 Node op_a = GetRegister(instr.gpr8);
23
24 Node op_b = [&] {
25 if (instr.is_b_imm) {
26 return GetImmediate19(instr);
27 } else if (instr.is_b_gpr) {
28 return GetRegister(instr.gpr20);
29 } else {
30 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
31 }
32 }();
33
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::MOV_C:
36 case OpCode::Id::MOV_R: {
37 // MOV does not have neither 'abs' nor 'neg' bits.
38 SetRegister(bb, instr.gpr0, op_b);
39 break;
40 }
41 case OpCode::Id::FMUL_C:
42 case OpCode::Id::FMUL_R:
43 case OpCode::Id::FMUL_IMM: {
44 // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
45 if (instr.fmul.tab5cb8_2 != 0) {
46 LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
47 instr.fmul.tab5cb8_2.Value());
48 }
49 if (instr.fmul.tab5c68_0 != 1) {
50 LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
51 instr.fmul.tab5c68_0.Value());
52 }
53
54 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
55
56 static constexpr std::array FmulPostFactor = {
57 1.000f, // None
58 0.500f, // Divide 2
59 0.250f, // Divide 4
60 0.125f, // Divide 8
61 8.000f, // Mul 8
62 4.000f, // Mul 4
63 2.000f, // Mul 2
64 };
65
66 if (instr.fmul.postfactor != 0) {
67 op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
68 Immediate(FmulPostFactor[instr.fmul.postfactor]));
69 }
70
71 // TODO(Rodrigo): Should precise be used when there's a postfactor?
72 Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
73
74 value = GetSaturatedFloat(value, instr.alu.saturate_d);
75
76 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
77 SetRegister(bb, instr.gpr0, value);
78 break;
79 }
80 case OpCode::Id::FADD_C:
81 case OpCode::Id::FADD_R:
82 case OpCode::Id::FADD_IMM: {
83 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
84 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
85
86 Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
87 value = GetSaturatedFloat(value, instr.alu.saturate_d);
88
89 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
90 SetRegister(bb, instr.gpr0, value);
91 break;
92 }
93 case OpCode::Id::MUFU: {
94 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
95
96 Node value = [&]() {
97 switch (instr.sub_op) {
98 case SubOp::Cos:
99 return Operation(OperationCode::FCos, PRECISE, op_a);
100 case SubOp::Sin:
101 return Operation(OperationCode::FSin, PRECISE, op_a);
102 case SubOp::Ex2:
103 return Operation(OperationCode::FExp2, PRECISE, op_a);
104 case SubOp::Lg2:
105 return Operation(OperationCode::FLog2, PRECISE, op_a);
106 case SubOp::Rcp:
107 return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
108 case SubOp::Rsq:
109 return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
110 case SubOp::Sqrt:
111 return Operation(OperationCode::FSqrt, PRECISE, op_a);
112 default:
113 UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
114 return Immediate(0);
115 }
116 }();
117 value = GetSaturatedFloat(value, instr.alu.saturate_d);
118
119 SetRegister(bb, instr.gpr0, value);
120 break;
121 }
122 case OpCode::Id::FMNMX_C:
123 case OpCode::Id::FMNMX_R:
124 case OpCode::Id::FMNMX_IMM: {
125 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
126 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
127
128 const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
129
130 const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
131 const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
132 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
133
134 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
135 SetRegister(bb, instr.gpr0, value);
136 break;
137 }
138 case OpCode::Id::FCMP_RR:
139 case OpCode::Id::FCMP_RC:
140 case OpCode::Id::FCMP_IMMR: {
141 UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
142 Node op_c = GetRegister(instr.gpr39);
143 Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
144 SetRegister(
145 bb, instr.gpr0,
146 Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
147 break;
148 }
149 case OpCode::Id::RRO_C:
150 case OpCode::Id::RRO_R:
151 case OpCode::Id::RRO_IMM: {
152 LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
153
154 // Currently RRO is only implemented as a register move.
155 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
156 SetRegister(bb, instr.gpr0, op_b);
157 break;
158 }
159 default:
160 UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
161 }
162
163 return pc;
164}
165
166} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
deleted file mode 100644
index 88103fede..000000000
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::HalfType;
15using Tegra::Shader::Instruction;
16using Tegra::Shader::OpCode;
17
18u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 bool negate_a = false;
23 bool negate_b = false;
24 bool absolute_a = false;
25 bool absolute_b = false;
26
27 switch (opcode->get().GetId()) {
28 case OpCode::Id::HADD2_R:
29 if (instr.alu_half.ftz == 0) {
30 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
31 }
32 negate_a = ((instr.value >> 43) & 1) != 0;
33 negate_b = ((instr.value >> 31) & 1) != 0;
34 absolute_a = ((instr.value >> 44) & 1) != 0;
35 absolute_b = ((instr.value >> 30) & 1) != 0;
36 break;
37 case OpCode::Id::HADD2_C:
38 if (instr.alu_half.ftz == 0) {
39 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
40 }
41 negate_a = ((instr.value >> 43) & 1) != 0;
42 negate_b = ((instr.value >> 56) & 1) != 0;
43 absolute_a = ((instr.value >> 44) & 1) != 0;
44 absolute_b = ((instr.value >> 54) & 1) != 0;
45 break;
46 case OpCode::Id::HMUL2_R:
47 negate_a = ((instr.value >> 43) & 1) != 0;
48 absolute_a = ((instr.value >> 44) & 1) != 0;
49 absolute_b = ((instr.value >> 30) & 1) != 0;
50 break;
51 case OpCode::Id::HMUL2_C:
52 negate_b = ((instr.value >> 31) & 1) != 0;
53 absolute_a = ((instr.value >> 44) & 1) != 0;
54 absolute_b = ((instr.value >> 54) & 1) != 0;
55 break;
56 default:
57 UNREACHABLE();
58 break;
59 }
60
61 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
62 op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
63
64 auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
65 switch (opcode->get().GetId()) {
66 case OpCode::Id::HADD2_C:
67 case OpCode::Id::HMUL2_C:
68 return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
69 case OpCode::Id::HADD2_R:
70 case OpCode::Id::HMUL2_R:
71 return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
72 default:
73 UNREACHABLE();
74 return {HalfType::F32, Immediate(0)};
75 }
76 }();
77 op_b = UnpackHalfFloat(op_b, type_b);
78 op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
79
80 Node value = [this, opcode, op_a, op_b = op_b] {
81 switch (opcode->get().GetId()) {
82 case OpCode::Id::HADD2_C:
83 case OpCode::Id::HADD2_R:
84 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
85 case OpCode::Id::HMUL2_C:
86 case OpCode::Id::HMUL2_R:
87 return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
88 default:
89 UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
90 return Immediate(0);
91 }
92 }();
93 value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
94 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
95
96 SetRegister(bb, instr.gpr0, value);
97
98 return pc;
99}
100
101} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
deleted file mode 100644
index d179b9873..000000000
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16
17u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
22 if (instr.alu_half_imm.ftz == 0) {
23 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
24 }
25 } else {
26 if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
27 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
28 }
29 }
30
31 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
32 op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
33
34 const Node op_b = UnpackHalfImmediate(instr, true);
35
36 Node value = [&]() {
37 switch (opcode->get().GetId()) {
38 case OpCode::Id::HADD2_IMM:
39 return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
40 case OpCode::Id::HMUL2_IMM:
41 return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
42 default:
43 UNREACHABLE();
44 return Immediate(0);
45 }
46 }();
47
48 value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
49 value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
50 SetRegister(bb, instr.gpr0, value);
51 return pc;
52}
53
54} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
deleted file mode 100644
index f1875967c..000000000
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 switch (opcode->get().GetId()) {
21 case OpCode::Id::MOV32_IMM: {
22 SetRegister(bb, instr.gpr0, GetImmediate32(instr));
23 break;
24 }
25 case OpCode::Id::FMUL32_IMM: {
26 Node value =
27 Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
28 value = GetSaturatedFloat(value, instr.fmul32.saturate);
29
30 SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
31 SetRegister(bb, instr.gpr0, value);
32 break;
33 }
34 case OpCode::Id::FADD32I: {
35 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
36 instr.fadd32i.negate_a);
37 const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
38 instr.fadd32i.negate_b);
39
40 const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
41 SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
42 SetRegister(bb, instr.gpr0, value);
43 break;
44 }
45 default:
46 UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
47 opcode->get().GetName());
48 }
49
50 return pc;
51}
52
53} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
deleted file mode 100644
index 7b5bb7003..000000000
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ /dev/null
@@ -1,375 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::IAdd3Height;
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17using Tegra::Shader::Register;
18
19u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
20 const Instruction instr = {program_code[pc]};
21 const auto opcode = OpCode::Decode(instr);
22
23 Node op_a = GetRegister(instr.gpr8);
24 Node op_b = [&]() {
25 if (instr.is_b_imm) {
26 return Immediate(instr.alu.GetSignedImm20_20());
27 } else if (instr.is_b_gpr) {
28 return GetRegister(instr.gpr20);
29 } else {
30 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
31 }
32 }();
33
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::IADD_C:
36 case OpCode::Id::IADD_R:
37 case OpCode::Id::IADD_IMM: {
38 UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT");
39 UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC");
40
41 op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
42 op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
43
44 Node value = Operation(OperationCode::UAdd, op_a, op_b);
45
46 if (instr.iadd.x) {
47 Node carry = GetInternalFlag(InternalFlag::Carry);
48 Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0));
49 value = Operation(OperationCode::UAdd, std::move(value), std::move(x));
50 }
51
52 if (instr.generates_cc) {
53 const Node i0 = Immediate(0);
54
55 Node zero = Operation(OperationCode::LogicalIEqual, value, i0);
56 Node sign = Operation(OperationCode::LogicalILessThan, value, i0);
57 Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b);
58
59 Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0);
60 Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0);
61 Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b));
62 Node overflow = Operation(OperationCode::LogicalAnd, pos, sign);
63
64 SetInternalFlag(bb, InternalFlag::Zero, std::move(zero));
65 SetInternalFlag(bb, InternalFlag::Sign, std::move(sign));
66 SetInternalFlag(bb, InternalFlag::Carry, std::move(carry));
67 SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow));
68 }
69 SetRegister(bb, instr.gpr0, std::move(value));
70 break;
71 }
72 case OpCode::Id::IADD3_C:
73 case OpCode::Id::IADD3_R:
74 case OpCode::Id::IADD3_IMM: {
75 Node op_c = GetRegister(instr.gpr39);
76
77 const auto ApplyHeight = [&](IAdd3Height height, Node value) {
78 switch (height) {
79 case IAdd3Height::None:
80 return value;
81 case IAdd3Height::LowerHalfWord:
82 return BitfieldExtract(value, 0, 16);
83 case IAdd3Height::UpperHalfWord:
84 return BitfieldExtract(value, 16, 16);
85 default:
86 UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height);
87 return Immediate(0);
88 }
89 };
90
91 if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
92 op_a = ApplyHeight(instr.iadd3.height_a, op_a);
93 op_b = ApplyHeight(instr.iadd3.height_b, op_b);
94 op_c = ApplyHeight(instr.iadd3.height_c, op_c);
95 }
96
97 op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
98 op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
99 op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
100
101 const Node value = [&] {
102 Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
103 if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
104 return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
105 }
106 const Node shifted = [&] {
107 switch (instr.iadd3.mode) {
108 case Tegra::Shader::IAdd3Mode::RightShift:
109 // TODO(tech4me): According to
110 // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
111 // The addition between op_a and op_b should be done in uint33, more
112 // investigation required
113 return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
114 Immediate(16));
115 case Tegra::Shader::IAdd3Mode::LeftShift:
116 return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
117 Immediate(16));
118 default:
119 return add_ab;
120 }
121 }();
122 return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
123 }();
124
125 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
126 SetRegister(bb, instr.gpr0, value);
127 break;
128 }
129 case OpCode::Id::ISCADD_C:
130 case OpCode::Id::ISCADD_R:
131 case OpCode::Id::ISCADD_IMM: {
132 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
133 "Condition codes generation in ISCADD is not implemented");
134
135 op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
136 op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
137
138 const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
139 const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
140 const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
141
142 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
143 SetRegister(bb, instr.gpr0, value);
144 break;
145 }
146 case OpCode::Id::POPC_C:
147 case OpCode::Id::POPC_R:
148 case OpCode::Id::POPC_IMM: {
149 if (instr.popc.invert) {
150 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
151 }
152 const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
153 SetRegister(bb, instr.gpr0, value);
154 break;
155 }
156 case OpCode::Id::FLO_R:
157 case OpCode::Id::FLO_C:
158 case OpCode::Id::FLO_IMM: {
159 Node value;
160 if (instr.flo.invert) {
161 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
162 }
163 if (instr.flo.is_signed) {
164 value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
165 } else {
166 value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
167 }
168 if (instr.flo.sh) {
169 value =
170 Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
171 }
172 SetRegister(bb, instr.gpr0, std::move(value));
173 break;
174 }
175 case OpCode::Id::SEL_C:
176 case OpCode::Id::SEL_R:
177 case OpCode::Id::SEL_IMM: {
178 const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
179 const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
180 SetRegister(bb, instr.gpr0, value);
181 break;
182 }
183 case OpCode::Id::ICMP_CR:
184 case OpCode::Id::ICMP_R:
185 case OpCode::Id::ICMP_RC:
186 case OpCode::Id::ICMP_IMM: {
187 const Node zero = Immediate(0);
188
189 const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
190 switch (opcode->get().GetId()) {
191 case OpCode::Id::ICMP_CR:
192 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
193 GetRegister(instr.gpr39)};
194 case OpCode::Id::ICMP_R:
195 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
196 case OpCode::Id::ICMP_RC:
197 return {GetRegister(instr.gpr39),
198 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
199 case OpCode::Id::ICMP_IMM:
200 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
201 default:
202 UNREACHABLE();
203 return {zero, zero};
204 }
205 }();
206 const Node op_lhs = GetRegister(instr.gpr8);
207 const Node comparison =
208 GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
209 SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
210 break;
211 }
212 case OpCode::Id::LOP_C:
213 case OpCode::Id::LOP_R:
214 case OpCode::Id::LOP_IMM: {
215 if (instr.alu.lop.invert_a)
216 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
217 if (instr.alu.lop.invert_b)
218 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
219
220 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
221 instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
222 instr.generates_cc);
223 break;
224 }
225 case OpCode::Id::LOP3_C:
226 case OpCode::Id::LOP3_R:
227 case OpCode::Id::LOP3_IMM: {
228 const Node op_c = GetRegister(instr.gpr39);
229 const Node lut = [&]() {
230 if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
231 return Immediate(instr.alu.lop3.GetImmLut28());
232 } else {
233 return Immediate(instr.alu.lop3.GetImmLut48());
234 }
235 }();
236
237 WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
238 break;
239 }
240 case OpCode::Id::IMNMX_C:
241 case OpCode::Id::IMNMX_R:
242 case OpCode::Id::IMNMX_IMM: {
243 UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
244
245 const bool is_signed = instr.imnmx.is_signed;
246
247 const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
248 const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
249 const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
250 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
251
252 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
253 SetRegister(bb, instr.gpr0, value);
254 break;
255 }
256 case OpCode::Id::LEA_R2:
257 case OpCode::Id::LEA_R1:
258 case OpCode::Id::LEA_IMM:
259 case OpCode::Id::LEA_RZ:
260 case OpCode::Id::LEA_HI: {
261 auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> {
262 switch (opcode->get().GetId()) {
263 case OpCode::Id::LEA_R2: {
264 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
265 Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
266 }
267 case OpCode::Id::LEA_R1: {
268 const bool neg = instr.lea.r1.neg != 0;
269 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
270 GetRegister(instr.gpr20),
271 Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
272 }
273 case OpCode::Id::LEA_IMM: {
274 const bool neg = instr.lea.imm.neg != 0;
275 return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
276 Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
277 Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
278 }
279 case OpCode::Id::LEA_RZ: {
280 const bool neg = instr.lea.rz.neg != 0;
281 return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
282 GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
283 Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
284 }
285 case OpCode::Id::LEA_HI:
286 default:
287 UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
288
289 return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
290 Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
291 }
292 }();
293
294 UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
295 "Unhandled LEA Predicate");
296
297 Node value =
298 Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_));
299 value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value));
300 SetRegister(bb, instr.gpr0, std::move(value));
301
302 break;
303 }
304 default:
305 UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
306 }
307
308 return pc;
309}
310
311void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
312 Node imm_lut, bool sets_cc) {
313 const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
314 Node value = Immediate(0);
315 const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
316 if (imm.GetValue() & 0x01) {
317 const Node a = Operation(OperationCode::IBitwiseNot, na);
318 const Node b = Operation(OperationCode::IBitwiseNot, nb);
319 const Node c = Operation(OperationCode::IBitwiseNot, nc);
320 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
321 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
322 value = Operation(OperationCode::IBitwiseOr, value, r);
323 }
324 if (imm.GetValue() & 0x02) {
325 const Node a = Operation(OperationCode::IBitwiseNot, na);
326 const Node b = Operation(OperationCode::IBitwiseNot, nb);
327 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
328 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
329 value = Operation(OperationCode::IBitwiseOr, value, r);
330 }
331 if (imm.GetValue() & 0x04) {
332 const Node a = Operation(OperationCode::IBitwiseNot, na);
333 const Node c = Operation(OperationCode::IBitwiseNot, nc);
334 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
335 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
336 value = Operation(OperationCode::IBitwiseOr, value, r);
337 }
338 if (imm.GetValue() & 0x08) {
339 const Node a = Operation(OperationCode::IBitwiseNot, na);
340 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
341 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
342 value = Operation(OperationCode::IBitwiseOr, value, r);
343 }
344 if (imm.GetValue() & 0x10) {
345 const Node b = Operation(OperationCode::IBitwiseNot, nb);
346 const Node c = Operation(OperationCode::IBitwiseNot, nc);
347 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
348 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
349 value = Operation(OperationCode::IBitwiseOr, value, r);
350 }
351 if (imm.GetValue() & 0x20) {
352 const Node b = Operation(OperationCode::IBitwiseNot, nb);
353 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
354 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
355 value = Operation(OperationCode::IBitwiseOr, value, r);
356 }
357 if (imm.GetValue() & 0x40) {
358 const Node c = Operation(OperationCode::IBitwiseNot, nc);
359 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
360 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
361 value = Operation(OperationCode::IBitwiseOr, value, r);
362 }
363 if (imm.GetValue() & 0x80) {
364 Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
365 r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
366 value = Operation(OperationCode::IBitwiseOr, value, r);
367 }
368 return value;
369 }(op_a, op_b, op_c, imm_lut);
370
371 SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
372 SetRegister(bb, dest, lop3_fast);
373}
374
375} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
deleted file mode 100644
index 73580277a..000000000
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::LogicOperation;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17using Tegra::Shader::PredicateResultMode;
18using Tegra::Shader::Register;
19
20u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
21 const Instruction instr = {program_code[pc]};
22 const auto opcode = OpCode::Decode(instr);
23
24 Node op_a = GetRegister(instr.gpr8);
25 Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
26
27 switch (opcode->get().GetId()) {
28 case OpCode::Id::IADD32I: {
29 UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
30
31 op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
32
33 Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
34
35 SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
36 SetRegister(bb, instr.gpr0, std::move(value));
37 break;
38 }
39 case OpCode::Id::LOP32I: {
40 if (instr.alu.lop32i.invert_a) {
41 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
42 }
43
44 if (instr.alu.lop32i.invert_b) {
45 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
46 }
47
48 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
49 std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
50 instr.op_32.generates_cc != 0);
51 break;
52 }
53 default:
54 UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
55 opcode->get().GetName());
56 }
57
58 return pc;
59}
60
61void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
62 Node op_b, PredicateResultMode predicate_mode, Pred predicate,
63 bool sets_cc) {
64 Node result = [&] {
65 switch (logic_op) {
66 case LogicOperation::And:
67 return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
68 case LogicOperation::Or:
69 return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
70 case LogicOperation::Xor:
71 return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
72 case LogicOperation::PassB:
73 return op_b;
74 default:
75 UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op);
76 return Immediate(0);
77 }
78 }();
79
80 SetInternalFlagsFromInteger(bb, result, sets_cc);
81 SetRegister(bb, dest, result);
82
83 // Write the predicate value depending on the predicate mode.
84 switch (predicate_mode) {
85 case PredicateResultMode::None:
86 // Do nothing.
87 return;
88 case PredicateResultMode::NotZero: {
89 // Set the predicate to true if the result is not zero.
90 Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
91 SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
92 break;
93 }
94 default:
95 UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode);
96 }
97}
98
99} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
deleted file mode 100644
index 8e3b46e8e..000000000
--- a/src/video_core/shader/decode/bfe.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 Node op_a = GetRegister(instr.gpr8);
21 Node op_b = [&] {
22 switch (opcode->get().GetId()) {
23 case OpCode::Id::BFE_R:
24 return GetRegister(instr.gpr20);
25 case OpCode::Id::BFE_C:
26 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
27 case OpCode::Id::BFE_IMM:
28 return Immediate(instr.alu.GetSignedImm20_20());
29 default:
30 UNREACHABLE();
31 return Immediate(0);
32 }
33 }();
34
35 UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
36
37 const bool is_signed = instr.bfe.is_signed;
38
39 // using reverse parallel method in
40 // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
41 // note for later if possible to implement faster method.
42 if (instr.bfe.brev) {
43 const auto swap = [&](u32 s, u32 mask) {
44 Node v1 =
45 SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
46 if (mask != 0) {
47 v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
48 Immediate(mask));
49 }
50 Node v2 = op_a;
51 if (mask != 0) {
52 v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
53 Immediate(mask));
54 }
55 v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
56 Immediate(s));
57 return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
58 std::move(v2));
59 };
60 op_a = swap(1, 0x55555555U);
61 op_a = swap(2, 0x33333333U);
62 op_a = swap(4, 0x0F0F0F0FU);
63 op_a = swap(8, 0x00FF00FFU);
64 op_a = swap(16, 0);
65 }
66
67 const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
68 Immediate(0), Immediate(8));
69 const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
70 Immediate(8), Immediate(8));
71 auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
72 SetRegister(bb, instr.gpr0, std::move(result));
73
74 return pc;
75}
76
77} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
deleted file mode 100644
index 70d1c055b..000000000
--- a/src/video_core/shader/decode/bfi.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
21 switch (opcode->get().GetId()) {
22 case OpCode::Id::BFI_RC:
23 return {GetRegister(instr.gpr39),
24 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
25 case OpCode::Id::BFI_IMM_R:
26 return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
27 default:
28 UNREACHABLE();
29 return {Immediate(0), Immediate(0)};
30 }
31 }();
32 const Node insert = GetRegister(instr.gpr8);
33 const Node offset = BitfieldExtract(packed_shift, 0, 8);
34 const Node bits = BitfieldExtract(packed_shift, 8, 8);
35
36 const Node value =
37 Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
38
39 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
40 SetRegister(bb, instr.gpr0, value);
41
42 return pc;
43}
44
45} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
deleted file mode 100644
index fea7a54df..000000000
--- a/src/video_core/shader/decode/conversion.cpp
+++ /dev/null
@@ -1,321 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6#include <optional>
7#include <utility>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/node_helper.h"
13#include "video_core/shader/shader_ir.h"
14
15namespace VideoCommon::Shader {
16
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register;
20
21namespace {
22
23constexpr OperationCode GetFloatSelector(u64 selector) {
24 return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
25}
26
27constexpr u32 SizeInBits(Register::Size size) {
28 switch (size) {
29 case Register::Size::Byte:
30 return 8;
31 case Register::Size::Short:
32 return 16;
33 case Register::Size::Word:
34 return 32;
35 case Register::Size::Long:
36 return 64;
37 }
38 return 0;
39}
40
41constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
42 Register::Size dst_size,
43 bool src_signed,
44 bool dst_signed) {
45 const u32 dst_bits = SizeInBits(dst_size);
46 if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
47 if (src_signed == dst_signed) {
48 return std::nullopt;
49 }
50 return std::make_pair(0, std::numeric_limits<s32>::max());
51 }
52 if (dst_signed) {
53 // Signed destination, clamp to [-128, 127] for instance
54 return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
55 } else {
56 // Unsigned destination
57 if (dst_bits == 32) {
58 // Avoid shifting by 32, that is undefined behavior
59 return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
60 }
61 return std::make_pair(0, (1 << dst_bits) - 1);
62 }
63}
64
65} // Anonymous namespace
66
67u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
68 const Instruction instr = {program_code[pc]};
69 const auto opcode = OpCode::Decode(instr);
70
71 switch (opcode->get().GetId()) {
72 case OpCode::Id::I2I_R:
73 case OpCode::Id::I2I_C:
74 case OpCode::Id::I2I_IMM: {
75 const bool src_signed = instr.conversion.is_input_signed;
76 const bool dst_signed = instr.conversion.is_output_signed;
77 const Register::Size src_size = instr.conversion.src_size;
78 const Register::Size dst_size = instr.conversion.dst_size;
79 const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
80
81 Node value = [this, instr, opcode] {
82 switch (opcode->get().GetId()) {
83 case OpCode::Id::I2I_R:
84 return GetRegister(instr.gpr20);
85 case OpCode::Id::I2I_C:
86 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
87 case OpCode::Id::I2I_IMM:
88 return Immediate(instr.alu.GetSignedImm20_20());
89 default:
90 UNREACHABLE();
91 return Immediate(0);
92 }
93 }();
94
95 // Ensure the source selector is valid
96 switch (instr.conversion.src_size) {
97 case Register::Size::Byte:
98 break;
99 case Register::Size::Short:
100 ASSERT(selector == 0 || selector == 2);
101 break;
102 default:
103 ASSERT(selector == 0);
104 break;
105 }
106
107 if (src_size != Register::Size::Word || selector != 0) {
108 value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
109 Immediate(selector * 8), Immediate(SizeInBits(src_size)));
110 }
111
112 value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
113 instr.conversion.negate_a, src_signed);
114
115 if (instr.alu.saturate_d) {
116 if (src_signed && !dst_signed) {
117 Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
118 Immediate(1 << (SizeInBits(src_size) - 1)));
119 value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
120 std::move(value));
121
122 // Simplify generated expressions, this can be removed without semantic impact
123 SetTemporary(bb, 0, std::move(value));
124 value = GetTemporary(0);
125
126 if (dst_size != Register::Size::Word) {
127 const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
128 Node is_large =
129 Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
130 value = Operation(OperationCode::Select, std::move(is_large), limit,
131 std::move(value));
132 }
133 } else if (const std::optional bounds =
134 IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
135 value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
136 Immediate(bounds->first));
137 value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
138 Immediate(bounds->second));
139 }
140 } else if (dst_size != Register::Size::Word) {
141 // No saturation, we only have to mask the result
142 Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
143 value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
144 }
145
146 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
147 SetRegister(bb, instr.gpr0, std::move(value));
148 break;
149 }
150 case OpCode::Id::I2F_R:
151 case OpCode::Id::I2F_C:
152 case OpCode::Id::I2F_IMM: {
153 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
154 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
155 "Condition codes generation in I2F is not implemented");
156
157 Node value = [&] {
158 switch (opcode->get().GetId()) {
159 case OpCode::Id::I2F_R:
160 return GetRegister(instr.gpr20);
161 case OpCode::Id::I2F_C:
162 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
163 case OpCode::Id::I2F_IMM:
164 return Immediate(instr.alu.GetSignedImm20_20());
165 default:
166 UNREACHABLE();
167 return Immediate(0);
168 }
169 }();
170
171 const bool input_signed = instr.conversion.is_input_signed;
172
173 if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) {
174 ASSERT(instr.conversion.src_size == Register::Size::Byte ||
175 instr.conversion.src_size == Register::Size::Short);
176 if (instr.conversion.src_size == Register::Size::Short) {
177 ASSERT(offset == 0 || offset == 2);
178 }
179 value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
180 std::move(value), Immediate(offset * 8));
181 }
182
183 value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
184 value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
185 value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
186 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
187
188 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
189
190 if (instr.conversion.dst_size == Register::Size::Short) {
191 value = Operation(OperationCode::HCastFloat, PRECISE, value);
192 }
193
194 SetRegister(bb, instr.gpr0, value);
195 break;
196 }
197 case OpCode::Id::F2F_R:
198 case OpCode::Id::F2F_C:
199 case OpCode::Id::F2F_IMM: {
200 UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
201 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
202 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
203 "Condition codes generation in F2F is not implemented");
204
205 Node value = [&]() {
206 switch (opcode->get().GetId()) {
207 case OpCode::Id::F2F_R:
208 return GetRegister(instr.gpr20);
209 case OpCode::Id::F2F_C:
210 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
211 case OpCode::Id::F2F_IMM:
212 return GetImmediate19(instr);
213 default:
214 UNREACHABLE();
215 return Immediate(0);
216 }
217 }();
218
219 if (instr.conversion.src_size == Register::Size::Short) {
220 value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
221 std::move(value));
222 } else {
223 ASSERT(instr.conversion.float_src.selector == 0);
224 }
225
226 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
227
228 value = [&] {
229 if (instr.conversion.src_size != instr.conversion.dst_size) {
230 // Rounding operations only matter when the source and destination conversion size
231 // is the same.
232 return value;
233 }
234 switch (instr.conversion.f2f.GetRoundingMode()) {
235 case Tegra::Shader::F2fRoundingOp::None:
236 return value;
237 case Tegra::Shader::F2fRoundingOp::Round:
238 return Operation(OperationCode::FRoundEven, value);
239 case Tegra::Shader::F2fRoundingOp::Floor:
240 return Operation(OperationCode::FFloor, value);
241 case Tegra::Shader::F2fRoundingOp::Ceil:
242 return Operation(OperationCode::FCeil, value);
243 case Tegra::Shader::F2fRoundingOp::Trunc:
244 return Operation(OperationCode::FTrunc, value);
245 default:
246 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
247 instr.conversion.f2f.rounding.Value());
248 return value;
249 }
250 }();
251 value = GetSaturatedFloat(value, instr.alu.saturate_d);
252
253 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
254
255 if (instr.conversion.dst_size == Register::Size::Short) {
256 value = Operation(OperationCode::HCastFloat, PRECISE, value);
257 }
258
259 SetRegister(bb, instr.gpr0, value);
260 break;
261 }
262 case OpCode::Id::F2I_R:
263 case OpCode::Id::F2I_C:
264 case OpCode::Id::F2I_IMM: {
265 UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
266 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
267 "Condition codes generation in F2I is not implemented");
268 Node value = [&]() {
269 switch (opcode->get().GetId()) {
270 case OpCode::Id::F2I_R:
271 return GetRegister(instr.gpr20);
272 case OpCode::Id::F2I_C:
273 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
274 case OpCode::Id::F2I_IMM:
275 return GetImmediate19(instr);
276 default:
277 UNREACHABLE();
278 return Immediate(0);
279 }
280 }();
281
282 if (instr.conversion.src_size == Register::Size::Short) {
283 value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
284 std::move(value));
285 } else {
286 ASSERT(instr.conversion.float_src.selector == 0);
287 }
288
289 value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
290
291 value = [&]() {
292 switch (instr.conversion.f2i.rounding) {
293 case Tegra::Shader::F2iRoundingOp::RoundEven:
294 return Operation(OperationCode::FRoundEven, PRECISE, value);
295 case Tegra::Shader::F2iRoundingOp::Floor:
296 return Operation(OperationCode::FFloor, PRECISE, value);
297 case Tegra::Shader::F2iRoundingOp::Ceil:
298 return Operation(OperationCode::FCeil, PRECISE, value);
299 case Tegra::Shader::F2iRoundingOp::Trunc:
300 return Operation(OperationCode::FTrunc, PRECISE, value);
301 default:
302 UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
303 instr.conversion.f2i.rounding.Value());
304 return Immediate(0);
305 }
306 }();
307 const bool is_signed = instr.conversion.is_output_signed;
308 value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
309 value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed);
310
311 SetRegister(bb, instr.gpr0, value);
312 break;
313 }
314 default:
315 UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
316 }
317
318 return pc;
319}
320
321} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
deleted file mode 100644
index 5973588d6..000000000
--- a/src/video_core/shader/decode/ffma.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18 const auto opcode = OpCode::Decode(instr);
19
20 UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
21 if (instr.ffma.tab5980_0 != 1) {
22 LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
23 }
24 if (instr.ffma.tab5980_1 != 0) {
25 LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
26 }
27
28 const Node op_a = GetRegister(instr.gpr8);
29
30 auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
31 switch (opcode->get().GetId()) {
32 case OpCode::Id::FFMA_CR: {
33 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
34 GetRegister(instr.gpr39)};
35 }
36 case OpCode::Id::FFMA_RR:
37 return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
38 case OpCode::Id::FFMA_RC: {
39 return {GetRegister(instr.gpr39),
40 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
41 }
42 case OpCode::Id::FFMA_IMM:
43 return {GetImmediate19(instr), GetRegister(instr.gpr39)};
44 default:
45 UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
46 return {Immediate(0), Immediate(0)};
47 }
48 }();
49
50 op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
51 op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
52
53 Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
54 value = GetSaturatedFloat(value, instr.alu.saturate_d);
55
56 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
57 SetRegister(bb, instr.gpr0, value);
58
59 return pc;
60}
61
62} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
deleted file mode 100644
index 5614e8a0d..000000000
--- a/src/video_core/shader/decode/float_set.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18
19 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
20 instr.fset.neg_a != 0);
21
22 Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return GetImmediate19(instr);
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31
32 op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
33
34 // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
35 // condition is true, and to 0 otherwise.
36 const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
37
38 const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
39 const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
40
41 const Node predicate = Operation(combiner, first_pred, second_pred);
42
43 const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
44 const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
45 const Node value =
46 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
47
48 if (instr.fset.bf) {
49 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
50 } else {
51 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
52 }
53 SetRegister(bb, instr.gpr0, value);
54
55 return pc;
56}
57
58} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
deleted file mode 100644
index 200c2c983..000000000
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16
17u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19
20 Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
21 instr.fsetp.neg_a != 0);
22 Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return GetImmediate19(instr);
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31 op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
32
33 // We can't use the constant predicate as destination.
34 ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
35
36 const Node predicate =
37 GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
38 const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
39
40 const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
41 const Node value = Operation(combiner, predicate, second_pred);
42
43 // Set the primary predicate to the result of Predicate OP SecondPredicate
44 SetPredicate(bb, instr.fsetp.pred3, value);
45
46 if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
47 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
48 // if enabled
49 const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
50 const Node second_value = Operation(combiner, negated_pred, second_pred);
51 SetPredicate(bb, instr.fsetp.pred0, second_value);
52 }
53
54 return pc;
55}
56
57} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
deleted file mode 100644
index fa83108cd..000000000
--- a/src/video_core/shader/decode/half_set.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "common/logging/log.h"
10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/shader/node_helper.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using std::move;
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::PredCondition;
20
21u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
22 const Instruction instr = {program_code[pc]};
23 const auto opcode = OpCode::Decode(instr);
24
25 PredCondition cond{};
26 bool bf = false;
27 bool ftz = false;
28 bool neg_a = false;
29 bool abs_a = false;
30 bool neg_b = false;
31 bool abs_b = false;
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSET2_C:
34 case OpCode::Id::HSET2_IMM:
35 cond = instr.hsetp2.cbuf_and_imm.cond;
36 bf = instr.Bit(53);
37 ftz = instr.Bit(54);
38 neg_a = instr.Bit(43);
39 abs_a = instr.Bit(44);
40 neg_b = instr.Bit(56);
41 abs_b = instr.Bit(54);
42 break;
43 case OpCode::Id::HSET2_R:
44 cond = instr.hsetp2.reg.cond;
45 bf = instr.Bit(49);
46 ftz = instr.Bit(50);
47 neg_a = instr.Bit(43);
48 abs_a = instr.Bit(44);
49 neg_b = instr.Bit(31);
50 abs_b = instr.Bit(30);
51 break;
52 default:
53 UNREACHABLE();
54 }
55
56 Node op_b = [this, instr, opcode] {
57 switch (opcode->get().GetId()) {
58 case OpCode::Id::HSET2_C:
59 // Inform as unimplemented as this is not tested.
60 UNIMPLEMENTED_MSG("HSET2_C is not implemented");
61 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
62 case OpCode::Id::HSET2_R:
63 return GetRegister(instr.gpr20);
64 case OpCode::Id::HSET2_IMM:
65 return UnpackHalfImmediate(instr, true);
66 default:
67 UNREACHABLE();
68 return Node{};
69 }
70 }();
71
72 if (!ftz) {
73 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
74 }
75
76 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
77 op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
78
79 switch (opcode->get().GetId()) {
80 case OpCode::Id::HSET2_R:
81 op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
82 [[fallthrough]];
83 case OpCode::Id::HSET2_C:
84 op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
85 break;
86 default:
87 break;
88 }
89
90 Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
91
92 Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
93
94 const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
95
96 // HSET2 operates on each half float in the pack.
97 std::array<Node, 2> values;
98 for (u32 i = 0; i < 2; ++i) {
99 const u32 raw_value = bf ? 0x3c00 : 0xffff;
100 Node true_value = Immediate(raw_value << (i * 16));
101 Node false_value = Immediate(0);
102
103 Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
104 Node predicate = Operation(combiner, comparison, second_pred);
105 values[i] =
106 Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
107 }
108
109 Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
110 SetRegister(bb, instr.gpr0, move(value));
111
112 return pc;
113}
114
115} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
deleted file mode 100644
index 310655619..000000000
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17
18u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
19 const Instruction instr = {program_code[pc]};
20 const auto opcode = OpCode::Decode(instr);
21
22 if (instr.hsetp2.ftz != 0) {
23 LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
24 }
25
26 Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
27 op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
28
29 Tegra::Shader::PredCondition cond{};
30 bool h_and{};
31 Node op_b{};
32 switch (opcode->get().GetId()) {
33 case OpCode::Id::HSETP2_C:
34 cond = instr.hsetp2.cbuf_and_imm.cond;
35 h_and = instr.hsetp2.cbuf_and_imm.h_and;
36 op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
37 instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
38 // F32 is hardcoded in hardware
39 op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
40 break;
41 case OpCode::Id::HSETP2_IMM:
42 cond = instr.hsetp2.cbuf_and_imm.cond;
43 h_and = instr.hsetp2.cbuf_and_imm.h_and;
44 op_b = UnpackHalfImmediate(instr, true);
45 break;
46 case OpCode::Id::HSETP2_R:
47 cond = instr.hsetp2.reg.cond;
48 h_and = instr.hsetp2.reg.h_and;
49 op_b =
50 GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b),
51 instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b);
52 break;
53 default:
54 UNREACHABLE();
55 op_b = Immediate(0);
56 }
57
58 const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
59 const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
60
61 const auto Write = [&](u64 dest, Node src) {
62 SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
63 };
64
65 const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
66 const u64 first = instr.hsetp2.pred3;
67 const u64 second = instr.hsetp2.pred0;
68 if (h_and) {
69 Node joined = Operation(OperationCode::LogicalAnd2, comparison);
70 Write(first, joined);
71 Write(second, Operation(OperationCode::LogicalNegate, std::move(joined)));
72 } else {
73 Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U)));
74 Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U)));
75 }
76
77 return pc;
78}
79
80} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp
deleted file mode 100644
index 5b44cb79c..000000000
--- a/src/video_core/shader/decode/hfma2.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <tuple>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/engines/shader_bytecode.h"
10#include "video_core/shader/node_helper.h"
11#include "video_core/shader/shader_ir.h"
12
13namespace VideoCommon::Shader {
14
15using Tegra::Shader::HalfPrecision;
16using Tegra::Shader::HalfType;
17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode;
19
20u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
21 const Instruction instr = {program_code[pc]};
22 const auto opcode = OpCode::Decode(instr);
23
24 if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
25 DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
26 } else {
27 DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
28 }
29
30 constexpr auto identity = HalfType::H0_H1;
31 bool neg_b{}, neg_c{};
32 auto [saturate, type_b, op_b, type_c,
33 op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
34 switch (opcode->get().GetId()) {
35 case OpCode::Id::HFMA2_CR:
36 neg_b = instr.hfma2.negate_b;
37 neg_c = instr.hfma2.negate_c;
38 return {instr.hfma2.saturate, HalfType::F32,
39 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
40 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
41 case OpCode::Id::HFMA2_RC:
42 neg_b = instr.hfma2.negate_b;
43 neg_c = instr.hfma2.negate_c;
44 return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
45 HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
46 case OpCode::Id::HFMA2_RR:
47 neg_b = instr.hfma2.rr.negate_b;
48 neg_c = instr.hfma2.rr.negate_c;
49 return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
50 instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
51 case OpCode::Id::HFMA2_IMM_R:
52 neg_c = instr.hfma2.negate_c;
53 return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
54 instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
55 default:
56 return {false, identity, Immediate(0), identity, Immediate(0)};
57 }
58 }();
59
60 const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
61 op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
62 op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
63
64 Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
65 value = GetSaturatedHalfFloat(value, saturate);
66 value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
67
68 SetRegister(bb, instr.gpr0, value);
69
70 return pc;
71}
72
73} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
deleted file mode 100644
index 5470e8cf4..000000000
--- a/src/video_core/shader/decode/image.cpp
+++ /dev/null
@@ -1,536 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/shader_ir.h"
16#include "video_core/textures/texture.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::Instruction;
21using Tegra::Shader::OpCode;
22using Tegra::Shader::PredCondition;
23using Tegra::Shader::StoreType;
24using Tegra::Texture::ComponentType;
25using Tegra::Texture::TextureFormat;
26using Tegra::Texture::TICEntry;
27
28namespace {
29
30ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
31 std::size_t component) {
32 const TextureFormat format{descriptor.format};
33 switch (format) {
34 case TextureFormat::R16G16B16A16:
35 case TextureFormat::R32G32B32A32:
36 case TextureFormat::R32G32B32:
37 case TextureFormat::R32G32:
38 case TextureFormat::R16G16:
39 case TextureFormat::R32:
40 case TextureFormat::R16:
41 case TextureFormat::R8:
42 case TextureFormat::R1:
43 if (component == 0) {
44 return descriptor.r_type;
45 }
46 if (component == 1) {
47 return descriptor.g_type;
48 }
49 if (component == 2) {
50 return descriptor.b_type;
51 }
52 if (component == 3) {
53 return descriptor.a_type;
54 }
55 break;
56 case TextureFormat::A8R8G8B8:
57 if (component == 0) {
58 return descriptor.a_type;
59 }
60 if (component == 1) {
61 return descriptor.r_type;
62 }
63 if (component == 2) {
64 return descriptor.g_type;
65 }
66 if (component == 3) {
67 return descriptor.b_type;
68 }
69 break;
70 case TextureFormat::A2B10G10R10:
71 case TextureFormat::A4B4G4R4:
72 case TextureFormat::A5B5G5R1:
73 case TextureFormat::A1B5G5R5:
74 if (component == 0) {
75 return descriptor.a_type;
76 }
77 if (component == 1) {
78 return descriptor.b_type;
79 }
80 if (component == 2) {
81 return descriptor.g_type;
82 }
83 if (component == 3) {
84 return descriptor.r_type;
85 }
86 break;
87 case TextureFormat::R32_B24G8:
88 if (component == 0) {
89 return descriptor.r_type;
90 }
91 if (component == 1) {
92 return descriptor.b_type;
93 }
94 if (component == 2) {
95 return descriptor.g_type;
96 }
97 break;
98 case TextureFormat::B5G6R5:
99 case TextureFormat::B6G5R5:
100 case TextureFormat::B10G11R11:
101 if (component == 0) {
102 return descriptor.b_type;
103 }
104 if (component == 1) {
105 return descriptor.g_type;
106 }
107 if (component == 2) {
108 return descriptor.r_type;
109 }
110 break;
111 case TextureFormat::R24G8:
112 case TextureFormat::R8G24:
113 case TextureFormat::R8G8:
114 case TextureFormat::G4R4:
115 if (component == 0) {
116 return descriptor.g_type;
117 }
118 if (component == 1) {
119 return descriptor.r_type;
120 }
121 break;
122 default:
123 break;
124 }
125 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
126 return ComponentType::FLOAT;
127}
128
129bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
130 constexpr u8 R = 0b0001;
131 constexpr u8 G = 0b0010;
132 constexpr u8 B = 0b0100;
133 constexpr u8 A = 0b1000;
134 constexpr std::array<u8, 16> mask = {
135 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B),
136 (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
137 return std::bitset<4>{mask.at(component_mask)}.test(component);
138}
139
140u32 GetComponentSize(TextureFormat format, std::size_t component) {
141 switch (format) {
142 case TextureFormat::R32G32B32A32:
143 return 32;
144 case TextureFormat::R16G16B16A16:
145 return 16;
146 case TextureFormat::R32G32B32:
147 return component <= 2 ? 32 : 0;
148 case TextureFormat::R32G32:
149 return component <= 1 ? 32 : 0;
150 case TextureFormat::R16G16:
151 return component <= 1 ? 16 : 0;
152 case TextureFormat::R32:
153 return component == 0 ? 32 : 0;
154 case TextureFormat::R16:
155 return component == 0 ? 16 : 0;
156 case TextureFormat::R8:
157 return component == 0 ? 8 : 0;
158 case TextureFormat::R1:
159 return component == 0 ? 1 : 0;
160 case TextureFormat::A8R8G8B8:
161 return 8;
162 case TextureFormat::A2B10G10R10:
163 return (component == 3 || component == 2 || component == 1) ? 10 : 2;
164 case TextureFormat::A4B4G4R4:
165 return 4;
166 case TextureFormat::A5B5G5R1:
167 return (component == 0 || component == 1 || component == 2) ? 5 : 1;
168 case TextureFormat::A1B5G5R5:
169 return (component == 1 || component == 2 || component == 3) ? 5 : 1;
170 case TextureFormat::R32_B24G8:
171 if (component == 0) {
172 return 32;
173 }
174 if (component == 1) {
175 return 24;
176 }
177 if (component == 2) {
178 return 8;
179 }
180 return 0;
181 case TextureFormat::B5G6R5:
182 if (component == 0 || component == 2) {
183 return 5;
184 }
185 if (component == 1) {
186 return 6;
187 }
188 return 0;
189 case TextureFormat::B6G5R5:
190 if (component == 1 || component == 2) {
191 return 5;
192 }
193 if (component == 0) {
194 return 6;
195 }
196 return 0;
197 case TextureFormat::B10G11R11:
198 if (component == 1 || component == 2) {
199 return 11;
200 }
201 if (component == 0) {
202 return 10;
203 }
204 return 0;
205 case TextureFormat::R24G8:
206 if (component == 0) {
207 return 8;
208 }
209 if (component == 1) {
210 return 24;
211 }
212 return 0;
213 case TextureFormat::R8G24:
214 if (component == 0) {
215 return 24;
216 }
217 if (component == 1) {
218 return 8;
219 }
220 return 0;
221 case TextureFormat::R8G8:
222 return (component == 0 || component == 1) ? 8 : 0;
223 case TextureFormat::G4R4:
224 return (component == 0 || component == 1) ? 4 : 0;
225 default:
226 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
227 return 0;
228 }
229}
230
231std::size_t GetImageComponentMask(TextureFormat format) {
232 constexpr u8 R = 0b0001;
233 constexpr u8 G = 0b0010;
234 constexpr u8 B = 0b0100;
235 constexpr u8 A = 0b1000;
236 switch (format) {
237 case TextureFormat::R32G32B32A32:
238 case TextureFormat::R16G16B16A16:
239 case TextureFormat::A8R8G8B8:
240 case TextureFormat::A2B10G10R10:
241 case TextureFormat::A4B4G4R4:
242 case TextureFormat::A5B5G5R1:
243 case TextureFormat::A1B5G5R5:
244 return std::size_t{R | G | B | A};
245 case TextureFormat::R32G32B32:
246 case TextureFormat::R32_B24G8:
247 case TextureFormat::B5G6R5:
248 case TextureFormat::B6G5R5:
249 case TextureFormat::B10G11R11:
250 return std::size_t{R | G | B};
251 case TextureFormat::R32G32:
252 case TextureFormat::R16G16:
253 case TextureFormat::R24G8:
254 case TextureFormat::R8G24:
255 case TextureFormat::R8G8:
256 case TextureFormat::G4R4:
257 return std::size_t{R | G};
258 case TextureFormat::R32:
259 case TextureFormat::R16:
260 case TextureFormat::R8:
261 case TextureFormat::R1:
262 return std::size_t{R};
263 default:
264 UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
265 return std::size_t{R | G | B | A};
266 }
267}
268
269std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
270 switch (image_type) {
271 case Tegra::Shader::ImageType::Texture1D:
272 case Tegra::Shader::ImageType::TextureBuffer:
273 return 1;
274 case Tegra::Shader::ImageType::Texture1DArray:
275 case Tegra::Shader::ImageType::Texture2D:
276 return 2;
277 case Tegra::Shader::ImageType::Texture2DArray:
278 case Tegra::Shader::ImageType::Texture3D:
279 return 3;
280 }
281 UNREACHABLE();
282 return 1;
283}
284} // Anonymous namespace
285
286std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
287 Node original_value) {
288 switch (component_type) {
289 case ComponentType::SNORM: {
290 // range [-1.0, 1.0]
291 auto cnv_value = Operation(OperationCode::FMul, original_value,
292 Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
293 cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
294 return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
295 }
296 case ComponentType::SINT:
297 case ComponentType::UNORM: {
298 bool is_signed = component_type == ComponentType::SINT;
299 // range [0.0, 1.0]
300 auto cnv_value = Operation(OperationCode::FMul, original_value,
301 Immediate(static_cast<float>(1 << component_size) - 1.f));
302 return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
303 is_signed};
304 }
305 case ComponentType::UINT: // range [0, (1 << component_size) - 1]
306 return {std::move(original_value), false};
307 case ComponentType::FLOAT:
308 if (component_size == 16) {
309 return {Operation(OperationCode::HCastFloat, original_value), true};
310 } else {
311 return {std::move(original_value), true};
312 }
313 default:
314 UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
315 return {std::move(original_value), true};
316 }
317}
318
319u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
320 const Instruction instr = {program_code[pc]};
321 const auto opcode = OpCode::Decode(instr);
322
323 const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) {
324 std::vector<Node> coords;
325 const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)};
326 coords.reserve(num_coords);
327 for (std::size_t i = 0; i < num_coords; ++i) {
328 coords.push_back(GetRegister(instr.gpr8.Value() + i));
329 }
330 return coords;
331 };
332
333 switch (opcode->get().GetId()) {
334 case OpCode::Id::SULD: {
335 UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
336 Tegra::Shader::OutOfBoundsStore::Ignore);
337
338 const auto type{instr.suldst.image_type};
339 auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
340 : GetBindlessImage(instr.gpr39, type)};
341 image.MarkRead();
342
343 if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
344 u32 indexer = 0;
345 for (u32 element = 0; element < 4; ++element) {
346 if (!instr.suldst.IsComponentEnabled(element)) {
347 continue;
348 }
349 MetaImage meta{image, {}, element};
350 Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
351 SetTemporary(bb, indexer++, std::move(value));
352 }
353 for (u32 i = 0; i < indexer; ++i) {
354 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
355 }
356 } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
357 UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
358 instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
359
360 auto descriptor = [this, instr] {
361 std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor;
362 if (instr.suldst.is_immediate) {
363 sampler_descriptor =
364 registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
365 } else {
366 const Node image_register = GetRegister(instr.gpr39);
367 const auto result = TrackCbuf(image_register, global_code,
368 static_cast<s64>(global_code.size()));
369 const auto buffer = std::get<1>(result);
370 const auto offset = std::get<2>(result);
371 sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset);
372 }
373 if (!sampler_descriptor) {
374 UNREACHABLE_MSG("Failed to obtain image descriptor");
375 }
376 return *sampler_descriptor;
377 }();
378
379 const auto comp_mask = GetImageComponentMask(descriptor.format);
380
381 switch (instr.suldst.GetStoreDataLayout()) {
382 case StoreType::Bits32:
383 case StoreType::Bits64: {
384 u32 indexer = 0;
385 u32 shifted_counter = 0;
386 Node value = Immediate(0);
387 for (u32 element = 0; element < 4; ++element) {
388 if (!IsComponentEnabled(comp_mask, element)) {
389 continue;
390 }
391 const auto component_type = GetComponentType(descriptor, element);
392 const auto component_size = GetComponentSize(descriptor.format, element);
393 MetaImage meta{image, {}, element};
394
395 auto [converted_value, is_signed] = GetComponentValue(
396 component_type, component_size,
397 Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
398
399 // shift element to correct position
400 const auto shifted = shifted_counter;
401 if (shifted > 0) {
402 converted_value =
403 SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
404 std::move(converted_value), Immediate(shifted));
405 }
406 shifted_counter += component_size;
407
408 // add value into result
409 value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
410
411 // if we shifted enough for 1 byte -> we save it into temp
412 if (shifted_counter >= 32) {
413 SetTemporary(bb, indexer++, std::move(value));
414 // reset counter and value to prepare pack next byte
415 value = Immediate(0);
416 shifted_counter = 0;
417 }
418 }
419 for (u32 i = 0; i < indexer; ++i) {
420 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
421 }
422 break;
423 }
424 default:
425 UNREACHABLE();
426 break;
427 }
428 }
429 break;
430 }
431 case OpCode::Id::SUST: {
432 UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
433 UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
434 Tegra::Shader::OutOfBoundsStore::Ignore);
435 UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA
436
437 std::vector<Node> values;
438 constexpr std::size_t hardcoded_size{4};
439 for (std::size_t i = 0; i < hardcoded_size; ++i) {
440 values.push_back(GetRegister(instr.gpr0.Value() + i));
441 }
442
443 const auto type{instr.suldst.image_type};
444 auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
445 : GetBindlessImage(instr.gpr39, type)};
446 image.MarkWrite();
447
448 MetaImage meta{image, std::move(values)};
449 bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type)));
450 break;
451 }
452 case OpCode::Id::SUATOM: {
453 UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
454
455 const OperationCode operation_code = [instr] {
456 switch (instr.suatom_d.operation_type) {
457 case Tegra::Shader::ImageAtomicOperationType::S32:
458 case Tegra::Shader::ImageAtomicOperationType::U32:
459 switch (instr.suatom_d.operation) {
460 case Tegra::Shader::ImageAtomicOperation::Add:
461 return OperationCode::AtomicImageAdd;
462 case Tegra::Shader::ImageAtomicOperation::And:
463 return OperationCode::AtomicImageAnd;
464 case Tegra::Shader::ImageAtomicOperation::Or:
465 return OperationCode::AtomicImageOr;
466 case Tegra::Shader::ImageAtomicOperation::Xor:
467 return OperationCode::AtomicImageXor;
468 case Tegra::Shader::ImageAtomicOperation::Exch:
469 return OperationCode::AtomicImageExchange;
470 default:
471 break;
472 }
473 break;
474 default:
475 break;
476 }
477 UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
478 static_cast<u64>(instr.suatom_d.operation.Value()),
479 static_cast<u64>(instr.suatom_d.operation_type.Value()));
480 return OperationCode::AtomicImageAdd;
481 }();
482
483 Node value = GetRegister(instr.gpr0);
484
485 const auto type = instr.suatom_d.image_type;
486 auto& image = GetImage(instr.image, type);
487 image.MarkAtomic();
488
489 MetaImage meta{image, {std::move(value)}};
490 SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type)));
491 break;
492 }
493 default:
494 UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
495 }
496
497 return pc;
498}
499
500ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
501 const auto offset = static_cast<u32>(image.index.Value());
502
503 const auto it =
504 std::find_if(std::begin(used_images), std::end(used_images),
505 [offset](const ImageEntry& entry) { return entry.offset == offset; });
506 if (it != std::end(used_images)) {
507 ASSERT(!it->is_bindless && it->type == type);
508 return *it;
509 }
510
511 const auto next_index = static_cast<u32>(used_images.size());
512 return used_images.emplace_back(next_index, offset, type);
513}
514
515ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
516 const Node image_register = GetRegister(reg);
517 const auto result =
518 TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
519
520 const auto buffer = std::get<1>(result);
521 const auto offset = std::get<2>(result);
522
523 const auto it = std::find_if(std::begin(used_images), std::end(used_images),
524 [buffer, offset](const ImageEntry& entry) {
525 return entry.buffer == buffer && entry.offset == offset;
526 });
527 if (it != std::end(used_images)) {
528 ASSERT(it->is_bindless && it->type == type);
529 return *it;
530 }
531
532 const auto next_index = static_cast<u32>(used_images.size());
533 return used_images.emplace_back(next_index, offset, buffer, type);
534}
535
536} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
deleted file mode 100644
index 59809bcd8..000000000
--- a/src/video_core/shader/decode/integer_set.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "video_core/engines/shader_bytecode.h"
7#include "video_core/shader/node_helper.h"
8#include "video_core/shader/shader_ir.h"
9
10namespace VideoCommon::Shader {
11
12using Tegra::Shader::Instruction;
13using Tegra::Shader::OpCode;
14
15u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]};
17
18 const Node op_a = GetRegister(instr.gpr8);
19 const Node op_b = [&]() {
20 if (instr.is_b_imm) {
21 return Immediate(instr.alu.GetSignedImm20_20());
22 } else if (instr.is_b_gpr) {
23 return GetRegister(instr.gpr20);
24 } else {
25 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
26 }
27 }();
28
29 // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
30 // is true, and to 0 otherwise.
31 const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
32 const Node first_pred =
33 GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
34
35 const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
36
37 const Node predicate = Operation(combiner, first_pred, second_pred);
38
39 const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
40 const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
41 const Node value =
42 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
43
44 SetRegister(bb, instr.gpr0, value);
45
46 return pc;
47}
48
49} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
deleted file mode 100644
index 25e48fef8..000000000
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16
17u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19
20 const Node op_a = GetRegister(instr.gpr8);
21
22 const Node op_b = [&]() {
23 if (instr.is_b_imm) {
24 return Immediate(instr.alu.GetSignedImm20_20());
25 } else if (instr.is_b_gpr) {
26 return GetRegister(instr.gpr20);
27 } else {
28 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
29 }
30 }();
31
32 // We can't use the constant predicate as destination.
33 ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
34
35 const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
36 const Node predicate =
37 GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
38
39 // Set the primary predicate to the result of Predicate OP SecondPredicate
40 const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
41 const Node value = Operation(combiner, predicate, second_pred);
42 SetPredicate(bb, instr.isetp.pred3, value);
43
44 if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
45 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
46 const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
47 SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
48 }
49
50 return pc;
51}
52
53} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
deleted file mode 100644
index 7728f600e..000000000
--- a/src/video_core/shader/decode/memory.cpp
+++ /dev/null
@@ -1,493 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <utility>
7#include <vector>
8
9#include <fmt/format.h>
10
11#include "common/alignment.h"
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "common/logging/log.h"
15#include "video_core/engines/shader_bytecode.h"
16#include "video_core/shader/node_helper.h"
17#include "video_core/shader/shader_ir.h"
18
19namespace VideoCommon::Shader {
20
21using std::move;
22using Tegra::Shader::AtomicOp;
23using Tegra::Shader::AtomicType;
24using Tegra::Shader::Attribute;
25using Tegra::Shader::GlobalAtomicType;
26using Tegra::Shader::Instruction;
27using Tegra::Shader::OpCode;
28using Tegra::Shader::Register;
29using Tegra::Shader::StoreType;
30
31namespace {
32
33OperationCode GetAtomOperation(AtomicOp op) {
34 switch (op) {
35 case AtomicOp::Add:
36 return OperationCode::AtomicIAdd;
37 case AtomicOp::Min:
38 return OperationCode::AtomicIMin;
39 case AtomicOp::Max:
40 return OperationCode::AtomicIMax;
41 case AtomicOp::And:
42 return OperationCode::AtomicIAnd;
43 case AtomicOp::Or:
44 return OperationCode::AtomicIOr;
45 case AtomicOp::Xor:
46 return OperationCode::AtomicIXor;
47 case AtomicOp::Exch:
48 return OperationCode::AtomicIExchange;
49 default:
50 UNIMPLEMENTED_MSG("op={}", op);
51 return OperationCode::AtomicIAdd;
52 }
53}
54
55bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
56 return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
57 uniform_type == Tegra::Shader::UniformType::UnsignedShort;
58}
59
60u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
61 switch (uniform_type) {
62 case Tegra::Shader::UniformType::UnsignedByte:
63 return 0b11;
64 case Tegra::Shader::UniformType::UnsignedShort:
65 return 0b10;
66 default:
67 UNREACHABLE();
68 return 0;
69 }
70}
71
72u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
73 switch (uniform_type) {
74 case Tegra::Shader::UniformType::UnsignedByte:
75 return 8;
76 case Tegra::Shader::UniformType::UnsignedShort:
77 return 16;
78 case Tegra::Shader::UniformType::Single:
79 return 32;
80 case Tegra::Shader::UniformType::Double:
81 return 64;
82 case Tegra::Shader::UniformType::Quad:
83 case Tegra::Shader::UniformType::UnsignedQuad:
84 return 128;
85 default:
86 UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type);
87 return 32;
88 }
89}
90
91Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
92 Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
93 offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
94 return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
95}
96
97Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
98 Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
99 offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
100 return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
101 Immediate(size));
102}
103
104Node Sign16Extend(Node value) {
105 Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
106 Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
107 Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
108 return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
109}
110
111} // Anonymous namespace
112
113u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
114 const Instruction instr = {program_code[pc]};
115 const auto opcode = OpCode::Decode(instr);
116
117 switch (opcode->get().GetId()) {
118 case OpCode::Id::LD_A: {
119 // Note: Shouldn't this be interp mode flat? As in no interpolation made.
120 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
121 "Indirect attribute loads are not supported");
122 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
123 "Unaligned attribute loads are not supported");
124 UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
125 instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
126 "Non-32 bits PHYS reads are not implemented");
127
128 const Node buffer{GetRegister(instr.gpr39)};
129
130 u64 next_element = instr.attribute.fmt20.element;
131 auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
132
133 const auto LoadNextElement = [&](u32 reg_offset) {
134 const Node attribute{instr.attribute.fmt20.IsPhysical()
135 ? GetPhysicalInputAttribute(instr.gpr8, buffer)
136 : GetInputAttribute(static_cast<Attribute::Index>(next_index),
137 next_element, buffer)};
138
139 SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
140
141 // Load the next attribute element into the following register. If the element
142 // to load goes beyond the vec4 size, load the first element of the next
143 // attribute.
144 next_element = (next_element + 1) % 4;
145 next_index = next_index + (next_element == 0 ? 1 : 0);
146 };
147
148 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
149 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
150 LoadNextElement(reg_offset);
151 }
152 break;
153 }
154 case OpCode::Id::LD_C: {
155 UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
156
157 Node index = GetRegister(instr.gpr8);
158
159 const Node op_a =
160 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
161
162 switch (instr.ld_c.type.Value()) {
163 case Tegra::Shader::UniformType::Single:
164 SetRegister(bb, instr.gpr0, op_a);
165 break;
166
167 case Tegra::Shader::UniformType::Double: {
168 const Node op_b =
169 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
170
171 SetTemporary(bb, 0, op_a);
172 SetTemporary(bb, 1, op_b);
173 SetRegister(bb, instr.gpr0, GetTemporary(0));
174 SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
175 break;
176 }
177 default:
178 UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value());
179 }
180 break;
181 }
182 case OpCode::Id::LD_L:
183 LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown);
184 [[fallthrough]];
185 case OpCode::Id::LD_S: {
186 const auto GetAddress = [&](s32 offset) {
187 ASSERT(offset % 4 == 0);
188 const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
189 return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
190 };
191 const auto GetMemory = [&](s32 offset) {
192 return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
193 : GetLocalMemory(GetAddress(offset));
194 };
195
196 switch (instr.ldst_sl.type.Value()) {
197 case StoreType::Signed16:
198 SetRegister(bb, instr.gpr0,
199 Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
200 break;
201 case StoreType::Bits32:
202 case StoreType::Bits64:
203 case StoreType::Bits128: {
204 const u32 count = [&] {
205 switch (instr.ldst_sl.type.Value()) {
206 case StoreType::Bits32:
207 return 1;
208 case StoreType::Bits64:
209 return 2;
210 case StoreType::Bits128:
211 return 4;
212 default:
213 UNREACHABLE();
214 return 0;
215 }
216 }();
217 for (u32 i = 0; i < count; ++i) {
218 SetTemporary(bb, i, GetMemory(i * 4));
219 }
220 for (u32 i = 0; i < count; ++i) {
221 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
222 }
223 break;
224 }
225 default:
226 UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
227 instr.ldst_sl.type.Value());
228 }
229 break;
230 }
231 case OpCode::Id::LD:
232 case OpCode::Id::LDG: {
233 const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
234 switch (opcode->get().GetId()) {
235 case OpCode::Id::LD:
236 UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
237 return instr.generic.type;
238 case OpCode::Id::LDG:
239 return instr.ldg.type;
240 default:
241 UNREACHABLE();
242 return {};
243 }
244 }();
245
246 const auto [real_address_base, base_address, descriptor] =
247 TrackGlobalMemory(bb, instr, true, false);
248
249 const u32 size = GetMemorySize(type);
250 const u32 count = Common::AlignUp(size, 32) / 32;
251 if (!real_address_base || !base_address) {
252 // Tracking failed, load zeroes.
253 for (u32 i = 0; i < count; ++i) {
254 SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
255 }
256 break;
257 }
258
259 for (u32 i = 0; i < count; ++i) {
260 const Node it_offset = Immediate(i * 4);
261 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
262 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
263
264 // To handle unaligned loads get the bytes used to dereference global memory and extract
265 // those bytes from the loaded u32.
266 if (IsUnaligned(type)) {
267 gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
268 }
269
270 SetTemporary(bb, i, gmem);
271 }
272
273 for (u32 i = 0; i < count; ++i) {
274 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
275 }
276 break;
277 }
278 case OpCode::Id::ST_A: {
279 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
280 "Indirect attribute loads are not supported");
281 UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
282 "Unaligned attribute loads are not supported");
283
284 u64 element = instr.attribute.fmt20.element;
285 auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
286
287 const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
288 for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
289 Node dest;
290 if (instr.attribute.fmt20.patch) {
291 const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
292 dest = MakeNode<PatchNode>(offset);
293 } else {
294 dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
295 GetRegister(instr.gpr39));
296 }
297 const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
298
299 bb.push_back(Operation(OperationCode::Assign, dest, src));
300
301 // Load the next attribute element into the following register. If the element to load
302 // goes beyond the vec4 size, load the first element of the next attribute.
303 element = (element + 1) % 4;
304 index = index + (element == 0 ? 1 : 0);
305 }
306 break;
307 }
308 case OpCode::Id::ST_L:
309 LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value());
310 [[fallthrough]];
311 case OpCode::Id::ST_S: {
312 const auto GetAddress = [&](s32 offset) {
313 ASSERT(offset % 4 == 0);
314 const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
315 return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
316 };
317
318 const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
319 const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
320 const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
321
322 switch (instr.ldst_sl.type.Value()) {
323 case StoreType::Bits128:
324 (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
325 (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
326 [[fallthrough]];
327 case StoreType::Bits64:
328 (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
329 [[fallthrough]];
330 case StoreType::Bits32:
331 (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
332 break;
333 case StoreType::Unsigned16:
334 case StoreType::Signed16: {
335 Node address = GetAddress(0);
336 Node memory = (this->*get_memory)(address);
337 (this->*set_memory)(
338 bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
339 break;
340 }
341 default:
342 UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
343 instr.ldst_sl.type.Value());
344 }
345 break;
346 }
347 case OpCode::Id::ST:
348 case OpCode::Id::STG: {
349 const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
350 switch (opcode->get().GetId()) {
351 case OpCode::Id::ST:
352 UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
353 return instr.generic.type;
354 case OpCode::Id::STG:
355 return instr.stg.type;
356 default:
357 UNREACHABLE();
358 return {};
359 }
360 }();
361
362 // For unaligned reads we have to read memory too.
363 const bool is_read = IsUnaligned(type);
364 const auto [real_address_base, base_address, descriptor] =
365 TrackGlobalMemory(bb, instr, is_read, true);
366 if (!real_address_base || !base_address) {
367 // Tracking failed, skip the store.
368 break;
369 }
370
371 const u32 size = GetMemorySize(type);
372 const u32 count = Common::AlignUp(size, 32) / 32;
373 for (u32 i = 0; i < count; ++i) {
374 const Node it_offset = Immediate(i * 4);
375 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
376 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
377 Node value = GetRegister(instr.gpr0.Value() + i);
378
379 if (IsUnaligned(type)) {
380 const u32 mask = GetUnalignedMask(type);
381 value = InsertUnaligned(gmem, move(value), real_address, mask, size);
382 }
383
384 bb.push_back(Operation(OperationCode::Assign, gmem, value));
385 }
386 break;
387 }
388 case OpCode::Id::RED: {
389 UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
390 instr.red.type.Value());
391 const auto [real_address, base_address, descriptor] =
392 TrackGlobalMemory(bb, instr, true, true);
393 if (!real_address || !base_address) {
394 // Tracking failed, skip atomic.
395 break;
396 }
397 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
398 Node value = GetRegister(instr.gpr0);
399 bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
400 break;
401 }
402 case OpCode::Id::ATOM: {
403 UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
404 instr.atom.operation == AtomicOp::Dec ||
405 instr.atom.operation == AtomicOp::SafeAdd,
406 "operation={}", instr.atom.operation.Value());
407 UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
408 instr.atom.type == GlobalAtomicType::U64 ||
409 instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
410 instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
411 "type={}", instr.atom.type.Value());
412
413 const auto [real_address, base_address, descriptor] =
414 TrackGlobalMemory(bb, instr, true, true);
415 if (!real_address || !base_address) {
416 // Tracking failed, skip atomic.
417 break;
418 }
419
420 const bool is_signed =
421 instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
422 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
423 SetRegister(bb, instr.gpr0,
424 SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
425 GetRegister(instr.gpr20)));
426 break;
427 }
428 case OpCode::Id::ATOMS: {
429 UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
430 instr.atoms.operation == AtomicOp::Dec,
431 "operation={}", instr.atoms.operation.Value());
432 UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
433 instr.atoms.type == AtomicType::U64,
434 "type={}", instr.atoms.type.Value());
435 const bool is_signed =
436 instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
437 const s32 offset = instr.atoms.GetImmediateOffset();
438 Node address = GetRegister(instr.gpr8);
439 address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
440 SetRegister(bb, instr.gpr0,
441 SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
442 GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
443 break;
444 }
445 case OpCode::Id::AL2P: {
446 // Ignore al2p.direction since we don't care about it.
447
448 // Calculate emulation fake physical address.
449 const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
450 const Node reg{GetRegister(instr.gpr8)};
451 const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
452
453 // Set the fake address to target register.
454 SetRegister(bb, instr.gpr0, fake_address);
455
456 // Signal the shader IR to declare all possible attributes and varyings
457 uses_physical_attributes = true;
458 break;
459 }
460 default:
461 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
462 }
463
464 return pc;
465}
466
467std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
468 Instruction instr,
469 bool is_read, bool is_write) {
470 const auto addr_register{GetRegister(instr.gmem.gpr)};
471 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
472
473 const auto [base_address, index, offset] =
474 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
475 ASSERT_OR_EXECUTE_MSG(
476 base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
477 "Global memory tracking failed");
478
479 bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
480
481 const GlobalMemoryBase descriptor{index, offset};
482 const auto& entry = used_global_memory.try_emplace(descriptor).first;
483 auto& usage = entry->second;
484 usage.is_written |= is_write;
485 usage.is_read |= is_read;
486
487 const auto real_address =
488 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
489
490 return {real_address, base_address, descriptor};
491}
492
493} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
deleted file mode 100644
index 5f88537bc..000000000
--- a/src/video_core/shader/decode/other.cpp
+++ /dev/null
@@ -1,322 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "common/logging/log.h"
8#include "video_core/engines/shader_bytecode.h"
9#include "video_core/shader/node_helper.h"
10#include "video_core/shader/shader_ir.h"
11
12namespace VideoCommon::Shader {
13
14using std::move;
15using Tegra::Shader::ConditionCode;
16using Tegra::Shader::Instruction;
17using Tegra::Shader::IpaInterpMode;
18using Tegra::Shader::OpCode;
19using Tegra::Shader::PixelImap;
20using Tegra::Shader::Register;
21using Tegra::Shader::SystemVariable;
22
23using Index = Tegra::Shader::Attribute::Index;
24
25u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
26 const Instruction instr = {program_code[pc]};
27 const auto opcode = OpCode::Decode(instr);
28
29 switch (opcode->get().GetId()) {
30 case OpCode::Id::NOP: {
31 UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
32 UNIMPLEMENTED_IF(instr.nop.trigger != 0);
33 // With the previous preconditions, this instruction is a no-operation.
34 break;
35 }
36 case OpCode::Id::EXIT: {
37 const ConditionCode cc = instr.flow_condition_code;
38 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
39
40 switch (instr.flow.cond) {
41 case Tegra::Shader::FlowCondition::Always:
42 bb.push_back(Operation(OperationCode::Exit));
43 if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
44 // If this is an unconditional exit then just end processing here,
45 // otherwise we have to account for the possibility of the condition
46 // not being met, so continue processing the next instruction.
47 pc = MAX_PROGRAM_LENGTH - 1;
48 }
49 break;
50
51 case Tegra::Shader::FlowCondition::Fcsm_Tr:
52 // TODO(bunnei): What is this used for? If we assume this conditon is not
53 // satisifed, dual vertex shaders in Farming Simulator make more sense
54 UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
55 break;
56
57 default:
58 UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value());
59 }
60 break;
61 }
62 case OpCode::Id::KIL: {
63 UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
64
65 const ConditionCode cc = instr.flow_condition_code;
66 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc);
67
68 bb.push_back(Operation(OperationCode::Discard));
69 break;
70 }
71 case OpCode::Id::S2R: {
72 const Node value = [this, instr] {
73 switch (instr.sys20) {
74 case SystemVariable::LaneId:
75 return Operation(OperationCode::ThreadId);
76 case SystemVariable::InvocationId:
77 return Operation(OperationCode::InvocationId);
78 case SystemVariable::Ydirection:
79 uses_y_negate = true;
80 return Operation(OperationCode::YNegate);
81 case SystemVariable::InvocationInfo:
82 LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
83 return Immediate(0x00ff'0000U);
84 case SystemVariable::WscaleFactorXY:
85 UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
86 return Immediate(0U);
87 case SystemVariable::WscaleFactorZ:
88 UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
89 return Immediate(0U);
90 case SystemVariable::Tid: {
91 Node val = Immediate(0);
92 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9);
93 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9);
94 val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
95 return val;
96 }
97 case SystemVariable::TidX:
98 return Operation(OperationCode::LocalInvocationIdX);
99 case SystemVariable::TidY:
100 return Operation(OperationCode::LocalInvocationIdY);
101 case SystemVariable::TidZ:
102 return Operation(OperationCode::LocalInvocationIdZ);
103 case SystemVariable::CtaIdX:
104 return Operation(OperationCode::WorkGroupIdX);
105 case SystemVariable::CtaIdY:
106 return Operation(OperationCode::WorkGroupIdY);
107 case SystemVariable::CtaIdZ:
108 return Operation(OperationCode::WorkGroupIdZ);
109 case SystemVariable::EqMask:
110 case SystemVariable::LtMask:
111 case SystemVariable::LeMask:
112 case SystemVariable::GtMask:
113 case SystemVariable::GeMask:
114 uses_warps = true;
115 switch (instr.sys20) {
116 case SystemVariable::EqMask:
117 return Operation(OperationCode::ThreadEqMask);
118 case SystemVariable::LtMask:
119 return Operation(OperationCode::ThreadLtMask);
120 case SystemVariable::LeMask:
121 return Operation(OperationCode::ThreadLeMask);
122 case SystemVariable::GtMask:
123 return Operation(OperationCode::ThreadGtMask);
124 case SystemVariable::GeMask:
125 return Operation(OperationCode::ThreadGeMask);
126 default:
127 UNREACHABLE();
128 return Immediate(0u);
129 }
130 default:
131 UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value());
132 return Immediate(0u);
133 }
134 }();
135 SetRegister(bb, instr.gpr0, value);
136
137 break;
138 }
139 case OpCode::Id::BRA: {
140 Node branch;
141 if (instr.bra.constant_buffer == 0) {
142 const u32 target = pc + instr.bra.GetBranchTarget();
143 branch = Operation(OperationCode::Branch, Immediate(target));
144 } else {
145 const u32 target = pc + 1;
146 const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
147 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
148 PRECISE, op_a, Immediate(3));
149 const Node operand =
150 Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
151 branch = Operation(OperationCode::BranchIndirect, operand);
152 }
153
154 const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
155 if (cc != Tegra::Shader::ConditionCode::T) {
156 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
157 } else {
158 bb.push_back(branch);
159 }
160 break;
161 }
162 case OpCode::Id::BRX: {
163 Node operand;
164 if (instr.brx.constant_buffer != 0) {
165 const s32 target = pc + 1;
166 const Node index = GetRegister(instr.gpr8);
167 const Node op_a =
168 GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
169 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
170 PRECISE, op_a, Immediate(3));
171 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
172 } else {
173 const s32 target = pc + instr.brx.GetBranchExtend();
174 const Node op_a = GetRegister(instr.gpr8);
175 const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
176 PRECISE, op_a, Immediate(3));
177 operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
178 }
179 const Node branch = Operation(OperationCode::BranchIndirect, operand);
180
181 const ConditionCode cc = instr.flow_condition_code;
182 if (cc != ConditionCode::T) {
183 bb.push_back(Conditional(GetConditionCode(cc), {branch}));
184 } else {
185 bb.push_back(branch);
186 }
187 break;
188 }
189 case OpCode::Id::SSY: {
190 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
191 "Constant buffer flow is not supported");
192
193 if (disable_flow_stack) {
194 break;
195 }
196
197 // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
198 const u32 target = pc + instr.bra.GetBranchTarget();
199 bb.push_back(
200 Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));
201 break;
202 }
203 case OpCode::Id::PBK: {
204 UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
205 "Constant buffer PBK is not supported");
206
207 if (disable_flow_stack) {
208 break;
209 }
210
211 // PBK pushes to a stack the address where BRK will jump to.
212 const u32 target = pc + instr.bra.GetBranchTarget();
213 bb.push_back(
214 Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));
215 break;
216 }
217 case OpCode::Id::SYNC: {
218 const ConditionCode cc = instr.flow_condition_code;
219 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc);
220
221 if (decompiled) {
222 break;
223 }
224
225 // The SYNC opcode jumps to the address previously set by the SSY opcode
226 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
227 break;
228 }
229 case OpCode::Id::BRK: {
230 const ConditionCode cc = instr.flow_condition_code;
231 UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc);
232 if (decompiled) {
233 break;
234 }
235
236 // The BRK opcode jumps to the address previously set by the PBK opcode
237 bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
238 break;
239 }
240 case OpCode::Id::IPA: {
241 const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
242 const auto attribute = instr.attribute.fmt28;
243 const Index index = attribute.index;
244
245 Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
246 : GetInputAttribute(index, attribute.element);
247
248 // Code taken from Ryujinx.
249 if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
250 const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
251 if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
252 Node position_w = GetInputAttribute(Index::Position, 3);
253 value = Operation(OperationCode::FMul, move(value), move(position_w));
254 }
255 }
256
257 if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
258 value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
259 }
260
261 value = GetSaturatedFloat(move(value), instr.ipa.saturate);
262
263 SetRegister(bb, instr.gpr0, move(value));
264 break;
265 }
266 case OpCode::Id::OUT_R: {
267 UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
268 "Stream buffer is not supported");
269
270 if (instr.out.emit) {
271 // gpr0 is used to store the next address and gpr8 contains the address to emit.
272 // Hardware uses pointers here but we just ignore it
273 bb.push_back(Operation(OperationCode::EmitVertex));
274 SetRegister(bb, instr.gpr0, Immediate(0));
275 }
276 if (instr.out.cut) {
277 bb.push_back(Operation(OperationCode::EndPrimitive));
278 }
279 break;
280 }
281 case OpCode::Id::ISBERD: {
282 UNIMPLEMENTED_IF(instr.isberd.o != 0);
283 UNIMPLEMENTED_IF(instr.isberd.skew != 0);
284 UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
285 UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
286 LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
287 SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
288 break;
289 }
290 case OpCode::Id::BAR: {
291 UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
292 bb.push_back(Operation(OperationCode::Barrier));
293 break;
294 }
295 case OpCode::Id::MEMBAR: {
296 UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
297 const OperationCode type = [instr] {
298 switch (instr.membar.type) {
299 case Tegra::Shader::MembarType::CTA:
300 return OperationCode::MemoryBarrierGroup;
301 case Tegra::Shader::MembarType::GL:
302 return OperationCode::MemoryBarrierGlobal;
303 default:
304 UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value());
305 return OperationCode::MemoryBarrierGlobal;
306 }
307 }();
308 bb.push_back(Operation(type));
309 break;
310 }
311 case OpCode::Id::DEPBAR: {
312 LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
313 break;
314 }
315 default:
316 UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
317 }
318
319 return pc;
320}
321
322} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp
deleted file mode 100644
index 9290d22eb..000000000
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16
17u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 switch (opcode->get().GetId()) {
22 case OpCode::Id::PSETP: {
23 const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
24 const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
25
26 // We can't use the constant predicate as destination.
27 ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
28
29 const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
30
31 const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
32 const Node predicate = Operation(combiner, op_a, op_b);
33
34 // Set the primary predicate to the result of Predicate OP SecondPredicate
35 SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
36
37 if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
38 // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
39 // enabled
40 SetPredicate(bb, instr.psetp.pred0,
41 Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
42 second_pred));
43 }
44 break;
45 }
46 case OpCode::Id::CSETP: {
47 const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
48 const Node condition_code = GetConditionCode(instr.csetp.cc);
49
50 const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
51
52 if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
53 SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
54 }
55 if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
56 const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
57 SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
58 }
59 break;
60 }
61 default:
62 UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
63 }
64
65 return pc;
66}
67
68} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
deleted file mode 100644
index 84dbc50fe..000000000
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15
16u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
17 const Instruction instr = {program_code[pc]};
18
19 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
20 "Condition codes generation in PSET is not implemented");
21
22 const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
23 const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
24 const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
25
26 const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
27
28 const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
29 const Node predicate = Operation(combiner, first_pred, second_pred);
30
31 const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
32 const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
33 const Node value =
34 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
35
36 if (instr.pset.bf) {
37 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
38 } else {
39 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
40 }
41 SetRegister(bb, instr.gpr0, value);
42
43 return pc;
44}
45
46} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp
deleted file mode 100644
index 6116c31aa..000000000
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/assert.h"
8#include "common/common_types.h"
9#include "video_core/engines/shader_bytecode.h"
10#include "video_core/shader/node_helper.h"
11#include "video_core/shader/shader_ir.h"
12
13namespace VideoCommon::Shader {
14
15using std::move;
16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode;
18
19namespace {
20constexpr u64 NUM_CONDITION_CODES = 4;
21constexpr u64 NUM_PREDICATES = 7;
22} // namespace
23
24u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
25 const Instruction instr = {program_code[pc]};
26 const auto opcode = OpCode::Decode(instr);
27
28 Node apply_mask = [this, opcode, instr] {
29 switch (opcode->get().GetId()) {
30 case OpCode::Id::R2P_IMM:
31 case OpCode::Id::P2R_IMM:
32 return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
33 default:
34 UNREACHABLE();
35 return Immediate(0);
36 }
37 }();
38
39 const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
40
41 const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc;
42 const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES;
43 const auto get_entry = [this, cc](u64 entry) {
44 return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry);
45 };
46
47 switch (opcode->get().GetId()) {
48 case OpCode::Id::R2P_IMM: {
49 Node mask = GetRegister(instr.gpr8);
50
51 for (u64 entry = 0; entry < num_entries; ++entry) {
52 const u32 shift = static_cast<u32>(entry);
53
54 Node apply = BitfieldExtract(apply_mask, shift, 1);
55 Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0));
56
57 Node compare = BitfieldExtract(mask, offset + shift, 1);
58 Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0));
59
60 Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value));
61 bb.push_back(Conditional(condition, {move(code)}));
62 }
63 break;
64 }
65 case OpCode::Id::P2R_IMM: {
66 Node value = Immediate(0);
67 for (u64 entry = 0; entry < num_entries; ++entry) {
68 Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry),
69 Immediate(0));
70 value = Operation(OperationCode::UBitwiseOr, move(value), move(bit));
71 }
72 value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask);
73 value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8);
74
75 SetRegister(bb, instr.gpr0, move(value));
76 break;
77 }
78 default:
79 UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
80 break;
81 }
82
83 return pc;
84}
85
86} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
deleted file mode 100644
index a53819c15..000000000
--- a/src/video_core/shader/decode/shift.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using std::move;
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::ShfType;
17using Tegra::Shader::ShfXmode;
18
19namespace {
20
21Node IsFull(Node shift) {
22 return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
23}
24
25Node Shift(OperationCode opcode, Node value, Node shift) {
26 Node shifted = Operation(opcode, move(value), shift);
27 return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
28}
29
30Node ClampShift(Node shift, s32 size = 32) {
31 shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
32 return Operation(OperationCode::IMin, move(shift), Immediate(size));
33}
34
35Node WrapShift(Node shift, s32 size = 32) {
36 return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
37}
38
39Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
40 // These values are used when the shift value is less than 32
41 Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
42 Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
43 Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
44
45 if (type == ShfType::Bits32) {
46 // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
47 return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
48 }
49
50 // And these when it's larger than or 32
51 const bool is_signed = type == ShfType::S64;
52 const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
53 Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
54 Node greater = Shift(opcode, high, move(reduced));
55
56 Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
57 Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
58
59 Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
60 return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
61}
62
63Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
64 // These values are used when the shift value is less than 32
65 Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
66 Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
67 Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
68
69 if (type == ShfType::Bits32) {
70 // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
71 return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
72 }
73
74 // And these when it's larger than or 32
75 Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
76 Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
77
78 Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
79 Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
80
81 Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
82 return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
83}
84
85} // Anonymous namespace
86
87u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
88 const Instruction instr = {program_code[pc]};
89 const auto opcode = OpCode::Decode(instr);
90
91 Node op_a = GetRegister(instr.gpr8);
92 Node op_b = [this, instr] {
93 if (instr.is_b_imm) {
94 return Immediate(instr.alu.GetSignedImm20_20());
95 } else if (instr.is_b_gpr) {
96 return GetRegister(instr.gpr20);
97 } else {
98 return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
99 }
100 }();
101
102 switch (const auto opid = opcode->get().GetId(); opid) {
103 case OpCode::Id::SHR_C:
104 case OpCode::Id::SHR_R:
105 case OpCode::Id::SHR_IMM: {
106 op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));
107
108 Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
109 move(op_a), move(op_b));
110 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
111 SetRegister(bb, instr.gpr0, move(value));
112 break;
113 }
114 case OpCode::Id::SHL_C:
115 case OpCode::Id::SHL_R:
116 case OpCode::Id::SHL_IMM: {
117 Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
118 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
119 SetRegister(bb, instr.gpr0, move(value));
120 break;
121 }
122 case OpCode::Id::SHF_RIGHT_R:
123 case OpCode::Id::SHF_RIGHT_IMM:
124 case OpCode::Id::SHF_LEFT_R:
125 case OpCode::Id::SHF_LEFT_IMM: {
126 UNIMPLEMENTED_IF(instr.generates_cc);
127 UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
128 instr.shf.xmode.Value());
129
130 if (instr.is_b_imm) {
131 op_b = Immediate(static_cast<u32>(instr.shf.immediate));
132 }
133 const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
134 Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
135
136 Node negated_shift = Operation(OperationCode::INegate, shift);
137 Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
138
139 const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
140 Node value = (is_right ? ShiftRight : ShiftLeft)(
141 move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
142
143 SetRegister(bb, instr.gpr0, move(value));
144 break;
145 }
146 default:
147 UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
148 }
149
150 return pc;
151}
152
153} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
deleted file mode 100644
index c69681e8d..000000000
--- a/src/video_core/shader/decode/texture.cpp
+++ /dev/null
@@ -1,935 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/bit_field.h"
11#include "common/common_types.h"
12#include "common/logging/log.h"
13#include "video_core/engines/shader_bytecode.h"
14#include "video_core/shader/node_helper.h"
15#include "video_core/shader/registry.h"
16#include "video_core/shader/shader_ir.h"
17
18namespace VideoCommon::Shader {
19
20using Tegra::Shader::Instruction;
21using Tegra::Shader::OpCode;
22using Tegra::Shader::Register;
23using Tegra::Shader::TextureMiscMode;
24using Tegra::Shader::TextureProcessMode;
25using Tegra::Shader::TextureType;
26
27static std::size_t GetCoordCount(TextureType texture_type) {
28 switch (texture_type) {
29 case TextureType::Texture1D:
30 return 1;
31 case TextureType::Texture2D:
32 return 2;
33 case TextureType::Texture3D:
34 case TextureType::TextureCube:
35 return 3;
36 default:
37 UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type);
38 return 0;
39 }
40}
41
42u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
43 const Instruction instr = {program_code[pc]};
44 const auto opcode = OpCode::Decode(instr);
45 bool is_bindless = false;
46 switch (opcode->get().GetId()) {
47 case OpCode::Id::TEX: {
48 const TextureType texture_type{instr.tex.texture_type};
49 const bool is_array = instr.tex.array != 0;
50 const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
51 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
52 const auto process_mode = instr.tex.GetTextureProcessMode();
53 WriteTexInstructionFloat(
54 bb, instr,
55 GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
56 break;
57 }
58 case OpCode::Id::TEX_B: {
59 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
60 "AOFFI is not implemented");
61
62 const TextureType texture_type{instr.tex_b.texture_type};
63 const bool is_array = instr.tex_b.array != 0;
64 const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
65 const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
66 const auto process_mode = instr.tex_b.GetTextureProcessMode();
67 WriteTexInstructionFloat(bb, instr,
68 GetTexCode(instr, texture_type, process_mode, depth_compare,
69 is_array, is_aoffi, {instr.gpr20}));
70 break;
71 }
72 case OpCode::Id::TEXS: {
73 const TextureType texture_type{instr.texs.GetTextureType()};
74 const bool is_array{instr.texs.IsArrayTexture()};
75 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
76 const auto process_mode = instr.texs.GetTextureProcessMode();
77
78 const Node4 components =
79 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
80
81 if (instr.texs.fp32_flag) {
82 WriteTexsInstructionFloat(bb, instr, components);
83 } else {
84 WriteTexsInstructionHalfFloat(bb, instr, components);
85 }
86 break;
87 }
88 case OpCode::Id::TLD4_B: {
89 is_bindless = true;
90 [[fallthrough]];
91 }
92 case OpCode::Id::TLD4: {
93 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
94 "NDV is not implemented");
95 const auto texture_type = instr.tld4.texture_type.Value();
96 const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
97 : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
98 const bool is_array = instr.tld4.array != 0;
99 const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
100 : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
101 const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
102 : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
103 WriteTexInstructionFloat(bb, instr,
104 GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
105 is_ptp, is_bindless));
106 break;
107 }
108 case OpCode::Id::TLD4S: {
109 constexpr std::size_t num_coords = 2;
110 const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
111 const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
112 const Node op_a = GetRegister(instr.gpr8);
113 const Node op_b = GetRegister(instr.gpr20);
114
115 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
116 std::vector<Node> coords;
117 std::vector<Node> aoffi;
118 Node depth_compare;
119 if (is_depth_compare) {
120 // Note: TLD4S coordinate encoding works just like TEXS's
121 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
122 coords.push_back(op_a);
123 coords.push_back(op_y);
124 if (is_aoffi) {
125 aoffi = GetAoffiCoordinates(op_b, num_coords, true);
126 depth_compare = GetRegister(instr.gpr20.Value() + 1);
127 } else {
128 depth_compare = op_b;
129 }
130 } else {
131 // There's no depth compare
132 coords.push_back(op_a);
133 if (is_aoffi) {
134 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
135 aoffi = GetAoffiCoordinates(op_b, num_coords, true);
136 } else {
137 coords.push_back(op_b);
138 }
139 }
140 const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
141
142 SamplerInfo info;
143 info.is_shadow = is_depth_compare;
144 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
145
146 Node4 values;
147 for (u32 element = 0; element < values.size(); ++element) {
148 MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {},
149 {}, {}, component, element, {}};
150 values[element] = Operation(OperationCode::TextureGather, meta, coords);
151 }
152
153 if (instr.tld4s.fp16_flag) {
154 WriteTexsInstructionHalfFloat(bb, instr, values, true);
155 } else {
156 WriteTexsInstructionFloat(bb, instr, values, true);
157 }
158 break;
159 }
160 case OpCode::Id::TXD_B:
161 is_bindless = true;
162 [[fallthrough]];
163 case OpCode::Id::TXD: {
164 UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
165 "AOFFI is not implemented");
166
167 const bool is_array = instr.txd.is_array != 0;
168 const auto derivate_reg = instr.gpr20.Value();
169 const auto texture_type = instr.txd.texture_type.Value();
170 const auto coord_count = GetCoordCount(texture_type);
171 u64 base_reg = instr.gpr8.Value();
172 Node index_var;
173 SamplerInfo info;
174 info.type = texture_type;
175 info.is_array = is_array;
176 const std::optional<SamplerEntry> sampler =
177 is_bindless ? GetBindlessSampler(base_reg, info, index_var)
178 : GetSampler(instr.sampler, info);
179 Node4 values;
180 if (!sampler) {
181 std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
182 WriteTexInstructionFloat(bb, instr, values);
183 break;
184 }
185
186 if (is_bindless) {
187 base_reg++;
188 }
189
190 std::vector<Node> coords;
191 std::vector<Node> derivates;
192 for (std::size_t i = 0; i < coord_count; ++i) {
193 coords.push_back(GetRegister(base_reg + i));
194 const std::size_t derivate = i * 2;
195 derivates.push_back(GetRegister(derivate_reg + derivate));
196 derivates.push_back(GetRegister(derivate_reg + derivate + 1));
197 }
198
199 Node array_node = {};
200 if (is_array) {
201 const Node info_reg = GetRegister(base_reg + coord_count);
202 array_node = BitfieldExtract(info_reg, 0, 16);
203 }
204
205 for (u32 element = 0; element < values.size(); ++element) {
206 MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates,
207 {}, {}, {}, element, index_var};
208 values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
209 }
210
211 WriteTexInstructionFloat(bb, instr, values);
212
213 break;
214 }
215 case OpCode::Id::TXQ_B:
216 is_bindless = true;
217 [[fallthrough]];
218 case OpCode::Id::TXQ: {
219 Node index_var;
220 const std::optional<SamplerEntry> sampler =
221 is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
222 : GetSampler(instr.sampler, {});
223
224 if (!sampler) {
225 u32 indexer = 0;
226 for (u32 element = 0; element < 4; ++element) {
227 if (!instr.txq.IsComponentEnabled(element)) {
228 continue;
229 }
230 const Node value = Immediate(0);
231 SetTemporary(bb, indexer++, value);
232 }
233 for (u32 i = 0; i < indexer; ++i) {
234 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
235 }
236 break;
237 }
238
239 u32 indexer = 0;
240 switch (instr.txq.query_type) {
241 case Tegra::Shader::TextureQueryType::Dimension: {
242 for (u32 element = 0; element < 4; ++element) {
243 if (!instr.txq.IsComponentEnabled(element)) {
244 continue;
245 }
246 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
247 const Node value =
248 Operation(OperationCode::TextureQueryDimensions, meta,
249 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
250 SetTemporary(bb, indexer++, value);
251 }
252 for (u32 i = 0; i < indexer; ++i) {
253 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
254 }
255 break;
256 }
257 default:
258 UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value());
259 }
260 break;
261 }
262 case OpCode::Id::TMML_B:
263 is_bindless = true;
264 [[fallthrough]];
265 case OpCode::Id::TMML: {
266 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
267 "NDV is not implemented");
268
269 const auto texture_type = instr.tmml.texture_type.Value();
270 const bool is_array = instr.tmml.array != 0;
271 SamplerInfo info;
272 info.type = texture_type;
273 info.is_array = is_array;
274 Node index_var;
275 const std::optional<SamplerEntry> sampler =
276 is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
277 : GetSampler(instr.sampler, info);
278
279 if (!sampler) {
280 u32 indexer = 0;
281 for (u32 element = 0; element < 2; ++element) {
282 if (!instr.tmml.IsComponentEnabled(element)) {
283 continue;
284 }
285 const Node value = Immediate(0);
286 SetTemporary(bb, indexer++, value);
287 }
288 for (u32 i = 0; i < indexer; ++i) {
289 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
290 }
291 break;
292 }
293
294 const u64 base_index = is_array ? 1 : 0;
295 const u64 num_components = [texture_type] {
296 switch (texture_type) {
297 case TextureType::Texture1D:
298 return 1;
299 case TextureType::Texture2D:
300 return 2;
301 case TextureType::TextureCube:
302 return 3;
303 default:
304 UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type);
305 return 2;
306 }
307 }();
308 // TODO: What's the array component used for?
309
310 std::vector<Node> coords;
311 coords.reserve(num_components);
312 for (u64 component = 0; component < num_components; ++component) {
313 coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
314 }
315
316 u32 indexer = 0;
317 for (u32 element = 0; element < 2; ++element) {
318 if (!instr.tmml.IsComponentEnabled(element)) {
319 continue;
320 }
321 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
322 Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
323 SetTemporary(bb, indexer++, std::move(value));
324 }
325 for (u32 i = 0; i < indexer; ++i) {
326 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
327 }
328 break;
329 }
330 case OpCode::Id::TLD: {
331 UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
332 UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
333 UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
334
335 WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
336 break;
337 }
338 case OpCode::Id::TLDS: {
339 const TextureType texture_type{instr.tlds.GetTextureType()};
340 const bool is_array{instr.tlds.IsArrayTexture()};
341
342 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
343 "AOFFI is not implemented");
344 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
345
346 const Node4 components = GetTldsCode(instr, texture_type, is_array);
347
348 if (instr.tlds.fp32_flag) {
349 WriteTexsInstructionFloat(bb, instr, components);
350 } else {
351 WriteTexsInstructionHalfFloat(bb, instr, components);
352 }
353 break;
354 }
355 default:
356 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
357 }
358
359 return pc;
360}
361
362ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
363 SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
364 if (info.IsComplete()) {
365 return info;
366 }
367 if (!sampler) {
368 LOG_WARNING(HW_GPU, "Unknown sampler info");
369 info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
370 info.is_array = info.is_array.value_or(false);
371 info.is_shadow = info.is_shadow.value_or(false);
372 info.is_buffer = info.is_buffer.value_or(false);
373 return info;
374 }
375 info.type = info.type.value_or(sampler->texture_type);
376 info.is_array = info.is_array.value_or(sampler->is_array != 0);
377 info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0);
378 info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0);
379 return info;
380}
381
382std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
383 SamplerInfo sampler_info) {
384 const u32 offset = static_cast<u32>(sampler.index.Value());
385 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
386
387 // If this sampler has already been used, return the existing mapping.
388 const auto it =
389 std::find_if(used_samplers.begin(), used_samplers.end(),
390 [offset](const SamplerEntry& entry) { return entry.offset == offset; });
391 if (it != used_samplers.end()) {
392 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
393 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
394 return *it;
395 }
396
397 // Otherwise create a new mapping for this sampler
398 const auto next_index = static_cast<u32>(used_samplers.size());
399 return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array,
400 *info.is_shadow, *info.is_buffer, false);
401}
402
403std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
404 SamplerInfo info, Node& index_var) {
405 const Node sampler_register = GetRegister(reg);
406 const auto [base_node, tracked_sampler_info] =
407 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
408 if (!base_node) {
409 UNREACHABLE();
410 return std::nullopt;
411 }
412
413 if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
414 const u32 buffer = sampler_info->index;
415 const u32 offset = sampler_info->offset;
416 info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
417
418 // If this sampler has already been used, return the existing mapping.
419 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
420 [buffer, offset](const SamplerEntry& entry) {
421 return entry.buffer == buffer && entry.offset == offset;
422 });
423 if (it != used_samplers.end()) {
424 ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
425 it->is_shadow == info.is_shadow);
426 return *it;
427 }
428
429 // Otherwise create a new mapping for this sampler
430 const auto next_index = static_cast<u32>(used_samplers.size());
431 return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
432 *info.is_shadow, *info.is_buffer, false);
433 }
434 if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
435 const std::pair indices = sampler_info->indices;
436 const std::pair offsets = sampler_info->offsets;
437 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
438
439 // Try to use an already created sampler if it exists
440 const auto it =
441 std::find_if(used_samplers.begin(), used_samplers.end(),
442 [indices, offsets](const SamplerEntry& entry) {
443 return offsets == std::pair{entry.offset, entry.secondary_offset} &&
444 indices == std::pair{entry.buffer, entry.secondary_buffer};
445 });
446 if (it != used_samplers.end()) {
447 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
448 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
449 return *it;
450 }
451
452 // Otherwise create a new mapping for this sampler
453 const u32 next_index = static_cast<u32>(used_samplers.size());
454 return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
455 *info.is_shadow, *info.is_buffer);
456 }
457 if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
458 const u32 base_offset = sampler_info->base_offset / 4;
459 index_var = GetCustomVariable(sampler_info->bindless_var);
460 info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
461
462 // If this sampler has already been used, return the existing mapping.
463 const auto it = std::find_if(
464 used_samplers.begin(), used_samplers.end(),
465 [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
466 if (it != used_samplers.end()) {
467 ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
468 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
469 it->is_indexed);
470 return *it;
471 }
472
473 uses_indexed_samplers = true;
474 // Otherwise create a new mapping for this sampler
475 const auto next_index = static_cast<u32>(used_samplers.size());
476 return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array,
477 *info.is_shadow, *info.is_buffer, true);
478 }
479 return std::nullopt;
480}
481
482void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
483 u32 dest_elem = 0;
484 for (u32 elem = 0; elem < 4; ++elem) {
485 if (!instr.tex.IsComponentEnabled(elem)) {
486 // Skip disabled components
487 continue;
488 }
489 SetTemporary(bb, dest_elem++, components[elem]);
490 }
491 // After writing values in temporals, move them to the real registers
492 for (u32 i = 0; i < dest_elem; ++i) {
493 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
494 }
495}
496
497void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
498 bool ignore_mask) {
499 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
500 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
501
502 u32 dest_elem = 0;
503 for (u32 component = 0; component < 4; ++component) {
504 if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
505 continue;
506 SetTemporary(bb, dest_elem++, components[component]);
507 }
508
509 for (u32 i = 0; i < dest_elem; ++i) {
510 if (i < 2) {
511 // Write the first two swizzle components to gpr0 and gpr0+1
512 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
513 } else {
514 ASSERT(instr.texs.HasTwoDestinations());
515 // Write the rest of the swizzle components to gpr28 and gpr28+1
516 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
517 }
518 }
519}
520
521void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
522 const Node4& components, bool ignore_mask) {
523 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
524 // float instruction).
525
526 Node4 values;
527 u32 dest_elem = 0;
528 for (u32 component = 0; component < 4; ++component) {
529 if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
530 continue;
531 values[dest_elem++] = components[component];
532 }
533 if (dest_elem == 0)
534 return;
535
536 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
537
538 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
539 if (dest_elem <= 2) {
540 SetRegister(bb, instr.gpr0, first_value);
541 return;
542 }
543
544 SetTemporary(bb, 0, first_value);
545 SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
546
547 SetRegister(bb, instr.gpr0, GetTemporary(0));
548 SetRegister(bb, instr.gpr28, GetTemporary(1));
549}
550
551Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
552 TextureProcessMode process_mode, std::vector<Node> coords,
553 Node array, Node depth_compare, u32 bias_offset,
554 std::vector<Node> aoffi,
555 std::optional<Tegra::Shader::Register> bindless_reg) {
556 const bool is_array = array != nullptr;
557 const bool is_shadow = depth_compare != nullptr;
558 const bool is_bindless = bindless_reg.has_value();
559
560 ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
561 "Illegal texture type");
562
563 SamplerInfo info;
564 info.type = texture_type;
565 info.is_array = is_array;
566 info.is_shadow = is_shadow;
567 info.is_buffer = false;
568
569 Node index_var;
570 const std::optional<SamplerEntry> sampler =
571 is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
572 : GetSampler(instr.sampler, info);
573 if (!sampler) {
574 return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
575 }
576
577 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
578 process_mode == TextureProcessMode::LL ||
579 process_mode == TextureProcessMode::LLA;
580 const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture;
581
582 Node bias;
583 Node lod;
584 switch (process_mode) {
585 case TextureProcessMode::None:
586 break;
587 case TextureProcessMode::LZ:
588 lod = Immediate(0.0f);
589 break;
590 case TextureProcessMode::LB:
591 // If present, lod or bias are always stored in the register indexed by the gpr20 field with
592 // an offset depending on the usage of the other registers.
593 bias = GetRegister(instr.gpr20.Value() + bias_offset);
594 break;
595 case TextureProcessMode::LL:
596 lod = GetRegister(instr.gpr20.Value() + bias_offset);
597 break;
598 default:
599 UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode);
600 break;
601 }
602
603 Node4 values;
604 for (u32 element = 0; element < values.size(); ++element) {
605 MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias,
606 lod, {}, element, index_var};
607 values[element] = Operation(opcode, meta, coords);
608 }
609
610 return values;
611}
612
613Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
614 TextureProcessMode process_mode, bool depth_compare, bool is_array,
615 bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
616 const bool lod_bias_enabled{
617 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
618
619 const bool is_bindless = bindless_reg.has_value();
620
621 u64 parameter_register = instr.gpr20.Value();
622 if (is_bindless) {
623 ++parameter_register;
624 }
625
626 const u32 bias_lod_offset = (is_bindless ? 1 : 0);
627 if (lod_bias_enabled) {
628 ++parameter_register;
629 }
630
631 const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
632 lod_bias_enabled, 4, 5);
633 const auto coord_count = std::get<0>(coord_counts);
634 // If enabled arrays index is always stored in the gpr8 field
635 const u64 array_register = instr.gpr8.Value();
636 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
637 const u64 coord_register = array_register + (is_array ? 1 : 0);
638
639 std::vector<Node> coords;
640 for (std::size_t i = 0; i < coord_count; ++i) {
641 coords.push_back(GetRegister(coord_register + i));
642 }
643 // 1D.DC in OpenGL the 2nd component is ignored.
644 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
645 coords.push_back(Immediate(0.0f));
646 }
647
648 const Node array = is_array ? GetRegister(array_register) : nullptr;
649
650 std::vector<Node> aoffi;
651 if (is_aoffi) {
652 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
653 }
654
655 Node dc;
656 if (depth_compare) {
657 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
658 // or bias are used
659 dc = GetRegister(parameter_register++);
660 }
661
662 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
663 aoffi, bindless_reg);
664}
665
666Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
667 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
668 const bool lod_bias_enabled =
669 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
670
671 const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
672 lod_bias_enabled, 4, 4);
673 const auto coord_count = std::get<0>(coord_counts);
674
675 // If enabled arrays index is always stored in the gpr8 field
676 const u64 array_register = instr.gpr8.Value();
677 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
678 const u64 coord_register = array_register + (is_array ? 1 : 0);
679 const u64 last_coord_register =
680 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
681 ? static_cast<u64>(instr.gpr20.Value())
682 : coord_register + 1;
683 const u32 bias_offset = coord_count > 2 ? 1 : 0;
684
685 std::vector<Node> coords;
686 for (std::size_t i = 0; i < coord_count; ++i) {
687 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
688 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
689 }
690
691 const Node array = is_array ? GetRegister(array_register) : nullptr;
692
693 Node dc;
694 if (depth_compare) {
695 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
696 // or bias are used
697 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
698 dc = GetRegister(depth_register);
699 }
700
701 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
702 {});
703}
704
705Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
706 bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
707 ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
708
709 const std::size_t coord_count = GetCoordCount(texture_type);
710
711 // If enabled arrays index is always stored in the gpr8 field
712 const u64 array_register = instr.gpr8.Value();
713 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
714 const u64 coord_register = array_register + (is_array ? 1 : 0);
715
716 std::vector<Node> coords;
717 for (std::size_t i = 0; i < coord_count; ++i) {
718 coords.push_back(GetRegister(coord_register + i));
719 }
720
721 u64 parameter_register = instr.gpr20.Value();
722
723 SamplerInfo info;
724 info.type = texture_type;
725 info.is_array = is_array;
726 info.is_shadow = depth_compare;
727
728 Node index_var;
729 const std::optional<SamplerEntry> sampler =
730 is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
731 : GetSampler(instr.sampler, info);
732 Node4 values;
733 if (!sampler) {
734 for (u32 element = 0; element < values.size(); ++element) {
735 values[element] = Immediate(0);
736 }
737 return values;
738 }
739
740 std::vector<Node> aoffi, ptp;
741 if (is_aoffi) {
742 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
743 } else if (is_ptp) {
744 ptp = GetPtpCoordinates(
745 {GetRegister(parameter_register++), GetRegister(parameter_register++)});
746 }
747
748 Node dc;
749 if (depth_compare) {
750 dc = GetRegister(parameter_register++);
751 }
752
753 const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
754 : Immediate(static_cast<u32>(instr.tld4.component));
755
756 for (u32 element = 0; element < values.size(); ++element) {
757 auto coords_copy = coords;
758 MetaTexture meta{
759 *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
760 index_var};
761 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
762 }
763
764 return values;
765}
766
767Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
768 const auto texture_type{instr.tld.texture_type};
769 const bool is_array{instr.tld.is_array != 0};
770 const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
771 const std::size_t coord_count{GetCoordCount(texture_type)};
772
773 u64 gpr8_cursor{instr.gpr8.Value()};
774 const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
775
776 std::vector<Node> coords;
777 coords.reserve(coord_count);
778 for (std::size_t i = 0; i < coord_count; ++i) {
779 coords.push_back(GetRegister(gpr8_cursor++));
780 }
781
782 u64 gpr20_cursor{instr.gpr20.Value()};
783 // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
784 const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
785 // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
786 // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
787
788 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
789
790 Node4 values;
791 for (u32 element = 0; element < values.size(); ++element) {
792 auto coords_copy = coords;
793 MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
794 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
795 }
796
797 return values;
798}
799
800Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
801 SamplerInfo info;
802 info.type = texture_type;
803 info.is_array = is_array;
804 info.is_shadow = false;
805 const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
806
807 const std::size_t type_coord_count = GetCoordCount(texture_type);
808 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
809 const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
810
811 // If enabled arrays index is always stored in the gpr8 field
812 const u64 array_register = instr.gpr8.Value();
813 // if is array gpr20 is used
814 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
815
816 const u64 last_coord_register =
817 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
818 ? static_cast<u64>(instr.gpr20.Value())
819 : coord_register + 1;
820
821 std::vector<Node> coords;
822 for (std::size_t i = 0; i < type_coord_count; ++i) {
823 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
824 coords.push_back(
825 GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
826 }
827
828 const Node array = is_array ? GetRegister(array_register) : nullptr;
829 // When lod is used always is in gpr20
830 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
831
832 std::vector<Node> aoffi;
833 if (aoffi_enabled) {
834 aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
835 }
836
837 Node4 values;
838 for (u32 element = 0; element < values.size(); ++element) {
839 auto coords_copy = coords;
840 MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
841 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
842 }
843 return values;
844}
845
846std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
847 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
848 std::size_t max_coords, std::size_t max_inputs) {
849 const std::size_t coord_count = GetCoordCount(texture_type);
850
851 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
852 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
853 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
854 UNIMPLEMENTED_MSG("Unsupported Texture operation");
855 total_coord_count = std::min(total_coord_count, max_coords);
856 }
857 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
858 total_coord_count +=
859 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
860
861 return {coord_count, total_coord_count};
862}
863
864std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
865 bool is_tld4) {
866 const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
867 const u32 size = is_tld4 ? 6 : 4;
868 const s32 wrap_value = is_tld4 ? 32 : 8;
869 const s32 diff_value = is_tld4 ? 64 : 16;
870 const u32 mask = (1U << size) - 1;
871
872 std::vector<Node> aoffi;
873 aoffi.reserve(coord_count);
874
875 const auto aoffi_immediate{
876 TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
877 if (!aoffi_immediate) {
878 // Variable access, not supported on AMD.
879 LOG_WARNING(HW_GPU,
880 "AOFFI constant folding failed, some hardware might have graphical issues");
881 for (std::size_t coord = 0; coord < coord_count; ++coord) {
882 const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
883 const Node condition =
884 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
885 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
886 aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
887 }
888 return aoffi;
889 }
890
891 for (std::size_t coord = 0; coord < coord_count; ++coord) {
892 s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
893 if (value >= wrap_value) {
894 value -= diff_value;
895 }
896 aoffi.push_back(Immediate(value));
897 }
898 return aoffi;
899}
900
901std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
902 static constexpr u32 num_entries = 8;
903
904 std::vector<Node> ptp;
905 ptp.reserve(num_entries);
906
907 const auto global_size = static_cast<s64>(global_code.size());
908 const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
909 const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
910 if (!low || !high) {
911 for (u32 entry = 0; entry < num_entries; ++entry) {
912 const u32 reg = entry / 4;
913 const u32 offset = entry % 4;
914 const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
915 const Node condition =
916 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
917 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
918 ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
919 }
920 return ptp;
921 }
922
923 const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
924 for (u32 entry = 0; entry < num_entries; ++entry) {
925 s32 value = (immediate >> (entry * 8)) & 0b111111;
926 if (value >= 32) {
927 value -= 64;
928 }
929 ptp.push_back(Immediate(value));
930 }
931
932 return ptp;
933}
934
935} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
deleted file mode 100644
index 1c0957277..000000000
--- a/src/video_core/shader/decode/video.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using std::move;
14using Tegra::Shader::Instruction;
15using Tegra::Shader::OpCode;
16using Tegra::Shader::Pred;
17using Tegra::Shader::VideoType;
18using Tegra::Shader::VmadShr;
19using Tegra::Shader::VmnmxOperation;
20using Tegra::Shader::VmnmxType;
21
22u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
23 const Instruction instr = {program_code[pc]};
24 const auto opcode = OpCode::Decode(instr);
25
26 if (opcode->get().GetId() == OpCode::Id::VMNMX) {
27 DecodeVMNMX(bb, instr);
28 return pc;
29 }
30
31 const Node op_a =
32 GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
33 instr.video.type_a, instr.video.byte_height_a);
34 const Node op_b = [this, instr] {
35 if (instr.video.use_register_b) {
36 return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
37 instr.video.signed_b, instr.video.type_b,
38 instr.video.byte_height_b);
39 }
40 if (instr.video.signed_b) {
41 const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
42 return Immediate(static_cast<u32>(imm));
43 } else {
44 return Immediate(instr.alu.GetImm20_16());
45 }
46 }();
47
48 switch (opcode->get().GetId()) {
49 case OpCode::Id::VMAD: {
50 const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
51 const Node op_c = GetRegister(instr.gpr39);
52
53 Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
54 value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
55
56 if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
57 const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
58 value =
59 SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
60 }
61
62 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
63 SetRegister(bb, instr.gpr0, value);
64 break;
65 }
66 case OpCode::Id::VSETP: {
67 // We can't use the constant predicate as destination.
68 ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
69
70 const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
71 const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
72 const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
73
74 const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
75
76 // Set the primary predicate to the result of Predicate OP SecondPredicate
77 SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
78
79 if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
80 // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
81 // if enabled
82 const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
83 SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
84 }
85 break;
86 }
87 default:
88 UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
89 }
90
91 return pc;
92}
93
94Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
95 u64 byte_height) {
96 if (!is_chunk) {
97 return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
98 }
99
100 switch (type) {
101 case VideoType::Size16_Low:
102 return BitfieldExtract(op, 0, 16);
103 case VideoType::Size16_High:
104 return BitfieldExtract(op, 16, 16);
105 case VideoType::Size32:
106 // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
107 // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
108 UNIMPLEMENTED();
109 return Immediate(0);
110 case VideoType::Invalid:
111 UNREACHABLE_MSG("Invalid instruction encoding");
112 return Immediate(0);
113 default:
114 UNREACHABLE();
115 return Immediate(0);
116 }
117}
118
119void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
120 UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
121 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
122 UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
123 UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
124 UNIMPLEMENTED_IF(instr.vmnmx.sat);
125 UNIMPLEMENTED_IF(instr.generates_cc);
126
127 Node op_a = GetRegister(instr.gpr8);
128 Node op_b = GetRegister(instr.gpr20);
129 Node op_c = GetRegister(instr.gpr39);
130
131 const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
132 const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
133
134 const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
135 Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
136
137 switch (instr.vmnmx.operation) {
138 case VmnmxOperation::Mrg_16H:
139 value = BitfieldInsert(move(op_c), move(value), 16, 16);
140 break;
141 case VmnmxOperation::Mrg_16L:
142 value = BitfieldInsert(move(op_c), move(value), 0, 16);
143 break;
144 case VmnmxOperation::Mrg_8B0:
145 value = BitfieldInsert(move(op_c), move(value), 0, 8);
146 break;
147 case VmnmxOperation::Mrg_8B2:
148 value = BitfieldInsert(move(op_c), move(value), 16, 8);
149 break;
150 case VmnmxOperation::Acc:
151 value = Operation(OperationCode::IAdd, move(value), move(op_c));
152 break;
153 case VmnmxOperation::Min:
154 value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
155 break;
156 case VmnmxOperation::Max:
157 value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
158 break;
159 case VmnmxOperation::Nop:
160 break;
161 default:
162 UNREACHABLE();
163 break;
164 }
165
166 SetRegister(bb, instr.gpr0, move(value));
167}
168
169} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
deleted file mode 100644
index 37433d783..000000000
--- a/src/video_core/shader/decode/warp.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred;
16using Tegra::Shader::ShuffleOperation;
17using Tegra::Shader::VoteOperation;
18
19namespace {
20
21OperationCode GetOperationCode(VoteOperation vote_op) {
22 switch (vote_op) {
23 case VoteOperation::All:
24 return OperationCode::VoteAll;
25 case VoteOperation::Any:
26 return OperationCode::VoteAny;
27 case VoteOperation::Eq:
28 return OperationCode::VoteEqual;
29 default:
30 UNREACHABLE_MSG("Invalid vote operation={}", vote_op);
31 return OperationCode::VoteAll;
32 }
33}
34
35} // Anonymous namespace
36
37u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
38 const Instruction instr = {program_code[pc]};
39 const auto opcode = OpCode::Decode(instr);
40
41 // Signal the backend that this shader uses warp instructions.
42 uses_warps = true;
43
44 switch (opcode->get().GetId()) {
45 case OpCode::Id::VOTE: {
46 const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
47 const Node active = Operation(OperationCode::BallotThread, value);
48 const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
49 SetRegister(bb, instr.gpr0, active);
50 SetPredicate(bb, instr.vote.dest_pred, vote);
51 break;
52 }
53 case OpCode::Id::SHFL: {
54 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
55 : GetRegister(instr.gpr39);
56 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
57 : GetRegister(instr.gpr20);
58
59 Node thread_id = Operation(OperationCode::ThreadId);
60 Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
61 Node seg_mask = BitfieldExtract(mask, 8, 16);
62
63 Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
64 Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
65 Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
66 Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
67
68 Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
69 switch (instr.shfl.operation) {
70 case ShuffleOperation::Idx:
71 return Operation(OperationCode::IBitwiseOr,
72 Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
73 min_thread_id);
74 case ShuffleOperation::Down:
75 return Operation(OperationCode::IAdd, thread_id, index);
76 case ShuffleOperation::Up:
77 return Operation(OperationCode::IAdd, thread_id,
78 Operation(OperationCode::INegate, index));
79 case ShuffleOperation::Bfly:
80 return Operation(OperationCode::IBitwiseXor, thread_id, index);
81 }
82 UNREACHABLE();
83 return Immediate(0U);
84 }();
85
86 Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
87 if (instr.shfl.operation == ShuffleOperation::Up) {
88 return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
89 } else {
90 return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
91 }
92 }();
93
94 SetPredicate(bb, instr.shfl.pred48, in_bounds);
95 SetRegister(
96 bb, instr.gpr0,
97 Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
98 break;
99 }
100 case OpCode::Id::FSWZADD: {
101 UNIMPLEMENTED_IF(instr.fswzadd.ndv);
102
103 Node op_a = GetRegister(instr.gpr8);
104 Node op_b = GetRegister(instr.gpr20);
105 Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
106 SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
107 break;
108 }
109 default:
110 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
111 break;
112 }
113
114 return pc;
115}
116
117} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
deleted file mode 100644
index 233b8fa42..000000000
--- a/src/video_core/shader/decode/xmad.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6#include "common/common_types.h"
7#include "video_core/engines/shader_bytecode.h"
8#include "video_core/shader/node_helper.h"
9#include "video_core/shader/shader_ir.h"
10
11namespace VideoCommon::Shader {
12
13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode;
15using Tegra::Shader::PredCondition;
16
17u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
18 const Instruction instr = {program_code[pc]};
19 const auto opcode = OpCode::Decode(instr);
20
21 UNIMPLEMENTED_IF(instr.xmad.sign_a);
22 UNIMPLEMENTED_IF(instr.xmad.sign_b);
23 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
24 "Condition codes generation in XMAD is not implemented");
25
26 Node op_a = GetRegister(instr.gpr8);
27
28 // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
29 UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
30 const bool is_signed_a = instr.xmad.sign_a == 1;
31 const bool is_signed_b = instr.xmad.sign_b == 1;
32 const bool is_signed_c = is_signed_a;
33
34 auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
35 op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
36 switch (opcode->get().GetId()) {
37 case OpCode::Id::XMAD_CR:
38 return {instr.xmad.merge_56,
39 instr.xmad.product_shift_left_second,
40 instr.xmad.high_b,
41 instr.xmad.mode_cbf,
42 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
43 GetRegister(instr.gpr39)};
44 case OpCode::Id::XMAD_RR:
45 return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
46 instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
47 case OpCode::Id::XMAD_RC:
48 return {false,
49 false,
50 instr.xmad.high_b,
51 instr.xmad.mode_cbf,
52 GetRegister(instr.gpr39),
53 GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
54 case OpCode::Id::XMAD_IMM:
55 return {instr.xmad.merge_37,
56 instr.xmad.product_shift_left,
57 false,
58 instr.xmad.mode,
59 Immediate(static_cast<u32>(instr.xmad.imm20_16)),
60 GetRegister(instr.gpr39)};
61 default:
62 UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
63 return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
64 }
65 }();
66
67 op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
68 instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
69
70 const Node original_b = op_b_binding;
71 const Node op_b =
72 SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
73 is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
74
75 // we already check sign_a and sign_b is difference or not before so just use one in here.
76 Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
77 if (is_psl) {
78 product =
79 SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
80 }
81 SetTemporary(bb, 0, product);
82 product = GetTemporary(0);
83
84 Node original_c = op_c;
85 const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
86 op_c = [&] {
87 switch (set_mode) {
88 case Tegra::Shader::XmadMode::None:
89 return original_c;
90 case Tegra::Shader::XmadMode::CLo:
91 return BitfieldExtract(std::move(original_c), 0, 16);
92 case Tegra::Shader::XmadMode::CHi:
93 return BitfieldExtract(std::move(original_c), 16, 16);
94 case Tegra::Shader::XmadMode::CBcc: {
95 Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
96 original_b, Immediate(16));
97 return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
98 std::move(shifted_b));
99 }
100 case Tegra::Shader::XmadMode::CSfu: {
101 const Node comp_a =
102 GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
103 const Node comp_b =
104 GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
105 const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
106
107 const Node comp_minus_a = GetPredicateComparisonInteger(
108 PredCondition::NE, is_signed_a,
109 SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
110 Immediate(0x80000000)),
111 Immediate(0));
112 const Node comp_minus_b = GetPredicateComparisonInteger(
113 PredCondition::NE, is_signed_b,
114 SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
115 Immediate(0x80000000)),
116 Immediate(0));
117
118 Node new_c = Operation(
119 OperationCode::Select, comp_minus_a,
120 SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
121 original_c);
122 new_c = Operation(
123 OperationCode::Select, comp_minus_b,
124 SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
125 std::move(new_c));
126
127 return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
128 }
129 default:
130 UNREACHABLE();
131 return Immediate(0);
132 }
133 }();
134
135 SetTemporary(bb, 1, op_c);
136 op_c = GetTemporary(1);
137
138 // TODO(Rodrigo): Use an appropiate sign for this operation
139 Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
140 SetTemporary(bb, 2, sum);
141 sum = GetTemporary(2);
142 if (is_merge) {
143 const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
144 Immediate(0), Immediate(16));
145 const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
146 Immediate(16));
147 sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
148 }
149
150 SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
151 SetRegister(bb, instr.gpr0, std::move(sum));
152
153 return pc;
154}
155
156} // namespace VideoCommon::Shader