summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2018-12-27 16:50:36 -0300
committerGravatar ReinUsesLisp2019-01-15 17:54:53 -0300
commit2d6c064e66bac4cb871aa26a12066441a8852008 (patch)
tree52baf7c971830bbe9cb5c8631235f1ebcda95d30 /src
parentshader_ir: Remove composite primitives and use temporals instead (diff)
downloadyuzu-2d6c064e66bac4cb871aa26a12066441a8852008.tar.gz
yuzu-2d6c064e66bac4cb871aa26a12066441a8852008.tar.xz
yuzu-2d6c064e66bac4cb871aa26a12066441a8852008.zip
shader_decode: Improve zero flag implementation
Diffstat (limited to 'src')
-rw-r--r--src/video_core/shader/decode/arithmetic.cpp15
-rw-r--r--src/video_core/shader/decode/arithmetic_immediate.cpp6
-rw-r--r--src/video_core/shader/decode/arithmetic_integer.cpp31
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp18
-rw-r--r--src/video_core/shader/decode/bfe.cpp1
-rw-r--r--src/video_core/shader/decode/bfi.cpp4
-rw-r--r--src/video_core/shader/decode/conversion.cpp11
-rw-r--r--src/video_core/shader/decode/ffma.cpp3
-rw-r--r--src/video_core/shader/decode/float_set.cpp11
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp6
-rw-r--r--src/video_core/shader/decode/shift.cpp14
-rw-r--r--src/video_core/shader/decode/video.cpp5
-rw-r--r--src/video_core/shader/decode/xmad.cpp1
-rw-r--r--src/video_core/shader/shader_ir.cpp19
-rw-r--r--src/video_core/shader/shader_ir.h9
15 files changed, 79 insertions, 75 deletions
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index ef846bd9a..926abcc8e 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -45,8 +45,6 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
45 UNIMPLEMENTED_IF_MSG( 45 UNIMPLEMENTED_IF_MSG(
46 instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented", 46 instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
47 instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default 47 instr.fmul.tab5c68_0.Value()); // SMO typical sends 1 here which seems to be the default
48 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
49 "Condition codes generation in FMUL is not implemented");
50 48
51 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); 49 op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
52 50
@@ -75,21 +73,20 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
75 73
76 value = GetSaturatedFloat(value, instr.alu.saturate_d); 74 value = GetSaturatedFloat(value, instr.alu.saturate_d);
77 75
76 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
78 SetRegister(bb, instr.gpr0, value); 77 SetRegister(bb, instr.gpr0, value);
79 break; 78 break;
80 } 79 }
81 case OpCode::Id::FADD_C: 80 case OpCode::Id::FADD_C:
82 case OpCode::Id::FADD_R: 81 case OpCode::Id::FADD_R:
83 case OpCode::Id::FADD_IMM: { 82 case OpCode::Id::FADD_IMM: {
84 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
85 "Condition codes generation in FADD is not implemented");
86
87 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); 83 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
88 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); 84 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
89 85
90 Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); 86 Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
91 value = GetSaturatedFloat(value, instr.alu.saturate_d); 87 value = GetSaturatedFloat(value, instr.alu.saturate_d);
92 88
89 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
93 SetRegister(bb, instr.gpr0, value); 90 SetRegister(bb, instr.gpr0, value);
94 break; 91 break;
95 } 92 }
@@ -126,9 +123,6 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
126 case OpCode::Id::FMNMX_C: 123 case OpCode::Id::FMNMX_C:
127 case OpCode::Id::FMNMX_R: 124 case OpCode::Id::FMNMX_R:
128 case OpCode::Id::FMNMX_IMM: { 125 case OpCode::Id::FMNMX_IMM: {
129 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
130 "Condition codes generation in FMNMX is not implemented");
131
132 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); 126 op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
133 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); 127 op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
134 128
@@ -136,9 +130,10 @@ u32 ShaderIR::DecodeArithmetic(BasicBlock& bb, u32 pc) {
136 130
137 const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); 131 const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
138 const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); 132 const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
133 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
139 134
140 SetRegister(bb, instr.gpr0, 135 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
141 Operation(OperationCode::Select, NO_PRECISE, condition, min, max)); 136 SetRegister(bb, instr.gpr0, value);
142 break; 137 break;
143 } 138 }
144 case OpCode::Id::RRO_C: 139 case OpCode::Id::RRO_C:
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp
index 996b2537a..1c6da94b4 100644
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@@ -22,24 +22,22 @@ u32 ShaderIR::DecodeArithmeticImmediate(BasicBlock& bb, u32 pc) {
22 break; 22 break;
23 } 23 }
24 case OpCode::Id::FMUL32_IMM: { 24 case OpCode::Id::FMUL32_IMM: {
25 UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
26 "Condition codes generation in FMUL32 is not implemented");
27 Node value = 25 Node value =
28 Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); 26 Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
29 value = GetSaturatedFloat(value, instr.fmul32.saturate); 27 value = GetSaturatedFloat(value, instr.fmul32.saturate);
30 28
29 SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
31 SetRegister(bb, instr.gpr0, value); 30 SetRegister(bb, instr.gpr0, value);
32 break; 31 break;
33 } 32 }
34 case OpCode::Id::FADD32I: { 33 case OpCode::Id::FADD32I: {
35 UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
36 "Condition codes generation in FADD32I is not implemented");
37 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, 34 const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
38 instr.fadd32i.negate_a); 35 instr.fadd32i.negate_a);
39 const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, 36 const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
40 instr.fadd32i.negate_b); 37 instr.fadd32i.negate_b);
41 38
42 const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); 39 const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
40 SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
43 SetRegister(bb, instr.gpr0, value); 41 SetRegister(bb, instr.gpr0, value);
44 break; 42 break;
45 } 43 }
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index 931e0fa1d..edd1695f4 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -34,22 +34,20 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
34 case OpCode::Id::IADD_C: 34 case OpCode::Id::IADD_C:
35 case OpCode::Id::IADD_R: 35 case OpCode::Id::IADD_R:
36 case OpCode::Id::IADD_IMM: { 36 case OpCode::Id::IADD_IMM: {
37 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
38 "Condition codes generation in IADD is not implemented");
39 UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented"); 37 UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD saturation not implemented");
40 38
41 op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); 39 op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
42 op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); 40 op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
43 41
44 SetRegister(bb, instr.gpr0, Operation(OperationCode::IAdd, PRECISE, op_a, op_b)); 42 const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
43
44 SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
45 SetRegister(bb, instr.gpr0, value);
45 break; 46 break;
46 } 47 }
47 case OpCode::Id::IADD3_C: 48 case OpCode::Id::IADD3_C:
48 case OpCode::Id::IADD3_R: 49 case OpCode::Id::IADD3_R:
49 case OpCode::Id::IADD3_IMM: { 50 case OpCode::Id::IADD3_IMM: {
50 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
51 "Condition codes generation in IADD3 is not implemented");
52
53 Node op_c = GetRegister(instr.gpr39); 51 Node op_c = GetRegister(instr.gpr39);
54 52
55 const auto ApplyHeight = [&](IAdd3Height height, Node value) { 53 const auto ApplyHeight = [&](IAdd3Height height, Node value) {
@@ -100,6 +98,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
100 return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); 98 return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
101 }(); 99 }();
102 100
101 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
103 SetRegister(bb, instr.gpr0, value); 102 SetRegister(bb, instr.gpr0, value);
104 break; 103 break;
105 } 104 }
@@ -115,6 +114,8 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
115 const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); 114 const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
116 const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); 115 const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
117 const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); 116 const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
117
118 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
118 SetRegister(bb, instr.gpr0, value); 119 SetRegister(bb, instr.gpr0, value);
119 break; 120 break;
120 } 121 }
@@ -139,24 +140,19 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
139 case OpCode::Id::LOP_C: 140 case OpCode::Id::LOP_C:
140 case OpCode::Id::LOP_R: 141 case OpCode::Id::LOP_R:
141 case OpCode::Id::LOP_IMM: { 142 case OpCode::Id::LOP_IMM: {
142 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
143 "Condition codes generation in LOP is not implemented");
144
145 if (instr.alu.lop.invert_a) 143 if (instr.alu.lop.invert_a)
146 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); 144 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
147 if (instr.alu.lop.invert_b) 145 if (instr.alu.lop.invert_b)
148 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); 146 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
149 147
150 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, 148 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
151 instr.alu.lop.pred_result_mode, instr.alu.lop.pred48); 149 instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
150 instr.generates_cc);
152 break; 151 break;
153 } 152 }
154 case OpCode::Id::LOP3_C: 153 case OpCode::Id::LOP3_C:
155 case OpCode::Id::LOP3_R: 154 case OpCode::Id::LOP3_R:
156 case OpCode::Id::LOP3_IMM: { 155 case OpCode::Id::LOP3_IMM: {
157 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
158 "Condition codes generation in LOP3 is not implemented");
159
160 const Node op_c = GetRegister(instr.gpr39); 156 const Node op_c = GetRegister(instr.gpr39);
161 const Node lut = [&]() { 157 const Node lut = [&]() {
162 if (opcode->get().GetId() == OpCode::Id::LOP3_R) { 158 if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
@@ -166,15 +162,13 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
166 } 162 }
167 }(); 163 }();
168 164
169 WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut); 165 WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
170 break; 166 break;
171 } 167 }
172 case OpCode::Id::IMNMX_C: 168 case OpCode::Id::IMNMX_C:
173 case OpCode::Id::IMNMX_R: 169 case OpCode::Id::IMNMX_R:
174 case OpCode::Id::IMNMX_IMM: { 170 case OpCode::Id::IMNMX_IMM: {
175 UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); 171 UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
176 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
177 "Condition codes generation in IMNMX is not implemented");
178 172
179 const bool is_signed = instr.imnmx.is_signed; 173 const bool is_signed = instr.imnmx.is_signed;
180 174
@@ -182,6 +176,8 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
182 const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); 176 const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
183 const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); 177 const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
184 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); 178 const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
179
180 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
185 SetRegister(bb, instr.gpr0, value); 181 SetRegister(bb, instr.gpr0, value);
186 break; 182 break;
187 } 183 }
@@ -247,7 +243,7 @@ u32 ShaderIR::DecodeArithmeticInteger(BasicBlock& bb, u32 pc) {
247} 243}
248 244
249void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, 245void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
250 Node imm_lut) { 246 Node imm_lut, bool sets_cc) {
251 constexpr u32 lop_iterations = 32; 247 constexpr u32 lop_iterations = 32;
252 const Node one = Immediate(1); 248 const Node one = Immediate(1);
253 const Node two = Immediate(2); 249 const Node two = Immediate(2);
@@ -284,6 +280,7 @@ void ShaderIR::WriteLop3Instruction(BasicBlock& bb, Register dest, Node op_a, No
284 } 280 }
285 } 281 }
286 282
283 SetInternalFlagsFromInteger(bb, value, sets_cc);
287 SetRegister(bb, dest, value); 284 SetRegister(bb, dest, value);
288} 285}
289 286
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 3b8a60c6b..3cbaeeaf5 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -25,20 +25,17 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
25 25
26 switch (opcode->get().GetId()) { 26 switch (opcode->get().GetId()) {
27 case OpCode::Id::IADD32I: { 27 case OpCode::Id::IADD32I: {
28 UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
29 "Condition codes generation in IADD32I is not implemented");
30 UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); 28 UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
31 29
32 op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true); 30 op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
33 31
34 const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); 32 const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
33
34 SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
35 SetRegister(bb, instr.gpr0, value); 35 SetRegister(bb, instr.gpr0, value);
36 break; 36 break;
37 } 37 }
38 case OpCode::Id::LOP32I: { 38 case OpCode::Id::LOP32I: {
39 UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
40 "Condition codes generation in LOP32I is not implemented");
41
42 if (instr.alu.lop32i.invert_a) 39 if (instr.alu.lop32i.invert_a)
43 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); 40 op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
44 41
@@ -46,8 +43,7 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
46 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); 43 op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
47 44
48 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, 45 WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
49 Tegra::Shader::PredicateResultMode::None, 46 PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc);
50 Tegra::Shader::Pred::UnusedIndex);
51 break; 47 break;
52 } 48 }
53 default: 49 default:
@@ -60,7 +56,7 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(BasicBlock& bb, u32 pc) {
60 56
61void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op, 57void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation logic_op,
62 Node op_a, Node op_b, PredicateResultMode predicate_mode, 58 Node op_a, Node op_b, PredicateResultMode predicate_mode,
63 Pred predicate) { 59 Pred predicate, bool sets_cc) {
64 const Node result = [&]() { 60 const Node result = [&]() {
65 switch (logic_op) { 61 switch (logic_op) {
66 case LogicOperation::And: 62 case LogicOperation::And:
@@ -77,11 +73,9 @@ void ShaderIR::WriteLogicOperation(BasicBlock& bb, Register dest, LogicOperation
77 } 73 }
78 }(); 74 }();
79 75
80 if (dest != Register::ZeroIndex) { 76 SetInternalFlagsFromInteger(bb, result, sets_cc);
81 SetRegister(bb, dest, result); 77 SetRegister(bb, dest, result);
82 }
83 78
84 using Tegra::Shader::PredicateResultMode;
85 // Write the predicate value depending on the predicate mode. 79 // Write the predicate value depending on the predicate mode.
86 switch (predicate_mode) { 80 switch (predicate_mode) {
87 case PredicateResultMode::None: 81 case PredicateResultMode::None:
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp
index 6532a3bce..d3244fd40 100644
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@@ -35,6 +35,7 @@ u32 ShaderIR::DecodeBfe(BasicBlock& bb, u32 pc) {
35 const Node outer_shift = 35 const Node outer_shift =
36 Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm); 36 Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, inner_shift, outer_shift_imm);
37 37
38 SetInternalFlagsFromInteger(bb, outer_shift, instr.generates_cc);
38 SetRegister(bb, instr.gpr0, outer_shift); 39 SetRegister(bb, instr.gpr0, outer_shift);
39 break; 40 break;
40 } 41 }
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp
index b0d8d9eba..ddb1872c6 100644
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@@ -16,8 +16,6 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) {
16 const Instruction instr = {program_code[pc]}; 16 const Instruction instr = {program_code[pc]};
17 const auto opcode = OpCode::Decode(instr); 17 const auto opcode = OpCode::Decode(instr);
18 18
19 UNIMPLEMENTED_IF(instr.generates_cc);
20
21 const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> { 19 const auto [base, packed_shift] = [&]() -> std::tuple<Node, Node> {
22 switch (opcode->get().GetId()) { 20 switch (opcode->get().GetId()) {
23 case OpCode::Id::BFI_IMM_R: 21 case OpCode::Id::BFI_IMM_R:
@@ -33,6 +31,8 @@ u32 ShaderIR::DecodeBfi(BasicBlock& bb, u32 pc) {
33 31
34 const Node value = 32 const Node value =
35 Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); 33 Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
34
35 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
36 SetRegister(bb, instr.gpr0, value); 36 SetRegister(bb, instr.gpr0, value);
37 37
38 return pc; 38 return pc;
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 791f03fe0..d5c75e8eb 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -33,15 +33,8 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
33 value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value); 33 value = SignedOperation(OperationCode::ICastUnsigned, output_signed, NO_PRECISE, value);
34 } 34 }
35 35
36 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
36 SetRegister(bb, instr.gpr0, value); 37 SetRegister(bb, instr.gpr0, value);
37
38 if (instr.generates_cc) {
39 const Node zero_condition =
40 SignedOperation(OperationCode::LogicalIEqual, output_signed, value, Immediate(0));
41 SetInternalFlag(bb, InternalFlag::Zero, zero_condition);
42 LOG_WARNING(HW_GPU, "I2I Condition codes implementation is incomplete.");
43 }
44
45 break; 38 break;
46 } 39 }
47 case OpCode::Id::I2F_R: 40 case OpCode::Id::I2F_R:
@@ -64,6 +57,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
64 value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); 57 value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
65 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); 58 value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
66 59
60 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
67 SetRegister(bb, instr.gpr0, value); 61 SetRegister(bb, instr.gpr0, value);
68 break; 62 break;
69 } 63 }
@@ -103,6 +97,7 @@ u32 ShaderIR::DecodeConversion(BasicBlock& bb, u32 pc) {
103 }(); 97 }();
104 value = GetSaturatedFloat(value, instr.alu.saturate_d); 98 value = GetSaturatedFloat(value, instr.alu.saturate_d);
105 99
100 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
106 SetRegister(bb, instr.gpr0, value); 101 SetRegister(bb, instr.gpr0, value);
107 break; 102 break;
108 } 103 }
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index a17ebd6db..f3ab3d2e8 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -21,8 +21,6 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) {
21 instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO 21 instr.ffma.tab5980_0.Value()); // Seems to be 1 by default based on SMO
22 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented", 22 UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
23 instr.ffma.tab5980_1.Value()); 23 instr.ffma.tab5980_1.Value());
24 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
25 "Condition codes generation in FFMA is not implemented");
26 24
27 const Node op_a = GetRegister(instr.gpr8); 25 const Node op_a = GetRegister(instr.gpr8);
28 26
@@ -52,6 +50,7 @@ u32 ShaderIR::DecodeFfma(BasicBlock& bb, u32 pc) {
52 Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); 50 Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
53 value = GetSaturatedFloat(value, instr.alu.saturate_d); 51 value = GetSaturatedFloat(value, instr.alu.saturate_d);
54 52
53 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
55 SetRegister(bb, instr.gpr0, value); 54 SetRegister(bb, instr.gpr0, value);
56 55
57 return pc; 56 return pc;
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index b69d94c2e..8e266cc4e 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -45,13 +45,12 @@ u32 ShaderIR::DecodeFloatSet(BasicBlock& bb, u32 pc) {
45 const Node value = 45 const Node value =
46 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); 46 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
47 47
48 SetRegister(bb, instr.gpr0, value); 48 if (instr.fset.bf) {
49 49 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
50 if (instr.generates_cc) { 50 } else {
51 const Node is_zero = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f)); 51 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
52 SetInternalFlag(bb, InternalFlag::Zero, is_zero);
53 LOG_WARNING(HW_GPU, "FSET condition code is incomplete");
54 } 52 }
53 SetRegister(bb, instr.gpr0, value);
55 54
56 return pc; 55 return pc;
57} 56}
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index 6c58496c2..58d20ceb5 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -32,6 +32,12 @@ u32 ShaderIR::DecodePredicateSetRegister(BasicBlock& bb, u32 pc) {
32 const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); 32 const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
33 const Node value = 33 const Node value =
34 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); 34 Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
35
36 if (instr.pset.bf) {
37 SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
38 } else {
39 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
40 }
35 SetRegister(bb, instr.gpr0, value); 41 SetRegister(bb, instr.gpr0, value);
36 42
37 return pc; 43 return pc;
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index 3ba039d21..e8ffdb818 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -31,22 +31,20 @@ u32 ShaderIR::DecodeShift(BasicBlock& bb, u32 pc) {
31 case OpCode::Id::SHR_C: 31 case OpCode::Id::SHR_C:
32 case OpCode::Id::SHR_R: 32 case OpCode::Id::SHR_R:
33 case OpCode::Id::SHR_IMM: { 33 case OpCode::Id::SHR_IMM: {
34 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
35 "Condition codes generation in SHR is not implemented");
36
37 const Node value = SignedOperation(OperationCode::IArithmeticShiftRight, 34 const Node value = SignedOperation(OperationCode::IArithmeticShiftRight,
38 instr.shift.is_signed, PRECISE, op_a, op_b); 35 instr.shift.is_signed, PRECISE, op_a, op_b);
36 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
39 SetRegister(bb, instr.gpr0, value); 37 SetRegister(bb, instr.gpr0, value);
40 break; 38 break;
41 } 39 }
42 case OpCode::Id::SHL_C: 40 case OpCode::Id::SHL_C:
43 case OpCode::Id::SHL_R: 41 case OpCode::Id::SHL_R:
44 case OpCode::Id::SHL_IMM: 42 case OpCode::Id::SHL_IMM: {
45 UNIMPLEMENTED_IF_MSG(instr.generates_cc, 43 const Node value = Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b);
46 "Condition codes generation in SHL is not implemented"); 44 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
47 SetRegister(bb, instr.gpr0, 45 SetRegister(bb, instr.gpr0, value);
48 Operation(OperationCode::ILogicalShiftLeft, PRECISE, op_a, op_b));
49 break; 46 break;
47 }
50 default: 48 default:
51 UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); 49 UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
52 } 50 }
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index b491fbadb..609b3a257 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -38,9 +38,6 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) {
38 38
39 switch (opcode->get().GetId()) { 39 switch (opcode->get().GetId()) {
40 case OpCode::Id::VMAD: { 40 case OpCode::Id::VMAD: {
41 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
42 "Condition codes generation in VMAD is not implemented");
43
44 const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; 41 const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
45 const Node op_c = GetRegister(instr.gpr39); 42 const Node op_c = GetRegister(instr.gpr39);
46 43
@@ -53,8 +50,8 @@ u32 ShaderIR::DecodeVideo(BasicBlock& bb, u32 pc) {
53 SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); 50 SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
54 } 51 }
55 52
53 SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
56 SetRegister(bb, instr.gpr0, value); 54 SetRegister(bb, instr.gpr0, value);
57
58 break; 55 break;
59 } 56 }
60 case OpCode::Id::VSETP: { 57 case OpCode::Id::VSETP: {
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index 3e37aee4a..88f1be27d 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -86,6 +86,7 @@ u32 ShaderIR::DecodeXmad(BasicBlock& bb, u32 pc) {
86 sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b); 86 sum = Operation(OperationCode::IBitwiseOr, NO_PRECISE, a, b);
87 } 87 }
88 88
89 SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
89 SetRegister(bb, instr.gpr0, sum); 90 SetRegister(bb, instr.gpr0, sum);
90 91
91 return pc; 92 return pc;
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 4474af7c4..d7747103e 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -7,6 +7,7 @@
7 7
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/logging/log.h"
10#include "video_core/engines/shader_bytecode.h" 11#include "video_core/engines/shader_bytecode.h"
11#include "video_core/shader/shader_ir.h" 12#include "video_core/shader/shader_ir.h"
12 13
@@ -356,6 +357,24 @@ void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) {
356 SetRegister(bb, Register::ZeroIndex + 1 + id, value); 357 SetRegister(bb, Register::ZeroIndex + 1 + id, value);
357} 358}
358 359
360void ShaderIR::SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc) {
361 if (!sets_cc) {
362 return;
363 }
364 const Node zerop = Operation(OperationCode::LogicalFEqual, value, Immediate(0.0f));
365 SetInternalFlag(bb, InternalFlag::Zero, zerop);
366 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
367}
368
369void ShaderIR::SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc) {
370 if (!sets_cc) {
371 return;
372 }
373 const Node zerop = Operation(OperationCode::LogicalIEqual, value, Immediate(0));
374 SetInternalFlag(bb, InternalFlag::Zero, zerop);
375 LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
376}
377
359Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { 378Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
360 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), 379 return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset),
361 Immediate(bits)); 380 Immediate(bits));
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 0c8f4a265..47f460bcf 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -649,6 +649,11 @@ private:
649 /// Sets a temporal. Internally it uses a post-RZ register 649 /// Sets a temporal. Internally it uses a post-RZ register
650 void SetTemporal(BasicBlock& bb, u32 id, Node value); 650 void SetTemporal(BasicBlock& bb, u32 id, Node value);
651 651
652 /// Sets internal flags from a float
653 void SetInternalFlagsFromFloat(BasicBlock& bb, Node value, bool sets_cc = true);
654 /// Sets internal flags from an integer
655 void SetInternalFlagsFromInteger(BasicBlock& bb, Node value, bool sets_cc = true);
656
652 /// Conditionally absolute/negated float. Absolute is applied first 657 /// Conditionally absolute/negated float. Absolute is applied first
653 Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); 658 Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
654 /// Conditionally saturates a float 659 /// Conditionally saturates a float
@@ -725,9 +730,9 @@ private:
725 void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest, 730 void WriteLogicOperation(BasicBlock& bb, Tegra::Shader::Register dest,
726 Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, 731 Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
727 Tegra::Shader::PredicateResultMode predicate_mode, 732 Tegra::Shader::PredicateResultMode predicate_mode,
728 Tegra::Shader::Pred predicate); 733 Tegra::Shader::Pred predicate, bool sets_cc);
729 void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, 734 void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
730 Node op_c, Node imm_lut); 735 Node op_c, Node imm_lut, bool sets_cc);
731 736
732 template <typename... T> 737 template <typename... T>
733 Node Operation(OperationCode code, const T*... operands) { 738 Node Operation(OperationCode code, const T*... operands) {