summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-02-22 22:59:16 -0300
committerGravatar ameerj2021-07-22 21:51:22 -0400
commite44752ddc8804961eb84f8c225bb36d5b4c77bc1 (patch)
tree84df0e38680470a0ee8c2230625193c4156ddea6 /src
parentshader: Fix MOV(reg), add SHL variants and emit neg and abs instructions (diff)
downloadyuzu-e44752ddc8804961eb84f8c225bb36d5b4c77bc1.tar.gz
yuzu-e44752ddc8804961eb84f8c225bb36d5b4c77bc1.tar.xz
yuzu-e44752ddc8804961eb84f8c225bb36d5b4c77bc1.zip
shader: FMUL, select, RRO, and MUFU fixes
Diffstat (limited to 'src')
-rw-r--r--src/shader_recompiler/CMakeLists.txt1
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h67
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp192
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_select.cpp21
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp144
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h18
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc50
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h3
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp4
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp8
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp42
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp41
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp12
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h5
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp12
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp2
18 files changed, 507 insertions, 119 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 802527255..5574feaa6 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -65,6 +65,7 @@ add_library(shader_recompiler STATIC
65 frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp 65 frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
66 frontend/maxwell/translate/impl/floating_point_multi_function.cpp 66 frontend/maxwell/translate/impl/floating_point_multi_function.cpp
67 frontend/maxwell/translate/impl/floating_point_multiply.cpp 67 frontend/maxwell/translate/impl/floating_point_multiply.cpp
68 frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
68 frontend/maxwell/translate/impl/impl.cpp 69 frontend/maxwell/translate/impl/impl.cpp
69 frontend/maxwell/translate/impl/impl.h 70 frontend/maxwell/translate/impl/impl.h
70 frontend/maxwell/translate/impl/integer_add.cpp 71 frontend/maxwell/translate/impl/integer_add.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 1b9be445e..130c71996 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -108,10 +108,12 @@ void EmitCompositeConstructF64x4(EmitContext& ctx);
108void EmitCompositeExtractF64x2(EmitContext& ctx); 108void EmitCompositeExtractF64x2(EmitContext& ctx);
109void EmitCompositeExtractF64x3(EmitContext& ctx); 109void EmitCompositeExtractF64x3(EmitContext& ctx);
110void EmitCompositeExtractF64x4(EmitContext& ctx); 110void EmitCompositeExtractF64x4(EmitContext& ctx);
111void EmitSelect8(EmitContext& ctx); 111Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
112void EmitSelect16(EmitContext& ctx); 112Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
113Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value); 113Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
114void EmitSelect64(EmitContext& ctx); 114Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value);
115Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
116Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
115void EmitBitCastU16F16(EmitContext& ctx); 117void EmitBitCastU16F16(EmitContext& ctx);
116Id EmitBitCastU32F32(EmitContext& ctx, Id value); 118Id EmitBitCastU32F32(EmitContext& ctx, Id value);
117void EmitBitCastU64F64(EmitContext& ctx); 119void EmitBitCastU64F64(EmitContext& ctx);
@@ -149,18 +151,15 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
149Id EmitFPNeg16(EmitContext& ctx, Id value); 151Id EmitFPNeg16(EmitContext& ctx, Id value);
150Id EmitFPNeg32(EmitContext& ctx, Id value); 152Id EmitFPNeg32(EmitContext& ctx, Id value);
151Id EmitFPNeg64(EmitContext& ctx, Id value); 153Id EmitFPNeg64(EmitContext& ctx, Id value);
152void EmitFPRecip32(EmitContext& ctx); 154Id EmitFPSin(EmitContext& ctx, Id value);
153void EmitFPRecip64(EmitContext& ctx); 155Id EmitFPCos(EmitContext& ctx, Id value);
154void EmitFPRecipSqrt32(EmitContext& ctx); 156Id EmitFPExp2(EmitContext& ctx, Id value);
155void EmitFPRecipSqrt64(EmitContext& ctx); 157Id EmitFPLog2(EmitContext& ctx, Id value);
156void EmitFPSqrt(EmitContext& ctx); 158Id EmitFPRecip32(EmitContext& ctx, Id value);
157void EmitFPSin(EmitContext& ctx); 159Id EmitFPRecip64(EmitContext& ctx, Id value);
158void EmitFPSinNotReduced(EmitContext& ctx); 160Id EmitFPRecipSqrt32(EmitContext& ctx, Id value);
159void EmitFPExp2(EmitContext& ctx); 161Id EmitFPRecipSqrt64(EmitContext& ctx, Id value);
160void EmitFPExp2NotReduced(EmitContext& ctx); 162Id EmitFPSqrt(EmitContext& ctx, Id value);
161void EmitFPCos(EmitContext& ctx);
162void EmitFPCosNotReduced(EmitContext& ctx);
163void EmitFPLog2(EmitContext& ctx);
164Id EmitFPSaturate16(EmitContext& ctx, Id value); 163Id EmitFPSaturate16(EmitContext& ctx, Id value);
165Id EmitFPSaturate32(EmitContext& ctx, Id value); 164Id EmitFPSaturate32(EmitContext& ctx, Id value);
166Id EmitFPSaturate64(EmitContext& ctx, Id value); 165Id EmitFPSaturate64(EmitContext& ctx, Id value);
@@ -176,6 +175,42 @@ Id EmitFPCeil64(EmitContext& ctx, Id value);
176Id EmitFPTrunc16(EmitContext& ctx, Id value); 175Id EmitFPTrunc16(EmitContext& ctx, Id value);
177Id EmitFPTrunc32(EmitContext& ctx, Id value); 176Id EmitFPTrunc32(EmitContext& ctx, Id value);
178Id EmitFPTrunc64(EmitContext& ctx, Id value); 177Id EmitFPTrunc64(EmitContext& ctx, Id value);
178Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs);
179Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs);
180Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs);
181Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs);
182Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs);
183Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs);
184Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
185Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
186Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
187Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs);
188Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs);
189Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs);
190Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs);
191Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs);
192Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs);
193Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs);
194Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs);
195Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs);
196Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
197Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
198Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
199Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs);
200Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs);
201Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs);
202Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
203Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
204Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
205Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
206Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
207Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
208Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
209Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
210Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
211Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
212Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
213Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
179Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); 214Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
180void EmitIAdd64(EmitContext& ctx); 215void EmitIAdd64(EmitContext& ctx);
181Id EmitISub32(EmitContext& ctx, Id a, Id b); 216Id EmitISub32(EmitContext& ctx, Id a, Id b);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
index 5d0b74f9b..749f11742 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -100,52 +100,40 @@ Id EmitFPNeg64(EmitContext& ctx, Id value) {
100 return ctx.OpFNegate(ctx.F64[1], value); 100 return ctx.OpFNegate(ctx.F64[1], value);
101} 101}
102 102
103void EmitFPRecip32(EmitContext&) { 103Id EmitFPSin(EmitContext& ctx, Id value) {
104 throw NotImplementedException("SPIR-V Instruction"); 104 return ctx.OpSin(ctx.F32[1], value);
105}
106
107void EmitFPRecip64(EmitContext&) {
108 throw NotImplementedException("SPIR-V Instruction");
109} 105}
110 106
111void EmitFPRecipSqrt32(EmitContext&) { 107Id EmitFPCos(EmitContext& ctx, Id value) {
112 throw NotImplementedException("SPIR-V Instruction"); 108 return ctx.OpCos(ctx.F32[1], value);
113} 109}
114 110
115void EmitFPRecipSqrt64(EmitContext&) { 111Id EmitFPExp2(EmitContext& ctx, Id value) {
116 throw NotImplementedException("SPIR-V Instruction"); 112 return ctx.OpExp2(ctx.F32[1], value);
117} 113}
118 114
119void EmitFPSqrt(EmitContext&) { 115Id EmitFPLog2(EmitContext& ctx, Id value) {
120 throw NotImplementedException("SPIR-V Instruction"); 116 return ctx.OpLog2(ctx.F32[1], value);
121} 117}
122 118
123void EmitFPSin(EmitContext&) { 119Id EmitFPRecip32(EmitContext& ctx, Id value) {
124 throw NotImplementedException("SPIR-V Instruction"); 120 return ctx.OpFDiv(ctx.F32[1], ctx.Constant(ctx.F32[1], 1.0f), value);
125} 121}
126 122
127void EmitFPSinNotReduced(EmitContext&) { 123Id EmitFPRecip64(EmitContext& ctx, Id value) {
128 throw NotImplementedException("SPIR-V Instruction"); 124 return ctx.OpFDiv(ctx.F64[1], ctx.Constant(ctx.F64[1], 1.0f), value);
129} 125}
130 126
131void EmitFPExp2(EmitContext&) { 127Id EmitFPRecipSqrt32(EmitContext& ctx, Id value) {
132 throw NotImplementedException("SPIR-V Instruction"); 128 return ctx.OpInverseSqrt(ctx.F32[1], value);
133} 129}
134 130
135void EmitFPExp2NotReduced(EmitContext&) { 131Id EmitFPRecipSqrt64(EmitContext& ctx, Id value) {
136 throw NotImplementedException("SPIR-V Instruction"); 132 return ctx.OpInverseSqrt(ctx.F64[1], value);
137} 133}
138 134
139void EmitFPCos(EmitContext&) { 135Id EmitFPSqrt(EmitContext& ctx, Id value) {
140 throw NotImplementedException("SPIR-V Instruction"); 136 return ctx.OpSqrt(ctx.F32[1], value);
141}
142
143void EmitFPCosNotReduced(EmitContext&) {
144 throw NotImplementedException("SPIR-V Instruction");
145}
146
147void EmitFPLog2(EmitContext&) {
148 throw NotImplementedException("SPIR-V Instruction");
149} 137}
150 138
151Id EmitFPSaturate16(EmitContext& ctx, Id value) { 139Id EmitFPSaturate16(EmitContext& ctx, Id value) {
@@ -214,4 +202,148 @@ Id EmitFPTrunc64(EmitContext& ctx, Id value) {
214 return ctx.OpTrunc(ctx.F64[1], value); 202 return ctx.OpTrunc(ctx.F64[1], value);
215} 203}
216 204
205Id EmitFPOrdEqual16(EmitContext& ctx, Id lhs, Id rhs) {
206 return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
207}
208
209Id EmitFPOrdEqual32(EmitContext& ctx, Id lhs, Id rhs) {
210 return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
211}
212
213Id EmitFPOrdEqual64(EmitContext& ctx, Id lhs, Id rhs) {
214 return ctx.OpFOrdEqual(ctx.U1, lhs, rhs);
215}
216
217Id EmitFPUnordEqual16(EmitContext& ctx, Id lhs, Id rhs) {
218 return ctx.OpFUnordEqual(ctx.U1, lhs, rhs);
219}
220
221Id EmitFPUnordEqual32(EmitContext& ctx, Id lhs, Id rhs) {
222 return ctx.OpFUnordEqual(ctx.U1, lhs, rhs);
223}
224
225Id EmitFPUnordEqual64(EmitContext& ctx, Id lhs, Id rhs) {
226 return ctx.OpFUnordEqual(ctx.U1, lhs, rhs);
227}
228
229Id EmitFPOrdNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
230 return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs);
231}
232
233Id EmitFPOrdNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
234 return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs);
235}
236
237Id EmitFPOrdNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
238 return ctx.OpFOrdNotEqual(ctx.U1, lhs, rhs);
239}
240
241Id EmitFPUnordNotEqual16(EmitContext& ctx, Id lhs, Id rhs) {
242 return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
243}
244
245Id EmitFPUnordNotEqual32(EmitContext& ctx, Id lhs, Id rhs) {
246 return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
247}
248
249Id EmitFPUnordNotEqual64(EmitContext& ctx, Id lhs, Id rhs) {
250 return ctx.OpFUnordNotEqual(ctx.U1, lhs, rhs);
251}
252
253Id EmitFPOrdLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
254 return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
255}
256
257Id EmitFPOrdLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
258 return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
259}
260
261Id EmitFPOrdLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
262 return ctx.OpFOrdLessThan(ctx.U1, lhs, rhs);
263}
264
265Id EmitFPUnordLessThan16(EmitContext& ctx, Id lhs, Id rhs) {
266 return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs);
267}
268
269Id EmitFPUnordLessThan32(EmitContext& ctx, Id lhs, Id rhs) {
270 return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs);
271}
272
273Id EmitFPUnordLessThan64(EmitContext& ctx, Id lhs, Id rhs) {
274 return ctx.OpFUnordLessThan(ctx.U1, lhs, rhs);
275}
276
277Id EmitFPOrdGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
278 return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
279}
280
281Id EmitFPOrdGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
282 return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
283}
284
285Id EmitFPOrdGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
286 return ctx.OpFOrdGreaterThan(ctx.U1, lhs, rhs);
287}
288
289Id EmitFPUnordGreaterThan16(EmitContext& ctx, Id lhs, Id rhs) {
290 return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs);
291}
292
293Id EmitFPUnordGreaterThan32(EmitContext& ctx, Id lhs, Id rhs) {
294 return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs);
295}
296
297Id EmitFPUnordGreaterThan64(EmitContext& ctx, Id lhs, Id rhs) {
298 return ctx.OpFUnordGreaterThan(ctx.U1, lhs, rhs);
299}
300
301Id EmitFPOrdLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
302 return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
303}
304
305Id EmitFPOrdLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
306 return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
307}
308
309Id EmitFPOrdLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
310 return ctx.OpFOrdLessThanEqual(ctx.U1, lhs, rhs);
311}
312
313Id EmitFPUnordLessThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
314 return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs);
315}
316
317Id EmitFPUnordLessThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
318 return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs);
319}
320
321Id EmitFPUnordLessThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
322 return ctx.OpFUnordLessThanEqual(ctx.U1, lhs, rhs);
323}
324
325Id EmitFPOrdGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
326 return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
327}
328
329Id EmitFPOrdGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
330 return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
331}
332
333Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
334 return ctx.OpFOrdGreaterThanEqual(ctx.U1, lhs, rhs);
335}
336
337Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs) {
338 return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs);
339}
340
341Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs) {
342 return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs);
343}
344
345Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
346 return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs);
347}
348
217} // namespace Shader::Backend::SPIRV 349} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
index eb1926a4d..21cca4455 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_select.cpp
@@ -6,20 +6,29 @@
6 6
7namespace Shader::Backend::SPIRV { 7namespace Shader::Backend::SPIRV {
8 8
9void EmitSelect8(EmitContext&) { 9Id EmitSelectU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] Id cond,
10 [[maybe_unused]] Id true_value, [[maybe_unused]] Id false_value) {
10 throw NotImplementedException("SPIR-V Instruction"); 11 throw NotImplementedException("SPIR-V Instruction");
11} 12}
12 13
13void EmitSelect16(EmitContext&) { 14Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
14 throw NotImplementedException("SPIR-V Instruction"); 15 return ctx.OpSelect(ctx.U16, cond, true_value, false_value);
15} 16}
16 17
17Id EmitSelect32(EmitContext& ctx, Id cond, Id true_value, Id false_value) { 18Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
18 return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value); 19 return ctx.OpSelect(ctx.U32[1], cond, true_value, false_value);
19} 20}
20 21
21void EmitSelect64(EmitContext&) { 22Id EmitSelectU64(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
22 throw NotImplementedException("SPIR-V Instruction"); 23 return ctx.OpSelect(ctx.U64, cond, true_value, false_value);
24}
25
26Id EmitSelectF16(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
27 return ctx.OpSelect(ctx.F16[1], cond, true_value, false_value);
28}
29
30Id EmitSelectF32(EmitContext& ctx, Id cond, Id true_value, Id false_value) {
31 return ctx.OpSelect(ctx.F32[1], cond, true_value, false_value);
23} 32}
24 33
25} // namespace Shader::Backend::SPIRV 34} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 34c2f67fb..8ba86e614 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -361,19 +361,21 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
361 } 361 }
362} 362}
363 363
364UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) { 364Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
365 if (true_value.Type() != false_value.Type()) { 365 if (true_value.Type() != false_value.Type()) {
366 throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); 366 throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
367 } 367 }
368 switch (true_value.Type()) { 368 switch (true_value.Type()) {
369 case Type::U8: 369 case Type::U8:
370 return Inst<UAny>(Opcode::Select8, condition, true_value, false_value); 370 return Inst(Opcode::SelectU8, condition, true_value, false_value);
371 case Type::U16: 371 case Type::U16:
372 return Inst<UAny>(Opcode::Select16, condition, true_value, false_value); 372 return Inst(Opcode::SelectU16, condition, true_value, false_value);
373 case Type::U32: 373 case Type::U32:
374 return Inst<UAny>(Opcode::Select32, condition, true_value, false_value); 374 return Inst(Opcode::SelectU32, condition, true_value, false_value);
375 case Type::U64: 375 case Type::U64:
376 return Inst<UAny>(Opcode::Select64, condition, true_value, false_value); 376 return Inst(Opcode::SelectU64, condition, true_value, false_value);
377 case Type::F32:
378 return Inst(Opcode::SelectF32, condition, true_value, false_value);
377 default: 379 default:
378 throw InvalidArgument("Invalid type {}", true_value.Type()); 380 throw InvalidArgument("Invalid type {}", true_value.Type());
379 } 381 }
@@ -503,12 +505,16 @@ F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {
503 return result; 505 return result;
504} 506}
505 507
506F32 IREmitter::FPCosNotReduced(const F32& value) { 508F32 IREmitter::FPCos(const F32& value) {
507 return Inst<F32>(Opcode::FPCosNotReduced, value); 509 return Inst<F32>(Opcode::FPCos, value);
510}
511
512F32 IREmitter::FPSin(const F32& value) {
513 return Inst<F32>(Opcode::FPSin, value);
508} 514}
509 515
510F32 IREmitter::FPExp2NotReduced(const F32& value) { 516F32 IREmitter::FPExp2(const F32& value) {
511 return Inst<F32>(Opcode::FPExp2NotReduced, value); 517 return Inst<F32>(Opcode::FPExp2, value);
512} 518}
513 519
514F32 IREmitter::FPLog2(const F32& value) { 520F32 IREmitter::FPLog2(const F32& value) {
@@ -517,9 +523,9 @@ F32 IREmitter::FPLog2(const F32& value) {
517 523
518F32F64 IREmitter::FPRecip(const F32F64& value) { 524F32F64 IREmitter::FPRecip(const F32F64& value) {
519 switch (value.Type()) { 525 switch (value.Type()) {
520 case Type::U32: 526 case Type::F32:
521 return Inst<F32>(Opcode::FPRecip32, value); 527 return Inst<F32>(Opcode::FPRecip32, value);
522 case Type::U64: 528 case Type::F64:
523 return Inst<F64>(Opcode::FPRecip64, value); 529 return Inst<F64>(Opcode::FPRecip64, value);
524 default: 530 default:
525 ThrowInvalidType(value.Type()); 531 ThrowInvalidType(value.Type());
@@ -528,19 +534,15 @@ F32F64 IREmitter::FPRecip(const F32F64& value) {
528 534
529F32F64 IREmitter::FPRecipSqrt(const F32F64& value) { 535F32F64 IREmitter::FPRecipSqrt(const F32F64& value) {
530 switch (value.Type()) { 536 switch (value.Type()) {
531 case Type::U32: 537 case Type::F32:
532 return Inst<F32>(Opcode::FPRecipSqrt32, value); 538 return Inst<F32>(Opcode::FPRecipSqrt32, value);
533 case Type::U64: 539 case Type::F64:
534 return Inst<F64>(Opcode::FPRecipSqrt64, value); 540 return Inst<F64>(Opcode::FPRecipSqrt64, value);
535 default: 541 default:
536 ThrowInvalidType(value.Type()); 542 ThrowInvalidType(value.Type());
537 } 543 }
538} 544}
539 545
540F32 IREmitter::FPSinNotReduced(const F32& value) {
541 return Inst<F32>(Opcode::FPSinNotReduced, value);
542}
543
544F32 IREmitter::FPSqrt(const F32& value) { 546F32 IREmitter::FPSqrt(const F32& value) {
545 return Inst<F32>(Opcode::FPSqrt, value); 547 return Inst<F32>(Opcode::FPSqrt, value);
546} 548}
@@ -610,6 +612,114 @@ F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) {
610 } 612 }
611} 613}
612 614
615U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
616 if (lhs.Type() != rhs.Type()) {
617 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
618 }
619 switch (lhs.Type()) {
620 case Type::F16:
621 return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, lhs, rhs);
622 case Type::F32:
623 return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, lhs, rhs);
624 case Type::F64:
625 return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, lhs, rhs);
626 default:
627 ThrowInvalidType(lhs.Type());
628 }
629}
630
631U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
632 if (lhs.Type() != rhs.Type()) {
633 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
634 }
635 switch (lhs.Type()) {
636 case Type::F16:
637 return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, lhs, rhs);
638 case Type::F32:
639 return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, lhs, rhs);
640 case Type::F64:
641 return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, lhs, rhs);
642 default:
643 ThrowInvalidType(lhs.Type());
644 }
645}
646
647U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
648 if (lhs.Type() != rhs.Type()) {
649 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
650 }
651 switch (lhs.Type()) {
652 case Type::F16:
653 return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, lhs, rhs);
654 case Type::F32:
655 return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, lhs, rhs);
656 case Type::F64:
657 return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, lhs, rhs);
658 default:
659 ThrowInvalidType(lhs.Type());
660 }
661}
662
663U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
664 if (lhs.Type() != rhs.Type()) {
665 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
666 }
667 switch (lhs.Type()) {
668 case Type::F16:
669 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, lhs,
670 rhs);
671 case Type::F32:
672 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, lhs,
673 rhs);
674 case Type::F64:
675 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, lhs,
676 rhs);
677 default:
678 ThrowInvalidType(lhs.Type());
679 }
680}
681
682U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
683 if (lhs.Type() != rhs.Type()) {
684 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
685 }
686 switch (lhs.Type()) {
687 case Type::F16:
688 return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16,
689 lhs, rhs);
690 case Type::F32:
691 return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32,
692 lhs, rhs);
693 case Type::F64:
694 return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64,
695 lhs, rhs);
696 default:
697 ThrowInvalidType(lhs.Type());
698 }
699}
700
701U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
702 if (lhs.Type() != rhs.Type()) {
703 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
704 }
705 switch (lhs.Type()) {
706 case Type::F16:
707 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16
708 : Opcode::FPUnordGreaterThanEqual16,
709 lhs, rhs);
710 case Type::F32:
711 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32
712 : Opcode::FPUnordGreaterThanEqual32,
713 lhs, rhs);
714 case Type::F64:
715 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64
716 : Opcode::FPUnordGreaterThanEqual64,
717 lhs, rhs);
718 default:
719 ThrowInvalidType(lhs.Type());
720 }
721}
722
613U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { 723U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
614 if (a.Type() != b.Type()) { 724 if (a.Type() != b.Type()) {
615 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); 725 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 959f4f9da..2c923716a 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -98,7 +98,8 @@ public:
98 const Value& e4); 98 const Value& e4);
99 [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); 99 [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
100 100
101 [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value); 101 [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
102 const Value& false_value);
102 103
103 template <typename Dest, typename Source> 104 template <typename Dest, typename Source>
104 [[nodiscard]] Dest BitCast(const Source& value); 105 [[nodiscard]] Dest BitCast(const Source& value);
@@ -121,12 +122,12 @@ public:
121 [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value); 122 [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
122 [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg); 123 [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);
123 124
124 [[nodiscard]] F32 FPCosNotReduced(const F32& value); 125 [[nodiscard]] F32 FPCos(const F32& value);
125 [[nodiscard]] F32 FPExp2NotReduced(const F32& value); 126 [[nodiscard]] F32 FPSin(const F32& value);
127 [[nodiscard]] F32 FPExp2(const F32& value);
126 [[nodiscard]] F32 FPLog2(const F32& value); 128 [[nodiscard]] F32 FPLog2(const F32& value);
127 [[nodiscard]] F32F64 FPRecip(const F32F64& value); 129 [[nodiscard]] F32F64 FPRecip(const F32F64& value);
128 [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); 130 [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
129 [[nodiscard]] F32 FPSinNotReduced(const F32& value);
130 [[nodiscard]] F32 FPSqrt(const F32& value); 131 [[nodiscard]] F32 FPSqrt(const F32& value);
131 [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); 132 [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
132 [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); 133 [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
@@ -134,6 +135,15 @@ public:
134 [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); 135 [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
135 [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); 136 [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {});
136 137
138 [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
139 [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
140 [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
141 [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
142 [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
143 bool ordered = true);
144 [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
145 bool ordered = true);
146
137 [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); 147 [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
138 [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); 148 [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
139 [[nodiscard]] U32 IMul(const U32& a, const U32& b); 149 [[nodiscard]] U32 IMul(const U32& a, const U32& b);
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 50da77535..f2d71144a 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -103,10 +103,12 @@ OPCODE(CompositeExtractF64x3, F64, F64x
103OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) 103OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
104 104
105// Select operations 105// Select operations
106OPCODE(Select8, U8, U1, U8, U8, ) 106OPCODE(SelectU8, U8, U1, U8, U8, )
107OPCODE(Select16, U16, U1, U16, U16, ) 107OPCODE(SelectU16, U16, U1, U16, U16, )
108OPCODE(Select32, U32, U1, U32, U32, ) 108OPCODE(SelectU32, U32, U1, U32, U32, )
109OPCODE(Select64, U64, U1, U64, U64, ) 109OPCODE(SelectU64, U64, U1, U64, U64, )
110OPCODE(SelectF16, F16, U1, F16, F16, )
111OPCODE(SelectF32, F32, U1, F32, F32, )
110 112
111// Bitwise conversions 113// Bitwise conversions
112OPCODE(BitCastU16F16, U16, F16, ) 114OPCODE(BitCastU16F16, U16, F16, )
@@ -156,11 +158,8 @@ OPCODE(FPRecipSqrt32, F32, F32,
156OPCODE(FPRecipSqrt64, F64, F64, ) 158OPCODE(FPRecipSqrt64, F64, F64, )
157OPCODE(FPSqrt, F32, F32, ) 159OPCODE(FPSqrt, F32, F32, )
158OPCODE(FPSin, F32, F32, ) 160OPCODE(FPSin, F32, F32, )
159OPCODE(FPSinNotReduced, F32, F32, )
160OPCODE(FPExp2, F32, F32, ) 161OPCODE(FPExp2, F32, F32, )
161OPCODE(FPExp2NotReduced, F32, F32, )
162OPCODE(FPCos, F32, F32, ) 162OPCODE(FPCos, F32, F32, )
163OPCODE(FPCosNotReduced, F32, F32, )
164OPCODE(FPLog2, F32, F32, ) 163OPCODE(FPLog2, F32, F32, )
165OPCODE(FPSaturate16, F16, F16, ) 164OPCODE(FPSaturate16, F16, F16, )
166OPCODE(FPSaturate32, F32, F32, ) 165OPCODE(FPSaturate32, F32, F32, )
@@ -178,6 +177,43 @@ OPCODE(FPTrunc16, F16, F16,
178OPCODE(FPTrunc32, F32, F32, ) 177OPCODE(FPTrunc32, F32, F32, )
179OPCODE(FPTrunc64, F64, F64, ) 178OPCODE(FPTrunc64, F64, F64, )
180 179
180OPCODE(FPOrdEqual16, U1, F16, F16, )
181OPCODE(FPOrdEqual32, U1, F32, F32, )
182OPCODE(FPOrdEqual64, U1, F64, F64, )
183OPCODE(FPUnordEqual16, U1, F16, F16, )
184OPCODE(FPUnordEqual32, U1, F32, F32, )
185OPCODE(FPUnordEqual64, U1, F64, F64, )
186OPCODE(FPOrdNotEqual16, U1, F16, F16, )
187OPCODE(FPOrdNotEqual32, U1, F32, F32, )
188OPCODE(FPOrdNotEqual64, U1, F64, F64, )
189OPCODE(FPUnordNotEqual16, U1, F16, F16, )
190OPCODE(FPUnordNotEqual32, U1, F32, F32, )
191OPCODE(FPUnordNotEqual64, U1, F64, F64, )
192OPCODE(FPOrdLessThan16, U1, F16, F16, )
193OPCODE(FPOrdLessThan32, U1, F32, F32, )
194OPCODE(FPOrdLessThan64, U1, F64, F64, )
195OPCODE(FPUnordLessThan16, U1, F16, F16, )
196OPCODE(FPUnordLessThan32, U1, F32, F32, )
197OPCODE(FPUnordLessThan64, U1, F64, F64, )
198OPCODE(FPOrdGreaterThan16, U1, F16, F16, )
199OPCODE(FPOrdGreaterThan32, U1, F32, F32, )
200OPCODE(FPOrdGreaterThan64, U1, F64, F64, )
201OPCODE(FPUnordGreaterThan16, U1, F16, F16, )
202OPCODE(FPUnordGreaterThan32, U1, F32, F32, )
203OPCODE(FPUnordGreaterThan64, U1, F64, F64, )
204OPCODE(FPOrdLessThanEqual16, U1, F16, F16, )
205OPCODE(FPOrdLessThanEqual32, U1, F32, F32, )
206OPCODE(FPOrdLessThanEqual64, U1, F64, F64, )
207OPCODE(FPUnordLessThanEqual16, U1, F16, F16, )
208OPCODE(FPUnordLessThanEqual32, U1, F32, F32, )
209OPCODE(FPUnordLessThanEqual64, U1, F64, F64, )
210OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, )
211OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, )
212OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, )
213OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )
214OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
215OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
216
181// Integer operations 217// Integer operations
182OPCODE(IAdd32, U32, U32, U32, ) 218OPCODE(IAdd32, U32, U32, U32, )
183OPCODE(IAdd64, U64, U64, U64, ) 219OPCODE(IAdd64, U64, U64, U64, )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
index 3da37a2bb..fd73f656c 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -46,7 +46,8 @@ inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
46 case FmzMode::FTZ: 46 case FmzMode::FTZ:
47 return IR::FmzMode::FTZ; 47 return IR::FmzMode::FTZ;
48 case FmzMode::FMZ: 48 case FmzMode::FMZ:
49 return IR::FmzMode::FMZ; 49 // FMZ is manually handled in the instruction
50 return IR::FmzMode::FTZ;
50 case FmzMode::INVALIDFMZ3: 51 case FmzMode::INVALIDFMZ3:
51 break; 52 break;
52 } 53 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
index 219ffcc6a..76a807d4e 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -53,7 +53,7 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
53} // Anonymous namespace 53} // Anonymous namespace
54 54
55void TranslatorVisitor::FADD_reg(u64 insn) { 55void TranslatorVisitor::FADD_reg(u64 insn) {
56 FADD(*this, insn, GetRegFloat20(insn)); 56 FADD(*this, insn, GetFloatReg20(insn));
57} 57}
58 58
59void TranslatorVisitor::FADD_cbuf(u64 insn) { 59void TranslatorVisitor::FADD_cbuf(u64 insn) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
index 758700d3c..c2ca0873b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& s
51} // Anonymous namespace 51} // Anonymous namespace
52 52
53void TranslatorVisitor::FFMA_reg(u64 insn) { 53void TranslatorVisitor::FFMA_reg(u64 insn) {
54 FFMA(*this, insn, GetRegFloat20(insn), GetRegFloat39(insn)); 54 FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
55} 55}
56 56
57void TranslatorVisitor::FFMA_rc(u64) { 57void TranslatorVisitor::FFMA_rc(u64) {
@@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) {
59} 59}
60 60
61void TranslatorVisitor::FFMA_cr(u64 insn) { 61void TranslatorVisitor::FFMA_cr(u64 insn) {
62 FFMA(*this, insn, GetFloatCbuf(insn), GetRegFloat39(insn)); 62 FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
63} 63}
64 64
65void TranslatorVisitor::FFMA_imm(u64) { 65void TranslatorVisitor::FFMA_imm(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
index ba005fbf4..2f8605619 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -10,7 +10,7 @@
10 10
11namespace Shader::Maxwell { 11namespace Shader::Maxwell {
12namespace { 12namespace {
13enum class Operation { 13enum class Operation : u64 {
14 Cos = 0, 14 Cos = 0,
15 Sin = 1, 15 Sin = 1,
16 Ex2 = 2, // Base 2 exponent 16 Ex2 = 2, // Base 2 exponent
@@ -39,11 +39,11 @@ void TranslatorVisitor::MUFU(u64 insn) {
39 IR::F32 value{[&]() -> IR::F32 { 39 IR::F32 value{[&]() -> IR::F32 {
40 switch (mufu.operation) { 40 switch (mufu.operation) {
41 case Operation::Cos: 41 case Operation::Cos:
42 return ir.FPCosNotReduced(op_a); 42 return ir.FPCos(op_a);
43 case Operation::Sin: 43 case Operation::Sin:
44 return ir.FPSinNotReduced(op_a); 44 return ir.FPSin(op_a);
45 case Operation::Ex2: 45 case Operation::Ex2:
46 return ir.FPExp2NotReduced(op_a); 46 return ir.FPExp2(op_a);
47 case Operation::Lg2: 47 case Operation::Lg2:
48 return ir.FPLog2(op_a); 48 return ir.FPLog2(op_a);
49 case Operation::Rcp: 49 case Operation::Rcp:
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
index 5c38d3fc1..edf2cadae 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -55,9 +55,6 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode
55 if (cc) { 55 if (cc) {
56 throw NotImplementedException("FMUL CC"); 56 throw NotImplementedException("FMUL CC");
57 } 57 }
58 if (sat) {
59 throw NotImplementedException("FMUL SAT");
60 }
61 IR::F32 op_a{v.F(fmul.src_a)}; 58 IR::F32 op_a{v.F(fmul.src_a)};
62 if (scale != Scale::None) { 59 if (scale != Scale::None) {
63 if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { 60 if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
@@ -71,7 +68,20 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode
71 .rounding{CastFpRounding(fp_rounding)}, 68 .rounding{CastFpRounding(fp_rounding)},
72 .fmz_mode{CastFmzMode(fmz_mode)}, 69 .fmz_mode{CastFmzMode(fmz_mode)},
73 }; 70 };
74 v.F(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); 71 IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
72 if (fmz_mode == FmzMode::FMZ && !sat) {
73 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
74 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
75 const IR::F32 zero{v.ir.Imm32(0.0f)};
76 const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
77 const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
78 const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
79 value = IR::F32{v.ir.Select(any_zero, zero, value)};
80 }
81 if (sat) {
82 value = v.ir.FPSaturate(value);
83 }
84 v.F(fmul.dest_reg, value);
75} 85}
76 86
77void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { 87void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
@@ -83,27 +93,33 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
83 BitField<47, 1, u64> cc; 93 BitField<47, 1, u64> cc;
84 BitField<48, 1, u64> neg_b; 94 BitField<48, 1, u64> neg_b;
85 BitField<50, 1, u64> sat; 95 BitField<50, 1, u64> sat;
86 } fmul{insn}; 96 } const fmul{insn};
87
88 FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, 97 FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
89 fmul.neg_b != 0); 98 fmul.neg_b != 0);
90} 99}
91} // Anonymous namespace 100} // Anonymous namespace
92 101
93void TranslatorVisitor::FMUL_reg(u64 insn) { 102void TranslatorVisitor::FMUL_reg(u64 insn) {
94 return FMUL(*this, insn, GetRegFloat20(insn)); 103 return FMUL(*this, insn, GetFloatReg20(insn));
95} 104}
96 105
97void TranslatorVisitor::FMUL_cbuf(u64) { 106void TranslatorVisitor::FMUL_cbuf(u64 insn) {
98 throw NotImplementedException("FMUL (cbuf)"); 107 return FMUL(*this, insn, GetFloatCbuf(insn));
99} 108}
100 109
101void TranslatorVisitor::FMUL_imm(u64) { 110void TranslatorVisitor::FMUL_imm(u64 insn) {
102 throw NotImplementedException("FMUL (imm)"); 111 return FMUL(*this, insn, GetFloatImm20(insn));
103} 112}
104 113
105void TranslatorVisitor::FMUL32I(u64) { 114void TranslatorVisitor::FMUL32I(u64 insn) {
106 throw NotImplementedException("FMUL32I"); 115 union {
116 u64 raw;
117 BitField<52, 1, u64> cc;
118 BitField<53, 2, FmzMode> fmz;
119 BitField<55, 1, u64> sat;
120 } const fmul32i{insn};
121 FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
122 fmul32i.sat != 0, fmul32i.cc != 0, false);
107} 123}
108 124
109} // namespace Shader::Maxwell \ No newline at end of file 125} // namespace Shader::Maxwell \ No newline at end of file
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
new file mode 100644
index 000000000..f91b93fad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
@@ -0,0 +1,41 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 SINCOS,
13 EX2,
14};
15
16void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
17 union {
18 u64 raw;
19 BitField<0, 8, IR::Reg> dest_reg;
20 BitField<39, 1, Mode> mode;
21 BitField<45, 1, u64> neg;
22 BitField<49, 1, u64> abs;
23 } const rro{insn};
24
25 v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
26}
27} // Anonymous namespace
28
29void TranslatorVisitor::RRO_reg(u64 insn) {
30 RRO(*this, insn, GetFloatReg20(insn));
31}
32
33void TranslatorVisitor::RRO_cbuf(u64 insn) {
34 RRO(*this, insn, GetFloatCbuf(insn));
35}
36
37void TranslatorVisitor::RRO_imm(u64) {
38 throw NotImplementedException("RRO (imm)");
39}
40
41} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 165d475b9..a5a0e1a9b 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -48,11 +48,11 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
48 return X(reg.index); 48 return X(reg.index);
49} 49}
50 50
51IR::F32 TranslatorVisitor::GetRegFloat20(u64 insn) { 51IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
52 return ir.BitCast<IR::F32>(GetReg20(insn)); 52 return ir.BitCast<IR::F32>(GetReg20(insn));
53} 53}
54 54
55IR::F32 TranslatorVisitor::GetRegFloat39(u64 insn) { 55IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
56 return ir.BitCast<IR::F32>(GetReg39(insn)); 56 return ir.BitCast<IR::F32>(GetReg39(insn));
57} 57}
58 58
@@ -110,6 +110,14 @@ IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
110 return ir.Imm32(static_cast<u32>(imm.value)); 110 return ir.Imm32(static_cast<u32>(imm.value));
111} 111}
112 112
113IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
114 union {
115 u64 raw;
116 BitField<20, 32, u64> value;
117 } const imm{insn};
118 return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
119}
120
113void TranslatorVisitor::SetZFlag(const IR::U1& value) { 121void TranslatorVisitor::SetZFlag(const IR::U1& value) {
114 ir.SetZFlag(value); 122 ir.SetZFlag(value);
115} 123}
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index 4d4cf2ebf..4e722e205 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -304,8 +304,8 @@ public:
304 [[nodiscard]] IR::U32 GetReg8(u64 insn); 304 [[nodiscard]] IR::U32 GetReg8(u64 insn);
305 [[nodiscard]] IR::U32 GetReg20(u64 insn); 305 [[nodiscard]] IR::U32 GetReg20(u64 insn);
306 [[nodiscard]] IR::U32 GetReg39(u64 insn); 306 [[nodiscard]] IR::U32 GetReg39(u64 insn);
307 [[nodiscard]] IR::F32 GetRegFloat20(u64 insn); 307 [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
308 [[nodiscard]] IR::F32 GetRegFloat39(u64 insn); 308 [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
309 309
310 [[nodiscard]] IR::U32 GetCbuf(u64 insn); 310 [[nodiscard]] IR::U32 GetCbuf(u64 insn);
311 [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); 311 [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
@@ -314,6 +314,7 @@ public:
314 [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); 314 [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
315 315
316 [[nodiscard]] IR::U32 GetImm32(u64 insn); 316 [[nodiscard]] IR::U32 GetImm32(u64 insn);
317 [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
317 318
318 void SetZFlag(const IR::U1& value); 319 void SetZFlag(const IR::U1& value);
319 void SetSFlag(const IR::U1& value); 320 void SetSFlag(const IR::U1& value);
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
index d8a5158b5..20af68852 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -50,7 +50,7 @@ void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
50 // 50 //
51 const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; 51 const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
52 const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; 52 const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
53 result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0)); 53 result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
54 } 54 }
55 v.X(shl.dest_reg, result); 55 v.X(shl.dest_reg, result);
56} 56}
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 628cf1c14..4114e10be 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -721,18 +721,6 @@ void TranslatorVisitor::RET(u64) {
721 ThrowNotImplemented(Opcode::RET); 721 ThrowNotImplemented(Opcode::RET);
722} 722}
723 723
724void TranslatorVisitor::RRO_reg(u64) {
725 ThrowNotImplemented(Opcode::RRO_reg);
726}
727
728void TranslatorVisitor::RRO_cbuf(u64) {
729 ThrowNotImplemented(Opcode::RRO_cbuf);
730}
731
732void TranslatorVisitor::RRO_imm(u64) {
733 ThrowNotImplemented(Opcode::RRO_imm);
734}
735
736void TranslatorVisitor::RTT(u64) { 724void TranslatorVisitor::RTT(u64) {
737 ThrowNotImplemented(Opcode::RTT); 725 ThrowNotImplemented(Opcode::RTT);
738} 726}
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 4d4e88259..ae3d5a7d6 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -330,7 +330,7 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
330 return FoldBitCast<u32, f32>(inst, IR::Opcode::BitCastF32U32); 330 return FoldBitCast<u32, f32>(inst, IR::Opcode::BitCastF32U32);
331 case IR::Opcode::IAdd64: 331 case IR::Opcode::IAdd64:
332 return FoldAdd<u64>(block, inst); 332 return FoldAdd<u64>(block, inst);
333 case IR::Opcode::Select32: 333 case IR::Opcode::SelectU32:
334 return FoldSelect<u32>(inst); 334 return FoldSelect<u32>(inst);
335 case IR::Opcode::LogicalAnd: 335 case IR::Opcode::LogicalAnd:
336 return FoldLogicalAnd(inst); 336 return FoldLogicalAnd(inst);