summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h3
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp20
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp18
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h1
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc3
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp88
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp17
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h2
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp2
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp2
10 files changed, 147 insertions, 9 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 7fefcf2f2..6d4adafc7 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -189,6 +189,9 @@ Id EmitFPSqrt(EmitContext& ctx, Id value);
189Id EmitFPSaturate16(EmitContext& ctx, Id value); 189Id EmitFPSaturate16(EmitContext& ctx, Id value);
190Id EmitFPSaturate32(EmitContext& ctx, Id value); 190Id EmitFPSaturate32(EmitContext& ctx, Id value);
191Id EmitFPSaturate64(EmitContext& ctx, Id value); 191Id EmitFPSaturate64(EmitContext& ctx, Id value);
192Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value);
193Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value);
194Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value);
192Id EmitFPRoundEven16(EmitContext& ctx, Id value); 195Id EmitFPRoundEven16(EmitContext& ctx, Id value);
193Id EmitFPRoundEven32(EmitContext& ctx, Id value); 196Id EmitFPRoundEven32(EmitContext& ctx, Id value);
194Id EmitFPRoundEven64(EmitContext& ctx, Id value); 197Id EmitFPRoundEven64(EmitContext& ctx, Id value);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
index 1fdf66cb6..24300af39 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@@ -15,7 +15,7 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) {
15 return op; 15 return op;
16} 16}
17 17
18Id Saturate(EmitContext& ctx, Id type, Id value, Id zero, Id one) { 18Id Clamp(EmitContext& ctx, Id type, Id value, Id zero, Id one) {
19 if (ctx.profile.has_broken_spirv_clamp) { 19 if (ctx.profile.has_broken_spirv_clamp) {
20 return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one); 20 return ctx.OpFMin(type, ctx.OpFMax(type, value, zero), one);
21 } else { 21 } else {
@@ -139,19 +139,31 @@ Id EmitFPSqrt(EmitContext& ctx, Id value) {
139Id EmitFPSaturate16(EmitContext& ctx, Id value) { 139Id EmitFPSaturate16(EmitContext& ctx, Id value) {
140 const Id zero{ctx.Constant(ctx.F16[1], u16{0})}; 140 const Id zero{ctx.Constant(ctx.F16[1], u16{0})};
141 const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})}; 141 const Id one{ctx.Constant(ctx.F16[1], u16{0x3c00})};
142 return Saturate(ctx, ctx.F16[1], value, zero, one); 142 return Clamp(ctx, ctx.F16[1], value, zero, one);
143} 143}
144 144
145Id EmitFPSaturate32(EmitContext& ctx, Id value) { 145Id EmitFPSaturate32(EmitContext& ctx, Id value) {
146 const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})}; 146 const Id zero{ctx.Constant(ctx.F32[1], f32{0.0})};
147 const Id one{ctx.Constant(ctx.F32[1], f32{1.0})}; 147 const Id one{ctx.Constant(ctx.F32[1], f32{1.0})};
148 return Saturate(ctx, ctx.F32[1], value, zero, one); 148 return Clamp(ctx, ctx.F32[1], value, zero, one);
149} 149}
150 150
151Id EmitFPSaturate64(EmitContext& ctx, Id value) { 151Id EmitFPSaturate64(EmitContext& ctx, Id value) {
152 const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})}; 152 const Id zero{ctx.Constant(ctx.F64[1], f64{0.0})};
153 const Id one{ctx.Constant(ctx.F64[1], f64{1.0})}; 153 const Id one{ctx.Constant(ctx.F64[1], f64{1.0})};
154 return Saturate(ctx, ctx.F64[1], value, zero, one); 154 return Clamp(ctx, ctx.F64[1], value, zero, one);
155}
156
157Id EmitFPClamp16(EmitContext& ctx, Id value, Id min_value, Id max_value) {
158 return Clamp(ctx, ctx.F16[1], value, min_value, max_value);
159}
160
161Id EmitFPClamp32(EmitContext& ctx, Id value, Id min_value, Id max_value) {
162 return Clamp(ctx, ctx.F32[1], value, min_value, max_value);
163}
164
165Id EmitFPClamp64(EmitContext& ctx, Id value, Id min_value, Id max_value) {
166 return Clamp(ctx, ctx.F64[1], value, min_value, max_value);
155} 167}
156 168
157Id EmitFPRoundEven16(EmitContext& ctx, Id value) { 169Id EmitFPRoundEven16(EmitContext& ctx, Id value) {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index ce610799a..6280c08f6 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -731,6 +731,24 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
731 } 731 }
732} 732}
733 733
734F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value,
735 const F16F32F64& max_value) {
736 if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) {
737 throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(),
738 max_value.Type());
739 }
740 switch (value.Type()) {
741 case Type::F16:
742 return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value);
743 case Type::F32:
744 return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value);
745 case Type::F64:
746 return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value);
747 default:
748 ThrowInvalidType(value.Type());
749 }
750}
751
734F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { 752F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) {
735 switch (value.Type()) { 753 switch (value.Type()) {
736 case Type::F16: 754 case Type::F16:
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 39109b0de..ebbda78a9 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -147,6 +147,7 @@ public:
147 [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); 147 [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
148 [[nodiscard]] F32 FPSqrt(const F32& value); 148 [[nodiscard]] F32 FPSqrt(const F32& value);
149 [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); 149 [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
150 [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value);
150 [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); 151 [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
151 [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); 152 [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
152 [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); 153 [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 8945c7b04..dd17212a1 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -192,6 +192,9 @@ OPCODE(FPLog2, F32, F32,
192OPCODE(FPSaturate16, F16, F16, ) 192OPCODE(FPSaturate16, F16, F16, )
193OPCODE(FPSaturate32, F32, F32, ) 193OPCODE(FPSaturate32, F32, F32, )
194OPCODE(FPSaturate64, F64, F64, ) 194OPCODE(FPSaturate64, F64, F64, )
195OPCODE(FPClamp16, F16, F16, F16, F16, )
196OPCODE(FPClamp32, F32, F32, F32, F32, )
197OPCODE(FPClamp64, F64, F64, F64, F64, )
195OPCODE(FPRoundEven16, F16, F16, ) 198OPCODE(FPRoundEven16, F16, F16, )
196OPCODE(FPRoundEven32, F32, F32, ) 199OPCODE(FPRoundEven32, F32, F32, )
197OPCODE(FPRoundEven64, F64, F64, ) 200OPCODE(FPRoundEven64, F64, F64, )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
index 81175627f..7c5a72800 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -2,6 +2,8 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <limits>
6
5#include "common/common_types.h" 7#include "common/common_types.h"
6#include "shader_recompiler/exception.h" 8#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/opcodes.h" 9#include "shader_recompiler/frontend/maxwell/opcodes.h"
@@ -55,6 +57,37 @@ size_t BitSize(DestFormat dest_format) {
55 } 57 }
56} 58}
57 59
60std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
61 if (is_signed) {
62 switch (format) {
63 case DestFormat::I16:
64 return {static_cast<f64>(std::numeric_limits<s16>::max()),
65 static_cast<f64>(std::numeric_limits<s16>::min())};
66 case DestFormat::I32:
67 return {static_cast<f64>(std::numeric_limits<s32>::max()),
68 static_cast<f64>(std::numeric_limits<s32>::min())};
69 case DestFormat::I64:
70 return {static_cast<f64>(std::numeric_limits<s64>::max()),
71 static_cast<f64>(std::numeric_limits<s64>::min())};
72 default: {}
73 }
74 } else {
75 switch (format) {
76 case DestFormat::I16:
77 return {static_cast<f64>(std::numeric_limits<u16>::max()),
78 static_cast<f64>(std::numeric_limits<u16>::min())};
79 case DestFormat::I32:
80 return {static_cast<f64>(std::numeric_limits<u32>::max()),
81 static_cast<f64>(std::numeric_limits<u32>::min())};
82 case DestFormat::I64:
83 return {static_cast<f64>(std::numeric_limits<u64>::max()),
84 static_cast<f64>(std::numeric_limits<u64>::min())};
85 default: {}
86 }
87 }
88 throw NotImplementedException("Invalid destination format {}", format);
89}
90
58IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { 91IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
59 union { 92 union {
60 u64 raw; 93 u64 raw;
@@ -112,13 +145,58 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
112 // For example converting F32 65537.0 to U16, the expected value is 0xffff, 145 // For example converting F32 65537.0 to U16, the expected value is 0xffff,
113 146
114 const bool is_signed{f2i.is_signed != 0}; 147 const bool is_signed{f2i.is_signed != 0};
115 const size_t bitsize{BitSize(f2i.dest_format)}; 148 const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
116 const IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, rounded_value)}; 149
150 IR::F16F32F64 intermediate;
151 switch (f2i.src_format) {
152 case SrcFormat::F16: {
153 const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
154 const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
155 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
156 break;
157 }
158 case SrcFormat::F32: {
159 const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
160 const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
161 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
162 break;
163 }
164 case SrcFormat::F64: {
165 const IR::F64 max_val{v.ir.Imm64(max_bound)};
166 const IR::F64 min_val{v.ir.Imm64(min_bound)};
167 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
168 break;
169 }
170 default:
171 throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
172 }
173
174 const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
175 IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
176
177 bool handled_special_case = false;
178 const bool special_nan_cases =
179 (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
180 if (special_nan_cases) {
181 if (f2i.dest_format == DestFormat::I32) {
182 handled_special_case = true;
183 result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
184 } else if (f2i.dest_format == DestFormat::I64) {
185 handled_special_case = true;
186 result = IR::U64{
187 v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000ULL), result)};
188 }
189 }
190 if (!handled_special_case && is_signed) {
191 if (bitsize != 64) {
192 result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
193 } else {
194 result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0ULL), result)};
195 }
196 }
117 197
118 if (bitsize == 64) { 198 if (bitsize == 64) {
119 const IR::Value vector{v.ir.UnpackUint2x32(result)}; 199 v.L(f2i.dest_reg, result);
120 v.X(f2i.dest_reg + 0, IR::U32{v.ir.CompositeExtract(vector, 0)});
121 v.X(f2i.dest_reg + 1, IR::U32{v.ir.CompositeExtract(vector, 1)});
122 } else { 200 } else {
123 v.X(f2i.dest_reg, result); 201 v.X(f2i.dest_reg, result);
124 } 202 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
index 758a0230a..9bae89c10 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -21,6 +21,13 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) {
21 return ir.GetReg(reg); 21 return ir.GetReg(reg);
22} 22}
23 23
24IR::U64 TranslatorVisitor::L(IR::Reg reg) {
25 if (!IR::IsAligned(reg, 2)) {
26 throw NotImplementedException("Unaligned source register {}", reg);
27 }
28 return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
29}
30
24IR::F32 TranslatorVisitor::F(IR::Reg reg) { 31IR::F32 TranslatorVisitor::F(IR::Reg reg) {
25 return ir.BitCast<IR::F32>(X(reg)); 32 return ir.BitCast<IR::F32>(X(reg));
26} 33}
@@ -36,6 +43,16 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
36 ir.SetReg(dest_reg, value); 43 ir.SetReg(dest_reg, value);
37} 44}
38 45
46void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
47 if (!IR::IsAligned(dest_reg, 2)) {
48 throw NotImplementedException("Unaligned destination register {}", dest_reg);
49 }
50 const IR::Value result{ir.UnpackUint2x32(value)};
51 for (int i = 0; i < 2; i++) {
52 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, i)});
53 }
54}
55
39void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { 56void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
40 X(dest_reg, ir.BitCast<IR::U32>(value)); 57 X(dest_reg, ir.BitCast<IR::U32>(value));
41} 58}
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
index c994fe803..54c31deb4 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -341,10 +341,12 @@ public:
341 void XMAD_imm(u64 insn); 341 void XMAD_imm(u64 insn);
342 342
343 [[nodiscard]] IR::U32 X(IR::Reg reg); 343 [[nodiscard]] IR::U32 X(IR::Reg reg);
344 [[nodiscard]] IR::U64 L(IR::Reg reg);
344 [[nodiscard]] IR::F32 F(IR::Reg reg); 345 [[nodiscard]] IR::F32 F(IR::Reg reg);
345 [[nodiscard]] IR::F64 D(IR::Reg reg); 346 [[nodiscard]] IR::F64 D(IR::Reg reg);
346 347
347 void X(IR::Reg dest_reg, const IR::U32& value); 348 void X(IR::Reg dest_reg, const IR::U32& value);
349 void L(IR::Reg dest_reg, const IR::U64& value);
348 void F(IR::Reg dest_reg, const IR::F32& value); 350 void F(IR::Reg dest_reg, const IR::F32& value);
349 void D(IR::Reg dest_reg, const IR::F64& value); 351 void D(IR::Reg dest_reg, const IR::F64& value);
350 352
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index db5138e4d..32f276f3b 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -105,6 +105,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
105 case IR::Opcode::FPNeg16: 105 case IR::Opcode::FPNeg16:
106 case IR::Opcode::FPRoundEven16: 106 case IR::Opcode::FPRoundEven16:
107 case IR::Opcode::FPSaturate16: 107 case IR::Opcode::FPSaturate16:
108 case IR::Opcode::FPClamp16:
108 case IR::Opcode::FPTrunc16: 109 case IR::Opcode::FPTrunc16:
109 case IR::Opcode::FPOrdEqual16: 110 case IR::Opcode::FPOrdEqual16:
110 case IR::Opcode::FPUnordEqual16: 111 case IR::Opcode::FPUnordEqual16:
@@ -148,6 +149,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
148 case IR::Opcode::FPRecipSqrt64: 149 case IR::Opcode::FPRecipSqrt64:
149 case IR::Opcode::FPRoundEven64: 150 case IR::Opcode::FPRoundEven64:
150 case IR::Opcode::FPSaturate64: 151 case IR::Opcode::FPSaturate64:
152 case IR::Opcode::FPClamp64:
151 case IR::Opcode::FPTrunc64: 153 case IR::Opcode::FPTrunc64:
152 case IR::Opcode::FPOrdEqual64: 154 case IR::Opcode::FPOrdEqual64:
153 case IR::Opcode::FPUnordEqual64: 155 case IR::Opcode::FPUnordEqual64:
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
index 0e8862f45..0d2c91ed6 100644
--- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -30,6 +30,8 @@ IR::Opcode Replace(IR::Opcode op) {
30 return IR::Opcode::FPRoundEven32; 30 return IR::Opcode::FPRoundEven32;
31 case IR::Opcode::FPSaturate16: 31 case IR::Opcode::FPSaturate16:
32 return IR::Opcode::FPSaturate32; 32 return IR::Opcode::FPSaturate32;
33 case IR::Opcode::FPClamp16:
34 return IR::Opcode::FPClamp32;
33 case IR::Opcode::FPTrunc16: 35 case IR::Opcode::FPTrunc16:
34 return IR::Opcode::FPTrunc32; 36 return IR::Opcode::FPTrunc32;
35 case IR::Opcode::CompositeConstructF16x2: 37 case IR::Opcode::CompositeConstructF16x2: