summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar FernandoS272021-03-21 01:03:21 +0100
committerGravatar ameerj2021-07-22 21:51:23 -0400
commit28dff6a6298b714019aa10a47f5a9e3f3f689067 (patch)
tree63ddbf5f41f08c9f2300e995598950b86db5ceb3 /src
parentspirv: Implement VertexId and InstanceId, refactor code (diff)
downloadyuzu-28dff6a6298b714019aa10a47f5a9e3f3f689067.tar.gz
yuzu-28dff6a6298b714019aa10a47f5a9e3f3f689067.tar.xz
yuzu-28dff6a6298b714019aa10a47f5a9e3f3f689067.zip
shader: Implement HFMA2
Diffstat (limited to 'src')
-rw-r--r--src/shader_recompiler/CMakeLists.txt1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp170
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp13
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h8
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp20
5 files changed, 192 insertions, 20 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 21c66ce13..42be817ec 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -78,6 +78,7 @@ add_library(shader_recompiler STATIC
78 frontend/maxwell/translate/impl/floating_point_range_reduction.cpp 78 frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
79 frontend/maxwell/translate/impl/floating_point_set_predicate.cpp 79 frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
80 frontend/maxwell/translate/impl/half_floating_point_add.cpp 80 frontend/maxwell/translate/impl/half_floating_point_add.cpp
81 frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
81 frontend/maxwell/translate/impl/half_floating_point_helper.cpp 82 frontend/maxwell/translate/impl/half_floating_point_helper.cpp
82 frontend/maxwell/translate/impl/half_floating_point_helper.h 83 frontend/maxwell/translate/impl/half_floating_point_helper.h
83 frontend/maxwell/translate/impl/impl.cpp 84 frontend/maxwell/translate/impl/impl.cpp
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..2f3996274
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -0,0 +1,170 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9
10void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
11 Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
12 bool sat, HalfPrecision precision) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a;
17 } const hfma2{insn};
18
19 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
20 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
21 auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
22 const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
23 if (promotion) {
24 if (lhs_a.Type() == IR::Type::F16) {
25 lhs_a = v.ir.FPConvert(32, lhs_a);
26 rhs_a = v.ir.FPConvert(32, rhs_a);
27 }
28 if (lhs_b.Type() == IR::Type::F16) {
29 lhs_b = v.ir.FPConvert(32, lhs_b);
30 rhs_b = v.ir.FPConvert(32, rhs_b);
31 }
32 if (lhs_c.Type() == IR::Type::F16) {
33 lhs_c = v.ir.FPConvert(32, lhs_c);
34 rhs_c = v.ir.FPConvert(32, rhs_c);
35 }
36 }
37
38 lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
39 rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
40
41 lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
42 rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
43
44 const IR::FpControl fp_control{
45 .no_contraction{true},
46 .rounding{IR::FpRounding::DontCare},
47 .fmz_mode{HalfPrecision2FmzMode(precision)},
48 };
49 IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
50 IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
51 if (precision == HalfPrecision::FMZ && !sat) {
52 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
53 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
54 const IR::F32 zero{v.ir.Imm32(0.0f)};
55 const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
56 const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
57 const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
58 lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
59
60 const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
61 const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
62 const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
63 rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
64 }
65 if (sat) {
66 lhs = v.ir.FPSaturate(lhs);
67 rhs = v.ir.FPSaturate(rhs);
68 }
69 if (promotion) {
70 lhs = v.ir.FPConvert(16, lhs);
71 rhs = v.ir.FPConvert(16, rhs);
72 }
73 v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
74}
75
76void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
77 Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
78 HalfPrecision precision) {
79 union {
80 u64 raw;
81 BitField<47, 2, Swizzle> swizzle_a;
82 BitField<49, 2, Merge> merge;
83 } const hfma2{insn};
84
85 HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
86 sat, precision);
87}
88
89} // namespace
90
91void TranslatorVisitor::HFMA2_reg(u64 insn) {
92 union {
93 u64 raw;
94 BitField<28, 2, Swizzle> swizzle_b;
95 BitField<32, 1, u64> saturate;
96 BitField<31, 1, u64> neg_b;
97 BitField<30, 1, u64> neg_c;
98 BitField<35, 2, Swizzle> swizzle_c;
99 BitField<37, 2, HalfPrecision> precision;
100 } const hfma2{insn};
101
102 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
103 GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
104}
105
106void TranslatorVisitor::HFMA2_rc(u64 insn) {
107 union {
108 u64 raw;
109 BitField<51, 1, u64> neg_c;
110 BitField<52, 1, u64> saturate;
111 BitField<53, 2, Swizzle> swizzle_b;
112 BitField<56, 1, u64> neg_b;
113 BitField<57, 2, HalfPrecision> precision;
114 } const hfma2{insn};
115
116 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
117 GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
118}
119
120void TranslatorVisitor::HFMA2_cr(u64 insn) {
121 union {
122 u64 raw;
123 BitField<51, 1, u64> neg_c;
124 BitField<52, 1, u64> saturate;
125 BitField<53, 2, Swizzle> swizzle_c;
126 BitField<56, 1, u64> neg_b;
127 BitField<57, 2, HalfPrecision> precision;
128 } const hfma2{insn};
129
130 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
131 GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
132}
133
134void TranslatorVisitor::HFMA2_imm(u64 insn) {
135 union {
136 u64 raw;
137 BitField<51, 1, u64> neg_c;
138 BitField<52, 1, u64> saturate;
139 BitField<53, 2, Swizzle> swizzle_c;
140
141 BitField<56, 1, u64> neg_high;
142 BitField<30, 9, u64> high;
143 BitField<29, 1, u64> neg_low;
144 BitField<20, 9, u64> low;
145 BitField<57, 2, HalfPrecision> precision;
146 } const hfma2{insn};
147
148 const u32 imm{static_cast<u32>(hfma2.low << 6) | ((hfma2.neg_low != 0 ? 1 : 0) << 15) |
149 static_cast<u32>(hfma2.high << 22) | ((hfma2.neg_high != 0 ? 1 : 0) << 31)};
150
151 HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
152 GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
153}
154
155void TranslatorVisitor::HFMA2_32I(u64 insn) {
156 union {
157 u64 raw;
158 BitField<0, 8, IR::Reg> src_c;
159 BitField<20, 32, u64> imm32;
160 BitField<52, 1, u64> neg_c;
161 BitField<53, 2, Swizzle> swizzle_a;
162 BitField<55, 2, HalfPrecision> precision;
163 } const hfma2{insn};
164
165 const u32 imm{static_cast<u32>(hfma2.imm32)};
166 HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
167 Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
168}
169
170} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
index 930822092..d0c6ba1aa 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
@@ -6,6 +6,19 @@
6 6
7namespace Shader::Maxwell { 7namespace Shader::Maxwell {
8 8
9IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
10 switch (precision) {
11 case HalfPrecision::None:
12 return IR::FmzMode::None;
13 case HalfPrecision::FTZ:
14 return IR::FmzMode::FTZ;
15 case HalfPrecision::FMZ:
16 return IR::FmzMode::FMZ;
17 default:
18 return IR::FmzMode::DontCare;
19 }
20}
21
9std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { 22std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
10 switch (swizzle) { 23 switch (swizzle) {
11 case Swizzle::H1_H0: { 24 case Swizzle::H1_H0: {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
index 0933b595e..f26ef0949 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
@@ -23,6 +23,14 @@ enum class Swizzle : u64 {
23 H1_H1, 23 H1_H1,
24}; 24};
25 25
26enum class HalfPrecision : u64 {
27 None = 0,
28 FTZ = 1,
29 FMZ = 2,
30};
31
32IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
33
26std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); 34std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
27 35
28IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, 36IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 4078feafa..ddfca8d7a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -181,26 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) {
181 ThrowNotImplemented(Opcode::GETLMEMBASE); 181 ThrowNotImplemented(Opcode::GETLMEMBASE);
182} 182}
183 183
184void TranslatorVisitor::HFMA2_reg(u64) {
185 ThrowNotImplemented(Opcode::HFMA2_reg);
186}
187
188void TranslatorVisitor::HFMA2_rc(u64) {
189 ThrowNotImplemented(Opcode::HFMA2_rc);
190}
191
192void TranslatorVisitor::HFMA2_cr(u64) {
193 ThrowNotImplemented(Opcode::HFMA2_cr);
194}
195
196void TranslatorVisitor::HFMA2_imm(u64) {
197 ThrowNotImplemented(Opcode::HFMA2_imm);
198}
199
200void TranslatorVisitor::HFMA2_32I(u64) {
201 ThrowNotImplemented(Opcode::HFMA2_32I);
202}
203
204void TranslatorVisitor::HMUL2_reg(u64) { 184void TranslatorVisitor::HMUL2_reg(u64) {
205 ThrowNotImplemented(Opcode::HMUL2_reg); 185 ThrowNotImplemented(Opcode::HMUL2_reg);
206} 186}