summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar FernandoS272021-03-21 02:08:04 +0100
committerGravatar ameerj2021-07-22 21:51:23 -0400
commited6cd3c94ac10b434a1240fc3cbed2050766be65 (patch)
treeb2c14c6c225c18bd535b4490a105aafa2a3f58da /src
parentshader: Implement HFMA2 (diff)
downloadyuzu-ed6cd3c94ac10b434a1240fc3cbed2050766be65.tar.gz
yuzu-ed6cd3c94ac10b434a1240fc3cbed2050766be65.tar.xz
yuzu-ed6cd3c94ac10b434a1240fc3cbed2050766be65.zip
shader: Implement HMUL2
Diffstat (limited to 'src')
-rw-r--r--src/shader_recompiler/CMakeLists.txt1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp143
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp16
3 files changed, 144 insertions, 16 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 42be817ec..24a2d61e0 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -81,6 +81,7 @@ add_library(shader_recompiler STATIC
81 frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp 81 frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
82 frontend/maxwell/translate/impl/half_floating_point_helper.cpp 82 frontend/maxwell/translate/impl/half_floating_point_helper.cpp
83 frontend/maxwell/translate/impl/half_floating_point_helper.h 83 frontend/maxwell/translate/impl/half_floating_point_helper.h
84 frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
84 frontend/maxwell/translate/impl/impl.cpp 85 frontend/maxwell/translate/impl/impl.cpp
85 frontend/maxwell/translate/impl/impl.h 86 frontend/maxwell/translate/impl/impl.h
86 frontend/maxwell/translate/impl/integer_add.cpp 87 frontend/maxwell/translate/impl/integer_add.cpp
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
new file mode 100644
index 000000000..ff34a8c8f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -0,0 +1,143 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9
10void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
11 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
12 HalfPrecision precision) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a;
17 } const hmul2{insn};
18
19 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
20 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
21 const bool promotion{lhs_a.Type() != lhs_b.Type()};
22 if (promotion) {
23 if (lhs_a.Type() == IR::Type::F16) {
24 lhs_a = v.ir.FPConvert(32, lhs_a);
25 rhs_a = v.ir.FPConvert(32, rhs_a);
26 }
27 if (lhs_b.Type() == IR::Type::F16) {
28 lhs_b = v.ir.FPConvert(32, lhs_b);
29 rhs_b = v.ir.FPConvert(32, rhs_b);
30 }
31 }
32 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
33 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
34
35 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
36 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
37
38 const IR::FpControl fp_control{
39 .no_contraction{true},
40 .rounding{IR::FpRounding::DontCare},
41 .fmz_mode{HalfPrecision2FmzMode(precision)},
42 };
43 IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
44 IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
45 if (precision == HalfPrecision::FMZ && !sat) {
46 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
47 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
48 const IR::F32 zero{v.ir.Imm32(0.0f)};
49 const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
50 const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
51 const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
52 lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
53
54 const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
55 const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
56 const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
57 rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
58 }
59 if (sat) {
60 lhs = v.ir.FPSaturate(lhs);
61 rhs = v.ir.FPSaturate(rhs);
62 }
63 if (promotion) {
64 lhs = v.ir.FPConvert(16, lhs);
65 rhs = v.ir.FPConvert(16, rhs);
66 }
67 v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
68}
69
70void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
71 Swizzle swizzle_b, const IR::U32& src_b) {
72 union {
73 u64 raw;
74 BitField<49, 2, Merge> merge;
75 BitField<47, 2, Swizzle> swizzle_a;
76 BitField<39, 2, HalfPrecision> precision;
77 } const hmul2{insn};
78
79 HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
80 hmul2.precision);
81}
82} // namespace
83
84void TranslatorVisitor::HMUL2_reg(u64 insn) {
85 union {
86 u64 raw;
87 BitField<32, 1, u64> sat;
88 BitField<31, 1, u64> neg_b;
89 BitField<30, 1, u64> abs_b;
90 BitField<44, 1, u64> abs_a;
91 BitField<28, 2, Swizzle> swizzle_b;
92 } const hmul2{insn};
93
94 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
95 hmul2.swizzle_b, GetReg20(insn));
96}
97
98void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
99 union {
100 u64 raw;
101 BitField<52, 1, u64> sat;
102 BitField<54, 1, u64> abs_b;
103 BitField<43, 1, u64> neg_a;
104 BitField<44, 1, u64> abs_a;
105 } const hmul2{insn};
106
107 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
108 Swizzle::F32, GetCbuf(insn));
109}
110
111void TranslatorVisitor::HMUL2_imm(u64 insn) {
112 union {
113 u64 raw;
114 BitField<52, 1, u64> sat;
115 BitField<56, 1, u64> neg_high;
116 BitField<30, 9, u64> high;
117 BitField<29, 1, u64> neg_low;
118 BitField<20, 9, u64> low;
119 BitField<43, 1, u64> neg_a;
120 BitField<44, 1, u64> abs_a;
121 } const hmul2{insn};
122
123 const u32 imm{static_cast<u32>(hmul2.low << 6) | ((hmul2.neg_low != 0 ? 1 : 0) << 15) |
124 static_cast<u32>(hmul2.high << 22) | ((hmul2.neg_high != 0 ? 1 : 0) << 31)};
125 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
126 Swizzle::H1_H0, ir.Imm32(imm));
127}
128
129void TranslatorVisitor::HMUL2_32I(u64 insn) {
130 union {
131 u64 raw;
132 BitField<55, 2, HalfPrecision> precision;
133 BitField<52, 1, u64> sat;
134 BitField<53, 2, Swizzle> swizzle_a;
135 BitField<20, 32, u64> imm32;
136 } const hmul2{insn};
137
138 const u32 imm{static_cast<u32>(hmul2.imm32)};
139 HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
140 Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
141}
142
143} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index ddfca8d7a..6c159301f 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -181,22 +181,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) {
181 ThrowNotImplemented(Opcode::GETLMEMBASE); 181 ThrowNotImplemented(Opcode::GETLMEMBASE);
182} 182}
183 183
184void TranslatorVisitor::HMUL2_reg(u64) {
185 ThrowNotImplemented(Opcode::HMUL2_reg);
186}
187
188void TranslatorVisitor::HMUL2_cbuf(u64) {
189 ThrowNotImplemented(Opcode::HMUL2_cbuf);
190}
191
192void TranslatorVisitor::HMUL2_imm(u64) {
193 ThrowNotImplemented(Opcode::HMUL2_imm);
194}
195
196void TranslatorVisitor::HMUL2_32I(u64) {
197 ThrowNotImplemented(Opcode::HMUL2_32I);
198}
199
200void TranslatorVisitor::HSET2_reg(u64) { 184void TranslatorVisitor::HSET2_reg(u64) {
201 ThrowNotImplemented(Opcode::HSET2_reg); 185 ThrowNotImplemented(Opcode::HSET2_reg);
202} 186}