summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/frontend/maxwell
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/frontend/maxwell')
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp184
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp16
3 files changed, 185 insertions, 17 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
new file mode 100644
index 000000000..6965adfb3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -0,0 +1,184 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class Merge : u64 {
13 H1_H0,
14 F32,
15 MRG_H0,
16 MRG_H1,
17};
18
19enum class Swizzle : u64 {
20 H1_H0,
21 F32,
22 H0_H0,
23 H1_H1,
24};
25
26std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
27 switch (swizzle) {
28 case Swizzle::H1_H0: {
29 const IR::Value vector{ir.UnpackFloat2x16(value)};
30 return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
31 }
32 case Swizzle::H0_H0: {
33 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
34 return {scalar, scalar};
35 }
36 case Swizzle::H1_H1: {
37 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
38 return {scalar, scalar};
39 }
40 case Swizzle::F32: {
41 const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
42 return {scalar, scalar};
43 }
44 }
45 throw InvalidArgument("Invalid swizzle {}", swizzle);
46}
47
48IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
49 Merge merge) {
50 switch (merge) {
51 case Merge::H1_H0:
52 return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
53 case Merge::F32:
54 return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
55 case Merge::MRG_H0:
56 case Merge::MRG_H1: {
57 const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
58 const bool h0{merge == Merge::MRG_H0};
59 const IR::F16& insert{h0 ? lhs : rhs};
60 return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1));
61 }
62 }
63 throw InvalidArgument("Invalid merge {}", merge);
64}
65
66void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
67 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
68 union {
69 u64 raw;
70 BitField<0, 8, IR::Reg> dest_reg;
71 BitField<8, 8, IR::Reg> src_a;
72 } const hadd2{insn};
73
74 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
75 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
76 const bool promotion{lhs_a.Type() != lhs_b.Type()};
77 if (promotion) {
78 if (lhs_a.Type() == IR::Type::F16) {
79 lhs_a = v.ir.FPConvert(32, lhs_a);
80 rhs_a = v.ir.FPConvert(32, rhs_a);
81 }
82 if (lhs_b.Type() == IR::Type::F16) {
83 lhs_b = v.ir.FPConvert(32, lhs_b);
84 rhs_b = v.ir.FPConvert(32, rhs_b);
85 }
86 }
87 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
88 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
89
90 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
91 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
92
93 const IR::FpControl fp_control{
94 .no_contraction{true},
95 .rounding{IR::FpRounding::DontCare},
96 .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
97 };
98 IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
99 IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
100 if (sat) {
101 lhs = v.ir.FPSaturate(lhs);
102 rhs = v.ir.FPSaturate(rhs);
103 }
104 if (promotion) {
105 lhs = v.ir.FPConvert(16, lhs);
106 rhs = v.ir.FPConvert(16, rhs);
107 }
108 v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
109}
110} // Anonymous namespace
111
112void TranslatorVisitor::HADD2_reg(u64 insn) {
113 union {
114 u64 raw;
115 BitField<49, 2, Merge> merge;
116 BitField<39, 1, u64> ftz;
117 BitField<32, 1, u64> sat;
118 BitField<43, 1, u64> neg_a;
119 BitField<44, 1, u64> abs_a;
120 BitField<47, 2, Swizzle> swizzle_a;
121 BitField<31, 1, u64> neg_b;
122 BitField<30, 1, u64> abs_b;
123 BitField<28, 2, Swizzle> swizzle_b;
124 } const hadd2{insn};
125
126 HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0,
127 hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
128 GetReg20(insn));
129}
130
131void TranslatorVisitor::HADD2_cbuf(u64 insn) {
132 union {
133 u64 raw;
134 BitField<49, 2, Merge> merge;
135 BitField<39, 1, u64> ftz;
136 BitField<52, 1, u64> sat;
137 BitField<43, 1, u64> neg_a;
138 BitField<44, 1, u64> abs_a;
139 BitField<47, 2, Swizzle> swizzle_a;
140 BitField<56, 1, u64> neg_b;
141 BitField<54, 1, u64> abs_b;
142 } const hadd2{insn};
143
144 HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0,
145 hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
146 GetCbuf(insn));
147}
148
149void TranslatorVisitor::HADD2_imm(u64 insn) {
150 union {
151 u64 raw;
152 BitField<49, 2, Merge> merge;
153 BitField<39, 1, u64> ftz;
154 BitField<52, 1, u64> sat;
155 BitField<43, 1, u64> neg_a;
156 BitField<44, 1, u64> abs_a;
157 BitField<47, 2, Swizzle> swizzle_a;
158 BitField<56, 1, u64> neg_high;
159 BitField<30, 9, u64> high;
160 BitField<29, 1, u64> neg_low;
161 BitField<20, 9, u64> low;
162 } const hadd2{insn};
163
164 const u32 imm{static_cast<u32>(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) |
165 static_cast<u32>(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)};
166 HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0,
167 hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
168}
169
170void TranslatorVisitor::HADD2_32I(u64 insn) {
171 union {
172 u64 raw;
173 BitField<55, 1, u64> ftz;
174 BitField<52, 1, u64> sat;
175 BitField<56, 1, u64> neg_a;
176 BitField<53, 2, Swizzle> swizzle_a;
177 BitField<20, 32, u64> imm32;
178 } const hadd2{insn};
179
180 const u32 imm{static_cast<u32>(hadd2.imm32)};
181 HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
182 hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
183}
184} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
index 727524284..748b856c9 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -59,7 +59,7 @@ IR::U64 Address(TranslatorVisitor& v, u64 insn) {
59 const IR::U64 address{[&]() -> IR::U64 { 59 const IR::U64 address{[&]() -> IR::U64 {
60 if (mem.e == 0) { 60 if (mem.e == 0) {
61 // LDG/STG without .E uses a 32-bit pointer, zero-extend it 61 // LDG/STG without .E uses a 32-bit pointer, zero-extend it
62 return v.ir.ConvertU(64, v.X(mem.addr_reg)); 62 return v.ir.UConvert(64, v.X(mem.addr_reg));
63 } 63 }
64 if (!IR::IsAligned(mem.addr_reg, 2)) { 64 if (!IR::IsAligned(mem.addr_reg, 2)) {
65 throw NotImplementedException("Unaligned address register"); 65 throw NotImplementedException("Unaligned address register");
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index a0535f1c2..c24f29ff7 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -265,22 +265,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) {
265 ThrowNotImplemented(Opcode::GETLMEMBASE); 265 ThrowNotImplemented(Opcode::GETLMEMBASE);
266} 266}
267 267
268void TranslatorVisitor::HADD2_reg(u64) {
269 ThrowNotImplemented(Opcode::HADD2_reg);
270}
271
272void TranslatorVisitor::HADD2_cbuf(u64) {
273 ThrowNotImplemented(Opcode::HADD2_cbuf);
274}
275
276void TranslatorVisitor::HADD2_imm(u64) {
277 ThrowNotImplemented(Opcode::HADD2_imm);
278}
279
280void TranslatorVisitor::HADD2_32I(u64) {
281 ThrowNotImplemented(Opcode::HADD2_32I);
282}
283
284void TranslatorVisitor::HFMA2_reg(u64) { 268void TranslatorVisitor::HFMA2_reg(u64) {
285 ThrowNotImplemented(Opcode::HFMA2_reg); 269 ThrowNotImplemented(Opcode::HFMA2_reg);
286} 270}