summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/CMakeLists.txt1
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h28
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp72
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp16
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp90
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h4
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc17
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp184
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp16
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp2
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp10
12 files changed, 400 insertions, 42 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 6506413a8..cb73e03fb 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -71,6 +71,7 @@ add_library(shader_recompiler STATIC
71 frontend/maxwell/translate/impl/floating_point_multi_function.cpp 71 frontend/maxwell/translate/impl/floating_point_multi_function.cpp
72 frontend/maxwell/translate/impl/floating_point_multiply.cpp 72 frontend/maxwell/translate/impl/floating_point_multiply.cpp
73 frontend/maxwell/translate/impl/floating_point_range_reduction.cpp 73 frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
74 frontend/maxwell/translate/impl/half_floating_point_add.cpp
74 frontend/maxwell/translate/impl/impl.cpp 75 frontend/maxwell/translate/impl/impl.cpp
75 frontend/maxwell/translate/impl/impl.h 76 frontend/maxwell/translate/impl/impl.h
76 frontend/maxwell/translate/impl/integer_add.cpp 77 frontend/maxwell/translate/impl/integer_add.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 5446d6ab6..bed43c094 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -90,24 +90,36 @@ Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
90Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); 90Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
91Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); 91Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index);
92Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); 92Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
93void EmitCompositeConstructF16x2(EmitContext& ctx); 93Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
94void EmitCompositeConstructF16x3(EmitContext& ctx); 94Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
95void EmitCompositeConstructF16x4(EmitContext& ctx); 95Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
96Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
97Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
98Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
96Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); 99Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
97Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); 100Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index);
98Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); 101Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
99void EmitCompositeConstructF32x2(EmitContext& ctx); 102Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
100void EmitCompositeConstructF32x3(EmitContext& ctx); 103Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
101void EmitCompositeConstructF32x4(EmitContext& ctx); 104Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
105Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
106Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
107Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
102Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); 108Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
103Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); 109Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index);
104Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); 110Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
111Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
112Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
113Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
105void EmitCompositeConstructF64x2(EmitContext& ctx); 114void EmitCompositeConstructF64x2(EmitContext& ctx);
106void EmitCompositeConstructF64x3(EmitContext& ctx); 115void EmitCompositeConstructF64x3(EmitContext& ctx);
107void EmitCompositeConstructF64x4(EmitContext& ctx); 116void EmitCompositeConstructF64x4(EmitContext& ctx);
108void EmitCompositeExtractF64x2(EmitContext& ctx); 117void EmitCompositeExtractF64x2(EmitContext& ctx);
109void EmitCompositeExtractF64x3(EmitContext& ctx); 118void EmitCompositeExtractF64x3(EmitContext& ctx);
110void EmitCompositeExtractF64x4(EmitContext& ctx); 119void EmitCompositeExtractF64x4(EmitContext& ctx);
120Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index);
121Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index);
122Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index);
111Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); 123Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
112Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); 124Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
113Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); 125Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
@@ -270,5 +282,9 @@ Id EmitConvertU64F32(EmitContext& ctx, Id value);
270Id EmitConvertU64F64(EmitContext& ctx, Id value); 282Id EmitConvertU64F64(EmitContext& ctx, Id value);
271Id EmitConvertU64U32(EmitContext& ctx, Id value); 283Id EmitConvertU64U32(EmitContext& ctx, Id value);
272Id EmitConvertU32U64(EmitContext& ctx, Id value); 284Id EmitConvertU32U64(EmitContext& ctx, Id value);
285Id EmitConvertF16F32(EmitContext& ctx, Id value);
286Id EmitConvertF32F16(EmitContext& ctx, Id value);
287Id EmitConvertF32F64(EmitContext& ctx, Id value);
288Id EmitConvertF64F32(EmitContext& ctx, Id value);
273 289
274} // namespace Shader::Backend::SPIRV 290} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
index c950854a0..616e63676 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
@@ -30,16 +30,28 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) {
30 return ctx.OpCompositeExtract(ctx.U32[1], composite, index); 30 return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
31} 31}
32 32
33void EmitCompositeConstructF16x2(EmitContext&) { 33Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
34 throw NotImplementedException("SPIR-V Instruction"); 34 return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index);
35} 35}
36 36
37void EmitCompositeConstructF16x3(EmitContext&) { 37Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
38 throw NotImplementedException("SPIR-V Instruction"); 38 return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index);
39} 39}
40 40
41void EmitCompositeConstructF16x4(EmitContext&) { 41Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
42 throw NotImplementedException("SPIR-V Instruction"); 42 return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
43}
44
45Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
46 return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
47}
48
49Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
50 return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3);
51}
52
53Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
54 return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4);
43} 55}
44 56
45Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { 57Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) {
@@ -54,16 +66,28 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) {
54 return ctx.OpCompositeExtract(ctx.F16[1], composite, index); 66 return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
55} 67}
56 68
57void EmitCompositeConstructF32x2(EmitContext&) { 69Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) {
58 throw NotImplementedException("SPIR-V Instruction"); 70 return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index);
59} 71}
60 72
61void EmitCompositeConstructF32x3(EmitContext&) { 73Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) {
62 throw NotImplementedException("SPIR-V Instruction"); 74 return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index);
63} 75}
64 76
65void EmitCompositeConstructF32x4(EmitContext&) { 77Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) {
66 throw NotImplementedException("SPIR-V Instruction"); 78 return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
79}
80
81Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
82 return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
83}
84
85Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
86 return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3);
87}
88
89Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
90 return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4);
67} 91}
68 92
69Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { 93Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) {
@@ -78,6 +102,18 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) {
78 return ctx.OpCompositeExtract(ctx.F32[1], composite, index); 102 return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
79} 103}
80 104
105Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
106 return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index);
107}
108
109Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
110 return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index);
111}
112
113Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
114 return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index);
115}
116
81void EmitCompositeConstructF64x2(EmitContext&) { 117void EmitCompositeConstructF64x2(EmitContext&) {
82 throw NotImplementedException("SPIR-V Instruction"); 118 throw NotImplementedException("SPIR-V Instruction");
83} 119}
@@ -102,4 +138,16 @@ void EmitCompositeExtractF64x4(EmitContext&) {
102 throw NotImplementedException("SPIR-V Instruction"); 138 throw NotImplementedException("SPIR-V Instruction");
103} 139}
104 140
141Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) {
142 return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index);
143}
144
145Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) {
146 return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index);
147}
148
149Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) {
150 return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
151}
152
105} // namespace Shader::Backend::SPIRV 153} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
index 76ccaffce..edcc2a1cc 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp
@@ -86,4 +86,20 @@ Id EmitConvertU32U64(EmitContext& ctx, Id value) {
86 return ctx.OpUConvert(ctx.U32[1], value); 86 return ctx.OpUConvert(ctx.U32[1], value);
87} 87}
88 88
89Id EmitConvertF16F32(EmitContext& ctx, Id value) {
90 return ctx.OpFConvert(ctx.F16[1], value);
91}
92
93Id EmitConvertF32F16(EmitContext& ctx, Id value) {
94 return ctx.OpFConvert(ctx.F32[1], value);
95}
96
97Id EmitConvertF32F64(EmitContext& ctx, Id value) {
98 return ctx.OpFConvert(ctx.F32[1], value);
99}
100
101Id EmitConvertF64F32(EmitContext& ctx, Id value) {
102 return ctx.OpFConvert(ctx.F64[1], value);
103}
104
89} // namespace Shader::Backend::SPIRV 105} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 0f1cab57a..186920d8f 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -334,12 +334,12 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
334} 334}
335 335
336Value IREmitter::CompositeExtract(const Value& vector, size_t element) { 336Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
337 const auto read = [&](Opcode opcode, size_t limit) -> Value { 337 const auto read{[&](Opcode opcode, size_t limit) -> Value {
338 if (element >= limit) { 338 if (element >= limit) {
339 throw InvalidArgument("Out of bounds element {}", element); 339 throw InvalidArgument("Out of bounds element {}", element);
340 } 340 }
341 return Inst(opcode, vector, Value{static_cast<u32>(element)}); 341 return Inst(opcode, vector, Value{static_cast<u32>(element)});
342 }; 342 }};
343 switch (vector.Type()) { 343 switch (vector.Type()) {
344 case Type::U32x2: 344 case Type::U32x2:
345 return read(Opcode::CompositeExtractU32x2, 2); 345 return read(Opcode::CompositeExtractU32x2, 2);
@@ -370,6 +370,43 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
370 } 370 }
371} 371}
372 372
373Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) {
374 const auto insert{[&](Opcode opcode, size_t limit) {
375 if (element >= limit) {
376 throw InvalidArgument("Out of bounds element {}", element);
377 }
378 return Inst(opcode, vector, object, Value{static_cast<u32>(element)});
379 }};
380 switch (vector.Type()) {
381 case Type::U32x2:
382 return insert(Opcode::CompositeInsertU32x2, 2);
383 case Type::U32x3:
384 return insert(Opcode::CompositeInsertU32x3, 3);
385 case Type::U32x4:
386 return insert(Opcode::CompositeInsertU32x4, 4);
387 case Type::F16x2:
388 return insert(Opcode::CompositeInsertF16x2, 2);
389 case Type::F16x3:
390 return insert(Opcode::CompositeInsertF16x3, 3);
391 case Type::F16x4:
392 return insert(Opcode::CompositeInsertF16x4, 4);
393 case Type::F32x2:
394 return insert(Opcode::CompositeInsertF32x2, 2);
395 case Type::F32x3:
396 return insert(Opcode::CompositeInsertF32x3, 3);
397 case Type::F32x4:
398 return insert(Opcode::CompositeInsertF32x4, 4);
399 case Type::F64x2:
400 return insert(Opcode::CompositeInsertF64x2, 2);
401 case Type::F64x3:
402 return insert(Opcode::CompositeInsertF64x3, 3);
403 case Type::F64x4:
404 return insert(Opcode::CompositeInsertF64x4, 4);
405 default:
406 ThrowInvalidType(vector.Type());
407 }
408}
409
373Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { 410Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
374 if (true_value.Type() != false_value.Type()) { 411 if (true_value.Type() != false_value.Type()) {
375 throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); 412 throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
@@ -433,7 +470,7 @@ U32 IREmitter::PackFloat2x16(const Value& vector) {
433} 470}
434 471
435Value IREmitter::UnpackFloat2x16(const U32& value) { 472Value IREmitter::UnpackFloat2x16(const U32& value) {
436 return Inst<Value>(Opcode::UnpackFloat2x16, value); 473 return Inst(Opcode::UnpackFloat2x16, value);
437} 474}
438 475
439F64 IREmitter::PackDouble2x32(const Value& vector) { 476F64 IREmitter::PackDouble2x32(const Value& vector) {
@@ -968,7 +1005,7 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v
968 } 1005 }
969} 1006}
970 1007
971U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { 1008U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
972 switch (result_bitsize) { 1009 switch (result_bitsize) {
973 case 32: 1010 case 32:
974 switch (value.Type()) { 1011 switch (value.Type()) {
@@ -995,4 +1032,49 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) {
995 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); 1032 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
996} 1033}
997 1034
1035F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
1036 switch (result_bitsize) {
1037 case 16:
1038 switch (value.Type()) {
1039 case Type::F16:
1040 // Nothing to do
1041 return value;
1042 case Type::F32:
1043 return Inst<F16>(Opcode::ConvertF16F32, value);
1044 case Type::F64:
1045 throw LogicError("Illegal conversion from F64 to F16");
1046 default:
1047 break;
1048 }
1049 break;
1050 case 32:
1051 switch (value.Type()) {
1052 case Type::F16:
1053 return Inst<F32>(Opcode::ConvertF32F16, value);
1054 case Type::F32:
1055 // Nothing to do
1056 return value;
1057 case Type::F64:
1058 return Inst<F64>(Opcode::ConvertF32F64, value);
1059 default:
1060 break;
1061 }
1062 break;
1063 case 64:
1064 switch (value.Type()) {
1065 case Type::F16:
1066 throw LogicError("Illegal conversion from F16 to F64");
1067 case Type::F32:
1068 // Nothing to do
1069 return value;
1070 case Type::F64:
1071 return Inst<F64>(Opcode::ConvertF32F64, value);
1072 default:
1073 break;
1074 }
1075 break;
1076 }
1077 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
1078}
1079
998} // namespace Shader::IR 1080} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 03a67985f..5beb99895 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -97,6 +97,7 @@ public:
97 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, 97 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
98 const Value& e4); 98 const Value& e4);
99 [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); 99 [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
100 [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
100 101
101 [[nodiscard]] Value Select(const U1& condition, const Value& true_value, 102 [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
102 const Value& false_value); 103 const Value& false_value);
@@ -186,7 +187,8 @@ public:
186 [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); 187 [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
187 [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); 188 [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
188 189
189 [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); 190 [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
191 [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
190 192
191private: 193private:
192 IR::Block::iterator insertion_point; 194 IR::Block::iterator insertion_point;
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index aedbc5c3e..acfc0a829 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -83,24 +83,36 @@ OPCODE(CompositeConstructU32x4, U32x4, U32,
83OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) 83OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
84OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) 84OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
85OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) 85OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
86OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
87OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
88OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
86OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) 89OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
87OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) 90OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
88OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) 91OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
89OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) 92OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
90OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) 93OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
91OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) 94OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
95OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
96OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
97OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
92OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) 98OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
93OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) 99OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
94OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) 100OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
95OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) 101OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
96OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) 102OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
97OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) 103OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
104OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
105OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
106OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
98OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) 107OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
99OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) 108OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
100OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) 109OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
101OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) 110OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
102OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) 111OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
103OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) 112OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
113OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
114OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
115OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
104 116
105// Select operations 117// Select operations
106OPCODE(SelectU8, U8, U1, U8, U8, ) 118OPCODE(SelectU8, U8, U1, U8, U8, )
@@ -277,6 +289,9 @@ OPCODE(ConvertU32F64, U32, F64,
277OPCODE(ConvertU64F16, U64, F16, ) 289OPCODE(ConvertU64F16, U64, F16, )
278OPCODE(ConvertU64F32, U64, F32, ) 290OPCODE(ConvertU64F32, U64, F32, )
279OPCODE(ConvertU64F64, U64, F64, ) 291OPCODE(ConvertU64F64, U64, F64, )
280
281OPCODE(ConvertU64U32, U64, U32, ) 292OPCODE(ConvertU64U32, U64, U32, )
282OPCODE(ConvertU32U64, U32, U64, ) 293OPCODE(ConvertU32U64, U32, U64, )
294OPCODE(ConvertF16F32, F16, F32, )
295OPCODE(ConvertF32F16, F32, F16, )
296OPCODE(ConvertF32F64, F32, F64, )
297OPCODE(ConvertF64F32, F64, F32, )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
new file mode 100644
index 000000000..6965adfb3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -0,0 +1,184 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class Merge : u64 {
13 H1_H0,
14 F32,
15 MRG_H0,
16 MRG_H1,
17};
18
19enum class Swizzle : u64 {
20 H1_H0,
21 F32,
22 H0_H0,
23 H1_H1,
24};
25
26std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
27 switch (swizzle) {
28 case Swizzle::H1_H0: {
29 const IR::Value vector{ir.UnpackFloat2x16(value)};
30 return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
31 }
32 case Swizzle::H0_H0: {
33 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
34 return {scalar, scalar};
35 }
36 case Swizzle::H1_H1: {
37 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
38 return {scalar, scalar};
39 }
40 case Swizzle::F32: {
41 const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
42 return {scalar, scalar};
43 }
44 }
45 throw InvalidArgument("Invalid swizzle {}", swizzle);
46}
47
48IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
49 Merge merge) {
50 switch (merge) {
51 case Merge::H1_H0:
52 return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
53 case Merge::F32:
54 return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
55 case Merge::MRG_H0:
56 case Merge::MRG_H1: {
57 const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
58 const bool h0{merge == Merge::MRG_H0};
59 const IR::F16& insert{h0 ? lhs : rhs};
60 return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1));
61 }
62 }
63 throw InvalidArgument("Invalid merge {}", merge);
64}
65
66void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
67 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
68 union {
69 u64 raw;
70 BitField<0, 8, IR::Reg> dest_reg;
71 BitField<8, 8, IR::Reg> src_a;
72 } const hadd2{insn};
73
74 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
75 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
76 const bool promotion{lhs_a.Type() != lhs_b.Type()};
77 if (promotion) {
78 if (lhs_a.Type() == IR::Type::F16) {
79 lhs_a = v.ir.FPConvert(32, lhs_a);
80 rhs_a = v.ir.FPConvert(32, rhs_a);
81 }
82 if (lhs_b.Type() == IR::Type::F16) {
83 lhs_b = v.ir.FPConvert(32, lhs_b);
84 rhs_b = v.ir.FPConvert(32, rhs_b);
85 }
86 }
87 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
88 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
89
90 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
91 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
92
93 const IR::FpControl fp_control{
94 .no_contraction{true},
95 .rounding{IR::FpRounding::DontCare},
96 .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
97 };
98 IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
99 IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
100 if (sat) {
101 lhs = v.ir.FPSaturate(lhs);
102 rhs = v.ir.FPSaturate(rhs);
103 }
104 if (promotion) {
105 lhs = v.ir.FPConvert(16, lhs);
106 rhs = v.ir.FPConvert(16, rhs);
107 }
108 v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
109}
110} // Anonymous namespace
111
112void TranslatorVisitor::HADD2_reg(u64 insn) {
113 union {
114 u64 raw;
115 BitField<49, 2, Merge> merge;
116 BitField<39, 1, u64> ftz;
117 BitField<32, 1, u64> sat;
118 BitField<43, 1, u64> neg_a;
119 BitField<44, 1, u64> abs_a;
120 BitField<47, 2, Swizzle> swizzle_a;
121 BitField<31, 1, u64> neg_b;
122 BitField<30, 1, u64> abs_b;
123 BitField<28, 2, Swizzle> swizzle_b;
124 } const hadd2{insn};
125
126 HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0,
127 hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
128 GetReg20(insn));
129}
130
131void TranslatorVisitor::HADD2_cbuf(u64 insn) {
132 union {
133 u64 raw;
134 BitField<49, 2, Merge> merge;
135 BitField<39, 1, u64> ftz;
136 BitField<52, 1, u64> sat;
137 BitField<43, 1, u64> neg_a;
138 BitField<44, 1, u64> abs_a;
139 BitField<47, 2, Swizzle> swizzle_a;
140 BitField<56, 1, u64> neg_b;
141 BitField<54, 1, u64> abs_b;
142 } const hadd2{insn};
143
144 HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0,
145 hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
146 GetCbuf(insn));
147}
148
149void TranslatorVisitor::HADD2_imm(u64 insn) {
150 union {
151 u64 raw;
152 BitField<49, 2, Merge> merge;
153 BitField<39, 1, u64> ftz;
154 BitField<52, 1, u64> sat;
155 BitField<43, 1, u64> neg_a;
156 BitField<44, 1, u64> abs_a;
157 BitField<47, 2, Swizzle> swizzle_a;
158 BitField<56, 1, u64> neg_high;
159 BitField<30, 9, u64> high;
160 BitField<29, 1, u64> neg_low;
161 BitField<20, 9, u64> low;
162 } const hadd2{insn};
163
164 const u32 imm{static_cast<u32>(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) |
165 static_cast<u32>(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)};
166 HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0,
167 hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
168}
169
170void TranslatorVisitor::HADD2_32I(u64 insn) {
171 union {
172 u64 raw;
173 BitField<55, 1, u64> ftz;
174 BitField<52, 1, u64> sat;
175 BitField<56, 1, u64> neg_a;
176 BitField<53, 2, Swizzle> swizzle_a;
177 BitField<20, 32, u64> imm32;
178 } const hadd2{insn};
179
180 const u32 imm{static_cast<u32>(hadd2.imm32)};
181 HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
182 hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
183}
184} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
index 727524284..748b856c9 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -59,7 +59,7 @@ IR::U64 Address(TranslatorVisitor& v, u64 insn) {
59 const IR::U64 address{[&]() -> IR::U64 { 59 const IR::U64 address{[&]() -> IR::U64 {
60 if (mem.e == 0) { 60 if (mem.e == 0) {
61 // LDG/STG without .E uses a 32-bit pointer, zero-extend it 61 // LDG/STG without .E uses a 32-bit pointer, zero-extend it
62 return v.ir.ConvertU(64, v.X(mem.addr_reg)); 62 return v.ir.UConvert(64, v.X(mem.addr_reg));
63 } 63 }
64 if (!IR::IsAligned(mem.addr_reg, 2)) { 64 if (!IR::IsAligned(mem.addr_reg, 2)) {
65 throw NotImplementedException("Unaligned address register"); 65 throw NotImplementedException("Unaligned address register");
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index a0535f1c2..c24f29ff7 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -265,22 +265,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) {
265 ThrowNotImplemented(Opcode::GETLMEMBASE); 265 ThrowNotImplemented(Opcode::GETLMEMBASE);
266} 266}
267 267
268void TranslatorVisitor::HADD2_reg(u64) {
269 ThrowNotImplemented(Opcode::HADD2_reg);
270}
271
272void TranslatorVisitor::HADD2_cbuf(u64) {
273 ThrowNotImplemented(Opcode::HADD2_cbuf);
274}
275
276void TranslatorVisitor::HADD2_imm(u64) {
277 ThrowNotImplemented(Opcode::HADD2_imm);
278}
279
280void TranslatorVisitor::HADD2_32I(u64) {
281 ThrowNotImplemented(Opcode::HADD2_32I);
282}
283
284void TranslatorVisitor::HFMA2_reg(u64) { 268void TranslatorVisitor::HFMA2_reg(u64) {
285 ThrowNotImplemented(Opcode::HFMA2_reg); 269 ThrowNotImplemented(Opcode::HFMA2_reg);
286} 270}
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 98e3dfef7..965e52135 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -298,7 +298,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
298 offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); 298 offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset));
299 } 299 }
300 } else { 300 } else {
301 offset = ir.ConvertU(32, IR::U64{inst.Arg(0)}); 301 offset = ir.UConvert(32, IR::U64{inst.Arg(0)});
302 } 302 }
303 // Subtract the least significant 32 bits from the guest offset. The result is the storage 303 // Subtract the least significant 32 bits from the guest offset. The result is the storage
304 // buffer offset in bytes. 304 // buffer offset in bytes.
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
index c7032f168..14a5cb50f 100644
--- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -44,6 +44,12 @@ IR::Opcode Replace(IR::Opcode op) {
44 return IR::Opcode::CompositeExtractF32x3; 44 return IR::Opcode::CompositeExtractF32x3;
45 case IR::Opcode::CompositeExtractF16x4: 45 case IR::Opcode::CompositeExtractF16x4:
46 return IR::Opcode::CompositeExtractF32x4; 46 return IR::Opcode::CompositeExtractF32x4;
47 case IR::Opcode::CompositeInsertF16x2:
48 return IR::Opcode::CompositeInsertF32x2;
49 case IR::Opcode::CompositeInsertF16x3:
50 return IR::Opcode::CompositeInsertF32x3;
51 case IR::Opcode::CompositeInsertF16x4:
52 return IR::Opcode::CompositeInsertF32x4;
47 case IR::Opcode::ConvertS16F16: 53 case IR::Opcode::ConvertS16F16:
48 return IR::Opcode::ConvertS16F32; 54 return IR::Opcode::ConvertS16F32;
49 case IR::Opcode::ConvertS32F16: 55 case IR::Opcode::ConvertS32F16:
@@ -60,6 +66,10 @@ IR::Opcode Replace(IR::Opcode op) {
60 return IR::Opcode::PackHalf2x16; 66 return IR::Opcode::PackHalf2x16;
61 case IR::Opcode::UnpackFloat2x16: 67 case IR::Opcode::UnpackFloat2x16:
62 return IR::Opcode::UnpackHalf2x16; 68 return IR::Opcode::UnpackHalf2x16;
69 case IR::Opcode::ConvertF32F16:
70 return IR::Opcode::Identity;
71 case IR::Opcode::ConvertF16F32:
72 return IR::Opcode::Identity;
63 default: 73 default:
64 return op; 74 return op;
65 } 75 }