diff options
Diffstat (limited to 'src')
12 files changed, 400 insertions, 42 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 6506413a8..cb73e03fb 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -71,6 +71,7 @@ add_library(shader_recompiler STATIC | |||
| 71 | frontend/maxwell/translate/impl/floating_point_multi_function.cpp | 71 | frontend/maxwell/translate/impl/floating_point_multi_function.cpp |
| 72 | frontend/maxwell/translate/impl/floating_point_multiply.cpp | 72 | frontend/maxwell/translate/impl/floating_point_multiply.cpp |
| 73 | frontend/maxwell/translate/impl/floating_point_range_reduction.cpp | 73 | frontend/maxwell/translate/impl/floating_point_range_reduction.cpp |
| 74 | frontend/maxwell/translate/impl/half_floating_point_add.cpp | ||
| 74 | frontend/maxwell/translate/impl/impl.cpp | 75 | frontend/maxwell/translate/impl/impl.cpp |
| 75 | frontend/maxwell/translate/impl/impl.h | 76 | frontend/maxwell/translate/impl/impl.h |
| 76 | frontend/maxwell/translate/impl/integer_add.cpp | 77 | frontend/maxwell/translate/impl/integer_add.cpp |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 5446d6ab6..bed43c094 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -90,24 +90,36 @@ Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | |||
| 90 | Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); | 90 | Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); |
| 91 | Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); | 91 | Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); |
| 92 | Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); | 92 | Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); |
| 93 | void EmitCompositeConstructF16x2(EmitContext& ctx); | 93 | Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); |
| 94 | void EmitCompositeConstructF16x3(EmitContext& ctx); | 94 | Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); |
| 95 | void EmitCompositeConstructF16x4(EmitContext& ctx); | 95 | Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); |
| 96 | Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); | ||
| 97 | Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||
| 98 | Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||
| 96 | Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); | 99 | Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); |
| 97 | Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); | 100 | Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); |
| 98 | Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); | 101 | Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); |
| 99 | void EmitCompositeConstructF32x2(EmitContext& ctx); | 102 | Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); |
| 100 | void EmitCompositeConstructF32x3(EmitContext& ctx); | 103 | Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); |
| 101 | void EmitCompositeConstructF32x4(EmitContext& ctx); | 104 | Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); |
| 105 | Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); | ||
| 106 | Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||
| 107 | Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||
| 102 | Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); | 108 | Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); |
| 103 | Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); | 109 | Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); |
| 104 | Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); | 110 | Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); |
| 111 | Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 112 | Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 113 | Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 105 | void EmitCompositeConstructF64x2(EmitContext& ctx); | 114 | void EmitCompositeConstructF64x2(EmitContext& ctx); |
| 106 | void EmitCompositeConstructF64x3(EmitContext& ctx); | 115 | void EmitCompositeConstructF64x3(EmitContext& ctx); |
| 107 | void EmitCompositeConstructF64x4(EmitContext& ctx); | 116 | void EmitCompositeConstructF64x4(EmitContext& ctx); |
| 108 | void EmitCompositeExtractF64x2(EmitContext& ctx); | 117 | void EmitCompositeExtractF64x2(EmitContext& ctx); |
| 109 | void EmitCompositeExtractF64x3(EmitContext& ctx); | 118 | void EmitCompositeExtractF64x3(EmitContext& ctx); |
| 110 | void EmitCompositeExtractF64x4(EmitContext& ctx); | 119 | void EmitCompositeExtractF64x4(EmitContext& ctx); |
| 120 | Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 121 | Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 122 | Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||
| 111 | Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); | 123 | Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); |
| 112 | Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); | 124 | Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); |
| 113 | Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); | 125 | Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); |
| @@ -270,5 +282,9 @@ Id EmitConvertU64F32(EmitContext& ctx, Id value); | |||
| 270 | Id EmitConvertU64F64(EmitContext& ctx, Id value); | 282 | Id EmitConvertU64F64(EmitContext& ctx, Id value); |
| 271 | Id EmitConvertU64U32(EmitContext& ctx, Id value); | 283 | Id EmitConvertU64U32(EmitContext& ctx, Id value); |
| 272 | Id EmitConvertU32U64(EmitContext& ctx, Id value); | 284 | Id EmitConvertU32U64(EmitContext& ctx, Id value); |
| 285 | Id EmitConvertF16F32(EmitContext& ctx, Id value); | ||
| 286 | Id EmitConvertF32F16(EmitContext& ctx, Id value); | ||
| 287 | Id EmitConvertF32F64(EmitContext& ctx, Id value); | ||
| 288 | Id EmitConvertF64F32(EmitContext& ctx, Id value); | ||
| 273 | 289 | ||
| 274 | } // namespace Shader::Backend::SPIRV | 290 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index c950854a0..616e63676 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp | |||
| @@ -30,16 +30,28 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) { | |||
| 30 | return ctx.OpCompositeExtract(ctx.U32[1], composite, index); | 30 | return ctx.OpCompositeExtract(ctx.U32[1], composite, index); |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | void EmitCompositeConstructF16x2(EmitContext&) { | 33 | Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) { |
| 34 | throw NotImplementedException("SPIR-V Instruction"); | 34 | return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | void EmitCompositeConstructF16x3(EmitContext&) { | 37 | Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) { |
| 38 | throw NotImplementedException("SPIR-V Instruction"); | 38 | return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index); |
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | void EmitCompositeConstructF16x4(EmitContext&) { | 41 | Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) { |
| 42 | throw NotImplementedException("SPIR-V Instruction"); | 42 | return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); |
| 43 | } | ||
| 44 | |||
| 45 | Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { | ||
| 46 | return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); | ||
| 47 | } | ||
| 48 | |||
| 49 | Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { | ||
| 50 | return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); | ||
| 51 | } | ||
| 52 | |||
| 53 | Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { | ||
| 54 | return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); | ||
| 43 | } | 55 | } |
| 44 | 56 | ||
| 45 | Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { | 57 | Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { |
| @@ -54,16 +66,28 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { | |||
| 54 | return ctx.OpCompositeExtract(ctx.F16[1], composite, index); | 66 | return ctx.OpCompositeExtract(ctx.F16[1], composite, index); |
| 55 | } | 67 | } |
| 56 | 68 | ||
| 57 | void EmitCompositeConstructF32x2(EmitContext&) { | 69 | Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) { |
| 58 | throw NotImplementedException("SPIR-V Instruction"); | 70 | return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index); |
| 59 | } | 71 | } |
| 60 | 72 | ||
| 61 | void EmitCompositeConstructF32x3(EmitContext&) { | 73 | Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) { |
| 62 | throw NotImplementedException("SPIR-V Instruction"); | 74 | return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index); |
| 63 | } | 75 | } |
| 64 | 76 | ||
| 65 | void EmitCompositeConstructF32x4(EmitContext&) { | 77 | Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) { |
| 66 | throw NotImplementedException("SPIR-V Instruction"); | 78 | return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); |
| 79 | } | ||
| 80 | |||
| 81 | Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { | ||
| 82 | return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); | ||
| 83 | } | ||
| 84 | |||
| 85 | Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { | ||
| 86 | return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); | ||
| 87 | } | ||
| 88 | |||
| 89 | Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { | ||
| 90 | return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); | ||
| 67 | } | 91 | } |
| 68 | 92 | ||
| 69 | Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { | 93 | Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { |
| @@ -78,6 +102,18 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { | |||
| 78 | return ctx.OpCompositeExtract(ctx.F32[1], composite, index); | 102 | return ctx.OpCompositeExtract(ctx.F32[1], composite, index); |
| 79 | } | 103 | } |
| 80 | 104 | ||
| 105 | Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 106 | return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index); | ||
| 107 | } | ||
| 108 | |||
| 109 | Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 110 | return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index); | ||
| 111 | } | ||
| 112 | |||
| 113 | Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 114 | return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); | ||
| 115 | } | ||
| 116 | |||
| 81 | void EmitCompositeConstructF64x2(EmitContext&) { | 117 | void EmitCompositeConstructF64x2(EmitContext&) { |
| 82 | throw NotImplementedException("SPIR-V Instruction"); | 118 | throw NotImplementedException("SPIR-V Instruction"); |
| 83 | } | 119 | } |
| @@ -102,4 +138,16 @@ void EmitCompositeExtractF64x4(EmitContext&) { | |||
| 102 | throw NotImplementedException("SPIR-V Instruction"); | 138 | throw NotImplementedException("SPIR-V Instruction"); |
| 103 | } | 139 | } |
| 104 | 140 | ||
| 141 | Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 142 | return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index); | ||
| 143 | } | ||
| 144 | |||
| 145 | Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 146 | return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index); | ||
| 147 | } | ||
| 148 | |||
| 149 | Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||
| 150 | return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); | ||
| 151 | } | ||
| 152 | |||
| 105 | } // namespace Shader::Backend::SPIRV | 153 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp index 76ccaffce..edcc2a1cc 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_convert.cpp | |||
| @@ -86,4 +86,20 @@ Id EmitConvertU32U64(EmitContext& ctx, Id value) { | |||
| 86 | return ctx.OpUConvert(ctx.U32[1], value); | 86 | return ctx.OpUConvert(ctx.U32[1], value); |
| 87 | } | 87 | } |
| 88 | 88 | ||
| 89 | Id EmitConvertF16F32(EmitContext& ctx, Id value) { | ||
| 90 | return ctx.OpFConvert(ctx.F16[1], value); | ||
| 91 | } | ||
| 92 | |||
| 93 | Id EmitConvertF32F16(EmitContext& ctx, Id value) { | ||
| 94 | return ctx.OpFConvert(ctx.F32[1], value); | ||
| 95 | } | ||
| 96 | |||
| 97 | Id EmitConvertF32F64(EmitContext& ctx, Id value) { | ||
| 98 | return ctx.OpFConvert(ctx.F32[1], value); | ||
| 99 | } | ||
| 100 | |||
| 101 | Id EmitConvertF64F32(EmitContext& ctx, Id value) { | ||
| 102 | return ctx.OpFConvert(ctx.F64[1], value); | ||
| 103 | } | ||
| 104 | |||
| 89 | } // namespace Shader::Backend::SPIRV | 105 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 0f1cab57a..186920d8f 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -334,12 +334,12 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu | |||
| 334 | } | 334 | } |
| 335 | 335 | ||
| 336 | Value IREmitter::CompositeExtract(const Value& vector, size_t element) { | 336 | Value IREmitter::CompositeExtract(const Value& vector, size_t element) { |
| 337 | const auto read = [&](Opcode opcode, size_t limit) -> Value { | 337 | const auto read{[&](Opcode opcode, size_t limit) -> Value { |
| 338 | if (element >= limit) { | 338 | if (element >= limit) { |
| 339 | throw InvalidArgument("Out of bounds element {}", element); | 339 | throw InvalidArgument("Out of bounds element {}", element); |
| 340 | } | 340 | } |
| 341 | return Inst(opcode, vector, Value{static_cast<u32>(element)}); | 341 | return Inst(opcode, vector, Value{static_cast<u32>(element)}); |
| 342 | }; | 342 | }}; |
| 343 | switch (vector.Type()) { | 343 | switch (vector.Type()) { |
| 344 | case Type::U32x2: | 344 | case Type::U32x2: |
| 345 | return read(Opcode::CompositeExtractU32x2, 2); | 345 | return read(Opcode::CompositeExtractU32x2, 2); |
| @@ -370,6 +370,43 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) { | |||
| 370 | } | 370 | } |
| 371 | } | 371 | } |
| 372 | 372 | ||
| 373 | Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) { | ||
| 374 | const auto insert{[&](Opcode opcode, size_t limit) { | ||
| 375 | if (element >= limit) { | ||
| 376 | throw InvalidArgument("Out of bounds element {}", element); | ||
| 377 | } | ||
| 378 | return Inst(opcode, vector, object, Value{static_cast<u32>(element)}); | ||
| 379 | }}; | ||
| 380 | switch (vector.Type()) { | ||
| 381 | case Type::U32x2: | ||
| 382 | return insert(Opcode::CompositeInsertU32x2, 2); | ||
| 383 | case Type::U32x3: | ||
| 384 | return insert(Opcode::CompositeInsertU32x3, 3); | ||
| 385 | case Type::U32x4: | ||
| 386 | return insert(Opcode::CompositeInsertU32x4, 4); | ||
| 387 | case Type::F16x2: | ||
| 388 | return insert(Opcode::CompositeInsertF16x2, 2); | ||
| 389 | case Type::F16x3: | ||
| 390 | return insert(Opcode::CompositeInsertF16x3, 3); | ||
| 391 | case Type::F16x4: | ||
| 392 | return insert(Opcode::CompositeInsertF16x4, 4); | ||
| 393 | case Type::F32x2: | ||
| 394 | return insert(Opcode::CompositeInsertF32x2, 2); | ||
| 395 | case Type::F32x3: | ||
| 396 | return insert(Opcode::CompositeInsertF32x3, 3); | ||
| 397 | case Type::F32x4: | ||
| 398 | return insert(Opcode::CompositeInsertF32x4, 4); | ||
| 399 | case Type::F64x2: | ||
| 400 | return insert(Opcode::CompositeInsertF64x2, 2); | ||
| 401 | case Type::F64x3: | ||
| 402 | return insert(Opcode::CompositeInsertF64x3, 3); | ||
| 403 | case Type::F64x4: | ||
| 404 | return insert(Opcode::CompositeInsertF64x4, 4); | ||
| 405 | default: | ||
| 406 | ThrowInvalidType(vector.Type()); | ||
| 407 | } | ||
| 408 | } | ||
| 409 | |||
| 373 | Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { | 410 | Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { |
| 374 | if (true_value.Type() != false_value.Type()) { | 411 | if (true_value.Type() != false_value.Type()) { |
| 375 | throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); | 412 | throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); |
| @@ -433,7 +470,7 @@ U32 IREmitter::PackFloat2x16(const Value& vector) { | |||
| 433 | } | 470 | } |
| 434 | 471 | ||
| 435 | Value IREmitter::UnpackFloat2x16(const U32& value) { | 472 | Value IREmitter::UnpackFloat2x16(const U32& value) { |
| 436 | return Inst<Value>(Opcode::UnpackFloat2x16, value); | 473 | return Inst(Opcode::UnpackFloat2x16, value); |
| 437 | } | 474 | } |
| 438 | 475 | ||
| 439 | F64 IREmitter::PackDouble2x32(const Value& vector) { | 476 | F64 IREmitter::PackDouble2x32(const Value& vector) { |
| @@ -968,7 +1005,7 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v | |||
| 968 | } | 1005 | } |
| 969 | } | 1006 | } |
| 970 | 1007 | ||
| 971 | U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { | 1008 | U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { |
| 972 | switch (result_bitsize) { | 1009 | switch (result_bitsize) { |
| 973 | case 32: | 1010 | case 32: |
| 974 | switch (value.Type()) { | 1011 | switch (value.Type()) { |
| @@ -995,4 +1032,49 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { | |||
| 995 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | 1032 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); |
| 996 | } | 1033 | } |
| 997 | 1034 | ||
| 1035 | F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { | ||
| 1036 | switch (result_bitsize) { | ||
| 1037 | case 16: | ||
| 1038 | switch (value.Type()) { | ||
| 1039 | case Type::F16: | ||
| 1040 | // Nothing to do | ||
| 1041 | return value; | ||
| 1042 | case Type::F32: | ||
| 1043 | return Inst<F16>(Opcode::ConvertF16F32, value); | ||
| 1044 | case Type::F64: | ||
| 1045 | throw LogicError("Illegal conversion from F64 to F16"); | ||
| 1046 | default: | ||
| 1047 | break; | ||
| 1048 | } | ||
| 1049 | break; | ||
| 1050 | case 32: | ||
| 1051 | switch (value.Type()) { | ||
| 1052 | case Type::F16: | ||
| 1053 | return Inst<F32>(Opcode::ConvertF32F16, value); | ||
| 1054 | case Type::F32: | ||
| 1055 | // Nothing to do | ||
| 1056 | return value; | ||
| 1057 | case Type::F64: | ||
| 1058 | return Inst<F64>(Opcode::ConvertF32F64, value); | ||
| 1059 | default: | ||
| 1060 | break; | ||
| 1061 | } | ||
| 1062 | break; | ||
| 1063 | case 64: | ||
| 1064 | switch (value.Type()) { | ||
| 1065 | case Type::F16: | ||
| 1066 | throw LogicError("Illegal conversion from F16 to F64"); | ||
| 1067 | case Type::F32: | ||
| 1068 | // Nothing to do | ||
| 1069 | return value; | ||
| 1070 | case Type::F64: | ||
| 1071 | return Inst<F64>(Opcode::ConvertF32F64, value); | ||
| 1072 | default: | ||
| 1073 | break; | ||
| 1074 | } | ||
| 1075 | break; | ||
| 1076 | } | ||
| 1077 | throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | ||
| 1078 | } | ||
| 1079 | |||
| 998 | } // namespace Shader::IR | 1080 | } // namespace Shader::IR |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index 03a67985f..5beb99895 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -97,6 +97,7 @@ public: | |||
| 97 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, | 97 | [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, |
| 98 | const Value& e4); | 98 | const Value& e4); |
| 99 | [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); | 99 | [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); |
| 100 | [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); | ||
| 100 | 101 | ||
| 101 | [[nodiscard]] Value Select(const U1& condition, const Value& true_value, | 102 | [[nodiscard]] Value Select(const U1& condition, const Value& true_value, |
| 102 | const Value& false_value); | 103 | const Value& false_value); |
| @@ -186,7 +187,8 @@ public: | |||
| 186 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); | 187 | [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); |
| 187 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); | 188 | [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); |
| 188 | 189 | ||
| 189 | [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); | 190 | [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); |
| 191 | [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); | ||
| 190 | 192 | ||
| 191 | private: | 193 | private: |
| 192 | IR::Block::iterator insertion_point; | 194 | IR::Block::iterator insertion_point; |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index aedbc5c3e..acfc0a829 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -83,24 +83,36 @@ OPCODE(CompositeConstructU32x4, U32x4, U32, | |||
| 83 | OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) | 83 | OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) |
| 84 | OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) | 84 | OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) |
| 85 | OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) | 85 | OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) |
| 86 | OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, ) | ||
| 87 | OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, ) | ||
| 88 | OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, ) | ||
| 86 | OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) | 89 | OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) |
| 87 | OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) | 90 | OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) |
| 88 | OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) | 91 | OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) |
| 89 | OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) | 92 | OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) |
| 90 | OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) | 93 | OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) |
| 91 | OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) | 94 | OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) |
| 95 | OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, ) | ||
| 96 | OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, ) | ||
| 97 | OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, ) | ||
| 92 | OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) | 98 | OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) |
| 93 | OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) | 99 | OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) |
| 94 | OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) | 100 | OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) |
| 95 | OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) | 101 | OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) |
| 96 | OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) | 102 | OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) |
| 97 | OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) | 103 | OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) |
| 104 | OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, ) | ||
| 105 | OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, ) | ||
| 106 | OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, ) | ||
| 98 | OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) | 107 | OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) |
| 99 | OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) | 108 | OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) |
| 100 | OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) | 109 | OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) |
| 101 | OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) | 110 | OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) |
| 102 | OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) | 111 | OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) |
| 103 | OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) | 112 | OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) |
| 113 | OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) | ||
| 114 | OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) | ||
| 115 | OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) | ||
| 104 | 116 | ||
| 105 | // Select operations | 117 | // Select operations |
| 106 | OPCODE(SelectU8, U8, U1, U8, U8, ) | 118 | OPCODE(SelectU8, U8, U1, U8, U8, ) |
| @@ -277,6 +289,9 @@ OPCODE(ConvertU32F64, U32, F64, | |||
| 277 | OPCODE(ConvertU64F16, U64, F16, ) | 289 | OPCODE(ConvertU64F16, U64, F16, ) |
| 278 | OPCODE(ConvertU64F32, U64, F32, ) | 290 | OPCODE(ConvertU64F32, U64, F32, ) |
| 279 | OPCODE(ConvertU64F64, U64, F64, ) | 291 | OPCODE(ConvertU64F64, U64, F64, ) |
| 280 | |||
| 281 | OPCODE(ConvertU64U32, U64, U32, ) | 292 | OPCODE(ConvertU64U32, U64, U32, ) |
| 282 | OPCODE(ConvertU32U64, U32, U64, ) | 293 | OPCODE(ConvertU32U64, U32, U64, ) |
| 294 | OPCODE(ConvertF16F32, F16, F32, ) | ||
| 295 | OPCODE(ConvertF32F16, F32, F16, ) | ||
| 296 | OPCODE(ConvertF32F64, F32, F64, ) | ||
| 297 | OPCODE(ConvertF64F32, F64, F32, ) | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..6965adfb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp | |||
| @@ -0,0 +1,184 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class Merge : u64 { | ||
| 13 | H1_H0, | ||
| 14 | F32, | ||
| 15 | MRG_H0, | ||
| 16 | MRG_H1, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class Swizzle : u64 { | ||
| 20 | H1_H0, | ||
| 21 | F32, | ||
| 22 | H0_H0, | ||
| 23 | H1_H1, | ||
| 24 | }; | ||
| 25 | |||
| 26 | std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { | ||
| 27 | switch (swizzle) { | ||
| 28 | case Swizzle::H1_H0: { | ||
| 29 | const IR::Value vector{ir.UnpackFloat2x16(value)}; | ||
| 30 | return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; | ||
| 31 | } | ||
| 32 | case Swizzle::H0_H0: { | ||
| 33 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; | ||
| 34 | return {scalar, scalar}; | ||
| 35 | } | ||
| 36 | case Swizzle::H1_H1: { | ||
| 37 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; | ||
| 38 | return {scalar, scalar}; | ||
| 39 | } | ||
| 40 | case Swizzle::F32: { | ||
| 41 | const IR::F32 scalar{ir.BitCast<IR::F32>(value)}; | ||
| 42 | return {scalar, scalar}; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | throw InvalidArgument("Invalid swizzle {}", swizzle); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, | ||
| 49 | Merge merge) { | ||
| 50 | switch (merge) { | ||
| 51 | case Merge::H1_H0: | ||
| 52 | return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); | ||
| 53 | case Merge::F32: | ||
| 54 | return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs)); | ||
| 55 | case Merge::MRG_H0: | ||
| 56 | case Merge::MRG_H1: { | ||
| 57 | const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; | ||
| 58 | const bool h0{merge == Merge::MRG_H0}; | ||
| 59 | const IR::F16& insert{h0 ? lhs : rhs}; | ||
| 60 | return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); | ||
| 61 | } | ||
| 62 | } | ||
| 63 | throw InvalidArgument("Invalid merge {}", merge); | ||
| 64 | } | ||
| 65 | |||
| 66 | void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, | ||
| 67 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { | ||
| 68 | union { | ||
| 69 | u64 raw; | ||
| 70 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 71 | BitField<8, 8, IR::Reg> src_a; | ||
| 72 | } const hadd2{insn}; | ||
| 73 | |||
| 74 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; | ||
| 75 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 76 | const bool promotion{lhs_a.Type() != lhs_b.Type()}; | ||
| 77 | if (promotion) { | ||
| 78 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 79 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 80 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 81 | } | ||
| 82 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 83 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 84 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); | ||
| 88 | rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); | ||
| 89 | |||
| 90 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 91 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 92 | |||
| 93 | const IR::FpControl fp_control{ | ||
| 94 | .no_contraction{true}, | ||
| 95 | .rounding{IR::FpRounding::DontCare}, | ||
| 96 | .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, | ||
| 97 | }; | ||
| 98 | IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; | ||
| 99 | IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; | ||
| 100 | if (sat) { | ||
| 101 | lhs = v.ir.FPSaturate(lhs); | ||
| 102 | rhs = v.ir.FPSaturate(rhs); | ||
| 103 | } | ||
| 104 | if (promotion) { | ||
| 105 | lhs = v.ir.FPConvert(16, lhs); | ||
| 106 | rhs = v.ir.FPConvert(16, rhs); | ||
| 107 | } | ||
| 108 | v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); | ||
| 109 | } | ||
| 110 | } // Anonymous namespace | ||
| 111 | |||
| 112 | void TranslatorVisitor::HADD2_reg(u64 insn) { | ||
| 113 | union { | ||
| 114 | u64 raw; | ||
| 115 | BitField<49, 2, Merge> merge; | ||
| 116 | BitField<39, 1, u64> ftz; | ||
| 117 | BitField<32, 1, u64> sat; | ||
| 118 | BitField<43, 1, u64> neg_a; | ||
| 119 | BitField<44, 1, u64> abs_a; | ||
| 120 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 121 | BitField<31, 1, u64> neg_b; | ||
| 122 | BitField<30, 1, u64> abs_b; | ||
| 123 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 124 | } const hadd2{insn}; | ||
| 125 | |||
| 126 | HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, | ||
| 127 | hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, | ||
| 128 | GetReg20(insn)); | ||
| 129 | } | ||
| 130 | |||
| 131 | void TranslatorVisitor::HADD2_cbuf(u64 insn) { | ||
| 132 | union { | ||
| 133 | u64 raw; | ||
| 134 | BitField<49, 2, Merge> merge; | ||
| 135 | BitField<39, 1, u64> ftz; | ||
| 136 | BitField<52, 1, u64> sat; | ||
| 137 | BitField<43, 1, u64> neg_a; | ||
| 138 | BitField<44, 1, u64> abs_a; | ||
| 139 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 140 | BitField<56, 1, u64> neg_b; | ||
| 141 | BitField<54, 1, u64> abs_b; | ||
| 142 | } const hadd2{insn}; | ||
| 143 | |||
| 144 | HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, | ||
| 145 | hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, | ||
| 146 | GetCbuf(insn)); | ||
| 147 | } | ||
| 148 | |||
| 149 | void TranslatorVisitor::HADD2_imm(u64 insn) { | ||
| 150 | union { | ||
| 151 | u64 raw; | ||
| 152 | BitField<49, 2, Merge> merge; | ||
| 153 | BitField<39, 1, u64> ftz; | ||
| 154 | BitField<52, 1, u64> sat; | ||
| 155 | BitField<43, 1, u64> neg_a; | ||
| 156 | BitField<44, 1, u64> abs_a; | ||
| 157 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 158 | BitField<56, 1, u64> neg_high; | ||
| 159 | BitField<30, 9, u64> high; | ||
| 160 | BitField<29, 1, u64> neg_low; | ||
| 161 | BitField<20, 9, u64> low; | ||
| 162 | } const hadd2{insn}; | ||
| 163 | |||
| 164 | const u32 imm{static_cast<u32>(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 165 | static_cast<u32>(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 166 | HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, | ||
| 167 | hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 168 | } | ||
| 169 | |||
| 170 | void TranslatorVisitor::HADD2_32I(u64 insn) { | ||
| 171 | union { | ||
| 172 | u64 raw; | ||
| 173 | BitField<55, 1, u64> ftz; | ||
| 174 | BitField<52, 1, u64> sat; | ||
| 175 | BitField<56, 1, u64> neg_a; | ||
| 176 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 177 | BitField<20, 32, u64> imm32; | ||
| 178 | } const hadd2{insn}; | ||
| 179 | |||
| 180 | const u32 imm{static_cast<u32>(hadd2.imm32)}; | ||
| 181 | HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, | ||
| 182 | hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 183 | } | ||
| 184 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp index 727524284..748b856c9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp | |||
| @@ -59,7 +59,7 @@ IR::U64 Address(TranslatorVisitor& v, u64 insn) { | |||
| 59 | const IR::U64 address{[&]() -> IR::U64 { | 59 | const IR::U64 address{[&]() -> IR::U64 { |
| 60 | if (mem.e == 0) { | 60 | if (mem.e == 0) { |
| 61 | // LDG/STG without .E uses a 32-bit pointer, zero-extend it | 61 | // LDG/STG without .E uses a 32-bit pointer, zero-extend it |
| 62 | return v.ir.ConvertU(64, v.X(mem.addr_reg)); | 62 | return v.ir.UConvert(64, v.X(mem.addr_reg)); |
| 63 | } | 63 | } |
| 64 | if (!IR::IsAligned(mem.addr_reg, 2)) { | 64 | if (!IR::IsAligned(mem.addr_reg, 2)) { |
| 65 | throw NotImplementedException("Unaligned address register"); | 65 | throw NotImplementedException("Unaligned address register"); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index a0535f1c2..c24f29ff7 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -265,22 +265,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { | |||
| 265 | ThrowNotImplemented(Opcode::GETLMEMBASE); | 265 | ThrowNotImplemented(Opcode::GETLMEMBASE); |
| 266 | } | 266 | } |
| 267 | 267 | ||
| 268 | void TranslatorVisitor::HADD2_reg(u64) { | ||
| 269 | ThrowNotImplemented(Opcode::HADD2_reg); | ||
| 270 | } | ||
| 271 | |||
| 272 | void TranslatorVisitor::HADD2_cbuf(u64) { | ||
| 273 | ThrowNotImplemented(Opcode::HADD2_cbuf); | ||
| 274 | } | ||
| 275 | |||
| 276 | void TranslatorVisitor::HADD2_imm(u64) { | ||
| 277 | ThrowNotImplemented(Opcode::HADD2_imm); | ||
| 278 | } | ||
| 279 | |||
| 280 | void TranslatorVisitor::HADD2_32I(u64) { | ||
| 281 | ThrowNotImplemented(Opcode::HADD2_32I); | ||
| 282 | } | ||
| 283 | |||
| 284 | void TranslatorVisitor::HFMA2_reg(u64) { | 268 | void TranslatorVisitor::HFMA2_reg(u64) { |
| 285 | ThrowNotImplemented(Opcode::HFMA2_reg); | 269 | ThrowNotImplemented(Opcode::HFMA2_reg); |
| 286 | } | 270 | } |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 98e3dfef7..965e52135 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -298,7 +298,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer | |||
| 298 | offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); | 298 | offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); |
| 299 | } | 299 | } |
| 300 | } else { | 300 | } else { |
| 301 | offset = ir.ConvertU(32, IR::U64{inst.Arg(0)}); | 301 | offset = ir.UConvert(32, IR::U64{inst.Arg(0)}); |
| 302 | } | 302 | } |
| 303 | // Subtract the least significant 32 bits from the guest offset. The result is the storage | 303 | // Subtract the least significant 32 bits from the guest offset. The result is the storage |
| 304 | // buffer offset in bytes. | 304 | // buffer offset in bytes. |
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index c7032f168..14a5cb50f 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp | |||
| @@ -44,6 +44,12 @@ IR::Opcode Replace(IR::Opcode op) { | |||
| 44 | return IR::Opcode::CompositeExtractF32x3; | 44 | return IR::Opcode::CompositeExtractF32x3; |
| 45 | case IR::Opcode::CompositeExtractF16x4: | 45 | case IR::Opcode::CompositeExtractF16x4: |
| 46 | return IR::Opcode::CompositeExtractF32x4; | 46 | return IR::Opcode::CompositeExtractF32x4; |
| 47 | case IR::Opcode::CompositeInsertF16x2: | ||
| 48 | return IR::Opcode::CompositeInsertF32x2; | ||
| 49 | case IR::Opcode::CompositeInsertF16x3: | ||
| 50 | return IR::Opcode::CompositeInsertF32x3; | ||
| 51 | case IR::Opcode::CompositeInsertF16x4: | ||
| 52 | return IR::Opcode::CompositeInsertF32x4; | ||
| 47 | case IR::Opcode::ConvertS16F16: | 53 | case IR::Opcode::ConvertS16F16: |
| 48 | return IR::Opcode::ConvertS16F32; | 54 | return IR::Opcode::ConvertS16F32; |
| 49 | case IR::Opcode::ConvertS32F16: | 55 | case IR::Opcode::ConvertS32F16: |
| @@ -60,6 +66,10 @@ IR::Opcode Replace(IR::Opcode op) { | |||
| 60 | return IR::Opcode::PackHalf2x16; | 66 | return IR::Opcode::PackHalf2x16; |
| 61 | case IR::Opcode::UnpackFloat2x16: | 67 | case IR::Opcode::UnpackFloat2x16: |
| 62 | return IR::Opcode::UnpackHalf2x16; | 68 | return IR::Opcode::UnpackHalf2x16; |
| 69 | case IR::Opcode::ConvertF32F16: | ||
| 70 | return IR::Opcode::Identity; | ||
| 71 | case IR::Opcode::ConvertF16F32: | ||
| 72 | return IR::Opcode::Identity; | ||
| 63 | default: | 73 | default: |
| 64 | return op; | 74 | return op; |
| 65 | } | 75 | } |