diff options
Diffstat (limited to 'src/shader_recompiler')
15 files changed, 111 insertions, 28 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 3d028db0f..7d8b938d1 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -169,6 +169,7 @@ void EmitContext::DefineCommonTypes(const Info& info) { | |||
| 169 | AddCapability(spv::Capability::Float64); | 169 | AddCapability(spv::Capability::Float64); |
| 170 | F64.Define(*this, TypeFloat(64), "f64"); | 170 | F64.Define(*this, TypeFloat(64), "f64"); |
| 171 | } | 171 | } |
| 172 | array_U32x2 = Name(TypeArray(U32[2], Constant(U32[1], 4U)), "array-u32x2"); | ||
| 172 | } | 173 | } |
| 173 | 174 | ||
| 174 | void EmitContext::DefineCommonConstants() { | 175 | void EmitContext::DefineCommonConstants() { |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index 5ed815c06..0a1e85408 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h | |||
| @@ -65,6 +65,7 @@ public: | |||
| 65 | VectorTypes U32; | 65 | VectorTypes U32; |
| 66 | VectorTypes F16; | 66 | VectorTypes F16; |
| 67 | VectorTypes F64; | 67 | VectorTypes F64; |
| 68 | Id array_U32x2; | ||
| 68 | 69 | ||
| 69 | Id true_value{}; | 70 | Id true_value{}; |
| 70 | Id false_value{}; | 71 | Id false_value{}; |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 4f945b917..eaf94dad5 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -95,7 +95,7 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va | |||
| 95 | Id value); | 95 | Id value); |
| 96 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | 96 | void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, |
| 97 | Id value); | 97 | Id value); |
| 98 | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2); | 98 | Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); |
| 99 | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | 99 | Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3); |
| 100 | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | 100 | Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); |
| 101 | Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); | 101 | Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); |
| @@ -104,7 +104,7 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); | |||
| 104 | Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); | 104 | Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); |
| 105 | Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); | 105 | Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); |
| 106 | Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); | 106 | Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); |
| 107 | Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); | 107 | Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); |
| 108 | Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); | 108 | Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); |
| 109 | Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | 109 | Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); |
| 110 | Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); | 110 | Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); |
| @@ -113,7 +113,7 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); | |||
| 113 | Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); | 113 | Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); |
| 114 | Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); | 114 | Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); |
| 115 | Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); | 115 | Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); |
| 116 | Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); | 116 | Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2); |
| 117 | Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | 117 | Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); |
| 118 | Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | 118 | Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); |
| 119 | Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); | 119 | Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); |
| @@ -122,6 +122,7 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); | |||
| 122 | Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); | 122 | Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); |
| 123 | Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); | 123 | Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); |
| 124 | Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); | 124 | Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); |
| 125 | Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4); | ||
| 125 | void EmitCompositeConstructF64x2(EmitContext& ctx); | 126 | void EmitCompositeConstructF64x2(EmitContext& ctx); |
| 126 | void EmitCompositeConstructF64x3(EmitContext& ctx); | 127 | void EmitCompositeConstructF64x3(EmitContext& ctx); |
| 127 | void EmitCompositeConstructF64x4(EmitContext& ctx); | 128 | void EmitCompositeConstructF64x4(EmitContext& ctx); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp index 616e63676..0da682859 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp | |||
| @@ -3,10 +3,15 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" |
| 6 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 6 | 7 | ||
| 7 | namespace Shader::Backend::SPIRV { | 8 | namespace Shader::Backend::SPIRV { |
| 8 | 9 | ||
| 9 | Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) { | 10 | Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { |
| 11 | const auto info{inst->Flags<IR::CompositeDecoration>()}; | ||
| 12 | if (info.is_constant) { | ||
| 13 | return ctx.ConstantComposite(ctx.U32[2], e1, e2); | ||
| 14 | } | ||
| 10 | return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); | 15 | return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2); |
| 11 | } | 16 | } |
| 12 | 17 | ||
| @@ -42,7 +47,12 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index | |||
| 42 | return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); | 47 | return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); |
| 43 | } | 48 | } |
| 44 | 49 | ||
| 45 | Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { | 50 | Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { |
| 51 | |||
| 52 | const auto info{inst->Flags<IR::CompositeDecoration>()}; | ||
| 53 | if (info.is_constant) { | ||
| 54 | return ctx.ConstantComposite(ctx.F16[2], e1, e2); | ||
| 55 | } | ||
| 46 | return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); | 56 | return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); |
| 47 | } | 57 | } |
| 48 | 58 | ||
| @@ -78,7 +88,11 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index | |||
| 78 | return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); | 88 | return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); |
| 79 | } | 89 | } |
| 80 | 90 | ||
| 81 | Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { | 91 | Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) { |
| 92 | const auto info{inst->Flags<IR::CompositeDecoration>()}; | ||
| 93 | if (info.is_constant) { | ||
| 94 | return ctx.ConstantComposite(ctx.F32[2], e1, e2); | ||
| 95 | } | ||
| 82 | return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); | 96 | return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); |
| 83 | } | 97 | } |
| 84 | 98 | ||
| @@ -150,4 +164,15 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index | |||
| 150 | return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); | 164 | return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); |
| 151 | } | 165 | } |
| 152 | 166 | ||
| 167 | Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) { | ||
| 168 | const auto info{inst->Flags<IR::CompositeDecoration>()}; | ||
| 169 | if (info.is_constant) { | ||
| 170 | return ctx.ConstantComposite(ctx.array_U32x2, e1, e2, e3, e4); | ||
| 171 | } | ||
| 172 | if (ctx.profile.support_variadic_ptp) { | ||
| 173 | return OpCompositeConstruct(ctx.array_U32x2, e1, e2, e3, e4); | ||
| 174 | } | ||
| 175 | return {}; | ||
| 176 | } | ||
| 177 | |||
| 153 | } // namespace Shader::Backend::SPIRV | 178 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 589013773..776afd4ab 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -30,10 +30,13 @@ public: | |||
| 30 | } | 30 | } |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset) { | 33 | explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset, Id offset2) { |
| 34 | if (Sirit::ValidId(offset)) { | 34 | if (Sirit::ValidId(offset)) { |
| 35 | Add(spv::ImageOperandsMask::Offset, offset); | 35 | Add(spv::ImageOperandsMask::Offset, offset); |
| 36 | } | 36 | } |
| 37 | if (Sirit::ValidId(offset2)) { | ||
| 38 | Add(spv::ImageOperandsMask::ConstOffsets, offset2); | ||
| 39 | } | ||
| 37 | } | 40 | } |
| 38 | 41 | ||
| 39 | void Add(spv::ImageOperandsMask new_mask, Id value) { | 42 | void Add(spv::ImageOperandsMask new_mask, Id value) { |
| @@ -177,7 +180,7 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va | |||
| 177 | Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, | 180 | Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset, |
| 178 | [[maybe_unused]] Id offset2) { | 181 | [[maybe_unused]] Id offset2) { |
| 179 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 182 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 180 | const ImageOperands operands(ctx, offset); | 183 | const ImageOperands operands(ctx, offset, offset2); |
| 181 | return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, | 184 | return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, |
| 182 | ctx.F32[4], Texture(ctx, index), coords, | 185 | ctx.F32[4], Texture(ctx, index), coords, |
| 183 | ctx.Constant(ctx.U32[1], info.gather_component.Value()), operands.Mask(), | 186 | ctx.Constant(ctx.U32[1], info.gather_component.Value()), operands.Mask(), |
| @@ -187,7 +190,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id | |||
| 187 | Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | 190 | Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, |
| 188 | Id offset, [[maybe_unused]] Id offset2, Id dref) { | 191 | Id offset, [[maybe_unused]] Id offset2, Id dref) { |
| 189 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 192 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 190 | const ImageOperands operands(ctx, offset); | 193 | const ImageOperands operands(ctx, offset, offset2); |
| 191 | return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, | 194 | return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, |
| 192 | ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); | 195 | ctx.F32[4], Texture(ctx, index), coords, dref, operands.Mask(), operands.Span()); |
| 193 | } | 196 | } |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b365a8a6e..f49c30484 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -398,15 +398,16 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) { | |||
| 398 | if (e1.Type() != e2.Type()) { | 398 | if (e1.Type() != e2.Type()) { |
| 399 | throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); | 399 | throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); |
| 400 | } | 400 | } |
| 401 | CompositeDecoration decor{}; | ||
| 401 | switch (e1.Type()) { | 402 | switch (e1.Type()) { |
| 402 | case Type::U32: | 403 | case Type::U32: |
| 403 | return Inst(Opcode::CompositeConstructU32x2, e1, e2); | 404 | return Inst(Opcode::CompositeConstructU32x2, Flags{decor}, e1, e2); |
| 404 | case Type::F16: | 405 | case Type::F16: |
| 405 | return Inst(Opcode::CompositeConstructF16x2, e1, e2); | 406 | return Inst(Opcode::CompositeConstructF16x2, Flags{decor}, e1, e2); |
| 406 | case Type::F32: | 407 | case Type::F32: |
| 407 | return Inst(Opcode::CompositeConstructF32x2, e1, e2); | 408 | return Inst(Opcode::CompositeConstructF32x2, Flags{decor}, e1, e2); |
| 408 | case Type::F64: | 409 | case Type::F64: |
| 409 | return Inst(Opcode::CompositeConstructF64x2, e1, e2); | 410 | return Inst(Opcode::CompositeConstructF64x2, Flags{decor}, e1, e2); |
| 410 | default: | 411 | default: |
| 411 | ThrowInvalidType(e1.Type()); | 412 | ThrowInvalidType(e1.Type()); |
| 412 | } | 413 | } |
| @@ -436,6 +437,7 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu | |||
| 436 | throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), | 437 | throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), |
| 437 | e3.Type(), e4.Type()); | 438 | e3.Type(), e4.Type()); |
| 438 | } | 439 | } |
| 440 | CompositeDecoration decor{}; | ||
| 439 | switch (e1.Type()) { | 441 | switch (e1.Type()) { |
| 440 | case Type::U32: | 442 | case Type::U32: |
| 441 | return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); | 443 | return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4); |
| @@ -445,6 +447,8 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu | |||
| 445 | return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); | 447 | return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4); |
| 446 | case Type::F64: | 448 | case Type::F64: |
| 447 | return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); | 449 | return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4); |
| 450 | case Type::U32x2: | ||
| 451 | return Inst(Opcode::CompositeConstructArrayU32x2, Flags{decor}, e1, e2, e3, e4); | ||
| 448 | default: | 452 | default: |
| 449 | ThrowInvalidType(e1.Type()); | 453 | ThrowInvalidType(e1.Type()); |
| 450 | } | 454 | } |
| @@ -1481,7 +1485,7 @@ Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Val | |||
| 1481 | } | 1485 | } |
| 1482 | 1486 | ||
| 1483 | Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, | 1487 | Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset, |
| 1484 | const Value& offset2, const F32& dref, TextureInstInfo info) { | 1488 | const Value& offset2, const F32& dref, TextureInstInfo info) { |
| 1485 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref | 1489 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref |
| 1486 | : Opcode::BindlessImageGatherDref}; | 1490 | : Opcode::BindlessImageGatherDref}; |
| 1487 | return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); | 1491 | return Inst(op, Flags{info}, handle, coords, offset, offset2, dref); |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.h b/src/shader_recompiler/frontend/ir/microinstruction.h index 770bbd550..77296cfa4 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.h +++ b/src/shader_recompiler/frontend/ir/microinstruction.h | |||
| @@ -99,6 +99,12 @@ public: | |||
| 99 | return ret; | 99 | return ret; |
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | template <typename FlagsType> | ||
| 103 | requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>) | ||
| 104 | [[nodiscard]] void SetFlags(FlagsType& new_val) noexcept { | ||
| 105 | std::memcpy(&flags, &new_val, sizeof(new_val)); | ||
| 106 | } | ||
| 107 | |||
| 102 | /// Intrusively store the host definition of this instruction. | 108 | /// Intrusively store the host definition of this instruction. |
| 103 | template <typename DefinitionType> | 109 | template <typename DefinitionType> |
| 104 | void SetDefinition(DefinitionType def) { | 110 | void SetDefinition(DefinitionType def) { |
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 4f09a4b39..20fb14fea 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h | |||
| @@ -32,6 +32,11 @@ struct FpControl { | |||
| 32 | }; | 32 | }; |
| 33 | static_assert(sizeof(FpControl) <= sizeof(u32)); | 33 | static_assert(sizeof(FpControl) <= sizeof(u32)); |
| 34 | 34 | ||
| 35 | struct CompositeDecoration { | ||
| 36 | bool is_constant{false}; | ||
| 37 | }; | ||
| 38 | static_assert(sizeof(CompositeDecoration) <= sizeof(u32)); | ||
| 39 | |||
| 35 | union TextureInstInfo { | 40 | union TextureInstInfo { |
| 36 | u32 raw; | 41 | u32 raw; |
| 37 | BitField<0, 8, TextureType> type; | 42 | BitField<0, 8, TextureType> type; |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 60a0bc980..0dc0aabdf 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -126,6 +126,7 @@ OPCODE(CompositeExtractF64x4, F64, F64x | |||
| 126 | OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) | 126 | OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, ) |
| 127 | OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) | 127 | OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, ) |
| 128 | OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) | 128 | OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, ) |
| 129 | OPCODE(CompositeConstructArrayU32x2, Opaque, U32x2, U32x2, U32x2, U32x2, ) | ||
| 129 | 130 | ||
| 130 | // Select operations | 131 | // Select operations |
| 131 | OPCODE(SelectU1, U1, U1, U1, U1, ) | 132 | OPCODE(SelectU1, U1, U1, U1, U1, ) |
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp index e8e4662e7..7671fc3d8 100644 --- a/src/shader_recompiler/frontend/ir/value.cpp +++ b/src/shader_recompiler/frontend/ir/value.cpp | |||
| @@ -44,6 +44,20 @@ bool Value::IsEmpty() const noexcept { | |||
| 44 | return type == Type::Void; | 44 | return type == Type::Void; |
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | bool Value::IsConstantContainer() const { | ||
| 48 | if (IsImmediate()) { | ||
| 49 | return true; | ||
| 50 | } | ||
| 51 | ValidateAccess(Type::Opaque); | ||
| 52 | auto num_args = inst->NumArgs(); | ||
| 53 | for (size_t i = 0; i < num_args; i++) { | ||
| 54 | if (!inst->Arg(i).IsConstantContainer()) { | ||
| 55 | return false; | ||
| 56 | } | ||
| 57 | } | ||
| 58 | return true; | ||
| 59 | } | ||
| 60 | |||
| 47 | bool Value::IsImmediate() const noexcept { | 61 | bool Value::IsImmediate() const noexcept { |
| 48 | if (IsIdentity()) { | 62 | if (IsIdentity()) { |
| 49 | return inst->Arg(0).IsImmediate(); | 63 | return inst->Arg(0).IsImmediate(); |
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index b27601e70..5d6e74c14 100644 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h | |||
| @@ -38,6 +38,7 @@ public: | |||
| 38 | [[nodiscard]] bool IsImmediate() const noexcept; | 38 | [[nodiscard]] bool IsImmediate() const noexcept; |
| 39 | [[nodiscard]] bool IsLabel() const noexcept; | 39 | [[nodiscard]] bool IsLabel() const noexcept; |
| 40 | [[nodiscard]] IR::Type Type() const noexcept; | 40 | [[nodiscard]] IR::Type Type() const noexcept; |
| 41 | [[nodiscard]] bool IsConstantContainer() const; | ||
| 41 | 42 | ||
| 42 | [[nodiscard]] IR::Inst* Inst() const; | 43 | [[nodiscard]] IR::Inst* Inst() const; |
| 43 | [[nodiscard]] IR::Block* Label() const; | 44 | [[nodiscard]] IR::Block* Label() const; |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 1f1689c43..b2da079f9 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp | |||
| @@ -101,16 +101,18 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | |||
| 101 | switch (type) { | 101 | switch (type) { |
| 102 | case TextureType::_1D: | 102 | case TextureType::_1D: |
| 103 | case TextureType::ARRAY_1D: | 103 | case TextureType::ARRAY_1D: |
| 104 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)); | 104 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); |
| 105 | case TextureType::_2D: | 105 | case TextureType::_2D: |
| 106 | case TextureType::ARRAY_2D: | 106 | case TextureType::ARRAY_2D: |
| 107 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), | 107 | return v.ir.CompositeConstruct( |
| 108 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4))); | 108 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), |
| 109 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 109 | case TextureType::_3D: | 110 | case TextureType::_3D: |
| 110 | case TextureType::ARRAY_3D: | 111 | case TextureType::ARRAY_3D: |
| 111 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)), | 112 | return v.ir.CompositeConstruct( |
| 112 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)), | 113 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), |
| 113 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4))); | 114 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), |
| 115 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); | ||
| 114 | case TextureType::CUBE: | 116 | case TextureType::CUBE: |
| 115 | case TextureType::ARRAY_CUBE: | 117 | case TextureType::ARRAY_CUBE: |
| 116 | throw NotImplementedException("Illegal offset on CUBE sample"); | 118 | throw NotImplementedException("Illegal offset on CUBE sample"); |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp index 8c6384040..cdf5cb5c4 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp | |||
| @@ -106,17 +106,17 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | |||
| 106 | throw NotImplementedException("Invalid texture type {}", type); | 106 | throw NotImplementedException("Invalid texture type {}", type); |
| 107 | } | 107 | } |
| 108 | 108 | ||
| 109 | std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { | 109 | IR::Value MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { |
| 110 | const IR::U32 value1{v.X(reg++)}; | 110 | const IR::U32 value1{v.X(reg++)}; |
| 111 | const IR::U32 value2{v.X(reg++)}; | 111 | const IR::U32 value2{v.X(reg++)}; |
| 112 | const auto getVector = ([&v](const IR::U32& value) { | 112 | const IR::U32 bitsize = v.ir.Imm32(6); |
| 113 | const auto getVector = ([&v, &bitsize](const IR::U32& value, u32 base) { | ||
| 113 | return v.ir.CompositeConstruct( | 114 | return v.ir.CompositeConstruct( |
| 114 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | 115 | v.ir.BitFieldExtract(value, v.ir.Imm32(base + 0), bitsize, true), |
| 115 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), | 116 | v.ir.BitFieldExtract(value, v.ir.Imm32(base + 8), bitsize, true)); |
| 116 | v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true), | ||
| 117 | v.ir.BitFieldExtract(value, v.ir.Imm32(24), v.ir.Imm32(6), true)); | ||
| 118 | }); | 117 | }); |
| 119 | return {getVector(value1), getVector(value2)}; | 118 | return v.ir.CompositeConstruct(getVector(value1, 0), getVector(value1, 16), |
| 119 | getVector(value2, 0), getVector(value2, 16)); | ||
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, | 122 | void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, |
| @@ -155,7 +155,7 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy | |||
| 155 | break; | 155 | break; |
| 156 | } | 156 | } |
| 157 | case OffsetType::PTP: { | 157 | case OffsetType::PTP: { |
| 158 | std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); | 158 | offset2 = MakeOffsetPTP(v, meta_reg); |
| 159 | break; | 159 | break; |
| 160 | } | 160 | } |
| 161 | default: | 161 | default: |
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 28060dccf..12159e738 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -355,6 +355,17 @@ void FoldBranchConditional(IR::Inst& inst) { | |||
| 355 | } | 355 | } |
| 356 | } | 356 | } |
| 357 | 357 | ||
| 358 | void FoldConstantComposite(IR::Inst& inst, size_t amount = 2) { | ||
| 359 | for (size_t i = 0; i < amount; i++) { | ||
| 360 | if (!inst.Arg(i).IsConstantContainer()) { | ||
| 361 | return; | ||
| 362 | } | ||
| 363 | } | ||
| 364 | auto info{inst.Flags<IR::CompositeDecoration>()}; | ||
| 365 | info.is_constant = true; | ||
| 366 | inst.SetFlags(info); | ||
| 367 | } | ||
| 368 | |||
| 358 | void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | 369 | void ConstantPropagation(IR::Block& block, IR::Inst& inst) { |
| 359 | switch (inst.Opcode()) { | 370 | switch (inst.Opcode()) { |
| 360 | case IR::Opcode::GetRegister: | 371 | case IR::Opcode::GetRegister: |
| @@ -380,6 +391,13 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | |||
| 380 | case IR::Opcode::SelectF32: | 391 | case IR::Opcode::SelectF32: |
| 381 | case IR::Opcode::SelectF64: | 392 | case IR::Opcode::SelectF64: |
| 382 | return FoldSelect(inst); | 393 | return FoldSelect(inst); |
| 394 | case IR::Opcode::CompositeConstructU32x2: | ||
| 395 | case IR::Opcode::CompositeConstructF16x2: | ||
| 396 | case IR::Opcode::CompositeConstructF32x2: | ||
| 397 | case IR::Opcode::CompositeConstructF64x2: | ||
| 398 | return FoldConstantComposite(inst, 2); | ||
| 399 | case IR::Opcode::CompositeConstructArrayU32x2: | ||
| 400 | return FoldConstantComposite(inst, 4); | ||
| 383 | case IR::Opcode::FPMul32: | 401 | case IR::Opcode::FPMul32: |
| 384 | return FoldFPMul32(inst); | 402 | return FoldFPMul32(inst); |
| 385 | case IR::Opcode::LogicalAnd: | 403 | case IR::Opcode::LogicalAnd: |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 41550bfc6..64031f49c 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -30,6 +30,7 @@ struct Profile { | |||
| 30 | bool support_fp32_signed_zero_nan_preserve{}; | 30 | bool support_fp32_signed_zero_nan_preserve{}; |
| 31 | bool support_fp64_signed_zero_nan_preserve{}; | 31 | bool support_fp64_signed_zero_nan_preserve{}; |
| 32 | bool support_vote{}; | 32 | bool support_vote{}; |
| 33 | bool support_variadic_ptp{}; | ||
| 33 | bool warp_size_potentially_larger_than_guest{}; | 34 | bool warp_size_potentially_larger_than_guest{}; |
| 34 | 35 | ||
| 35 | // FClamp is broken and OpFMax + OpFMin should be used instead | 36 | // FClamp is broken and OpFMax + OpFMin should be used instead |