diff options
Diffstat (limited to 'src/shader_recompiler')
16 files changed, 147 insertions, 59 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 83b763447..19db17c6d 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -231,6 +231,7 @@ add_library(shader_recompiler STATIC | |||
| 231 | ir_opt/rescaling_pass.cpp | 231 | ir_opt/rescaling_pass.cpp |
| 232 | ir_opt/ssa_rewrite_pass.cpp | 232 | ir_opt/ssa_rewrite_pass.cpp |
| 233 | ir_opt/texture_pass.cpp | 233 | ir_opt/texture_pass.cpp |
| 234 | ir_opt/vendor_workaround_pass.cpp | ||
| 234 | ir_opt/verification_pass.cpp | 235 | ir_opt/verification_pass.cpp |
| 235 | object_pool.h | 236 | object_pool.h |
| 236 | precompiled_headers.h | 237 | precompiled_headers.h |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index d0e308124..64e7bad75 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | |||
| @@ -559,12 +559,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 559 | const IR::Value& offset, const IR::Value& lod_clamp) { | 559 | const IR::Value& offset, const IR::Value& lod_clamp) { |
| 560 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | 560 | const auto info{inst.Flags<IR::TextureInstInfo>()}; |
| 561 | ScopedRegister dpdx, dpdy, coords; | 561 | ScopedRegister dpdx, dpdy, coords; |
| 562 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | 562 | const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp}; |
| 563 | if (multi_component) { | 563 | if (multi_component) { |
| 564 | // Allocate this early to avoid aliasing other registers | 564 | // Allocate this early to avoid aliasing other registers |
| 565 | dpdx = ScopedRegister{ctx.reg_alloc}; | 565 | dpdx = ScopedRegister{ctx.reg_alloc}; |
| 566 | dpdy = ScopedRegister{ctx.reg_alloc}; | 566 | dpdy = ScopedRegister{ctx.reg_alloc}; |
| 567 | if (info.num_derivates >= 3) { | 567 | if (info.num_derivatives >= 3) { |
| 568 | coords = ScopedRegister{ctx.reg_alloc}; | 568 | coords = ScopedRegister{ctx.reg_alloc}; |
| 569 | } | 569 | } |
| 570 | } | 570 | } |
| @@ -584,7 +584,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 584 | dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, | 584 | dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, |
| 585 | dpdy.reg, derivatives_vec); | 585 | dpdy.reg, derivatives_vec); |
| 586 | Register final_coord; | 586 | Register final_coord; |
| 587 | if (info.num_derivates >= 3) { | 587 | if (info.num_derivatives >= 3) { |
| 588 | ctx.Add("MOV.F {}.z,{}.x;" | 588 | ctx.Add("MOV.F {}.z,{}.x;" |
| 589 | "MOV.F {}.z,{}.y;", | 589 | "MOV.F {}.z,{}.y;", |
| 590 | dpdx.reg, coord_vec, dpdy.reg, coord_vec); | 590 | dpdx.reg, coord_vec, dpdy.reg, coord_vec); |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index d9872ecc2..6e940bd5a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | |||
| @@ -548,15 +548,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 548 | if (sparse_inst) { | 548 | if (sparse_inst) { |
| 549 | throw NotImplementedException("EmitImageGradient Sparse"); | 549 | throw NotImplementedException("EmitImageGradient Sparse"); |
| 550 | } | 550 | } |
| 551 | if (!offset.IsEmpty() && info.num_derivates <= 2) { | 551 | if (!offset.IsEmpty() && info.num_derivatives <= 2) { |
| 552 | throw NotImplementedException("EmitImageGradient offset"); | 552 | throw NotImplementedException("EmitImageGradient offset"); |
| 553 | } | 553 | } |
| 554 | const auto texture{Texture(ctx, info, index)}; | 554 | const auto texture{Texture(ctx, info, index)}; |
| 555 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | 555 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; |
| 556 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | 556 | const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp}; |
| 557 | const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; | 557 | const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; |
| 558 | if (multi_component) { | 558 | if (multi_component) { |
| 559 | if (info.num_derivates >= 3) { | 559 | if (info.num_derivatives >= 3) { |
| 560 | const auto offset_vec{ctx.var_alloc.Consume(offset)}; | 560 | const auto offset_vec{ctx.var_alloc.Consume(offset)}; |
| 561 | ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture, | 561 | ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture, |
| 562 | coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); | 562 | coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 34592a01f..0031fa5fb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp | |||
| @@ -407,7 +407,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct | |||
| 407 | } | 407 | } |
| 408 | ctx.AddCapability(spv::Capability::DemoteToHelperInvocation); | 408 | ctx.AddCapability(spv::Capability::DemoteToHelperInvocation); |
| 409 | } | 409 | } |
| 410 | if (info.stores[IR::Attribute::ViewportIndex]) { | 410 | if (info.stores[IR::Attribute::ViewportIndex] && profile.support_multi_viewport) { |
| 411 | ctx.AddCapability(spv::Capability::MultiViewport); | 411 | ctx.AddCapability(spv::Capability::MultiViewport); |
| 412 | } | 412 | } |
| 413 | if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { | 413 | if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 1d77426e0..e5a78a914 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -84,6 +84,10 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | |||
| 84 | } | 84 | } |
| 85 | return std::nullopt; | 85 | return std::nullopt; |
| 86 | case IR::Attribute::ViewportIndex: | 86 | case IR::Attribute::ViewportIndex: |
| 87 | if (!ctx.profile.support_multi_viewport) { | ||
| 88 | LOG_WARNING(Shader, "Ignoring viewport index store on non-supporting driver"); | ||
| 89 | return std::nullopt; | ||
| 90 | } | ||
| 87 | if (ctx.profile.support_viewport_index_layer_non_geometry || | 91 | if (ctx.profile.support_viewport_index_layer_non_geometry || |
| 88 | ctx.stage == Shader::Stage::Geometry) { | 92 | ctx.stage == Shader::Stage::Geometry) { |
| 89 | return OutAttr{ctx.viewport_index, ctx.U32[1]}; | 93 | return OutAttr{ctx.viewport_index, ctx.U32[1]}; |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 8decdf399..22ceca19c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -67,22 +67,22 @@ public: | |||
| 67 | } | 67 | } |
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, | 70 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives, |
| 71 | Id offset, Id lod_clamp) { | 71 | u32 num_derivatives, Id offset, Id lod_clamp) { |
| 72 | if (!Sirit::ValidId(derivates)) { | 72 | if (!Sirit::ValidId(derivatives)) { |
| 73 | throw LogicError("Derivates must be present"); | 73 | throw LogicError("Derivatives must be present"); |
| 74 | } | 74 | } |
| 75 | boost::container::static_vector<Id, 3> deriv_x_accum; | 75 | boost::container::static_vector<Id, 3> deriv_x_accum; |
| 76 | boost::container::static_vector<Id, 3> deriv_y_accum; | 76 | boost::container::static_vector<Id, 3> deriv_y_accum; |
| 77 | for (u32 i = 0; i < num_derivates; ++i) { | 77 | for (u32 i = 0; i < num_derivatives; ++i) { |
| 78 | deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); | 78 | deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2)); |
| 79 | deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); | 79 | deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2 + 1)); |
| 80 | } | 80 | } |
| 81 | const Id derivates_X{ctx.OpCompositeConstruct( | 81 | const Id derivatives_X{ctx.OpCompositeConstruct( |
| 82 | ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; | 82 | ctx.F32[num_derivatives], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; |
| 83 | const Id derivates_Y{ctx.OpCompositeConstruct( | 83 | const Id derivatives_Y{ctx.OpCompositeConstruct( |
| 84 | ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; | 84 | ctx.F32[num_derivatives], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; |
| 85 | Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); | 85 | Add(spv::ImageOperandsMask::Grad, derivatives_X, derivatives_Y); |
| 86 | if (Sirit::ValidId(offset)) { | 86 | if (Sirit::ValidId(offset)) { |
| 87 | Add(spv::ImageOperandsMask::Offset, offset); | 87 | Add(spv::ImageOperandsMask::Offset, offset); |
| 88 | } | 88 | } |
| @@ -91,26 +91,26 @@ public: | |||
| 91 | } | 91 | } |
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates_1, Id derivates_2, | 94 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives_1, Id derivatives_2, |
| 95 | Id offset, Id lod_clamp) { | 95 | Id offset, Id lod_clamp) { |
| 96 | if (!Sirit::ValidId(derivates_1) || !Sirit::ValidId(derivates_2)) { | 96 | if (!Sirit::ValidId(derivatives_1) || !Sirit::ValidId(derivatives_2)) { |
| 97 | throw LogicError("Derivates must be present"); | 97 | throw LogicError("Derivatives must be present"); |
| 98 | } | 98 | } |
| 99 | boost::container::static_vector<Id, 3> deriv_1_accum{ | 99 | boost::container::static_vector<Id, 3> deriv_1_accum{ |
| 100 | ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 0), | 100 | ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 0), |
| 101 | ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 2), | 101 | ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 2), |
| 102 | ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 0), | 102 | ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 0), |
| 103 | }; | 103 | }; |
| 104 | boost::container::static_vector<Id, 3> deriv_2_accum{ | 104 | boost::container::static_vector<Id, 3> deriv_2_accum{ |
| 105 | ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 1), | 105 | ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 1), |
| 106 | ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 3), | 106 | ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 3), |
| 107 | ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 1), | 107 | ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 1), |
| 108 | }; | 108 | }; |
| 109 | const Id derivates_id1{ctx.OpCompositeConstruct( | 109 | const Id derivatives_id1{ctx.OpCompositeConstruct( |
| 110 | ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})}; | 110 | ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})}; |
| 111 | const Id derivates_id2{ctx.OpCompositeConstruct( | 111 | const Id derivatives_id2{ctx.OpCompositeConstruct( |
| 112 | ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; | 112 | ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; |
| 113 | Add(spv::ImageOperandsMask::Grad, derivates_id1, derivates_id2); | 113 | Add(spv::ImageOperandsMask::Grad, derivatives_id1, derivatives_id2); |
| 114 | if (Sirit::ValidId(offset)) { | 114 | if (Sirit::ValidId(offset)) { |
| 115 | Add(spv::ImageOperandsMask::Offset, offset); | 115 | Add(spv::ImageOperandsMask::Offset, offset); |
| 116 | } | 116 | } |
| @@ -548,12 +548,12 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I | |||
| 548 | } | 548 | } |
| 549 | 549 | ||
| 550 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | 550 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, |
| 551 | Id derivates, Id offset, Id lod_clamp) { | 551 | Id derivatives, Id offset, Id lod_clamp) { |
| 552 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 552 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 553 | const auto operands = | 553 | const auto operands = |
| 554 | info.num_derivates == 3 | 554 | info.num_derivatives == 3 |
| 555 | ? ImageOperands(ctx, info.has_lod_clamp != 0, derivates, offset, {}, lod_clamp) | 555 | ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, offset, {}, lod_clamp) |
| 556 | : ImageOperands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, | 556 | : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset, |
| 557 | lod_clamp); | 557 | lod_clamp); |
| 558 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | 558 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, |
| 559 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], | 559 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index a440b557d..7d34575c8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h | |||
| @@ -543,7 +543,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i | |||
| 543 | const IR::Value& skip_mips); | 543 | const IR::Value& skip_mips); |
| 544 | Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); | 544 | Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); |
| 545 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | 545 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, |
| 546 | Id derivates, Id offset, Id lod_clamp); | 546 | Id derivatives, Id offset, Id lod_clamp); |
| 547 | Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); | 547 | Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); |
| 548 | void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); | 548 | void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); |
| 549 | Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); | 549 | Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b7caa4246..49171c470 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -1864,11 +1864,11 @@ Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, Texture | |||
| 1864 | return Inst(op, Flags{info}, handle, coords); | 1864 | return Inst(op, Flags{info}, handle, coords); |
| 1865 | } | 1865 | } |
| 1866 | 1866 | ||
| 1867 | Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates, | 1867 | Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivatives, |
| 1868 | const Value& offset, const F32& lod_clamp, TextureInstInfo info) { | 1868 | const Value& offset, const F32& lod_clamp, TextureInstInfo info) { |
| 1869 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient | 1869 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient |
| 1870 | : Opcode::BindlessImageGradient}; | 1870 | : Opcode::BindlessImageGradient}; |
| 1871 | return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp); | 1871 | return Inst(op, Flags{info}, handle, coords, derivatives, offset, lod_clamp); |
| 1872 | } | 1872 | } |
| 1873 | 1873 | ||
| 1874 | Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { | 1874 | Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index f3c81dbe1..6c30897f4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -335,7 +335,7 @@ public: | |||
| 335 | [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, | 335 | [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, |
| 336 | const U32& lod, const U32& multisampling, TextureInstInfo info); | 336 | const U32& lod, const U32& multisampling, TextureInstInfo info); |
| 337 | [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, | 337 | [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, |
| 338 | const Value& derivates, const Value& offset, | 338 | const Value& derivatives, const Value& offset, |
| 339 | const F32& lod_clamp, TextureInstInfo info); | 339 | const F32& lod_clamp, TextureInstInfo info); |
| 340 | [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); | 340 | [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); |
| 341 | void ImageWrite(const Value& handle, const Value& coords, const Value& color, | 341 | void ImageWrite(const Value& handle, const Value& coords, const Value& color, |
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 1e9e8c8f5..c20c2401f 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h | |||
| @@ -40,7 +40,7 @@ union TextureInstInfo { | |||
| 40 | BitField<21, 1, u32> has_lod_clamp; | 40 | BitField<21, 1, u32> has_lod_clamp; |
| 41 | BitField<22, 1, u32> relaxed_precision; | 41 | BitField<22, 1, u32> relaxed_precision; |
| 42 | BitField<23, 2, u32> gather_component; | 42 | BitField<23, 2, u32> gather_component; |
| 43 | BitField<25, 2, u32> num_derivates; | 43 | BitField<25, 2, u32> num_derivatives; |
| 44 | BitField<27, 3, ImageFormat> image_format; | 44 | BitField<27, 3, ImageFormat> image_format; |
| 45 | BitField<30, 1, u32> ndv_is_active; | 45 | BitField<30, 1, u32> ndv_is_active; |
| 46 | }; | 46 | }; |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp index dd34507bc..4ce3dd0cd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp | |||
| @@ -59,7 +59,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 59 | BitField<51, 3, IR::Pred> sparse_pred; | 59 | BitField<51, 3, IR::Pred> sparse_pred; |
| 60 | BitField<0, 8, IR::Reg> dest_reg; | 60 | BitField<0, 8, IR::Reg> dest_reg; |
| 61 | BitField<8, 8, IR::Reg> coord_reg; | 61 | BitField<8, 8, IR::Reg> coord_reg; |
| 62 | BitField<20, 8, IR::Reg> derivate_reg; | 62 | BitField<20, 8, IR::Reg> derivative_reg; |
| 63 | BitField<28, 3, TextureType> type; | 63 | BitField<28, 3, TextureType> type; |
| 64 | BitField<31, 4, u64> mask; | 64 | BitField<31, 4, u64> mask; |
| 65 | BitField<36, 13, u64> cbuf_offset; | 65 | BitField<36, 13, u64> cbuf_offset; |
| @@ -71,7 +71,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | IR::Value coords; | 73 | IR::Value coords; |
| 74 | u32 num_derivates{}; | 74 | u32 num_derivatives{}; |
| 75 | IR::Reg base_reg{txd.coord_reg}; | 75 | IR::Reg base_reg{txd.coord_reg}; |
| 76 | IR::Reg last_reg; | 76 | IR::Reg last_reg; |
| 77 | IR::Value handle; | 77 | IR::Value handle; |
| @@ -90,42 +90,42 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 90 | switch (txd.type) { | 90 | switch (txd.type) { |
| 91 | case TextureType::_1D: { | 91 | case TextureType::_1D: { |
| 92 | coords = v.F(base_reg); | 92 | coords = v.F(base_reg); |
| 93 | num_derivates = 1; | 93 | num_derivatives = 1; |
| 94 | last_reg = base_reg + 1; | 94 | last_reg = base_reg + 1; |
| 95 | break; | 95 | break; |
| 96 | } | 96 | } |
| 97 | case TextureType::ARRAY_1D: { | 97 | case TextureType::ARRAY_1D: { |
| 98 | last_reg = base_reg + 1; | 98 | last_reg = base_reg + 1; |
| 99 | coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); | 99 | coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); |
| 100 | num_derivates = 1; | 100 | num_derivatives = 1; |
| 101 | break; | 101 | break; |
| 102 | } | 102 | } |
| 103 | case TextureType::_2D: { | 103 | case TextureType::_2D: { |
| 104 | last_reg = base_reg + 2; | 104 | last_reg = base_reg + 2; |
| 105 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); | 105 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); |
| 106 | num_derivates = 2; | 106 | num_derivatives = 2; |
| 107 | break; | 107 | break; |
| 108 | } | 108 | } |
| 109 | case TextureType::ARRAY_2D: { | 109 | case TextureType::ARRAY_2D: { |
| 110 | last_reg = base_reg + 2; | 110 | last_reg = base_reg + 2; |
| 111 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); | 111 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); |
| 112 | num_derivates = 2; | 112 | num_derivatives = 2; |
| 113 | break; | 113 | break; |
| 114 | } | 114 | } |
| 115 | default: | 115 | default: |
| 116 | throw NotImplementedException("Invalid texture type"); | 116 | throw NotImplementedException("Invalid texture type"); |
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | const IR::Reg derivate_reg{txd.derivate_reg}; | 119 | const IR::Reg derivative_reg{txd.derivative_reg}; |
| 120 | IR::Value derivates; | 120 | IR::Value derivatives; |
| 121 | switch (num_derivates) { | 121 | switch (num_derivatives) { |
| 122 | case 1: { | 122 | case 1: { |
| 123 | derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); | 123 | derivatives = v.ir.CompositeConstruct(v.F(derivative_reg), v.F(derivative_reg + 1)); |
| 124 | break; | 124 | break; |
| 125 | } | 125 | } |
| 126 | case 2: { | 126 | case 2: { |
| 127 | derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), | 127 | derivatives = v.ir.CompositeConstruct(v.F(derivative_reg), v.F(derivative_reg + 1), |
| 128 | v.F(derivate_reg + 2), v.F(derivate_reg + 3)); | 128 | v.F(derivative_reg + 2), v.F(derivative_reg + 3)); |
| 129 | break; | 129 | break; |
| 130 | } | 130 | } |
| 131 | default: | 131 | default: |
| @@ -150,9 +150,10 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 150 | 150 | ||
| 151 | IR::TextureInstInfo info{}; | 151 | IR::TextureInstInfo info{}; |
| 152 | info.type.Assign(GetType(txd.type)); | 152 | info.type.Assign(GetType(txd.type)); |
| 153 | info.num_derivates.Assign(num_derivates); | 153 | info.num_derivatives.Assign(num_derivatives); |
| 154 | info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); | 154 | info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); |
| 155 | const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; | 155 | const IR::Value sample{ |
| 156 | v.ir.ImageGradient(handle, coords, derivatives, offset, lod_clamp, info)}; | ||
| 156 | 157 | ||
| 157 | IR::Reg dest_reg{txd.dest_reg}; | 158 | IR::Reg dest_reg{txd.dest_reg}; |
| 158 | for (size_t element = 0; element < 4; ++element) { | 159 | for (size_t element = 0; element < 4; ++element) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 928b35561..8fac6bad3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -310,6 +310,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 310 | } | 310 | } |
| 311 | Optimization::CollectShaderInfoPass(env, program); | 311 | Optimization::CollectShaderInfoPass(env, program); |
| 312 | Optimization::LayerPass(program, host_info); | 312 | Optimization::LayerPass(program, host_info); |
| 313 | Optimization::VendorWorkaroundPass(program); | ||
| 313 | 314 | ||
| 314 | CollectInterpolationInfo(env, program); | 315 | CollectInterpolationInfo(env, program); |
| 315 | AddNVNStorageBuffers(program); | 316 | AddNVNStorageBuffers(program); |
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f46e55122..ec12c843a 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -428,7 +428,7 @@ void FoldFPAdd32(IR::Inst& inst) { | |||
| 428 | } | 428 | } |
| 429 | } | 429 | } |
| 430 | 430 | ||
| 431 | bool FoldDerivateYFromCorrection(IR::Inst& inst) { | 431 | bool FoldDerivativeYFromCorrection(IR::Inst& inst) { |
| 432 | const IR::Value lhs_value{inst.Arg(0)}; | 432 | const IR::Value lhs_value{inst.Arg(0)}; |
| 433 | const IR::Value rhs_value{inst.Arg(1)}; | 433 | const IR::Value rhs_value{inst.Arg(1)}; |
| 434 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | 434 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; |
| @@ -464,7 +464,7 @@ void FoldFPMul32(IR::Inst& inst) { | |||
| 464 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { | 464 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { |
| 465 | return; | 465 | return; |
| 466 | } | 466 | } |
| 467 | if (FoldDerivateYFromCorrection(inst)) { | 467 | if (FoldDerivativeYFromCorrection(inst)) { |
| 468 | return; | 468 | return; |
| 469 | } | 469 | } |
| 470 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | 470 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; |
| @@ -699,7 +699,7 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | |||
| 699 | } | 699 | } |
| 700 | } | 700 | } |
| 701 | 701 | ||
| 702 | bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { | 702 | bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coord) { |
| 703 | if (coord.IsImmediate()) { | 703 | if (coord.IsImmediate()) { |
| 704 | return false; | 704 | return false; |
| 705 | } | 705 | } |
| @@ -834,7 +834,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { | |||
| 834 | IR::Inst* const inst2 = coords.InstRecursive(); | 834 | IR::Inst* const inst2 = coords.InstRecursive(); |
| 835 | std::array<std::array<IR::Value, 3>, 3> results_matrix; | 835 | std::array<std::array<IR::Value, 3>, 3> results_matrix; |
| 836 | for (size_t i = 0; i < 3; i++) { | 836 | for (size_t i = 0; i < 3; i++) { |
| 837 | if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { | 837 | if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) { |
| 838 | return; | 838 | return; |
| 839 | } | 839 | } |
| 840 | } | 840 | } |
| @@ -852,7 +852,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { | |||
| 852 | IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], | 852 | IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], |
| 853 | results_matrix[1][1], results_matrix[1][2]); | 853 | results_matrix[1][1], results_matrix[1][2]); |
| 854 | IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); | 854 | IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); |
| 855 | info.num_derivates.Assign(3); | 855 | info.num_derivatives.Assign(3); |
| 856 | IR::Value new_gradient_instruction = | 856 | IR::Value new_gradient_instruction = |
| 857 | ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); | 857 | ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); |
| 858 | IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); | 858 | IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); |
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 629d18fa1..d4d5285e5 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program); | |||
| 26 | void PositionPass(Environment& env, IR::Program& program); | 26 | void PositionPass(Environment& env, IR::Program& program); |
| 27 | void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); | 27 | void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); |
| 28 | void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); | 28 | void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); |
| 29 | void VendorWorkaroundPass(IR::Program& program); | ||
| 29 | void VerificationPass(const IR::Program& program); | 30 | void VerificationPass(const IR::Program& program); |
| 30 | 31 | ||
| 31 | // Dual Vertex | 32 | // Dual Vertex |
diff --git a/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp new file mode 100644 index 000000000..08c658cb8 --- /dev/null +++ b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 5 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 7 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 8 | |||
| 9 | namespace Shader::Optimization { | ||
| 10 | |||
| 11 | namespace { | ||
| 12 | void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) { | ||
| 13 | /* | ||
| 14 | * Workaround for an NVIDIA bug seen in Super Mario RPG | ||
| 15 | * | ||
| 16 | * We are looking for this pattern: | ||
| 17 | * %lhs_bfe = BitFieldUExtract %factor_a, #0, #16 | ||
| 18 | * %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional? | ||
| 19 | * %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16 | ||
| 20 | * %rhs_bfe = BitFieldUExtract %factor_a, #16, #16 | ||
| 21 | * %result = IAdd32 %lhs_shl, %rhs_bfe | ||
| 22 | * | ||
| 23 | * And replacing the IAdd32 with a BitwiseOr32 | ||
| 24 | * %result = BitwiseOr32 %lhs_shl, %rhs_bfe | ||
| 25 | * | ||
| 26 | */ | ||
| 27 | IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()}; | ||
| 28 | IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()}; | ||
| 29 | if (!lhs_shl || !rhs_bfe) { | ||
| 30 | return; | ||
| 31 | } | ||
| 32 | if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || | ||
| 33 | lhs_shl->Arg(1) != IR::Value{16U}) { | ||
| 34 | return; | ||
| 35 | } | ||
| 36 | if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} || | ||
| 37 | rhs_bfe->Arg(2) != IR::Value{16U}) { | ||
| 38 | return; | ||
| 39 | } | ||
| 40 | IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()}; | ||
| 41 | if (!lhs_mul) { | ||
| 42 | return; | ||
| 43 | } | ||
| 44 | const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract}; | ||
| 45 | if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 && | ||
| 46 | lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) { | ||
| 47 | return; | ||
| 48 | } | ||
| 49 | IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()}; | ||
| 50 | if (!lhs_bfe) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { | ||
| 54 | return; | ||
| 55 | } | ||
| 56 | if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) { | ||
| 57 | return; | ||
| 58 | } | ||
| 59 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 60 | inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)})); | ||
| 61 | } | ||
| 62 | |||
| 63 | } // Anonymous namespace | ||
| 64 | |||
| 65 | void VendorWorkaroundPass(IR::Program& program) { | ||
| 66 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 67 | for (IR::Inst& inst : block->Instructions()) { | ||
| 68 | switch (inst.GetOpcode()) { | ||
| 69 | case IR::Opcode::IAdd32: | ||
| 70 | AddingByteSwapsWorkaround(*block, inst); | ||
| 71 | break; | ||
| 72 | default: | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 38d820db2..a9de9f4a9 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -43,6 +43,7 @@ struct Profile { | |||
| 43 | bool support_gl_sparse_textures{}; | 43 | bool support_gl_sparse_textures{}; |
| 44 | bool support_gl_derivative_control{}; | 44 | bool support_gl_derivative_control{}; |
| 45 | bool support_scaled_attributes{}; | 45 | bool support_scaled_attributes{}; |
| 46 | bool support_multi_viewport{}; | ||
| 46 | 47 | ||
| 47 | bool warp_size_potentially_larger_than_guest{}; | 48 | bool warp_size_potentially_larger_than_guest{}; |
| 48 | 49 | ||