summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/CMakeLists.txt1
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp6
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_image.cpp6
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp4
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image.cpp56
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_instructions.h2
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp4
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h2
-rw-r--r--src/shader_recompiler/frontend/ir/modifiers.h2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp29
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp1
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp10
-rw-r--r--src/shader_recompiler/ir_opt/passes.h1
-rw-r--r--src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp79
-rw-r--r--src/shader_recompiler/profile.h1
16 files changed, 147 insertions, 59 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 83b763447..19db17c6d 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -231,6 +231,7 @@ add_library(shader_recompiler STATIC
231 ir_opt/rescaling_pass.cpp 231 ir_opt/rescaling_pass.cpp
232 ir_opt/ssa_rewrite_pass.cpp 232 ir_opt/ssa_rewrite_pass.cpp
233 ir_opt/texture_pass.cpp 233 ir_opt/texture_pass.cpp
234 ir_opt/vendor_workaround_pass.cpp
234 ir_opt/verification_pass.cpp 235 ir_opt/verification_pass.cpp
235 object_pool.h 236 object_pool.h
236 precompiled_headers.h 237 precompiled_headers.h
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
index d0e308124..64e7bad75 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -559,12 +559,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
559 const IR::Value& offset, const IR::Value& lod_clamp) { 559 const IR::Value& offset, const IR::Value& lod_clamp) {
560 const auto info{inst.Flags<IR::TextureInstInfo>()}; 560 const auto info{inst.Flags<IR::TextureInstInfo>()};
561 ScopedRegister dpdx, dpdy, coords; 561 ScopedRegister dpdx, dpdy, coords;
562 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; 562 const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp};
563 if (multi_component) { 563 if (multi_component) {
564 // Allocate this early to avoid aliasing other registers 564 // Allocate this early to avoid aliasing other registers
565 dpdx = ScopedRegister{ctx.reg_alloc}; 565 dpdx = ScopedRegister{ctx.reg_alloc};
566 dpdy = ScopedRegister{ctx.reg_alloc}; 566 dpdy = ScopedRegister{ctx.reg_alloc};
567 if (info.num_derivates >= 3) { 567 if (info.num_derivatives >= 3) {
568 coords = ScopedRegister{ctx.reg_alloc}; 568 coords = ScopedRegister{ctx.reg_alloc};
569 } 569 }
570 } 570 }
@@ -584,7 +584,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
584 dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, 584 dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
585 dpdy.reg, derivatives_vec); 585 dpdy.reg, derivatives_vec);
586 Register final_coord; 586 Register final_coord;
587 if (info.num_derivates >= 3) { 587 if (info.num_derivatives >= 3) {
588 ctx.Add("MOV.F {}.z,{}.x;" 588 ctx.Add("MOV.F {}.z,{}.x;"
589 "MOV.F {}.z,{}.y;", 589 "MOV.F {}.z,{}.y;",
590 dpdx.reg, coord_vec, dpdy.reg, coord_vec); 590 dpdx.reg, coord_vec, dpdy.reg, coord_vec);
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
index d9872ecc2..6e940bd5a 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -548,15 +548,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
548 if (sparse_inst) { 548 if (sparse_inst) {
549 throw NotImplementedException("EmitImageGradient Sparse"); 549 throw NotImplementedException("EmitImageGradient Sparse");
550 } 550 }
551 if (!offset.IsEmpty() && info.num_derivates <= 2) { 551 if (!offset.IsEmpty() && info.num_derivatives <= 2) {
552 throw NotImplementedException("EmitImageGradient offset"); 552 throw NotImplementedException("EmitImageGradient offset");
553 } 553 }
554 const auto texture{Texture(ctx, info, index)}; 554 const auto texture{Texture(ctx, info, index)};
555 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; 555 const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)};
556 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; 556 const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp};
557 const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; 557 const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)};
558 if (multi_component) { 558 if (multi_component) {
559 if (info.num_derivates >= 3) { 559 if (info.num_derivatives >= 3) {
560 const auto offset_vec{ctx.var_alloc.Consume(offset)}; 560 const auto offset_vec{ctx.var_alloc.Consume(offset)};
561 ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture, 561 ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture,
562 coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); 562 coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 34592a01f..0031fa5fb 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -407,7 +407,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
407 } 407 }
408 ctx.AddCapability(spv::Capability::DemoteToHelperInvocation); 408 ctx.AddCapability(spv::Capability::DemoteToHelperInvocation);
409 } 409 }
410 if (info.stores[IR::Attribute::ViewportIndex]) { 410 if (info.stores[IR::Attribute::ViewportIndex] && profile.support_multi_viewport) {
411 ctx.AddCapability(spv::Capability::MultiViewport); 411 ctx.AddCapability(spv::Capability::MultiViewport);
412 } 412 }
413 if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { 413 if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 1d77426e0..e5a78a914 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -84,6 +84,10 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
84 } 84 }
85 return std::nullopt; 85 return std::nullopt;
86 case IR::Attribute::ViewportIndex: 86 case IR::Attribute::ViewportIndex:
87 if (!ctx.profile.support_multi_viewport) {
88 LOG_WARNING(Shader, "Ignoring viewport index store on non-supporting driver");
89 return std::nullopt;
90 }
87 if (ctx.profile.support_viewport_index_layer_non_geometry || 91 if (ctx.profile.support_viewport_index_layer_non_geometry ||
88 ctx.stage == Shader::Stage::Geometry) { 92 ctx.stage == Shader::Stage::Geometry) {
89 return OutAttr{ctx.viewport_index, ctx.U32[1]}; 93 return OutAttr{ctx.viewport_index, ctx.U32[1]};
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index 8decdf399..22ceca19c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -67,22 +67,22 @@ public:
67 } 67 }
68 } 68 }
69 69
70 explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, 70 explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives,
71 Id offset, Id lod_clamp) { 71 u32 num_derivatives, Id offset, Id lod_clamp) {
72 if (!Sirit::ValidId(derivates)) { 72 if (!Sirit::ValidId(derivatives)) {
73 throw LogicError("Derivates must be present"); 73 throw LogicError("Derivatives must be present");
74 } 74 }
75 boost::container::static_vector<Id, 3> deriv_x_accum; 75 boost::container::static_vector<Id, 3> deriv_x_accum;
76 boost::container::static_vector<Id, 3> deriv_y_accum; 76 boost::container::static_vector<Id, 3> deriv_y_accum;
77 for (u32 i = 0; i < num_derivates; ++i) { 77 for (u32 i = 0; i < num_derivatives; ++i) {
78 deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); 78 deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2));
79 deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); 79 deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2 + 1));
80 } 80 }
81 const Id derivates_X{ctx.OpCompositeConstruct( 81 const Id derivatives_X{ctx.OpCompositeConstruct(
82 ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; 82 ctx.F32[num_derivatives], std::span{deriv_x_accum.data(), deriv_x_accum.size()})};
83 const Id derivates_Y{ctx.OpCompositeConstruct( 83 const Id derivatives_Y{ctx.OpCompositeConstruct(
84 ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; 84 ctx.F32[num_derivatives], std::span{deriv_y_accum.data(), deriv_y_accum.size()})};
85 Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); 85 Add(spv::ImageOperandsMask::Grad, derivatives_X, derivatives_Y);
86 if (Sirit::ValidId(offset)) { 86 if (Sirit::ValidId(offset)) {
87 Add(spv::ImageOperandsMask::Offset, offset); 87 Add(spv::ImageOperandsMask::Offset, offset);
88 } 88 }
@@ -91,26 +91,26 @@ public:
91 } 91 }
92 } 92 }
93 93
94 explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates_1, Id derivates_2, 94 explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives_1, Id derivatives_2,
95 Id offset, Id lod_clamp) { 95 Id offset, Id lod_clamp) {
96 if (!Sirit::ValidId(derivates_1) || !Sirit::ValidId(derivates_2)) { 96 if (!Sirit::ValidId(derivatives_1) || !Sirit::ValidId(derivatives_2)) {
97 throw LogicError("Derivates must be present"); 97 throw LogicError("Derivatives must be present");
98 } 98 }
99 boost::container::static_vector<Id, 3> deriv_1_accum{ 99 boost::container::static_vector<Id, 3> deriv_1_accum{
100 ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 0), 100 ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 0),
101 ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 2), 101 ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 2),
102 ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 0), 102 ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 0),
103 }; 103 };
104 boost::container::static_vector<Id, 3> deriv_2_accum{ 104 boost::container::static_vector<Id, 3> deriv_2_accum{
105 ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 1), 105 ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 1),
106 ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 3), 106 ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 3),
107 ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 1), 107 ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 1),
108 }; 108 };
109 const Id derivates_id1{ctx.OpCompositeConstruct( 109 const Id derivatives_id1{ctx.OpCompositeConstruct(
110 ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})}; 110 ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})};
111 const Id derivates_id2{ctx.OpCompositeConstruct( 111 const Id derivatives_id2{ctx.OpCompositeConstruct(
112 ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; 112 ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})};
113 Add(spv::ImageOperandsMask::Grad, derivates_id1, derivates_id2); 113 Add(spv::ImageOperandsMask::Grad, derivatives_id1, derivatives_id2);
114 if (Sirit::ValidId(offset)) { 114 if (Sirit::ValidId(offset)) {
115 Add(spv::ImageOperandsMask::Offset, offset); 115 Add(spv::ImageOperandsMask::Offset, offset);
116 } 116 }
@@ -548,12 +548,12 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
548} 548}
549 549
550Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, 550Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
551 Id derivates, Id offset, Id lod_clamp) { 551 Id derivatives, Id offset, Id lod_clamp) {
552 const auto info{inst->Flags<IR::TextureInstInfo>()}; 552 const auto info{inst->Flags<IR::TextureInstInfo>()};
553 const auto operands = 553 const auto operands =
554 info.num_derivates == 3 554 info.num_derivatives == 3
555 ? ImageOperands(ctx, info.has_lod_clamp != 0, derivates, offset, {}, lod_clamp) 555 ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, offset, {}, lod_clamp)
556 : ImageOperands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, 556 : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset,
557 lod_clamp); 557 lod_clamp);
558 return Emit(&EmitContext::OpImageSparseSampleExplicitLod, 558 return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
559 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], 559 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
index a440b557d..7d34575c8 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@@ -543,7 +543,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
543 const IR::Value& skip_mips); 543 const IR::Value& skip_mips);
544Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); 544Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
545Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, 545Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
546 Id derivates, Id offset, Id lod_clamp); 546 Id derivatives, Id offset, Id lod_clamp);
547Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); 547Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
548void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); 548void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color);
549Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); 549Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index);
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index b7caa4246..49171c470 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -1864,11 +1864,11 @@ Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, Texture
1864 return Inst(op, Flags{info}, handle, coords); 1864 return Inst(op, Flags{info}, handle, coords);
1865} 1865}
1866 1866
1867Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates, 1867Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivatives,
1868 const Value& offset, const F32& lod_clamp, TextureInstInfo info) { 1868 const Value& offset, const F32& lod_clamp, TextureInstInfo info) {
1869 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient 1869 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient
1870 : Opcode::BindlessImageGradient}; 1870 : Opcode::BindlessImageGradient};
1871 return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp); 1871 return Inst(op, Flags{info}, handle, coords, derivatives, offset, lod_clamp);
1872} 1872}
1873 1873
1874Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { 1874Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index f3c81dbe1..6c30897f4 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -335,7 +335,7 @@ public:
335 [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, 335 [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
336 const U32& lod, const U32& multisampling, TextureInstInfo info); 336 const U32& lod, const U32& multisampling, TextureInstInfo info);
337 [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, 337 [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
338 const Value& derivates, const Value& offset, 338 const Value& derivatives, const Value& offset,
339 const F32& lod_clamp, TextureInstInfo info); 339 const F32& lod_clamp, TextureInstInfo info);
340 [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); 340 [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
341 void ImageWrite(const Value& handle, const Value& coords, const Value& color, 341 void ImageWrite(const Value& handle, const Value& coords, const Value& color,
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
index 1e9e8c8f5..c20c2401f 100644
--- a/src/shader_recompiler/frontend/ir/modifiers.h
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -40,7 +40,7 @@ union TextureInstInfo {
40 BitField<21, 1, u32> has_lod_clamp; 40 BitField<21, 1, u32> has_lod_clamp;
41 BitField<22, 1, u32> relaxed_precision; 41 BitField<22, 1, u32> relaxed_precision;
42 BitField<23, 2, u32> gather_component; 42 BitField<23, 2, u32> gather_component;
43 BitField<25, 2, u32> num_derivates; 43 BitField<25, 2, u32> num_derivatives;
44 BitField<27, 3, ImageFormat> image_format; 44 BitField<27, 3, ImageFormat> image_format;
45 BitField<30, 1, u32> ndv_is_active; 45 BitField<30, 1, u32> ndv_is_active;
46}; 46};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
index dd34507bc..4ce3dd0cd 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
@@ -59,7 +59,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
59 BitField<51, 3, IR::Pred> sparse_pred; 59 BitField<51, 3, IR::Pred> sparse_pred;
60 BitField<0, 8, IR::Reg> dest_reg; 60 BitField<0, 8, IR::Reg> dest_reg;
61 BitField<8, 8, IR::Reg> coord_reg; 61 BitField<8, 8, IR::Reg> coord_reg;
62 BitField<20, 8, IR::Reg> derivate_reg; 62 BitField<20, 8, IR::Reg> derivative_reg;
63 BitField<28, 3, TextureType> type; 63 BitField<28, 3, TextureType> type;
64 BitField<31, 4, u64> mask; 64 BitField<31, 4, u64> mask;
65 BitField<36, 13, u64> cbuf_offset; 65 BitField<36, 13, u64> cbuf_offset;
@@ -71,7 +71,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
71 } 71 }
72 72
73 IR::Value coords; 73 IR::Value coords;
74 u32 num_derivates{}; 74 u32 num_derivatives{};
75 IR::Reg base_reg{txd.coord_reg}; 75 IR::Reg base_reg{txd.coord_reg};
76 IR::Reg last_reg; 76 IR::Reg last_reg;
77 IR::Value handle; 77 IR::Value handle;
@@ -90,42 +90,42 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
90 switch (txd.type) { 90 switch (txd.type) {
91 case TextureType::_1D: { 91 case TextureType::_1D: {
92 coords = v.F(base_reg); 92 coords = v.F(base_reg);
93 num_derivates = 1; 93 num_derivatives = 1;
94 last_reg = base_reg + 1; 94 last_reg = base_reg + 1;
95 break; 95 break;
96 } 96 }
97 case TextureType::ARRAY_1D: { 97 case TextureType::ARRAY_1D: {
98 last_reg = base_reg + 1; 98 last_reg = base_reg + 1;
99 coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); 99 coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
100 num_derivates = 1; 100 num_derivatives = 1;
101 break; 101 break;
102 } 102 }
103 case TextureType::_2D: { 103 case TextureType::_2D: {
104 last_reg = base_reg + 2; 104 last_reg = base_reg + 2;
105 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); 105 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
106 num_derivates = 2; 106 num_derivatives = 2;
107 break; 107 break;
108 } 108 }
109 case TextureType::ARRAY_2D: { 109 case TextureType::ARRAY_2D: {
110 last_reg = base_reg + 2; 110 last_reg = base_reg + 2;
111 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); 111 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
112 num_derivates = 2; 112 num_derivatives = 2;
113 break; 113 break;
114 } 114 }
115 default: 115 default:
116 throw NotImplementedException("Invalid texture type"); 116 throw NotImplementedException("Invalid texture type");
117 } 117 }
118 118
119 const IR::Reg derivate_reg{txd.derivate_reg}; 119 const IR::Reg derivative_reg{txd.derivative_reg};
120 IR::Value derivates; 120 IR::Value derivatives;
121 switch (num_derivates) { 121 switch (num_derivatives) {
122 case 1: { 122 case 1: {
123 derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); 123 derivatives = v.ir.CompositeConstruct(v.F(derivative_reg), v.F(derivative_reg + 1));
124 break; 124 break;
125 } 125 }
126 case 2: { 126 case 2: {
127 derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), 127 derivatives = v.ir.CompositeConstruct(v.F(derivative_reg), v.F(derivative_reg + 1),
128 v.F(derivate_reg + 2), v.F(derivate_reg + 3)); 128 v.F(derivative_reg + 2), v.F(derivative_reg + 3));
129 break; 129 break;
130 } 130 }
131 default: 131 default:
@@ -150,9 +150,10 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
150 150
151 IR::TextureInstInfo info{}; 151 IR::TextureInstInfo info{};
152 info.type.Assign(GetType(txd.type)); 152 info.type.Assign(GetType(txd.type));
153 info.num_derivates.Assign(num_derivates); 153 info.num_derivatives.Assign(num_derivatives);
154 info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); 154 info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
155 const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; 155 const IR::Value sample{
156 v.ir.ImageGradient(handle, coords, derivatives, offset, lod_clamp, info)};
156 157
157 IR::Reg dest_reg{txd.dest_reg}; 158 IR::Reg dest_reg{txd.dest_reg};
158 for (size_t element = 0; element < 4; ++element) { 159 for (size_t element = 0; element < 4; ++element) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index 928b35561..8fac6bad3 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -310,6 +310,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
310 } 310 }
311 Optimization::CollectShaderInfoPass(env, program); 311 Optimization::CollectShaderInfoPass(env, program);
312 Optimization::LayerPass(program, host_info); 312 Optimization::LayerPass(program, host_info);
313 Optimization::VendorWorkaroundPass(program);
313 314
314 CollectInterpolationInfo(env, program); 315 CollectInterpolationInfo(env, program);
315 AddNVNStorageBuffers(program); 316 AddNVNStorageBuffers(program);
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index f46e55122..ec12c843a 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -428,7 +428,7 @@ void FoldFPAdd32(IR::Inst& inst) {
428 } 428 }
429} 429}
430 430
431bool FoldDerivateYFromCorrection(IR::Inst& inst) { 431bool FoldDerivativeYFromCorrection(IR::Inst& inst) {
432 const IR::Value lhs_value{inst.Arg(0)}; 432 const IR::Value lhs_value{inst.Arg(0)};
433 const IR::Value rhs_value{inst.Arg(1)}; 433 const IR::Value rhs_value{inst.Arg(1)};
434 IR::Inst* const lhs_op{lhs_value.InstRecursive()}; 434 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
@@ -464,7 +464,7 @@ void FoldFPMul32(IR::Inst& inst) {
464 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { 464 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
465 return; 465 return;
466 } 466 }
467 if (FoldDerivateYFromCorrection(inst)) { 467 if (FoldDerivativeYFromCorrection(inst)) {
468 return; 468 return;
469 } 469 }
470 IR::Inst* const lhs_op{lhs_value.InstRecursive()}; 470 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
@@ -699,7 +699,7 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
699 } 699 }
700} 700}
701 701
702bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { 702bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coord) {
703 if (coord.IsImmediate()) { 703 if (coord.IsImmediate()) {
704 return false; 704 return false;
705 } 705 }
@@ -834,7 +834,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
834 IR::Inst* const inst2 = coords.InstRecursive(); 834 IR::Inst* const inst2 = coords.InstRecursive();
835 std::array<std::array<IR::Value, 3>, 3> results_matrix; 835 std::array<std::array<IR::Value, 3>, 3> results_matrix;
836 for (size_t i = 0; i < 3; i++) { 836 for (size_t i = 0; i < 3; i++) {
837 if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { 837 if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) {
838 return; 838 return;
839 } 839 }
840 } 840 }
@@ -852,7 +852,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
852 IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], 852 IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
853 results_matrix[1][1], results_matrix[1][2]); 853 results_matrix[1][1], results_matrix[1][2]);
854 IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); 854 IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]);
855 info.num_derivates.Assign(3); 855 info.num_derivatives.Assign(3);
856 IR::Value new_gradient_instruction = 856 IR::Value new_gradient_instruction =
857 ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); 857 ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info);
858 IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); 858 IR::Inst* const new_inst = new_gradient_instruction.InstRecursive();
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 629d18fa1..d4d5285e5 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program);
26void PositionPass(Environment& env, IR::Program& program); 26void PositionPass(Environment& env, IR::Program& program);
27void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); 27void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info);
28void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); 28void LayerPass(IR::Program& program, const HostTranslateInfo& host_info);
29void VendorWorkaroundPass(IR::Program& program);
29void VerificationPass(const IR::Program& program); 30void VerificationPass(const IR::Program& program);
30 31
31// Dual Vertex 32// Dual Vertex
diff --git a/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
new file mode 100644
index 000000000..08c658cb8
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp
@@ -0,0 +1,79 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "shader_recompiler/frontend/ir/basic_block.h"
5#include "shader_recompiler/frontend/ir/ir_emitter.h"
6#include "shader_recompiler/frontend/ir/value.h"
7#include "shader_recompiler/ir_opt/passes.h"
8
9namespace Shader::Optimization {
10
11namespace {
12void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) {
13 /*
14 * Workaround for an NVIDIA bug seen in Super Mario RPG
15 *
16 * We are looking for this pattern:
17 * %lhs_bfe = BitFieldUExtract %factor_a, #0, #16
18 * %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional?
19 * %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16
20 * %rhs_bfe = BitFieldUExtract %factor_a, #16, #16
21 * %result = IAdd32 %lhs_shl, %rhs_bfe
22 *
23 * And replacing the IAdd32 with a BitwiseOr32
24 * %result = BitwiseOr32 %lhs_shl, %rhs_bfe
25 *
26 */
27 IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()};
28 IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()};
29 if (!lhs_shl || !rhs_bfe) {
30 return;
31 }
32 if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
33 lhs_shl->Arg(1) != IR::Value{16U}) {
34 return;
35 }
36 if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} ||
37 rhs_bfe->Arg(2) != IR::Value{16U}) {
38 return;
39 }
40 IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()};
41 if (!lhs_mul) {
42 return;
43 }
44 const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract};
45 if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 &&
46 lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) {
47 return;
48 }
49 IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()};
50 if (!lhs_bfe) {
51 return;
52 }
53 if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
54 return;
55 }
56 if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) {
57 return;
58 }
59 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
60 inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)}));
61}
62
63} // Anonymous namespace
64
65void VendorWorkaroundPass(IR::Program& program) {
66 for (IR::Block* const block : program.post_order_blocks) {
67 for (IR::Inst& inst : block->Instructions()) {
68 switch (inst.GetOpcode()) {
69 case IR::Opcode::IAdd32:
70 AddingByteSwapsWorkaround(*block, inst);
71 break;
72 default:
73 break;
74 }
75 }
76 }
77}
78
79} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index 38d820db2..a9de9f4a9 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -43,6 +43,7 @@ struct Profile {
43 bool support_gl_sparse_textures{}; 43 bool support_gl_sparse_textures{};
44 bool support_gl_derivative_control{}; 44 bool support_gl_derivative_control{};
45 bool support_scaled_attributes{}; 45 bool support_scaled_attributes{};
46 bool support_multi_viewport{};
46 47
47 bool warp_size_potentially_larger_than_guest{}; 48 bool warp_size_potentially_larger_than_guest{};
48 49