summaryrefslogtreecommitdiff
path: root/src/shader_recompiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler')
-rw-r--r--src/shader_recompiler/CMakeLists.txt2
-rw-r--r--src/shader_recompiler/backend/glasm/emit_glasm_image.cpp23
-rw-r--r--src/shader_recompiler/backend/glsl/emit_glsl_image.cpp8
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_image.cpp39
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp10
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.h1
-rw-r--r--src/shader_recompiler/frontend/ir/modifiers.h1
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp2
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp6
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp3
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp4
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp1
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp227
13 files changed, 304 insertions, 23 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 07e75f9d8..83b763447 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -245,8 +245,6 @@ target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit)
245 245
246if (MSVC) 246if (MSVC)
247 target_compile_options(shader_recompiler PRIVATE 247 target_compile_options(shader_recompiler PRIVATE
248 /W4
249
250 /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data 248 /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data
251 /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data 249 /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
252 /we4800 # Implicit conversion from 'type' to bool. Possible information loss 250 /we4800 # Implicit conversion from 'type' to bool. Possible information loss
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
index 85ee27333..d0e308124 100644
--- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
+++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp
@@ -558,12 +558,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
558 const IR::Value& coord, const IR::Value& derivatives, 558 const IR::Value& coord, const IR::Value& derivatives,
559 const IR::Value& offset, const IR::Value& lod_clamp) { 559 const IR::Value& offset, const IR::Value& lod_clamp) {
560 const auto info{inst.Flags<IR::TextureInstInfo>()}; 560 const auto info{inst.Flags<IR::TextureInstInfo>()};
561 ScopedRegister dpdx, dpdy; 561 ScopedRegister dpdx, dpdy, coords;
562 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; 562 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
563 if (multi_component) { 563 if (multi_component) {
564 // Allocate this early to avoid aliasing other registers 564 // Allocate this early to avoid aliasing other registers
565 dpdx = ScopedRegister{ctx.reg_alloc}; 565 dpdx = ScopedRegister{ctx.reg_alloc};
566 dpdy = ScopedRegister{ctx.reg_alloc}; 566 dpdy = ScopedRegister{ctx.reg_alloc};
567 if (info.num_derivates >= 3) {
568 coords = ScopedRegister{ctx.reg_alloc};
569 }
567 } 570 }
568 const auto sparse_inst{PrepareSparse(inst)}; 571 const auto sparse_inst{PrepareSparse(inst)};
569 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; 572 const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
@@ -580,15 +583,27 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
580 "MOV.F {}.y,{}.w;", 583 "MOV.F {}.y,{}.w;",
581 dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, 584 dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
582 dpdy.reg, derivatives_vec); 585 dpdy.reg, derivatives_vec);
586 Register final_coord;
587 if (info.num_derivates >= 3) {
588 ctx.Add("MOV.F {}.z,{}.x;"
589 "MOV.F {}.z,{}.y;",
590 dpdx.reg, coord_vec, dpdy.reg, coord_vec);
591 ctx.Add("MOV.F {}.x,0;"
592 "MOV.F {}.y,0;",
593 "MOV.F {}.z,0;", coords.reg, coords.reg, coords.reg);
594 final_coord = coords.reg;
595 } else {
596 final_coord = coord_vec;
597 }
583 if (info.has_lod_clamp) { 598 if (info.has_lod_clamp) {
584 const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; 599 const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)};
585 ctx.Add("MOV.F {}.w,{};" 600 ctx.Add("MOV.F {}.w,{};"
586 "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", 601 "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};",
587 dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, 602 dpdy.reg, lod_clamp_value, sparse_mod, ret, final_coord, dpdx.reg, dpdy.reg,
588 texture, type, offset_vec); 603 texture, type, offset_vec);
589 } else { 604 } else {
590 ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, 605 ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, final_coord, dpdx.reg,
591 texture, type, offset_vec); 606 dpdy.reg, texture, type, offset_vec);
592 } 607 }
593 } else { 608 } else {
594 ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, 609 ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec,
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
index 418505475..d9872ecc2 100644
--- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
+++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp
@@ -548,7 +548,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
548 if (sparse_inst) { 548 if (sparse_inst) {
549 throw NotImplementedException("EmitImageGradient Sparse"); 549 throw NotImplementedException("EmitImageGradient Sparse");
550 } 550 }
551 if (!offset.IsEmpty()) { 551 if (!offset.IsEmpty() && info.num_derivates <= 2) {
552 throw NotImplementedException("EmitImageGradient offset"); 552 throw NotImplementedException("EmitImageGradient offset");
553 } 553 }
554 const auto texture{Texture(ctx, info, index)}; 554 const auto texture{Texture(ctx, info, index)};
@@ -556,6 +556,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
556 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; 556 const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
557 const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; 557 const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)};
558 if (multi_component) { 558 if (multi_component) {
559 if (info.num_derivates >= 3) {
560 const auto offset_vec{ctx.var_alloc.Consume(offset)};
561 ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture,
562 coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec);
563 return;
564 }
559 ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, 565 ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords,
560 derivatives_vec, derivatives_vec); 566 derivatives_vec, derivatives_vec);
561 } else { 567 } else {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index 7d901c04b..8decdf399 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -91,6 +91,34 @@ public:
91 } 91 }
92 } 92 }
93 93
94 explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates_1, Id derivates_2,
95 Id offset, Id lod_clamp) {
96 if (!Sirit::ValidId(derivates_1) || !Sirit::ValidId(derivates_2)) {
97 throw LogicError("Derivates must be present");
98 }
99 boost::container::static_vector<Id, 3> deriv_1_accum{
100 ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 0),
101 ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 2),
102 ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 0),
103 };
104 boost::container::static_vector<Id, 3> deriv_2_accum{
105 ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 1),
106 ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 3),
107 ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 1),
108 };
109 const Id derivates_id1{ctx.OpCompositeConstruct(
110 ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})};
111 const Id derivates_id2{ctx.OpCompositeConstruct(
112 ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})};
113 Add(spv::ImageOperandsMask::Grad, derivates_id1, derivates_id2);
114 if (Sirit::ValidId(offset)) {
115 Add(spv::ImageOperandsMask::Offset, offset);
116 }
117 if (has_lod_clamp) {
118 Add(spv::ImageOperandsMask::MinLod, lod_clamp);
119 }
120 }
121
94 std::span<const Id> Span() const noexcept { 122 std::span<const Id> Span() const noexcept {
95 return std::span{operands.data(), operands.size()}; 123 return std::span{operands.data(), operands.size()};
96 } 124 }
@@ -176,9 +204,7 @@ Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& ind
176 if (def.count > 1) { 204 if (def.count > 1) {
177 throw NotImplementedException("Indirect texture sample"); 205 throw NotImplementedException("Indirect texture sample");
178 } 206 }
179 const Id sampler_id{def.id}; 207 return ctx.OpLoad(ctx.image_buffer_type, def.id);
180 const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)};
181 return ctx.OpImage(ctx.image_buffer_type, id);
182 } else { 208 } else {
183 const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; 209 const TextureDefinition& def{ctx.textures.at(info.descriptor_index)};
184 if (def.count > 1) { 210 if (def.count > 1) {
@@ -524,8 +550,11 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
524Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, 550Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
525 Id derivates, Id offset, Id lod_clamp) { 551 Id derivates, Id offset, Id lod_clamp) {
526 const auto info{inst->Flags<IR::TextureInstInfo>()}; 552 const auto info{inst->Flags<IR::TextureInstInfo>()};
527 const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, 553 const auto operands =
528 offset, lod_clamp); 554 info.num_derivates == 3
555 ? ImageOperands(ctx, info.has_lod_clamp != 0, derivates, offset, {}, lod_clamp)
556 : ImageOperands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset,
557 lod_clamp);
529 return Emit(&EmitContext::OpImageSparseSampleExplicitLod, 558 return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
530 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], 559 &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
531 Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); 560 Texture(ctx, info, index), coords, operands.Mask(), operands.Span());
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index bec5db173..72f69b7aa 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -74,6 +74,11 @@ spv::ImageFormat GetImageFormat(ImageFormat format) {
74 throw InvalidArgument("Invalid image format {}", format); 74 throw InvalidArgument("Invalid image format {}", format);
75} 75}
76 76
77spv::ImageFormat GetImageFormatForBuffer(ImageFormat format) {
78 const auto spv_format = GetImageFormat(format);
79 return spv_format == spv::ImageFormat::Unknown ? spv::ImageFormat::R32ui : spv_format;
80}
81
77Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { 82Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) {
78 const spv::ImageFormat format{GetImageFormat(desc.format)}; 83 const spv::ImageFormat format{GetImageFormat(desc.format)};
79 const Id type{ctx.U32[1]}; 84 const Id type{ctx.U32[1]};
@@ -1242,9 +1247,8 @@ void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) {
1242 } 1247 }
1243 const spv::ImageFormat format{spv::ImageFormat::Unknown}; 1248 const spv::ImageFormat format{spv::ImageFormat::Unknown};
1244 image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); 1249 image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format);
1245 sampled_texture_buffer_type = TypeSampledImage(image_buffer_type);
1246 1250
1247 const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; 1251 const Id type{TypePointer(spv::StorageClass::UniformConstant, image_buffer_type)};
1248 texture_buffers.reserve(info.texture_buffer_descriptors.size()); 1252 texture_buffers.reserve(info.texture_buffer_descriptors.size());
1249 for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { 1253 for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) {
1250 if (desc.count != 1) { 1254 if (desc.count != 1) {
@@ -1271,7 +1275,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) {
1271 if (desc.count != 1) { 1275 if (desc.count != 1) {
1272 throw NotImplementedException("Array of image buffers"); 1276 throw NotImplementedException("Array of image buffers");
1273 } 1277 }
1274 const spv::ImageFormat format{GetImageFormat(desc.format)}; 1278 const spv::ImageFormat format{GetImageFormatForBuffer(desc.format)};
1275 const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)}; 1279 const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)};
1276 const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; 1280 const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
1277 const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; 1281 const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
index e63330f11..7c49fd504 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h
@@ -206,7 +206,6 @@ public:
206 Id output_u32{}; 206 Id output_u32{};
207 207
208 Id image_buffer_type{}; 208 Id image_buffer_type{};
209 Id sampled_texture_buffer_type{};
210 Id image_u32{}; 209 Id image_u32{};
211 210
212 std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; 211 std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{};
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
index 69035d462..1e9e8c8f5 100644
--- a/src/shader_recompiler/frontend/ir/modifiers.h
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -42,6 +42,7 @@ union TextureInstInfo {
42 BitField<23, 2, u32> gather_component; 42 BitField<23, 2, u32> gather_component;
43 BitField<25, 2, u32> num_derivates; 43 BitField<25, 2, u32> num_derivates;
44 BitField<27, 3, ImageFormat> image_format; 44 BitField<27, 3, ImageFormat> image_format;
45 BitField<30, 1, u32> ndv_is_active;
45}; 46};
46static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); 47static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
47 48
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
index ef4ffa54b..f00e20023 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -19,7 +19,7 @@ void TranslatorVisitor::FSWZADD(u64 insn) {
19 } const fswzadd{insn}; 19 } const fswzadd{insn};
20 20
21 if (fswzadd.ndv != 0) { 21 if (fswzadd.ndv != 0) {
22 throw NotImplementedException("FSWZADD NDV"); 22 LOG_WARNING(Shader, "(STUBBED) FSWZADD - NDV mode");
23 } 23 }
24 24
25 const IR::F32 src_a{GetFloatReg8(insn)}; 25 const IR::F32 src_a{GetFloatReg8(insn)};
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
index 82aec3b73..1ddfeab06 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -16,8 +16,10 @@ void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = fa
16 BitField<12, 4, u64> mov32i_mask; 16 BitField<12, 4, u64> mov32i_mask;
17 } const mov{insn}; 17 } const mov{insn};
18 18
19 if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { 19 u64 mask = is_mov32i ? mov.mov32i_mask : mov.mask;
20 throw NotImplementedException("Non-full move mask"); 20 if (mask != 0xf && mask != 0x1) {
21 LOG_WARNING(Shader, "(STUBBED) Masked Mov");
22 return;
21 } 23 }
22 v.X(mov.dest_reg, src); 24 v.X(mov.dest_reg, src);
23} 25}
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
index 753c62098..e593132e6 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -161,7 +161,8 @@ enum class SpecialRegister : u64 {
161 LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); 161 LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
162 return ir.Imm32(0); // This is the default value hardware returns. 162 return ir.Imm32(0); // This is the default value hardware returns.
163 default: 163 default:
164 throw NotImplementedException("S2R special register {}", special_register); 164 LOG_CRITICAL(Shader, "(STUBBED) Special register {}", special_register);
165 return ir.Imm32(0); // This is the default value hardware returns.
165 } 166 }
166} 167}
167} // Anonymous namespace 168} // Anonymous namespace
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 2f930f1ea..6203003b3 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -209,7 +209,7 @@ void TranslatorVisitor::R2B(u64) {
209} 209}
210 210
211void TranslatorVisitor::RAM(u64) { 211void TranslatorVisitor::RAM(u64) {
212 ThrowNotImplemented(Opcode::RAM); 212 LOG_WARNING(Shader, "(STUBBED) RAM Instruction");
213} 213}
214 214
215void TranslatorVisitor::RET(u64) { 215void TranslatorVisitor::RET(u64) {
@@ -221,7 +221,7 @@ void TranslatorVisitor::RTT(u64) {
221} 221}
222 222
223void TranslatorVisitor::SAM(u64) { 223void TranslatorVisitor::SAM(u64) {
224 ThrowNotImplemented(Opcode::SAM); 224 LOG_WARNING(Shader, "(STUBBED) SAM Instruction");
225} 225}
226 226
227void TranslatorVisitor::SETCRSPTR(u64) { 227void TranslatorVisitor::SETCRSPTR(u64) {
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
index 2459fc30d..7a9b7fff8 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -172,6 +172,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
172 info.is_depth.Assign(tex.dc != 0 ? 1 : 0); 172 info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
173 info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); 173 info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
174 info.has_lod_clamp.Assign(lc ? 1 : 0); 174 info.has_lod_clamp.Assign(lc ? 1 : 0);
175 info.ndv_is_active.Assign(tex.ndv != 0 ? 1 : 0);
175 176
176 const IR::Value sample{[&]() -> IR::Value { 177 const IR::Value sample{[&]() -> IR::Value {
177 if (tex.dc == 0) { 178 if (tex.dc == 0) {
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 4d81e9336..f46e55122 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -10,6 +10,7 @@
10#include "shader_recompiler/environment.h" 10#include "shader_recompiler/environment.h"
11#include "shader_recompiler/exception.h" 11#include "shader_recompiler/exception.h"
12#include "shader_recompiler/frontend/ir/ir_emitter.h" 12#include "shader_recompiler/frontend/ir/ir_emitter.h"
13#include "shader_recompiler/frontend/ir/modifiers.h"
13#include "shader_recompiler/frontend/ir/value.h" 14#include "shader_recompiler/frontend/ir/value.h"
14#include "shader_recompiler/ir_opt/passes.h" 15#include "shader_recompiler/ir_opt/passes.h"
15 16
@@ -410,7 +411,49 @@ void FoldSelect(IR::Inst& inst) {
410 } 411 }
411} 412}
412 413
414void FoldFPAdd32(IR::Inst& inst) {
415 if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a + b; })) {
416 return;
417 }
418 const IR::Value lhs_value{inst.Arg(0)};
419 const IR::Value rhs_value{inst.Arg(1)};
420 const auto check_neutral = [](const IR::Value& one_operand) {
421 return one_operand.IsImmediate() && std::abs(one_operand.F32()) == 0.0f;
422 };
423 if (check_neutral(lhs_value)) {
424 inst.ReplaceUsesWith(rhs_value);
425 }
426 if (check_neutral(rhs_value)) {
427 inst.ReplaceUsesWith(lhs_value);
428 }
429}
430
431bool FoldDerivateYFromCorrection(IR::Inst& inst) {
432 const IR::Value lhs_value{inst.Arg(0)};
433 const IR::Value rhs_value{inst.Arg(1)};
434 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
435 IR::Inst* const rhs_op{rhs_value.InstRecursive()};
436 if (lhs_op->GetOpcode() == IR::Opcode::YDirection) {
437 if (rhs_op->GetOpcode() != IR::Opcode::DPdyFine) {
438 return false;
439 }
440 inst.ReplaceUsesWith(rhs_value);
441 return true;
442 }
443 if (rhs_op->GetOpcode() != IR::Opcode::YDirection) {
444 return false;
445 }
446 if (lhs_op->GetOpcode() != IR::Opcode::DPdyFine) {
447 return false;
448 }
449 inst.ReplaceUsesWith(lhs_value);
450 return true;
451}
452
413void FoldFPMul32(IR::Inst& inst) { 453void FoldFPMul32(IR::Inst& inst) {
454 if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a * b; })) {
455 return;
456 }
414 const auto control{inst.Flags<IR::FpControl>()}; 457 const auto control{inst.Flags<IR::FpControl>()};
415 if (control.no_contraction) { 458 if (control.no_contraction) {
416 return; 459 return;
@@ -421,6 +464,9 @@ void FoldFPMul32(IR::Inst& inst) {
421 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { 464 if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
422 return; 465 return;
423 } 466 }
467 if (FoldDerivateYFromCorrection(inst)) {
468 return;
469 }
424 IR::Inst* const lhs_op{lhs_value.InstRecursive()}; 470 IR::Inst* const lhs_op{lhs_value.InstRecursive()};
425 IR::Inst* const rhs_op{rhs_value.InstRecursive()}; 471 IR::Inst* const rhs_op{rhs_value.InstRecursive()};
426 if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || 472 if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
@@ -622,7 +668,12 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
622 } 668 }
623 const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; 669 const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
624 if (value_2 != value_3) { 670 if (value_2 != value_3) {
625 return; 671 if (!value_2.IsImmediate() || !value_3.IsImmediate()) {
672 return;
673 }
674 if (Common::BitCast<u32>(value_2.F32()) != value_3.U32()) {
675 return;
676 }
626 } 677 }
627 const IR::Value index{inst2->Arg(1)}; 678 const IR::Value index{inst2->Arg(1)};
628 const IR::Value clamp{inst2->Arg(2)}; 679 const IR::Value clamp{inst2->Arg(2)};
@@ -648,6 +699,169 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
648 } 699 }
649} 700}
650 701
702bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) {
703 if (coord.IsImmediate()) {
704 return false;
705 }
706 const auto check_through_shuffle = [](IR::Value input, IR::Value& result) {
707 const IR::Value value_1{GetThroughCast(input.Resolve(), IR::Opcode::BitCastF32U32)};
708 IR::Inst* const inst2{value_1.InstRecursive()};
709 if (inst2->GetOpcode() != IR::Opcode::ShuffleIndex) {
710 return false;
711 }
712 const IR::Value index{inst2->Arg(1).Resolve()};
713 const IR::Value clamp{inst2->Arg(2).Resolve()};
714 const IR::Value segmentation_mask{inst2->Arg(3).Resolve()};
715 if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
716 return false;
717 }
718 if (index.U32() != 3 && clamp.U32() != 3) {
719 return false;
720 }
721 result = GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32);
722 return true;
723 };
724 IR::Inst* const inst = coord.InstRecursive();
725 if (inst->GetOpcode() != IR::Opcode::FSwizzleAdd) {
726 return false;
727 }
728 std::array<IR::Value, 3> temporary_values;
729 IR::Value value_1 = inst->Arg(0).Resolve();
730 IR::Value value_2 = inst->Arg(1).Resolve();
731 IR::Value value_3 = inst->Arg(2).Resolve();
732 std::array<u32, 4> swizzles_mask_a{};
733 std::array<u32, 4> swizzles_mask_b{};
734 const auto resolve_mask = [](std::array<u32, 4>& mask_results, IR::Value mask) {
735 u32 value = mask.U32();
736 for (size_t i = 0; i < 4; i++) {
737 mask_results[i] = (value >> (i * 2)) & 0x3;
738 }
739 };
740 resolve_mask(swizzles_mask_a, value_3);
741 size_t coordinate_index = 0;
742 const auto resolve_pending = [&](IR::Value resolve_v) {
743 IR::Inst* const inst_r = resolve_v.InstRecursive();
744 if (inst_r->GetOpcode() != IR::Opcode::FSwizzleAdd) {
745 return false;
746 }
747 if (!check_through_shuffle(inst_r->Arg(0).Resolve(), temporary_values[1])) {
748 return false;
749 }
750 if (!check_through_shuffle(inst_r->Arg(1).Resolve(), temporary_values[2])) {
751 return false;
752 }
753 resolve_mask(swizzles_mask_b, inst_r->Arg(2).Resolve());
754 return true;
755 };
756 if (value_1.IsImmediate() || value_2.IsImmediate()) {
757 return false;
758 }
759 bool should_continue = false;
760 if (resolve_pending(value_1)) {
761 should_continue = check_through_shuffle(value_2, temporary_values[0]);
762 coordinate_index = 0;
763 }
764 if (resolve_pending(value_2)) {
765 should_continue = check_through_shuffle(value_1, temporary_values[0]);
766 coordinate_index = 2;
767 }
768 if (!should_continue) {
769 return false;
770 }
771 // figure which is which
772 size_t zero_mask_a = 0;
773 size_t zero_mask_b = 0;
774 for (size_t i = 0; i < 4; i++) {
775 if (swizzles_mask_a[i] == 2 || swizzles_mask_b[i] == 2) {
776 // last operand can be inversed, we cannot determine a result.
777 return false;
778 }
779 zero_mask_a |= static_cast<size_t>(swizzles_mask_a[i] == 3 ? 1 : 0) << i;
780 zero_mask_b |= static_cast<size_t>(swizzles_mask_b[i] == 3 ? 1 : 0) << i;
781 }
782 static constexpr size_t ddx_pattern = 0b1010;
783 static constexpr size_t ddx_pattern_inv = ~ddx_pattern & 0b00001111;
784 if (std::popcount(zero_mask_a) != 2) {
785 return false;
786 }
787 if (std::popcount(zero_mask_b) != 2) {
788 return false;
789 }
790 if (zero_mask_a == zero_mask_b) {
791 return false;
792 }
793 results[0] = temporary_values[coordinate_index];
794
795 if (coordinate_index == 0) {
796 if (zero_mask_b == ddx_pattern || zero_mask_b == ddx_pattern_inv) {
797 results[1] = temporary_values[1];
798 results[2] = temporary_values[2];
799 return true;
800 }
801 results[2] = temporary_values[1];
802 results[1] = temporary_values[2];
803 } else {
804 const auto assign_result = [&results](IR::Value temporary_value, size_t mask) {
805 if (mask == ddx_pattern || mask == ddx_pattern_inv) {
806 results[1] = temporary_value;
807 return;
808 }
809 results[2] = temporary_value;
810 };
811 assign_result(temporary_values[1], zero_mask_b);
812 assign_result(temporary_values[0], zero_mask_a);
813 }
814
815 return true;
816}
817
818void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
819 IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>();
820 auto orig_opcode = inst.GetOpcode();
821 if (info.ndv_is_active == 0) {
822 return;
823 }
824 if (info.type != TextureType::Color3D) {
825 return;
826 }
827 const IR::Value handle{inst.Arg(0)};
828 const IR::Value coords{inst.Arg(1)};
829 const IR::Value bias_lc{inst.Arg(2)};
830 const IR::Value offset{inst.Arg(3)};
831 if (!offset.IsImmediate()) {
832 return;
833 }
834 IR::Inst* const inst2 = coords.InstRecursive();
835 std::array<std::array<IR::Value, 3>, 3> results_matrix;
836 for (size_t i = 0; i < 3; i++) {
837 if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) {
838 return;
839 }
840 }
841 IR::F32 lod_clamp{};
842 if (info.has_lod_clamp != 0) {
843 if (!bias_lc.IsImmediate()) {
844 lod_clamp = IR::F32{bias_lc.InstRecursive()->Arg(1).Resolve()};
845 } else {
846 lod_clamp = IR::F32{bias_lc};
847 }
848 }
849 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
850 IR::Value new_coords =
851 ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]);
852 IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
853 results_matrix[1][1], results_matrix[1][2]);
854 IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]);
855 info.num_derivates.Assign(3);
856 IR::Value new_gradient_instruction =
857 ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info);
858 IR::Inst* const new_inst = new_gradient_instruction.InstRecursive();
859 if (orig_opcode == IR::Opcode::ImageSampleImplicitLod) {
860 new_inst->ReplaceOpcode(IR::Opcode::ImageGradient);
861 }
862 inst.ReplaceUsesWith(new_gradient_instruction);
863}
864
651void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { 865void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
652 const IR::Value bank{inst.Arg(0)}; 866 const IR::Value bank{inst.Arg(0)};
653 const IR::Value offset{inst.Arg(1)}; 867 const IR::Value offset{inst.Arg(1)};
@@ -743,6 +957,12 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
743 case IR::Opcode::SelectF32: 957 case IR::Opcode::SelectF32:
744 case IR::Opcode::SelectF64: 958 case IR::Opcode::SelectF64:
745 return FoldSelect(inst); 959 return FoldSelect(inst);
960 case IR::Opcode::FPNeg32:
961 FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
962 return;
963 case IR::Opcode::FPAdd32:
964 FoldFPAdd32(inst);
965 return;
746 case IR::Opcode::FPMul32: 966 case IR::Opcode::FPMul32:
747 return FoldFPMul32(inst); 967 return FoldFPMul32(inst);
748 case IR::Opcode::LogicalAnd: 968 case IR::Opcode::LogicalAnd:
@@ -858,6 +1078,11 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
858 FoldDriverConstBuffer(env, block, inst, 1); 1078 FoldDriverConstBuffer(env, block, inst, 1);
859 } 1079 }
860 break; 1080 break;
1081 case IR::Opcode::BindlessImageSampleImplicitLod:
1082 case IR::Opcode::BoundImageSampleImplicitLod:
1083 case IR::Opcode::ImageSampleImplicitLod:
1084 FoldImageSampleImplicitLod(block, inst);
1085 break;
861 default: 1086 default:
862 break; 1087 break;
863 } 1088 }