diff options
Diffstat (limited to 'src/shader_recompiler')
13 files changed, 304 insertions, 23 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 07e75f9d8..83b763447 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -245,8 +245,6 @@ target_link_libraries(shader_recompiler PUBLIC common fmt::fmt sirit) | |||
| 245 | 245 | ||
| 246 | if (MSVC) | 246 | if (MSVC) |
| 247 | target_compile_options(shader_recompiler PRIVATE | 247 | target_compile_options(shader_recompiler PRIVATE |
| 248 | /W4 | ||
| 249 | |||
| 250 | /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data | 248 | /we4242 # 'identifier': conversion from 'type1' to 'type2', possible loss of data |
| 251 | /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data | 249 | /we4254 # 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data |
| 252 | /we4800 # Implicit conversion from 'type' to bool. Possible information loss | 250 | /we4800 # Implicit conversion from 'type' to bool. Possible information loss |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index 85ee27333..d0e308124 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | |||
| @@ -558,12 +558,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 558 | const IR::Value& coord, const IR::Value& derivatives, | 558 | const IR::Value& coord, const IR::Value& derivatives, |
| 559 | const IR::Value& offset, const IR::Value& lod_clamp) { | 559 | const IR::Value& offset, const IR::Value& lod_clamp) { |
| 560 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | 560 | const auto info{inst.Flags<IR::TextureInstInfo>()}; |
| 561 | ScopedRegister dpdx, dpdy; | 561 | ScopedRegister dpdx, dpdy, coords; |
| 562 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | 562 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; |
| 563 | if (multi_component) { | 563 | if (multi_component) { |
| 564 | // Allocate this early to avoid aliasing other registers | 564 | // Allocate this early to avoid aliasing other registers |
| 565 | dpdx = ScopedRegister{ctx.reg_alloc}; | 565 | dpdx = ScopedRegister{ctx.reg_alloc}; |
| 566 | dpdy = ScopedRegister{ctx.reg_alloc}; | 566 | dpdy = ScopedRegister{ctx.reg_alloc}; |
| 567 | if (info.num_derivates >= 3) { | ||
| 568 | coords = ScopedRegister{ctx.reg_alloc}; | ||
| 569 | } | ||
| 567 | } | 570 | } |
| 568 | const auto sparse_inst{PrepareSparse(inst)}; | 571 | const auto sparse_inst{PrepareSparse(inst)}; |
| 569 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; | 572 | const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; |
| @@ -580,15 +583,27 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 580 | "MOV.F {}.y,{}.w;", | 583 | "MOV.F {}.y,{}.w;", |
| 581 | dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, | 584 | dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, |
| 582 | dpdy.reg, derivatives_vec); | 585 | dpdy.reg, derivatives_vec); |
| 586 | Register final_coord; | ||
| 587 | if (info.num_derivates >= 3) { | ||
| 588 | ctx.Add("MOV.F {}.z,{}.x;" | ||
| 589 | "MOV.F {}.z,{}.y;", | ||
| 590 | dpdx.reg, coord_vec, dpdy.reg, coord_vec); | ||
| 591 | ctx.Add("MOV.F {}.x,0;" | ||
| 592 | "MOV.F {}.y,0;", | ||
| 593 | "MOV.F {}.z,0;", coords.reg, coords.reg, coords.reg); | ||
| 594 | final_coord = coords.reg; | ||
| 595 | } else { | ||
| 596 | final_coord = coord_vec; | ||
| 597 | } | ||
| 583 | if (info.has_lod_clamp) { | 598 | if (info.has_lod_clamp) { |
| 584 | const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; | 599 | const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)}; |
| 585 | ctx.Add("MOV.F {}.w,{};" | 600 | ctx.Add("MOV.F {}.w,{};" |
| 586 | "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", | 601 | "TXD.F.LODCLAMP{} {},{},{},{},{},{}{};", |
| 587 | dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, | 602 | dpdy.reg, lod_clamp_value, sparse_mod, ret, final_coord, dpdx.reg, dpdy.reg, |
| 588 | texture, type, offset_vec); | 603 | texture, type, offset_vec); |
| 589 | } else { | 604 | } else { |
| 590 | ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg, | 605 | ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, final_coord, dpdx.reg, |
| 591 | texture, type, offset_vec); | 606 | dpdy.reg, texture, type, offset_vec); |
| 592 | } | 607 | } |
| 593 | } else { | 608 | } else { |
| 594 | ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, | 609 | ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec, |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index 418505475..d9872ecc2 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | |||
| @@ -548,7 +548,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 548 | if (sparse_inst) { | 548 | if (sparse_inst) { |
| 549 | throw NotImplementedException("EmitImageGradient Sparse"); | 549 | throw NotImplementedException("EmitImageGradient Sparse"); |
| 550 | } | 550 | } |
| 551 | if (!offset.IsEmpty()) { | 551 | if (!offset.IsEmpty() && info.num_derivates <= 2) { |
| 552 | throw NotImplementedException("EmitImageGradient offset"); | 552 | throw NotImplementedException("EmitImageGradient offset"); |
| 553 | } | 553 | } |
| 554 | const auto texture{Texture(ctx, info, index)}; | 554 | const auto texture{Texture(ctx, info, index)}; |
| @@ -556,6 +556,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 556 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | 556 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; |
| 557 | const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; | 557 | const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; |
| 558 | if (multi_component) { | 558 | if (multi_component) { |
| 559 | if (info.num_derivates >= 3) { | ||
| 560 | const auto offset_vec{ctx.var_alloc.Consume(offset)}; | ||
| 561 | ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture, | ||
| 562 | coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); | ||
| 563 | return; | ||
| 564 | } | ||
| 559 | ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, | 565 | ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords, |
| 560 | derivatives_vec, derivatives_vec); | 566 | derivatives_vec, derivatives_vec); |
| 561 | } else { | 567 | } else { |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 7d901c04b..8decdf399 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -91,6 +91,34 @@ public: | |||
| 91 | } | 91 | } |
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates_1, Id derivates_2, | ||
| 95 | Id offset, Id lod_clamp) { | ||
| 96 | if (!Sirit::ValidId(derivates_1) || !Sirit::ValidId(derivates_2)) { | ||
| 97 | throw LogicError("Derivates must be present"); | ||
| 98 | } | ||
| 99 | boost::container::static_vector<Id, 3> deriv_1_accum{ | ||
| 100 | ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 0), | ||
| 101 | ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 2), | ||
| 102 | ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 0), | ||
| 103 | }; | ||
| 104 | boost::container::static_vector<Id, 3> deriv_2_accum{ | ||
| 105 | ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 1), | ||
| 106 | ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 3), | ||
| 107 | ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 1), | ||
| 108 | }; | ||
| 109 | const Id derivates_id1{ctx.OpCompositeConstruct( | ||
| 110 | ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})}; | ||
| 111 | const Id derivates_id2{ctx.OpCompositeConstruct( | ||
| 112 | ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; | ||
| 113 | Add(spv::ImageOperandsMask::Grad, derivates_id1, derivates_id2); | ||
| 114 | if (Sirit::ValidId(offset)) { | ||
| 115 | Add(spv::ImageOperandsMask::Offset, offset); | ||
| 116 | } | ||
| 117 | if (has_lod_clamp) { | ||
| 118 | Add(spv::ImageOperandsMask::MinLod, lod_clamp); | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 94 | std::span<const Id> Span() const noexcept { | 122 | std::span<const Id> Span() const noexcept { |
| 95 | return std::span{operands.data(), operands.size()}; | 123 | return std::span{operands.data(), operands.size()}; |
| 96 | } | 124 | } |
| @@ -176,9 +204,7 @@ Id TextureImage(EmitContext& ctx, IR::TextureInstInfo info, const IR::Value& ind | |||
| 176 | if (def.count > 1) { | 204 | if (def.count > 1) { |
| 177 | throw NotImplementedException("Indirect texture sample"); | 205 | throw NotImplementedException("Indirect texture sample"); |
| 178 | } | 206 | } |
| 179 | const Id sampler_id{def.id}; | 207 | return ctx.OpLoad(ctx.image_buffer_type, def.id); |
| 180 | const Id id{ctx.OpLoad(ctx.sampled_texture_buffer_type, sampler_id)}; | ||
| 181 | return ctx.OpImage(ctx.image_buffer_type, id); | ||
| 182 | } else { | 208 | } else { |
| 183 | const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; | 209 | const TextureDefinition& def{ctx.textures.at(info.descriptor_index)}; |
| 184 | if (def.count > 1) { | 210 | if (def.count > 1) { |
| @@ -524,8 +550,11 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I | |||
| 524 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | 550 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, |
| 525 | Id derivates, Id offset, Id lod_clamp) { | 551 | Id derivates, Id offset, Id lod_clamp) { |
| 526 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 552 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 527 | const ImageOperands operands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, | 553 | const auto operands = |
| 528 | offset, lod_clamp); | 554 | info.num_derivates == 3 |
| 555 | ? ImageOperands(ctx, info.has_lod_clamp != 0, derivates, offset, {}, lod_clamp) | ||
| 556 | : ImageOperands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, | ||
| 557 | lod_clamp); | ||
| 529 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | 558 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, |
| 530 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], | 559 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], |
| 531 | Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); | 560 | Texture(ctx, info, index), coords, operands.Mask(), operands.Span()); |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index bec5db173..72f69b7aa 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | |||
| @@ -74,6 +74,11 @@ spv::ImageFormat GetImageFormat(ImageFormat format) { | |||
| 74 | throw InvalidArgument("Invalid image format {}", format); | 74 | throw InvalidArgument("Invalid image format {}", format); |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | spv::ImageFormat GetImageFormatForBuffer(ImageFormat format) { | ||
| 78 | const auto spv_format = GetImageFormat(format); | ||
| 79 | return spv_format == spv::ImageFormat::Unknown ? spv::ImageFormat::R32ui : spv_format; | ||
| 80 | } | ||
| 81 | |||
| 77 | Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { | 82 | Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) { |
| 78 | const spv::ImageFormat format{GetImageFormat(desc.format)}; | 83 | const spv::ImageFormat format{GetImageFormat(desc.format)}; |
| 79 | const Id type{ctx.U32[1]}; | 84 | const Id type{ctx.U32[1]}; |
| @@ -1242,9 +1247,8 @@ void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { | |||
| 1242 | } | 1247 | } |
| 1243 | const spv::ImageFormat format{spv::ImageFormat::Unknown}; | 1248 | const spv::ImageFormat format{spv::ImageFormat::Unknown}; |
| 1244 | image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); | 1249 | image_buffer_type = TypeImage(F32[1], spv::Dim::Buffer, 0U, false, false, 1, format); |
| 1245 | sampled_texture_buffer_type = TypeSampledImage(image_buffer_type); | ||
| 1246 | 1250 | ||
| 1247 | const Id type{TypePointer(spv::StorageClass::UniformConstant, sampled_texture_buffer_type)}; | 1251 | const Id type{TypePointer(spv::StorageClass::UniformConstant, image_buffer_type)}; |
| 1248 | texture_buffers.reserve(info.texture_buffer_descriptors.size()); | 1252 | texture_buffers.reserve(info.texture_buffer_descriptors.size()); |
| 1249 | for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { | 1253 | for (const TextureBufferDescriptor& desc : info.texture_buffer_descriptors) { |
| 1250 | if (desc.count != 1) { | 1254 | if (desc.count != 1) { |
| @@ -1271,7 +1275,7 @@ void EmitContext::DefineImageBuffers(const Info& info, u32& binding) { | |||
| 1271 | if (desc.count != 1) { | 1275 | if (desc.count != 1) { |
| 1272 | throw NotImplementedException("Array of image buffers"); | 1276 | throw NotImplementedException("Array of image buffers"); |
| 1273 | } | 1277 | } |
| 1274 | const spv::ImageFormat format{GetImageFormat(desc.format)}; | 1278 | const spv::ImageFormat format{GetImageFormatForBuffer(desc.format)}; |
| 1275 | const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)}; | 1279 | const Id image_type{TypeImage(U32[1], spv::Dim::Buffer, false, false, false, 2, format)}; |
| 1276 | const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; | 1280 | const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)}; |
| 1277 | const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; | 1281 | const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)}; |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index e63330f11..7c49fd504 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h | |||
| @@ -206,7 +206,6 @@ public: | |||
| 206 | Id output_u32{}; | 206 | Id output_u32{}; |
| 207 | 207 | ||
| 208 | Id image_buffer_type{}; | 208 | Id image_buffer_type{}; |
| 209 | Id sampled_texture_buffer_type{}; | ||
| 210 | Id image_u32{}; | 209 | Id image_u32{}; |
| 211 | 210 | ||
| 212 | std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; | 211 | std::array<UniformDefinitions, Info::MAX_CBUFS> cbufs{}; |
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 69035d462..1e9e8c8f5 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h | |||
| @@ -42,6 +42,7 @@ union TextureInstInfo { | |||
| 42 | BitField<23, 2, u32> gather_component; | 42 | BitField<23, 2, u32> gather_component; |
| 43 | BitField<25, 2, u32> num_derivates; | 43 | BitField<25, 2, u32> num_derivates; |
| 44 | BitField<27, 3, ImageFormat> image_format; | 44 | BitField<27, 3, ImageFormat> image_format; |
| 45 | BitField<30, 1, u32> ndv_is_active; | ||
| 45 | }; | 46 | }; |
| 46 | static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); | 47 | static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); |
| 47 | 48 | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp index ef4ffa54b..f00e20023 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp | |||
| @@ -19,7 +19,7 @@ void TranslatorVisitor::FSWZADD(u64 insn) { | |||
| 19 | } const fswzadd{insn}; | 19 | } const fswzadd{insn}; |
| 20 | 20 | ||
| 21 | if (fswzadd.ndv != 0) { | 21 | if (fswzadd.ndv != 0) { |
| 22 | throw NotImplementedException("FSWZADD NDV"); | 22 | LOG_WARNING(Shader, "(STUBBED) FSWZADD - NDV mode"); |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | const IR::F32 src_a{GetFloatReg8(insn)}; | 25 | const IR::F32 src_a{GetFloatReg8(insn)}; |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp index 82aec3b73..1ddfeab06 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp | |||
| @@ -16,8 +16,10 @@ void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = fa | |||
| 16 | BitField<12, 4, u64> mov32i_mask; | 16 | BitField<12, 4, u64> mov32i_mask; |
| 17 | } const mov{insn}; | 17 | } const mov{insn}; |
| 18 | 18 | ||
| 19 | if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { | 19 | u64 mask = is_mov32i ? mov.mov32i_mask : mov.mask; |
| 20 | throw NotImplementedException("Non-full move mask"); | 20 | if (mask != 0xf && mask != 0x1) { |
| 21 | LOG_WARNING(Shader, "(STUBBED) Masked Mov"); | ||
| 22 | return; | ||
| 21 | } | 23 | } |
| 22 | v.X(mov.dest_reg, src); | 24 | v.X(mov.dest_reg, src); |
| 23 | } | 25 | } |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index 753c62098..e593132e6 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp | |||
| @@ -161,7 +161,8 @@ enum class SpecialRegister : u64 { | |||
| 161 | LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); | 161 | LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); |
| 162 | return ir.Imm32(0); // This is the default value hardware returns. | 162 | return ir.Imm32(0); // This is the default value hardware returns. |
| 163 | default: | 163 | default: |
| 164 | throw NotImplementedException("S2R special register {}", special_register); | 164 | LOG_CRITICAL(Shader, "(STUBBED) Special register {}", special_register); |
| 165 | return ir.Imm32(0); // This is the default value hardware returns. | ||
| 165 | } | 166 | } |
| 166 | } | 167 | } |
| 167 | } // Anonymous namespace | 168 | } // Anonymous namespace |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 2f930f1ea..6203003b3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -209,7 +209,7 @@ void TranslatorVisitor::R2B(u64) { | |||
| 209 | } | 209 | } |
| 210 | 210 | ||
| 211 | void TranslatorVisitor::RAM(u64) { | 211 | void TranslatorVisitor::RAM(u64) { |
| 212 | ThrowNotImplemented(Opcode::RAM); | 212 | LOG_WARNING(Shader, "(STUBBED) RAM Instruction"); |
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | void TranslatorVisitor::RET(u64) { | 215 | void TranslatorVisitor::RET(u64) { |
| @@ -221,7 +221,7 @@ void TranslatorVisitor::RTT(u64) { | |||
| 221 | } | 221 | } |
| 222 | 222 | ||
| 223 | void TranslatorVisitor::SAM(u64) { | 223 | void TranslatorVisitor::SAM(u64) { |
| 224 | ThrowNotImplemented(Opcode::SAM); | 224 | LOG_WARNING(Shader, "(STUBBED) SAM Instruction"); |
| 225 | } | 225 | } |
| 226 | 226 | ||
| 227 | void TranslatorVisitor::SETCRSPTR(u64) { | 227 | void TranslatorVisitor::SETCRSPTR(u64) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp index 2459fc30d..7a9b7fff8 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp | |||
| @@ -172,6 +172,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, | |||
| 172 | info.is_depth.Assign(tex.dc != 0 ? 1 : 0); | 172 | info.is_depth.Assign(tex.dc != 0 ? 1 : 0); |
| 173 | info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); | 173 | info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); |
| 174 | info.has_lod_clamp.Assign(lc ? 1 : 0); | 174 | info.has_lod_clamp.Assign(lc ? 1 : 0); |
| 175 | info.ndv_is_active.Assign(tex.ndv != 0 ? 1 : 0); | ||
| 175 | 176 | ||
| 176 | const IR::Value sample{[&]() -> IR::Value { | 177 | const IR::Value sample{[&]() -> IR::Value { |
| 177 | if (tex.dc == 0) { | 178 | if (tex.dc == 0) { |
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 4d81e9336..f46e55122 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "shader_recompiler/environment.h" | 10 | #include "shader_recompiler/environment.h" |
| 11 | #include "shader_recompiler/exception.h" | 11 | #include "shader_recompiler/exception.h" |
| 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| 13 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 13 | #include "shader_recompiler/frontend/ir/value.h" | 14 | #include "shader_recompiler/frontend/ir/value.h" |
| 14 | #include "shader_recompiler/ir_opt/passes.h" | 15 | #include "shader_recompiler/ir_opt/passes.h" |
| 15 | 16 | ||
| @@ -410,7 +411,49 @@ void FoldSelect(IR::Inst& inst) { | |||
| 410 | } | 411 | } |
| 411 | } | 412 | } |
| 412 | 413 | ||
| 414 | void FoldFPAdd32(IR::Inst& inst) { | ||
| 415 | if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a + b; })) { | ||
| 416 | return; | ||
| 417 | } | ||
| 418 | const IR::Value lhs_value{inst.Arg(0)}; | ||
| 419 | const IR::Value rhs_value{inst.Arg(1)}; | ||
| 420 | const auto check_neutral = [](const IR::Value& one_operand) { | ||
| 421 | return one_operand.IsImmediate() && std::abs(one_operand.F32()) == 0.0f; | ||
| 422 | }; | ||
| 423 | if (check_neutral(lhs_value)) { | ||
| 424 | inst.ReplaceUsesWith(rhs_value); | ||
| 425 | } | ||
| 426 | if (check_neutral(rhs_value)) { | ||
| 427 | inst.ReplaceUsesWith(lhs_value); | ||
| 428 | } | ||
| 429 | } | ||
| 430 | |||
| 431 | bool FoldDerivateYFromCorrection(IR::Inst& inst) { | ||
| 432 | const IR::Value lhs_value{inst.Arg(0)}; | ||
| 433 | const IR::Value rhs_value{inst.Arg(1)}; | ||
| 434 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | ||
| 435 | IR::Inst* const rhs_op{rhs_value.InstRecursive()}; | ||
| 436 | if (lhs_op->GetOpcode() == IR::Opcode::YDirection) { | ||
| 437 | if (rhs_op->GetOpcode() != IR::Opcode::DPdyFine) { | ||
| 438 | return false; | ||
| 439 | } | ||
| 440 | inst.ReplaceUsesWith(rhs_value); | ||
| 441 | return true; | ||
| 442 | } | ||
| 443 | if (rhs_op->GetOpcode() != IR::Opcode::YDirection) { | ||
| 444 | return false; | ||
| 445 | } | ||
| 446 | if (lhs_op->GetOpcode() != IR::Opcode::DPdyFine) { | ||
| 447 | return false; | ||
| 448 | } | ||
| 449 | inst.ReplaceUsesWith(lhs_value); | ||
| 450 | return true; | ||
| 451 | } | ||
| 452 | |||
| 413 | void FoldFPMul32(IR::Inst& inst) { | 453 | void FoldFPMul32(IR::Inst& inst) { |
| 454 | if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a * b; })) { | ||
| 455 | return; | ||
| 456 | } | ||
| 414 | const auto control{inst.Flags<IR::FpControl>()}; | 457 | const auto control{inst.Flags<IR::FpControl>()}; |
| 415 | if (control.no_contraction) { | 458 | if (control.no_contraction) { |
| 416 | return; | 459 | return; |
| @@ -421,6 +464,9 @@ void FoldFPMul32(IR::Inst& inst) { | |||
| 421 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { | 464 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { |
| 422 | return; | 465 | return; |
| 423 | } | 466 | } |
| 467 | if (FoldDerivateYFromCorrection(inst)) { | ||
| 468 | return; | ||
| 469 | } | ||
| 424 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | 470 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; |
| 425 | IR::Inst* const rhs_op{rhs_value.InstRecursive()}; | 471 | IR::Inst* const rhs_op{rhs_value.InstRecursive()}; |
| 426 | if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || | 472 | if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 || |
| @@ -622,7 +668,12 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | |||
| 622 | } | 668 | } |
| 623 | const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; | 669 | const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; |
| 624 | if (value_2 != value_3) { | 670 | if (value_2 != value_3) { |
| 625 | return; | 671 | if (!value_2.IsImmediate() || !value_3.IsImmediate()) { |
| 672 | return; | ||
| 673 | } | ||
| 674 | if (Common::BitCast<u32>(value_2.F32()) != value_3.U32()) { | ||
| 675 | return; | ||
| 676 | } | ||
| 626 | } | 677 | } |
| 627 | const IR::Value index{inst2->Arg(1)}; | 678 | const IR::Value index{inst2->Arg(1)}; |
| 628 | const IR::Value clamp{inst2->Arg(2)}; | 679 | const IR::Value clamp{inst2->Arg(2)}; |
| @@ -648,6 +699,169 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | |||
| 648 | } | 699 | } |
| 649 | } | 700 | } |
| 650 | 701 | ||
| 702 | bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { | ||
| 703 | if (coord.IsImmediate()) { | ||
| 704 | return false; | ||
| 705 | } | ||
| 706 | const auto check_through_shuffle = [](IR::Value input, IR::Value& result) { | ||
| 707 | const IR::Value value_1{GetThroughCast(input.Resolve(), IR::Opcode::BitCastF32U32)}; | ||
| 708 | IR::Inst* const inst2{value_1.InstRecursive()}; | ||
| 709 | if (inst2->GetOpcode() != IR::Opcode::ShuffleIndex) { | ||
| 710 | return false; | ||
| 711 | } | ||
| 712 | const IR::Value index{inst2->Arg(1).Resolve()}; | ||
| 713 | const IR::Value clamp{inst2->Arg(2).Resolve()}; | ||
| 714 | const IR::Value segmentation_mask{inst2->Arg(3).Resolve()}; | ||
| 715 | if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { | ||
| 716 | return false; | ||
| 717 | } | ||
| 718 | if (index.U32() != 3 && clamp.U32() != 3) { | ||
| 719 | return false; | ||
| 720 | } | ||
| 721 | result = GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32); | ||
| 722 | return true; | ||
| 723 | }; | ||
| 724 | IR::Inst* const inst = coord.InstRecursive(); | ||
| 725 | if (inst->GetOpcode() != IR::Opcode::FSwizzleAdd) { | ||
| 726 | return false; | ||
| 727 | } | ||
| 728 | std::array<IR::Value, 3> temporary_values; | ||
| 729 | IR::Value value_1 = inst->Arg(0).Resolve(); | ||
| 730 | IR::Value value_2 = inst->Arg(1).Resolve(); | ||
| 731 | IR::Value value_3 = inst->Arg(2).Resolve(); | ||
| 732 | std::array<u32, 4> swizzles_mask_a{}; | ||
| 733 | std::array<u32, 4> swizzles_mask_b{}; | ||
| 734 | const auto resolve_mask = [](std::array<u32, 4>& mask_results, IR::Value mask) { | ||
| 735 | u32 value = mask.U32(); | ||
| 736 | for (size_t i = 0; i < 4; i++) { | ||
| 737 | mask_results[i] = (value >> (i * 2)) & 0x3; | ||
| 738 | } | ||
| 739 | }; | ||
| 740 | resolve_mask(swizzles_mask_a, value_3); | ||
| 741 | size_t coordinate_index = 0; | ||
| 742 | const auto resolve_pending = [&](IR::Value resolve_v) { | ||
| 743 | IR::Inst* const inst_r = resolve_v.InstRecursive(); | ||
| 744 | if (inst_r->GetOpcode() != IR::Opcode::FSwizzleAdd) { | ||
| 745 | return false; | ||
| 746 | } | ||
| 747 | if (!check_through_shuffle(inst_r->Arg(0).Resolve(), temporary_values[1])) { | ||
| 748 | return false; | ||
| 749 | } | ||
| 750 | if (!check_through_shuffle(inst_r->Arg(1).Resolve(), temporary_values[2])) { | ||
| 751 | return false; | ||
| 752 | } | ||
| 753 | resolve_mask(swizzles_mask_b, inst_r->Arg(2).Resolve()); | ||
| 754 | return true; | ||
| 755 | }; | ||
| 756 | if (value_1.IsImmediate() || value_2.IsImmediate()) { | ||
| 757 | return false; | ||
| 758 | } | ||
| 759 | bool should_continue = false; | ||
| 760 | if (resolve_pending(value_1)) { | ||
| 761 | should_continue = check_through_shuffle(value_2, temporary_values[0]); | ||
| 762 | coordinate_index = 0; | ||
| 763 | } | ||
| 764 | if (resolve_pending(value_2)) { | ||
| 765 | should_continue = check_through_shuffle(value_1, temporary_values[0]); | ||
| 766 | coordinate_index = 2; | ||
| 767 | } | ||
| 768 | if (!should_continue) { | ||
| 769 | return false; | ||
| 770 | } | ||
| 771 | // figure which is which | ||
| 772 | size_t zero_mask_a = 0; | ||
| 773 | size_t zero_mask_b = 0; | ||
| 774 | for (size_t i = 0; i < 4; i++) { | ||
| 775 | if (swizzles_mask_a[i] == 2 || swizzles_mask_b[i] == 2) { | ||
| 776 | // last operand can be inversed, we cannot determine a result. | ||
| 777 | return false; | ||
| 778 | } | ||
| 779 | zero_mask_a |= static_cast<size_t>(swizzles_mask_a[i] == 3 ? 1 : 0) << i; | ||
| 780 | zero_mask_b |= static_cast<size_t>(swizzles_mask_b[i] == 3 ? 1 : 0) << i; | ||
| 781 | } | ||
| 782 | static constexpr size_t ddx_pattern = 0b1010; | ||
| 783 | static constexpr size_t ddx_pattern_inv = ~ddx_pattern & 0b00001111; | ||
| 784 | if (std::popcount(zero_mask_a) != 2) { | ||
| 785 | return false; | ||
| 786 | } | ||
| 787 | if (std::popcount(zero_mask_b) != 2) { | ||
| 788 | return false; | ||
| 789 | } | ||
| 790 | if (zero_mask_a == zero_mask_b) { | ||
| 791 | return false; | ||
| 792 | } | ||
| 793 | results[0] = temporary_values[coordinate_index]; | ||
| 794 | |||
| 795 | if (coordinate_index == 0) { | ||
| 796 | if (zero_mask_b == ddx_pattern || zero_mask_b == ddx_pattern_inv) { | ||
| 797 | results[1] = temporary_values[1]; | ||
| 798 | results[2] = temporary_values[2]; | ||
| 799 | return true; | ||
| 800 | } | ||
| 801 | results[2] = temporary_values[1]; | ||
| 802 | results[1] = temporary_values[2]; | ||
| 803 | } else { | ||
| 804 | const auto assign_result = [&results](IR::Value temporary_value, size_t mask) { | ||
| 805 | if (mask == ddx_pattern || mask == ddx_pattern_inv) { | ||
| 806 | results[1] = temporary_value; | ||
| 807 | return; | ||
| 808 | } | ||
| 809 | results[2] = temporary_value; | ||
| 810 | }; | ||
| 811 | assign_result(temporary_values[1], zero_mask_b); | ||
| 812 | assign_result(temporary_values[0], zero_mask_a); | ||
| 813 | } | ||
| 814 | |||
| 815 | return true; | ||
| 816 | } | ||
| 817 | |||
| 818 | void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { | ||
| 819 | IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>(); | ||
| 820 | auto orig_opcode = inst.GetOpcode(); | ||
| 821 | if (info.ndv_is_active == 0) { | ||
| 822 | return; | ||
| 823 | } | ||
| 824 | if (info.type != TextureType::Color3D) { | ||
| 825 | return; | ||
| 826 | } | ||
| 827 | const IR::Value handle{inst.Arg(0)}; | ||
| 828 | const IR::Value coords{inst.Arg(1)}; | ||
| 829 | const IR::Value bias_lc{inst.Arg(2)}; | ||
| 830 | const IR::Value offset{inst.Arg(3)}; | ||
| 831 | if (!offset.IsImmediate()) { | ||
| 832 | return; | ||
| 833 | } | ||
| 834 | IR::Inst* const inst2 = coords.InstRecursive(); | ||
| 835 | std::array<std::array<IR::Value, 3>, 3> results_matrix; | ||
| 836 | for (size_t i = 0; i < 3; i++) { | ||
| 837 | if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { | ||
| 838 | return; | ||
| 839 | } | ||
| 840 | } | ||
| 841 | IR::F32 lod_clamp{}; | ||
| 842 | if (info.has_lod_clamp != 0) { | ||
| 843 | if (!bias_lc.IsImmediate()) { | ||
| 844 | lod_clamp = IR::F32{bias_lc.InstRecursive()->Arg(1).Resolve()}; | ||
| 845 | } else { | ||
| 846 | lod_clamp = IR::F32{bias_lc}; | ||
| 847 | } | ||
| 848 | } | ||
| 849 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 850 | IR::Value new_coords = | ||
| 851 | ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]); | ||
| 852 | IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], | ||
| 853 | results_matrix[1][1], results_matrix[1][2]); | ||
| 854 | IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); | ||
| 855 | info.num_derivates.Assign(3); | ||
| 856 | IR::Value new_gradient_instruction = | ||
| 857 | ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); | ||
| 858 | IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); | ||
| 859 | if (orig_opcode == IR::Opcode::ImageSampleImplicitLod) { | ||
| 860 | new_inst->ReplaceOpcode(IR::Opcode::ImageGradient); | ||
| 861 | } | ||
| 862 | inst.ReplaceUsesWith(new_gradient_instruction); | ||
| 863 | } | ||
| 864 | |||
| 651 | void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { | 865 | void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { |
| 652 | const IR::Value bank{inst.Arg(0)}; | 866 | const IR::Value bank{inst.Arg(0)}; |
| 653 | const IR::Value offset{inst.Arg(1)}; | 867 | const IR::Value offset{inst.Arg(1)}; |
| @@ -743,6 +957,12 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { | |||
| 743 | case IR::Opcode::SelectF32: | 957 | case IR::Opcode::SelectF32: |
| 744 | case IR::Opcode::SelectF64: | 958 | case IR::Opcode::SelectF64: |
| 745 | return FoldSelect(inst); | 959 | return FoldSelect(inst); |
| 960 | case IR::Opcode::FPNeg32: | ||
| 961 | FoldWhenAllImmediates(inst, [](f32 a) { return -a; }); | ||
| 962 | return; | ||
| 963 | case IR::Opcode::FPAdd32: | ||
| 964 | FoldFPAdd32(inst); | ||
| 965 | return; | ||
| 746 | case IR::Opcode::FPMul32: | 966 | case IR::Opcode::FPMul32: |
| 747 | return FoldFPMul32(inst); | 967 | return FoldFPMul32(inst); |
| 748 | case IR::Opcode::LogicalAnd: | 968 | case IR::Opcode::LogicalAnd: |
| @@ -858,6 +1078,11 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { | |||
| 858 | FoldDriverConstBuffer(env, block, inst, 1); | 1078 | FoldDriverConstBuffer(env, block, inst, 1); |
| 859 | } | 1079 | } |
| 860 | break; | 1080 | break; |
| 1081 | case IR::Opcode::BindlessImageSampleImplicitLod: | ||
| 1082 | case IR::Opcode::BoundImageSampleImplicitLod: | ||
| 1083 | case IR::Opcode::ImageSampleImplicitLod: | ||
| 1084 | FoldImageSampleImplicitLod(block, inst); | ||
| 1085 | break; | ||
| 861 | default: | 1086 | default: |
| 862 | break; | 1087 | break; |
| 863 | } | 1088 | } |