diff options
Diffstat (limited to 'src/shader_recompiler')
21 files changed, 174 insertions, 70 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 83b763447..19db17c6d 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -231,6 +231,7 @@ add_library(shader_recompiler STATIC | |||
| 231 | ir_opt/rescaling_pass.cpp | 231 | ir_opt/rescaling_pass.cpp |
| 232 | ir_opt/ssa_rewrite_pass.cpp | 232 | ir_opt/ssa_rewrite_pass.cpp |
| 233 | ir_opt/texture_pass.cpp | 233 | ir_opt/texture_pass.cpp |
| 234 | ir_opt/vendor_workaround_pass.cpp | ||
| 234 | ir_opt/verification_pass.cpp | 235 | ir_opt/verification_pass.cpp |
| 235 | object_pool.h | 236 | object_pool.h |
| 236 | precompiled_headers.h | 237 | precompiled_headers.h |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index d0e308124..64e7bad75 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp | |||
| @@ -559,12 +559,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 559 | const IR::Value& offset, const IR::Value& lod_clamp) { | 559 | const IR::Value& offset, const IR::Value& lod_clamp) { |
| 560 | const auto info{inst.Flags<IR::TextureInstInfo>()}; | 560 | const auto info{inst.Flags<IR::TextureInstInfo>()}; |
| 561 | ScopedRegister dpdx, dpdy, coords; | 561 | ScopedRegister dpdx, dpdy, coords; |
| 562 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | 562 | const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp}; |
| 563 | if (multi_component) { | 563 | if (multi_component) { |
| 564 | // Allocate this early to avoid aliasing other registers | 564 | // Allocate this early to avoid aliasing other registers |
| 565 | dpdx = ScopedRegister{ctx.reg_alloc}; | 565 | dpdx = ScopedRegister{ctx.reg_alloc}; |
| 566 | dpdy = ScopedRegister{ctx.reg_alloc}; | 566 | dpdy = ScopedRegister{ctx.reg_alloc}; |
| 567 | if (info.num_derivates >= 3) { | 567 | if (info.num_derivatives >= 3) { |
| 568 | coords = ScopedRegister{ctx.reg_alloc}; | 568 | coords = ScopedRegister{ctx.reg_alloc}; |
| 569 | } | 569 | } |
| 570 | } | 570 | } |
| @@ -584,7 +584,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 584 | dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, | 584 | dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec, |
| 585 | dpdy.reg, derivatives_vec); | 585 | dpdy.reg, derivatives_vec); |
| 586 | Register final_coord; | 586 | Register final_coord; |
| 587 | if (info.num_derivates >= 3) { | 587 | if (info.num_derivatives >= 3) { |
| 588 | ctx.Add("MOV.F {}.z,{}.x;" | 588 | ctx.Add("MOV.F {}.z,{}.x;" |
| 589 | "MOV.F {}.z,{}.y;", | 589 | "MOV.F {}.z,{}.y;", |
| 590 | dpdx.reg, coord_vec, dpdy.reg, coord_vec); | 590 | dpdx.reg, coord_vec, dpdy.reg, coord_vec); |
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 2705ab140..9319ea007 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include "shader_recompiler/backend/glasm/glasm_emit_context.h" | 5 | #include "shader_recompiler/backend/glasm/glasm_emit_context.h" |
| 6 | #include "shader_recompiler/frontend/ir/program.h" | 6 | #include "shader_recompiler/frontend/ir/program.h" |
| 7 | #include "shader_recompiler/frontend/ir/value.h" | 7 | #include "shader_recompiler/frontend/ir/value.h" |
| 8 | #include "shader_recompiler/profile.h" | ||
| 8 | #include "shader_recompiler/runtime_info.h" | 9 | #include "shader_recompiler/runtime_info.h" |
| 9 | 10 | ||
| 10 | namespace Shader::Backend::GLASM { | 11 | namespace Shader::Backend::GLASM { |
| @@ -35,7 +36,9 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std | |||
| 35 | continue; | 36 | continue; |
| 36 | } | 37 | } |
| 37 | const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; | 38 | const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; |
| 38 | ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr | 39 | const u64 ssbo_align_mask{~(ctx.profile.min_ssbo_alignment - 1U)}; |
| 40 | ctx.Add("LDC.U64 DC.x,c{}[{}];" // unaligned_ssbo_addr | ||
| 41 | "AND.U64 DC.x,DC.x,{};" // ssbo_addr = unaligned_ssbo_addr & ssbo_align_mask | ||
| 39 | "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 | 42 | "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 |
| 40 | "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 | 43 | "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 |
| 41 | "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size | 44 | "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size |
| @@ -44,8 +47,8 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std | |||
| 44 | "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b | 47 | "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b |
| 45 | "IF NE.x;" // if cond | 48 | "IF NE.x;" // if cond |
| 46 | "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr | 49 | "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr |
| 47 | ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, | 50 | ssbo.cbuf_index, ssbo.cbuf_offset, ssbo_align_mask, ssbo.cbuf_index, |
| 48 | address, address); | 51 | ssbo.cbuf_offset + 8, address, address, address); |
| 49 | if (pointer_based) { | 52 | if (pointer_based) { |
| 50 | ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf | 53 | ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf |
| 51 | "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset | 54 | "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset |
diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp index d9872ecc2..6e940bd5a 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_image.cpp | |||
| @@ -548,15 +548,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, | |||
| 548 | if (sparse_inst) { | 548 | if (sparse_inst) { |
| 549 | throw NotImplementedException("EmitImageGradient Sparse"); | 549 | throw NotImplementedException("EmitImageGradient Sparse"); |
| 550 | } | 550 | } |
| 551 | if (!offset.IsEmpty() && info.num_derivates <= 2) { | 551 | if (!offset.IsEmpty() && info.num_derivatives <= 2) { |
| 552 | throw NotImplementedException("EmitImageGradient offset"); | 552 | throw NotImplementedException("EmitImageGradient offset"); |
| 553 | } | 553 | } |
| 554 | const auto texture{Texture(ctx, info, index)}; | 554 | const auto texture{Texture(ctx, info, index)}; |
| 555 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; | 555 | const auto texel{ctx.var_alloc.Define(inst, GlslVarType::F32x4)}; |
| 556 | const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp}; | 556 | const bool multi_component{info.num_derivatives > 1 || info.has_lod_clamp}; |
| 557 | const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; | 557 | const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)}; |
| 558 | if (multi_component) { | 558 | if (multi_component) { |
| 559 | if (info.num_derivates >= 3) { | 559 | if (info.num_derivatives >= 3) { |
| 560 | const auto offset_vec{ctx.var_alloc.Consume(offset)}; | 560 | const auto offset_vec{ctx.var_alloc.Consume(offset)}; |
| 561 | ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture, | 561 | ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yw, {}.y));", texel, texture, |
| 562 | coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); | 562 | coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec); |
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index 9ff4028c2..fd9a99449 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp | |||
| @@ -601,7 +601,10 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { | |||
| 601 | addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); | 601 | addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); |
| 602 | size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); | 602 | size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); |
| 603 | } | 603 | } |
| 604 | const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])}; | 604 | const u32 ssbo_align_mask{~(static_cast<u32>(profile.min_ssbo_alignment) - 1U)}; |
| 605 | const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)}; | ||
| 606 | const auto aligned_addr{fmt::format("uvec2({},{})", aligned_low_addr, addr_xy[1])}; | ||
| 607 | const auto addr_pack{fmt::format("packUint2x32({})", aligned_addr)}; | ||
| 605 | const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; | 608 | const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; |
| 606 | func += addr_statment; | 609 | func += addr_statment; |
| 607 | 610 | ||
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 34592a01f..0031fa5fb 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp | |||
| @@ -407,7 +407,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct | |||
| 407 | } | 407 | } |
| 408 | ctx.AddCapability(spv::Capability::DemoteToHelperInvocation); | 408 | ctx.AddCapability(spv::Capability::DemoteToHelperInvocation); |
| 409 | } | 409 | } |
| 410 | if (info.stores[IR::Attribute::ViewportIndex]) { | 410 | if (info.stores[IR::Attribute::ViewportIndex] && profile.support_multi_viewport) { |
| 411 | ctx.AddCapability(spv::Capability::MultiViewport); | 411 | ctx.AddCapability(spv::Capability::MultiViewport); |
| 412 | } | 412 | } |
| 413 | if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { | 413 | if (info.stores[IR::Attribute::ViewportMask] && profile.support_viewport_mask) { |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 1d77426e0..e5a78a914 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp | |||
| @@ -84,6 +84,10 @@ std::optional<OutAttr> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | |||
| 84 | } | 84 | } |
| 85 | return std::nullopt; | 85 | return std::nullopt; |
| 86 | case IR::Attribute::ViewportIndex: | 86 | case IR::Attribute::ViewportIndex: |
| 87 | if (!ctx.profile.support_multi_viewport) { | ||
| 88 | LOG_WARNING(Shader, "Ignoring viewport index store on non-supporting driver"); | ||
| 89 | return std::nullopt; | ||
| 90 | } | ||
| 87 | if (ctx.profile.support_viewport_index_layer_non_geometry || | 91 | if (ctx.profile.support_viewport_index_layer_non_geometry || |
| 88 | ctx.stage == Shader::Stage::Geometry) { | 92 | ctx.stage == Shader::Stage::Geometry) { |
| 89 | return OutAttr{ctx.viewport_index, ctx.U32[1]}; | 93 | return OutAttr{ctx.viewport_index, ctx.U32[1]}; |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 8decdf399..22ceca19c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp | |||
| @@ -67,22 +67,22 @@ public: | |||
| 67 | } | 67 | } |
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates, u32 num_derivates, | 70 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives, |
| 71 | Id offset, Id lod_clamp) { | 71 | u32 num_derivatives, Id offset, Id lod_clamp) { |
| 72 | if (!Sirit::ValidId(derivates)) { | 72 | if (!Sirit::ValidId(derivatives)) { |
| 73 | throw LogicError("Derivates must be present"); | 73 | throw LogicError("Derivatives must be present"); |
| 74 | } | 74 | } |
| 75 | boost::container::static_vector<Id, 3> deriv_x_accum; | 75 | boost::container::static_vector<Id, 3> deriv_x_accum; |
| 76 | boost::container::static_vector<Id, 3> deriv_y_accum; | 76 | boost::container::static_vector<Id, 3> deriv_y_accum; |
| 77 | for (u32 i = 0; i < num_derivates; ++i) { | 77 | for (u32 i = 0; i < num_derivatives; ++i) { |
| 78 | deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2)); | 78 | deriv_x_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2)); |
| 79 | deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivates, i * 2 + 1)); | 79 | deriv_y_accum.push_back(ctx.OpCompositeExtract(ctx.F32[1], derivatives, i * 2 + 1)); |
| 80 | } | 80 | } |
| 81 | const Id derivates_X{ctx.OpCompositeConstruct( | 81 | const Id derivatives_X{ctx.OpCompositeConstruct( |
| 82 | ctx.F32[num_derivates], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; | 82 | ctx.F32[num_derivatives], std::span{deriv_x_accum.data(), deriv_x_accum.size()})}; |
| 83 | const Id derivates_Y{ctx.OpCompositeConstruct( | 83 | const Id derivatives_Y{ctx.OpCompositeConstruct( |
| 84 | ctx.F32[num_derivates], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; | 84 | ctx.F32[num_derivatives], std::span{deriv_y_accum.data(), deriv_y_accum.size()})}; |
| 85 | Add(spv::ImageOperandsMask::Grad, derivates_X, derivates_Y); | 85 | Add(spv::ImageOperandsMask::Grad, derivatives_X, derivatives_Y); |
| 86 | if (Sirit::ValidId(offset)) { | 86 | if (Sirit::ValidId(offset)) { |
| 87 | Add(spv::ImageOperandsMask::Offset, offset); | 87 | Add(spv::ImageOperandsMask::Offset, offset); |
| 88 | } | 88 | } |
| @@ -91,26 +91,26 @@ public: | |||
| 91 | } | 91 | } |
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivates_1, Id derivates_2, | 94 | explicit ImageOperands(EmitContext& ctx, bool has_lod_clamp, Id derivatives_1, Id derivatives_2, |
| 95 | Id offset, Id lod_clamp) { | 95 | Id offset, Id lod_clamp) { |
| 96 | if (!Sirit::ValidId(derivates_1) || !Sirit::ValidId(derivates_2)) { | 96 | if (!Sirit::ValidId(derivatives_1) || !Sirit::ValidId(derivatives_2)) { |
| 97 | throw LogicError("Derivates must be present"); | 97 | throw LogicError("Derivatives must be present"); |
| 98 | } | 98 | } |
| 99 | boost::container::static_vector<Id, 3> deriv_1_accum{ | 99 | boost::container::static_vector<Id, 3> deriv_1_accum{ |
| 100 | ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 0), | 100 | ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 0), |
| 101 | ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 2), | 101 | ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 2), |
| 102 | ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 0), | 102 | ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 0), |
| 103 | }; | 103 | }; |
| 104 | boost::container::static_vector<Id, 3> deriv_2_accum{ | 104 | boost::container::static_vector<Id, 3> deriv_2_accum{ |
| 105 | ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 1), | 105 | ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 1), |
| 106 | ctx.OpCompositeExtract(ctx.F32[1], derivates_1, 3), | 106 | ctx.OpCompositeExtract(ctx.F32[1], derivatives_1, 3), |
| 107 | ctx.OpCompositeExtract(ctx.F32[1], derivates_2, 1), | 107 | ctx.OpCompositeExtract(ctx.F32[1], derivatives_2, 1), |
| 108 | }; | 108 | }; |
| 109 | const Id derivates_id1{ctx.OpCompositeConstruct( | 109 | const Id derivatives_id1{ctx.OpCompositeConstruct( |
| 110 | ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})}; | 110 | ctx.F32[3], std::span{deriv_1_accum.data(), deriv_1_accum.size()})}; |
| 111 | const Id derivates_id2{ctx.OpCompositeConstruct( | 111 | const Id derivatives_id2{ctx.OpCompositeConstruct( |
| 112 | ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; | 112 | ctx.F32[3], std::span{deriv_2_accum.data(), deriv_2_accum.size()})}; |
| 113 | Add(spv::ImageOperandsMask::Grad, derivates_id1, derivates_id2); | 113 | Add(spv::ImageOperandsMask::Grad, derivatives_id1, derivatives_id2); |
| 114 | if (Sirit::ValidId(offset)) { | 114 | if (Sirit::ValidId(offset)) { |
| 115 | Add(spv::ImageOperandsMask::Offset, offset); | 115 | Add(spv::ImageOperandsMask::Offset, offset); |
| 116 | } | 116 | } |
| @@ -548,12 +548,12 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I | |||
| 548 | } | 548 | } |
| 549 | 549 | ||
| 550 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | 550 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, |
| 551 | Id derivates, Id offset, Id lod_clamp) { | 551 | Id derivatives, Id offset, Id lod_clamp) { |
| 552 | const auto info{inst->Flags<IR::TextureInstInfo>()}; | 552 | const auto info{inst->Flags<IR::TextureInstInfo>()}; |
| 553 | const auto operands = | 553 | const auto operands = |
| 554 | info.num_derivates == 3 | 554 | info.num_derivatives == 3 |
| 555 | ? ImageOperands(ctx, info.has_lod_clamp != 0, derivates, offset, {}, lod_clamp) | 555 | ? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, offset, {}, lod_clamp) |
| 556 | : ImageOperands(ctx, info.has_lod_clamp != 0, derivates, info.num_derivates, offset, | 556 | : ImageOperands(ctx, info.has_lod_clamp != 0, derivatives, info.num_derivatives, offset, |
| 557 | lod_clamp); | 557 | lod_clamp); |
| 558 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, | 558 | return Emit(&EmitContext::OpImageSparseSampleExplicitLod, |
| 559 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], | 559 | &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index a440b557d..7d34575c8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h | |||
| @@ -543,7 +543,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i | |||
| 543 | const IR::Value& skip_mips); | 543 | const IR::Value& skip_mips); |
| 544 | Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); | 544 | Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); |
| 545 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, | 545 | Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, |
| 546 | Id derivates, Id offset, Id lod_clamp); | 546 | Id derivatives, Id offset, Id lod_clamp); |
| 547 | Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); | 547 | Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords); |
| 548 | void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); | 548 | void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color); |
| 549 | Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); | 549 | Id EmitIsTextureScaled(EmitContext& ctx, const IR::Value& index); |
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 57df6fc34..3350f1f85 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | |||
| @@ -891,7 +891,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { | |||
| 891 | const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, | 891 | const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, |
| 892 | zero, ssbo_size_cbuf_offset)}; | 892 | zero, ssbo_size_cbuf_offset)}; |
| 893 | 893 | ||
| 894 | const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; | 894 | const u64 ssbo_align_mask{~(profile.min_ssbo_alignment - 1U)}; |
| 895 | const Id unaligned_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; | ||
| 896 | const Id ssbo_addr{OpBitwiseAnd(U64, unaligned_addr, Constant(U64, ssbo_align_mask))}; | ||
| 895 | const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; | 897 | const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; |
| 896 | const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; | 898 | const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; |
| 897 | const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), | 899 | const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index b7caa4246..49171c470 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -1864,11 +1864,11 @@ Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, Texture | |||
| 1864 | return Inst(op, Flags{info}, handle, coords); | 1864 | return Inst(op, Flags{info}, handle, coords); |
| 1865 | } | 1865 | } |
| 1866 | 1866 | ||
| 1867 | Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates, | 1867 | Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivatives, |
| 1868 | const Value& offset, const F32& lod_clamp, TextureInstInfo info) { | 1868 | const Value& offset, const F32& lod_clamp, TextureInstInfo info) { |
| 1869 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient | 1869 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient |
| 1870 | : Opcode::BindlessImageGradient}; | 1870 | : Opcode::BindlessImageGradient}; |
| 1871 | return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp); | 1871 | return Inst(op, Flags{info}, handle, coords, derivatives, offset, lod_clamp); |
| 1872 | } | 1872 | } |
| 1873 | 1873 | ||
| 1874 | Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { | 1874 | Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) { |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index f3c81dbe1..6c30897f4 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -335,7 +335,7 @@ public: | |||
| 335 | [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, | 335 | [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset, |
| 336 | const U32& lod, const U32& multisampling, TextureInstInfo info); | 336 | const U32& lod, const U32& multisampling, TextureInstInfo info); |
| 337 | [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, | 337 | [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords, |
| 338 | const Value& derivates, const Value& offset, | 338 | const Value& derivatives, const Value& offset, |
| 339 | const F32& lod_clamp, TextureInstInfo info); | 339 | const F32& lod_clamp, TextureInstInfo info); |
| 340 | [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); | 340 | [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info); |
| 341 | void ImageWrite(const Value& handle, const Value& coords, const Value& color, | 341 | void ImageWrite(const Value& handle, const Value& coords, const Value& color, |
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h index 1e9e8c8f5..c20c2401f 100644 --- a/src/shader_recompiler/frontend/ir/modifiers.h +++ b/src/shader_recompiler/frontend/ir/modifiers.h | |||
| @@ -40,7 +40,7 @@ union TextureInstInfo { | |||
| 40 | BitField<21, 1, u32> has_lod_clamp; | 40 | BitField<21, 1, u32> has_lod_clamp; |
| 41 | BitField<22, 1, u32> relaxed_precision; | 41 | BitField<22, 1, u32> relaxed_precision; |
| 42 | BitField<23, 2, u32> gather_component; | 42 | BitField<23, 2, u32> gather_component; |
| 43 | BitField<25, 2, u32> num_derivates; | 43 | BitField<25, 2, u32> num_derivatives; |
| 44 | BitField<27, 3, ImageFormat> image_format; | 44 | BitField<27, 3, ImageFormat> image_format; |
| 45 | BitField<30, 1, u32> ndv_is_active; | 45 | BitField<30, 1, u32> ndv_is_active; |
| 46 | }; | 46 | }; |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp index dd34507bc..4ce3dd0cd 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp | |||
| @@ -59,7 +59,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 59 | BitField<51, 3, IR::Pred> sparse_pred; | 59 | BitField<51, 3, IR::Pred> sparse_pred; |
| 60 | BitField<0, 8, IR::Reg> dest_reg; | 60 | BitField<0, 8, IR::Reg> dest_reg; |
| 61 | BitField<8, 8, IR::Reg> coord_reg; | 61 | BitField<8, 8, IR::Reg> coord_reg; |
| 62 | BitField<20, 8, IR::Reg> derivate_reg; | 62 | BitField<20, 8, IR::Reg> derivative_reg; |
| 63 | BitField<28, 3, TextureType> type; | 63 | BitField<28, 3, TextureType> type; |
| 64 | BitField<31, 4, u64> mask; | 64 | BitField<31, 4, u64> mask; |
| 65 | BitField<36, 13, u64> cbuf_offset; | 65 | BitField<36, 13, u64> cbuf_offset; |
| @@ -71,7 +71,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | IR::Value coords; | 73 | IR::Value coords; |
| 74 | u32 num_derivates{}; | 74 | u32 num_derivatives{}; |
| 75 | IR::Reg base_reg{txd.coord_reg}; | 75 | IR::Reg base_reg{txd.coord_reg}; |
| 76 | IR::Reg last_reg; | 76 | IR::Reg last_reg; |
| 77 | IR::Value handle; | 77 | IR::Value handle; |
| @@ -90,42 +90,42 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 90 | switch (txd.type) { | 90 | switch (txd.type) { |
| 91 | case TextureType::_1D: { | 91 | case TextureType::_1D: { |
| 92 | coords = v.F(base_reg); | 92 | coords = v.F(base_reg); |
| 93 | num_derivates = 1; | 93 | num_derivatives = 1; |
| 94 | last_reg = base_reg + 1; | 94 | last_reg = base_reg + 1; |
| 95 | break; | 95 | break; |
| 96 | } | 96 | } |
| 97 | case TextureType::ARRAY_1D: { | 97 | case TextureType::ARRAY_1D: { |
| 98 | last_reg = base_reg + 1; | 98 | last_reg = base_reg + 1; |
| 99 | coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); | 99 | coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); |
| 100 | num_derivates = 1; | 100 | num_derivatives = 1; |
| 101 | break; | 101 | break; |
| 102 | } | 102 | } |
| 103 | case TextureType::_2D: { | 103 | case TextureType::_2D: { |
| 104 | last_reg = base_reg + 2; | 104 | last_reg = base_reg + 2; |
| 105 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); | 105 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); |
| 106 | num_derivates = 2; | 106 | num_derivatives = 2; |
| 107 | break; | 107 | break; |
| 108 | } | 108 | } |
| 109 | case TextureType::ARRAY_2D: { | 109 | case TextureType::ARRAY_2D: { |
| 110 | last_reg = base_reg + 2; | 110 | last_reg = base_reg + 2; |
| 111 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); | 111 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); |
| 112 | num_derivates = 2; | 112 | num_derivatives = 2; |
| 113 | break; | 113 | break; |
| 114 | } | 114 | } |
| 115 | default: | 115 | default: |
| 116 | throw NotImplementedException("Invalid texture type"); | 116 | throw NotImplementedException("Invalid texture type"); |
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | const IR::Reg derivate_reg{txd.derivate_reg}; | 119 | const IR::Reg derivative_reg{txd.derivative_reg}; |
| 120 | IR::Value derivates; | 120 | IR::Value derivatives; |
| 121 | switch (num_derivates) { | 121 | switch (num_derivatives) { |
| 122 | case 1: { | 122 | case 1: { |
| 123 | derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); | 123 | derivatives = v.ir.CompositeConstruct(v.F(derivative_reg), v.F(derivative_reg + 1)); |
| 124 | break; | 124 | break; |
| 125 | } | 125 | } |
| 126 | case 2: { | 126 | case 2: { |
| 127 | derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), | 127 | derivatives = v.ir.CompositeConstruct(v.F(derivative_reg), v.F(derivative_reg + 1), |
| 128 | v.F(derivate_reg + 2), v.F(derivate_reg + 3)); | 128 | v.F(derivative_reg + 2), v.F(derivative_reg + 3)); |
| 129 | break; | 129 | break; |
| 130 | } | 130 | } |
| 131 | default: | 131 | default: |
| @@ -150,9 +150,10 @@ void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | |||
| 150 | 150 | ||
| 151 | IR::TextureInstInfo info{}; | 151 | IR::TextureInstInfo info{}; |
| 152 | info.type.Assign(GetType(txd.type)); | 152 | info.type.Assign(GetType(txd.type)); |
| 153 | info.num_derivates.Assign(num_derivates); | 153 | info.num_derivatives.Assign(num_derivatives); |
| 154 | info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); | 154 | info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); |
| 155 | const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; | 155 | const IR::Value sample{ |
| 156 | v.ir.ImageGradient(handle, coords, derivatives, offset, lod_clamp, info)}; | ||
| 156 | 157 | ||
| 157 | IR::Reg dest_reg{txd.dest_reg}; | 158 | IR::Reg dest_reg{txd.dest_reg}; |
| 158 | for (size_t element = 0; element < 4; ++element) { | 159 | for (size_t element = 0; element < 4; ++element) { |
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 928b35561..321ea625b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -298,7 +298,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 298 | 298 | ||
| 299 | Optimization::PositionPass(env, program); | 299 | Optimization::PositionPass(env, program); |
| 300 | 300 | ||
| 301 | Optimization::GlobalMemoryToStorageBufferPass(program); | 301 | Optimization::GlobalMemoryToStorageBufferPass(program, host_info); |
| 302 | Optimization::TexturePass(env, program, host_info); | 302 | Optimization::TexturePass(env, program, host_info); |
| 303 | 303 | ||
| 304 | if (Settings::values.resolution_info.active) { | 304 | if (Settings::values.resolution_info.active) { |
| @@ -310,6 +310,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 310 | } | 310 | } |
| 311 | Optimization::CollectShaderInfoPass(env, program); | 311 | Optimization::CollectShaderInfoPass(env, program); |
| 312 | Optimization::LayerPass(program, host_info); | 312 | Optimization::LayerPass(program, host_info); |
| 313 | Optimization::VendorWorkaroundPass(program); | ||
| 313 | 314 | ||
| 314 | CollectInterpolationInfo(env, program); | 315 | CollectInterpolationInfo(env, program); |
| 315 | AddNVNStorageBuffers(program); | 316 | AddNVNStorageBuffers(program); |
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 7d2ded907..1b53404fc 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h | |||
| @@ -16,6 +16,7 @@ struct HostTranslateInfo { | |||
| 16 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered | 16 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered |
| 17 | bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers | 17 | bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers |
| 18 | bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS | 18 | bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS |
| 19 | u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs | ||
| 19 | bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry | 20 | bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry |
| 20 | ///< passthrough shaders | 21 | ///< passthrough shaders |
| 21 | bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional | 22 | bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional |
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index f46e55122..ec12c843a 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -428,7 +428,7 @@ void FoldFPAdd32(IR::Inst& inst) { | |||
| 428 | } | 428 | } |
| 429 | } | 429 | } |
| 430 | 430 | ||
| 431 | bool FoldDerivateYFromCorrection(IR::Inst& inst) { | 431 | bool FoldDerivativeYFromCorrection(IR::Inst& inst) { |
| 432 | const IR::Value lhs_value{inst.Arg(0)}; | 432 | const IR::Value lhs_value{inst.Arg(0)}; |
| 433 | const IR::Value rhs_value{inst.Arg(1)}; | 433 | const IR::Value rhs_value{inst.Arg(1)}; |
| 434 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | 434 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; |
| @@ -464,7 +464,7 @@ void FoldFPMul32(IR::Inst& inst) { | |||
| 464 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { | 464 | if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) { |
| 465 | return; | 465 | return; |
| 466 | } | 466 | } |
| 467 | if (FoldDerivateYFromCorrection(inst)) { | 467 | if (FoldDerivativeYFromCorrection(inst)) { |
| 468 | return; | 468 | return; |
| 469 | } | 469 | } |
| 470 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; | 470 | IR::Inst* const lhs_op{lhs_value.InstRecursive()}; |
| @@ -699,7 +699,7 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | |||
| 699 | } | 699 | } |
| 700 | } | 700 | } |
| 701 | 701 | ||
| 702 | bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) { | 702 | bool FindGradient3DDerivatives(std::array<IR::Value, 3>& results, IR::Value coord) { |
| 703 | if (coord.IsImmediate()) { | 703 | if (coord.IsImmediate()) { |
| 704 | return false; | 704 | return false; |
| 705 | } | 705 | } |
| @@ -834,7 +834,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { | |||
| 834 | IR::Inst* const inst2 = coords.InstRecursive(); | 834 | IR::Inst* const inst2 = coords.InstRecursive(); |
| 835 | std::array<std::array<IR::Value, 3>, 3> results_matrix; | 835 | std::array<std::array<IR::Value, 3>, 3> results_matrix; |
| 836 | for (size_t i = 0; i < 3; i++) { | 836 | for (size_t i = 0; i < 3; i++) { |
| 837 | if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) { | 837 | if (!FindGradient3DDerivatives(results_matrix[i], inst2->Arg(i).Resolve())) { |
| 838 | return; | 838 | return; |
| 839 | } | 839 | } |
| 840 | } | 840 | } |
| @@ -852,7 +852,7 @@ void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) { | |||
| 852 | IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], | 852 | IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2], |
| 853 | results_matrix[1][1], results_matrix[1][2]); | 853 | results_matrix[1][1], results_matrix[1][2]); |
| 854 | IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); | 854 | IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]); |
| 855 | info.num_derivates.Assign(3); | 855 | info.num_derivatives.Assign(3); |
| 856 | IR::Value new_gradient_instruction = | 856 | IR::Value new_gradient_instruction = |
| 857 | ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); | 857 | ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info); |
| 858 | IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); | 858 | IR::Inst* const new_inst = new_gradient_instruction.InstRecursive(); |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index d1e59f22e..0cea79945 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | 11 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" |
| 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| 13 | #include "shader_recompiler/frontend/ir/value.h" | 13 | #include "shader_recompiler/frontend/ir/value.h" |
| 14 | #include "shader_recompiler/host_translate_info.h" | ||
| 14 | #include "shader_recompiler/ir_opt/passes.h" | 15 | #include "shader_recompiler/ir_opt/passes.h" |
| 15 | 16 | ||
| 16 | namespace Shader::Optimization { | 17 | namespace Shader::Optimization { |
| @@ -408,7 +409,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) | |||
| 408 | } | 409 | } |
| 409 | 410 | ||
| 410 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction | 411 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction |
| 411 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { | 412 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) { |
| 412 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | 413 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; |
| 413 | IR::U32 offset; | 414 | IR::U32 offset; |
| 414 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { | 415 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { |
| @@ -421,7 +422,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer | |||
| 421 | } | 422 | } |
| 422 | // Subtract the least significant 32 bits from the guest offset. The result is the storage | 423 | // Subtract the least significant 32 bits from the guest offset. The result is the storage |
| 423 | // buffer offset in bytes. | 424 | // buffer offset in bytes. |
| 424 | const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; | 425 | IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; |
| 426 | |||
| 427 | // Align the offset base to match the host alignment requirements | ||
| 428 | low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); | ||
| 425 | return ir.ISub(offset, low_cbuf); | 429 | return ir.ISub(offset, low_cbuf); |
| 426 | } | 430 | } |
| 427 | 431 | ||
| @@ -516,7 +520,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | |||
| 516 | } | 520 | } |
| 517 | } // Anonymous namespace | 521 | } // Anonymous namespace |
| 518 | 522 | ||
| 519 | void GlobalMemoryToStorageBufferPass(IR::Program& program) { | 523 | void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { |
| 520 | StorageInfo info; | 524 | StorageInfo info; |
| 521 | for (IR::Block* const block : program.post_order_blocks) { | 525 | for (IR::Block* const block : program.post_order_blocks) { |
| 522 | for (IR::Inst& inst : block->Instructions()) { | 526 | for (IR::Inst& inst : block->Instructions()) { |
| @@ -540,7 +544,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | |||
| 540 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; | 544 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; |
| 541 | IR::Block* const block{storage_inst.block}; | 545 | IR::Block* const block{storage_inst.block}; |
| 542 | IR::Inst* const inst{storage_inst.inst}; | 546 | IR::Inst* const inst{storage_inst.inst}; |
| 543 | const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; | 547 | const IR::U32 offset{ |
| 548 | StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; | ||
| 544 | Replace(*block, *inst, index, offset); | 549 | Replace(*block, *inst, index, offset); |
| 545 | } | 550 | } |
| 546 | } | 551 | } |
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 629d18fa1..1e637cb23 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -16,7 +16,7 @@ void CollectShaderInfoPass(Environment& env, IR::Program& program); | |||
| 16 | void ConditionalBarrierPass(IR::Program& program); | 16 | void ConditionalBarrierPass(IR::Program& program); |
| 17 | void ConstantPropagationPass(Environment& env, IR::Program& program); | 17 | void ConstantPropagationPass(Environment& env, IR::Program& program); |
| 18 | void DeadCodeEliminationPass(IR::Program& program); | 18 | void DeadCodeEliminationPass(IR::Program& program); |
| 19 | void GlobalMemoryToStorageBufferPass(IR::Program& program); | 19 | void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); |
| 20 | void IdentityRemovalPass(IR::Program& program); | 20 | void IdentityRemovalPass(IR::Program& program); |
| 21 | void LowerFp64ToFp32(IR::Program& program); | 21 | void LowerFp64ToFp32(IR::Program& program); |
| 22 | void LowerFp16ToFp32(IR::Program& program); | 22 | void LowerFp16ToFp32(IR::Program& program); |
| @@ -26,6 +26,7 @@ void SsaRewritePass(IR::Program& program); | |||
| 26 | void PositionPass(Environment& env, IR::Program& program); | 26 | void PositionPass(Environment& env, IR::Program& program); |
| 27 | void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); | 27 | void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info); |
| 28 | void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); | 28 | void LayerPass(IR::Program& program, const HostTranslateInfo& host_info); |
| 29 | void VendorWorkaroundPass(IR::Program& program); | ||
| 29 | void VerificationPass(const IR::Program& program); | 30 | void VerificationPass(const IR::Program& program); |
| 30 | 31 | ||
| 31 | // Dual Vertex | 32 | // Dual Vertex |
diff --git a/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp new file mode 100644 index 000000000..08c658cb8 --- /dev/null +++ b/src/shader_recompiler/ir_opt/vendor_workaround_pass.cpp | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 5 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 7 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 8 | |||
| 9 | namespace Shader::Optimization { | ||
| 10 | |||
| 11 | namespace { | ||
| 12 | void AddingByteSwapsWorkaround(IR::Block& block, IR::Inst& inst) { | ||
| 13 | /* | ||
| 14 | * Workaround for an NVIDIA bug seen in Super Mario RPG | ||
| 15 | * | ||
| 16 | * We are looking for this pattern: | ||
| 17 | * %lhs_bfe = BitFieldUExtract %factor_a, #0, #16 | ||
| 18 | * %lhs_mul = IMul32 %lhs_bfe, %factor_b // potentially optional? | ||
| 19 | * %lhs_shl = ShiftLeftLogical32 %lhs_mul, #16 | ||
| 20 | * %rhs_bfe = BitFieldUExtract %factor_a, #16, #16 | ||
| 21 | * %result = IAdd32 %lhs_shl, %rhs_bfe | ||
| 22 | * | ||
| 23 | * And replacing the IAdd32 with a BitwiseOr32 | ||
| 24 | * %result = BitwiseOr32 %lhs_shl, %rhs_bfe | ||
| 25 | * | ||
| 26 | */ | ||
| 27 | IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()}; | ||
| 28 | IR::Inst* const rhs_bfe{inst.Arg(1).TryInstRecursive()}; | ||
| 29 | if (!lhs_shl || !rhs_bfe) { | ||
| 30 | return; | ||
| 31 | } | ||
| 32 | if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || | ||
| 33 | lhs_shl->Arg(1) != IR::Value{16U}) { | ||
| 34 | return; | ||
| 35 | } | ||
| 36 | if (rhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract || rhs_bfe->Arg(1) != IR::Value{16U} || | ||
| 37 | rhs_bfe->Arg(2) != IR::Value{16U}) { | ||
| 38 | return; | ||
| 39 | } | ||
| 40 | IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()}; | ||
| 41 | if (!lhs_mul) { | ||
| 42 | return; | ||
| 43 | } | ||
| 44 | const bool lhs_mul_optional{lhs_mul->GetOpcode() == IR::Opcode::BitFieldUExtract}; | ||
| 45 | if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 && | ||
| 46 | lhs_mul->GetOpcode() != IR::Opcode::BitFieldUExtract) { | ||
| 47 | return; | ||
| 48 | } | ||
| 49 | IR::Inst* const lhs_bfe{lhs_mul_optional ? lhs_mul : lhs_mul->Arg(0).TryInstRecursive()}; | ||
| 50 | if (!lhs_bfe) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { | ||
| 54 | return; | ||
| 55 | } | ||
| 56 | if (lhs_bfe->Arg(1) != IR::Value{0U} || lhs_bfe->Arg(2) != IR::Value{16U}) { | ||
| 57 | return; | ||
| 58 | } | ||
| 59 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 60 | inst.ReplaceUsesWith(ir.BitwiseOr(IR::U32{inst.Arg(0)}, IR::U32{inst.Arg(1)})); | ||
| 61 | } | ||
| 62 | |||
| 63 | } // Anonymous namespace | ||
| 64 | |||
| 65 | void VendorWorkaroundPass(IR::Program& program) { | ||
| 66 | for (IR::Block* const block : program.post_order_blocks) { | ||
| 67 | for (IR::Inst& inst : block->Instructions()) { | ||
| 68 | switch (inst.GetOpcode()) { | ||
| 69 | case IR::Opcode::IAdd32: | ||
| 70 | AddingByteSwapsWorkaround(*block, inst); | ||
| 71 | break; | ||
| 72 | default: | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 38d820db2..66901a965 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -43,6 +43,7 @@ struct Profile { | |||
| 43 | bool support_gl_sparse_textures{}; | 43 | bool support_gl_sparse_textures{}; |
| 44 | bool support_gl_derivative_control{}; | 44 | bool support_gl_derivative_control{}; |
| 45 | bool support_scaled_attributes{}; | 45 | bool support_scaled_attributes{}; |
| 46 | bool support_multi_viewport{}; | ||
| 46 | 47 | ||
| 47 | bool warp_size_potentially_larger_than_guest{}; | 48 | bool warp_size_potentially_larger_than_guest{}; |
| 48 | 49 | ||
| @@ -84,6 +85,8 @@ struct Profile { | |||
| 84 | 85 | ||
| 85 | /// Maxwell and earlier nVidia architectures have broken robust support | 86 | /// Maxwell and earlier nVidia architectures have broken robust support |
| 86 | bool has_broken_robust{}; | 87 | bool has_broken_robust{}; |
| 88 | |||
| 89 | u64 min_ssbo_alignment{}; | ||
| 87 | }; | 90 | }; |
| 88 | 91 | ||
| 89 | } // namespace Shader | 92 | } // namespace Shader |