diff options
Diffstat (limited to 'src/shader_recompiler')
8 files changed, 27 insertions, 11 deletions
diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp index 2705ab140..9319ea007 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_memory.cpp | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include "shader_recompiler/backend/glasm/glasm_emit_context.h" | 5 | #include "shader_recompiler/backend/glasm/glasm_emit_context.h" |
| 6 | #include "shader_recompiler/frontend/ir/program.h" | 6 | #include "shader_recompiler/frontend/ir/program.h" |
| 7 | #include "shader_recompiler/frontend/ir/value.h" | 7 | #include "shader_recompiler/frontend/ir/value.h" |
| 8 | #include "shader_recompiler/profile.h" | ||
| 8 | #include "shader_recompiler/runtime_info.h" | 9 | #include "shader_recompiler/runtime_info.h" |
| 9 | 10 | ||
| 10 | namespace Shader::Backend::GLASM { | 11 | namespace Shader::Backend::GLASM { |
| @@ -35,7 +36,9 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std | |||
| 35 | continue; | 36 | continue; |
| 36 | } | 37 | } |
| 37 | const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; | 38 | const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; |
| 38 | ctx.Add("LDC.U64 DC.x,c{}[{}];" // ssbo_addr | 39 | const u64 ssbo_align_mask{~(ctx.profile.min_ssbo_alignment - 1U)}; |
| 40 | ctx.Add("LDC.U64 DC.x,c{}[{}];" // unaligned_ssbo_addr | ||
| 41 | "AND.U64 DC.x,DC.x,{};" // ssbo_addr = unaligned_ssbo_addr & ssbo_align_mask | ||
| 39 | "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 | 42 | "LDC.U32 RC.x,c{}[{}];" // ssbo_size_u32 |
| 40 | "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 | 43 | "CVT.U64.U32 DC.y,RC.x;" // ssbo_size = ssbo_size_u32 |
| 41 | "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size | 44 | "ADD.U64 DC.y,DC.y,DC.x;" // ssbo_end = ssbo_addr + ssbo_size |
| @@ -44,8 +47,8 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std | |||
| 44 | "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b | 47 | "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b |
| 45 | "IF NE.x;" // if cond | 48 | "IF NE.x;" // if cond |
| 46 | "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr | 49 | "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr |
| 47 | ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, | 50 | ssbo.cbuf_index, ssbo.cbuf_offset, ssbo_align_mask, ssbo.cbuf_index, |
| 48 | address, address); | 51 | ssbo.cbuf_offset + 8, address, address, address); |
| 49 | if (pointer_based) { | 52 | if (pointer_based) { |
| 50 | ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf | 53 | ctx.Add("PK64.U DC.y,c[{}];" // host_ssbo = cbuf |
| 51 | "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset | 54 | "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset |
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp index 9ff4028c2..fd9a99449 100644 --- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp | |||
| @@ -601,7 +601,10 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { | |||
| 601 | addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); | 601 | addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); |
| 602 | size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); | 602 | size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); |
| 603 | } | 603 | } |
| 604 | const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])}; | 604 | const u32 ssbo_align_mask{~(static_cast<u32>(profile.min_ssbo_alignment) - 1U)}; |
| 605 | const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)}; | ||
| 606 | const auto aligned_addr{fmt::format("uvec2({},{})", aligned_low_addr, addr_xy[1])}; | ||
| 607 | const auto addr_pack{fmt::format("packUint2x32({})", aligned_addr)}; | ||
| 605 | const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; | 608 | const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; |
| 606 | func += addr_statment; | 609 | func += addr_statment; |
| 607 | 610 | ||
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 57df6fc34..3350f1f85 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp | |||
| @@ -891,7 +891,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { | |||
| 891 | const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, | 891 | const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, |
| 892 | zero, ssbo_size_cbuf_offset)}; | 892 | zero, ssbo_size_cbuf_offset)}; |
| 893 | 893 | ||
| 894 | const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; | 894 | const u64 ssbo_align_mask{~(profile.min_ssbo_alignment - 1U)}; |
| 895 | const Id unaligned_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; | ||
| 896 | const Id ssbo_addr{OpBitwiseAnd(U64, unaligned_addr, Constant(U64, ssbo_align_mask))}; | ||
| 895 | const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; | 897 | const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; |
| 896 | const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; | 898 | const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; |
| 897 | const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), | 899 | const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), |
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp index 8fac6bad3..321ea625b 100644 --- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -298,7 +298,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 298 | 298 | ||
| 299 | Optimization::PositionPass(env, program); | 299 | Optimization::PositionPass(env, program); |
| 300 | 300 | ||
| 301 | Optimization::GlobalMemoryToStorageBufferPass(program); | 301 | Optimization::GlobalMemoryToStorageBufferPass(program, host_info); |
| 302 | Optimization::TexturePass(env, program, host_info); | 302 | Optimization::TexturePass(env, program, host_info); |
| 303 | 303 | ||
| 304 | if (Settings::values.resolution_info.active) { | 304 | if (Settings::values.resolution_info.active) { |
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h index 7d2ded907..1b53404fc 100644 --- a/src/shader_recompiler/host_translate_info.h +++ b/src/shader_recompiler/host_translate_info.h | |||
| @@ -16,6 +16,7 @@ struct HostTranslateInfo { | |||
| 16 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered | 16 | bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered |
| 17 | bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers | 17 | bool support_snorm_render_buffer{}; ///< True when the device supports SNORM render buffers |
| 18 | bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS | 18 | bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS |
| 19 | u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs | ||
| 19 | bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry | 20 | bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry |
| 20 | ///< passthrough shaders | 21 | ///< passthrough shaders |
| 21 | bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional | 22 | bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index d1e59f22e..0cea79945 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | 11 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" |
| 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | 12 | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| 13 | #include "shader_recompiler/frontend/ir/value.h" | 13 | #include "shader_recompiler/frontend/ir/value.h" |
| 14 | #include "shader_recompiler/host_translate_info.h" | ||
| 14 | #include "shader_recompiler/ir_opt/passes.h" | 15 | #include "shader_recompiler/ir_opt/passes.h" |
| 15 | 16 | ||
| 16 | namespace Shader::Optimization { | 17 | namespace Shader::Optimization { |
| @@ -408,7 +409,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) | |||
| 408 | } | 409 | } |
| 409 | 410 | ||
| 410 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction | 411 | /// Returns the offset in indices (not bytes) for an equivalent storage instruction |
| 411 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { | 412 | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) { |
| 412 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | 413 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; |
| 413 | IR::U32 offset; | 414 | IR::U32 offset; |
| 414 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { | 415 | if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { |
| @@ -421,7 +422,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer | |||
| 421 | } | 422 | } |
| 422 | // Subtract the least significant 32 bits from the guest offset. The result is the storage | 423 | // Subtract the least significant 32 bits from the guest offset. The result is the storage |
| 423 | // buffer offset in bytes. | 424 | // buffer offset in bytes. |
| 424 | const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; | 425 | IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; |
| 426 | |||
| 427 | // Align the offset base to match the host alignment requirements | ||
| 428 | low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); | ||
| 425 | return ir.ISub(offset, low_cbuf); | 429 | return ir.ISub(offset, low_cbuf); |
| 426 | } | 430 | } |
| 427 | 431 | ||
| @@ -516,7 +520,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | |||
| 516 | } | 520 | } |
| 517 | } // Anonymous namespace | 521 | } // Anonymous namespace |
| 518 | 522 | ||
| 519 | void GlobalMemoryToStorageBufferPass(IR::Program& program) { | 523 | void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { |
| 520 | StorageInfo info; | 524 | StorageInfo info; |
| 521 | for (IR::Block* const block : program.post_order_blocks) { | 525 | for (IR::Block* const block : program.post_order_blocks) { |
| 522 | for (IR::Inst& inst : block->Instructions()) { | 526 | for (IR::Inst& inst : block->Instructions()) { |
| @@ -540,7 +544,8 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | |||
| 540 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; | 544 | const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; |
| 541 | IR::Block* const block{storage_inst.block}; | 545 | IR::Block* const block{storage_inst.block}; |
| 542 | IR::Inst* const inst{storage_inst.inst}; | 546 | IR::Inst* const inst{storage_inst.inst}; |
| 543 | const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; | 547 | const IR::U32 offset{ |
| 548 | StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; | ||
| 544 | Replace(*block, *inst, index, offset); | 549 | Replace(*block, *inst, index, offset); |
| 545 | } | 550 | } |
| 546 | } | 551 | } |
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index d4d5285e5..1e637cb23 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -16,7 +16,7 @@ void CollectShaderInfoPass(Environment& env, IR::Program& program); | |||
| 16 | void ConditionalBarrierPass(IR::Program& program); | 16 | void ConditionalBarrierPass(IR::Program& program); |
| 17 | void ConstantPropagationPass(Environment& env, IR::Program& program); | 17 | void ConstantPropagationPass(Environment& env, IR::Program& program); |
| 18 | void DeadCodeEliminationPass(IR::Program& program); | 18 | void DeadCodeEliminationPass(IR::Program& program); |
| 19 | void GlobalMemoryToStorageBufferPass(IR::Program& program); | 19 | void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); |
| 20 | void IdentityRemovalPass(IR::Program& program); | 20 | void IdentityRemovalPass(IR::Program& program); |
| 21 | void LowerFp64ToFp32(IR::Program& program); | 21 | void LowerFp64ToFp32(IR::Program& program); |
| 22 | void LowerFp16ToFp32(IR::Program& program); | 22 | void LowerFp16ToFp32(IR::Program& program); |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index a9de9f4a9..66901a965 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -85,6 +85,8 @@ struct Profile { | |||
| 85 | 85 | ||
| 86 | /// Maxwell and earlier nVidia architectures have broken robust support | 86 | /// Maxwell and earlier nVidia architectures have broken robust support |
| 87 | bool has_broken_robust{}; | 87 | bool has_broken_robust{}; |
| 88 | |||
| 89 | u64 min_ssbo_alignment{}; | ||
| 88 | }; | 90 | }; |
| 89 | 91 | ||
| 90 | } // namespace Shader | 92 | } // namespace Shader |