diff options
| author | 2021-04-19 16:33:23 -0300 | |
|---|---|---|
| committer | 2021-07-22 21:51:28 -0400 | |
| commit | 7018e524f5e6217b3259333acc4ea09ad036d331 (patch) | |
| tree | 58e750b08d48e018accc4de9a05cb483d825904c | |
| parent | spirv: Fix ViewportMask (diff) | |
| download | yuzu-7018e524f5e6217b3259333acc4ea09ad036d331.tar.gz yuzu-7018e524f5e6217b3259333acc4ea09ad036d331.tar.xz yuzu-7018e524f5e6217b3259333acc4ea09ad036d331.zip | |
shader: Add NVN storage buffer fallbacks
When we can't track the SSBO origin of a global memory instruction,
leave it as a global memory operation and assume these pointers are in
the NVN storage buffer slots, then apply a linear search in the shader's
runtime.
Diffstat (limited to '')
9 files changed, 214 insertions, 62 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 2ffa8c453..7f16cb0dc 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -411,6 +411,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin | |||
| 411 | DefineTextures(program.info, binding); | 411 | DefineTextures(program.info, binding); |
| 412 | DefineImages(program.info, binding); | 412 | DefineImages(program.info, binding); |
| 413 | DefineAttributeMemAccess(program.info); | 413 | DefineAttributeMemAccess(program.info); |
| 414 | DefineGlobalMemoryFunctions(program.info); | ||
| 414 | DefineLabels(program); | 415 | DefineLabels(program); |
| 415 | } | 416 | } |
| 416 | 417 | ||
| @@ -762,6 +763,82 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { | |||
| 762 | } | 763 | } |
| 763 | } | 764 | } |
| 764 | 765 | ||
| 766 | void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { | ||
| 767 | if (!info.uses_global_memory) { | ||
| 768 | return; | ||
| 769 | } | ||
| 770 | using DefPtr = Id StorageDefinitions::*; | ||
| 771 | const Id zero{u32_zero_value}; | ||
| 772 | const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift, | ||
| 773 | auto&& callback) { | ||
| 774 | AddLabel(); | ||
| 775 | const size_t num_buffers{info.storage_buffers_descriptors.size()}; | ||
| 776 | for (size_t index = 0; index < num_buffers; ++index) { | ||
| 777 | const auto& ssbo{info.storage_buffers_descriptors[index]}; | ||
| 778 | const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; | ||
| 779 | const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; | ||
| 780 | const Id ssbo_addr_pointer{OpAccessChain( | ||
| 781 | uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)}; | ||
| 782 | const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, | ||
| 783 | zero, ssbo_size_cbuf_offset)}; | ||
| 784 | |||
| 785 | const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; | ||
| 786 | const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; | ||
| 787 | const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; | ||
| 788 | const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), | ||
| 789 | OpULessThan(U1, addr, ssbo_end))}; | ||
| 790 | const Id then_label{OpLabel()}; | ||
| 791 | const Id else_label{OpLabel()}; | ||
| 792 | OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone); | ||
| 793 | OpBranchConditional(cond, then_label, else_label); | ||
| 794 | AddLabel(then_label); | ||
| 795 | const Id ssbo_id{ssbos[index].*ssbo_member}; | ||
| 796 | const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))}; | ||
| 797 | const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))}; | ||
| 798 | const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)}; | ||
| 799 | callback(ssbo_pointer); | ||
| 800 | AddLabel(else_label); | ||
| 801 | } | ||
| 802 | }}; | ||
| 803 | const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { | ||
| 804 | const Id function_type{TypeFunction(type, U64)}; | ||
| 805 | const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)}; | ||
| 806 | const Id addr{OpFunctionParameter(U64)}; | ||
| 807 | define_body(ssbo_member, addr, element_pointer, shift, | ||
| 808 | [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); }); | ||
| 809 | OpReturnValue(ConstantNull(type)); | ||
| 810 | OpFunctionEnd(); | ||
| 811 | return func_id; | ||
| 812 | }}; | ||
| 813 | const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { | ||
| 814 | const Id function_type{TypeFunction(void_id, U64, type)}; | ||
| 815 | const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)}; | ||
| 816 | const Id addr{OpFunctionParameter(U64)}; | ||
| 817 | const Id data{OpFunctionParameter(type)}; | ||
| 818 | define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) { | ||
| 819 | OpStore(ssbo_pointer, data); | ||
| 820 | OpReturn(); | ||
| 821 | }); | ||
| 822 | OpReturn(); | ||
| 823 | OpFunctionEnd(); | ||
| 824 | return func_id; | ||
| 825 | }}; | ||
| 826 | const auto define{ | ||
| 827 | [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) { | ||
| 828 | const Id element_type{type_def.element}; | ||
| 829 | const u32 shift{static_cast<u32>(std::countr_zero(size))}; | ||
| 830 | const Id load_func{define_load(ssbo_member, element_type, type, shift)}; | ||
| 831 | const Id write_func{define_write(ssbo_member, element_type, type, shift)}; | ||
| 832 | return std::make_pair(load_func, write_func); | ||
| 833 | }}; | ||
| 834 | std::tie(load_global_func_u32, write_global_func_u32) = | ||
| 835 | define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32)); | ||
| 836 | std::tie(load_global_func_u32x2, write_global_func_u32x2) = | ||
| 837 | define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2])); | ||
| 838 | std::tie(load_global_func_u32x4, write_global_func_u32x4) = | ||
| 839 | define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); | ||
| 840 | } | ||
| 841 | |||
| 765 | void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { | 842 | void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { |
| 766 | if (info.constant_buffer_descriptors.empty()) { | 843 | if (info.constant_buffer_descriptors.empty()) { |
| 767 | return; | 844 | return; |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index ef8507367..a4503c7ab 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h | |||
| @@ -224,6 +224,13 @@ public: | |||
| 224 | Id f32x2_min_cas{}; | 224 | Id f32x2_min_cas{}; |
| 225 | Id f32x2_max_cas{}; | 225 | Id f32x2_max_cas{}; |
| 226 | 226 | ||
| 227 | Id load_global_func_u32{}; | ||
| 228 | Id load_global_func_u32x2{}; | ||
| 229 | Id load_global_func_u32x4{}; | ||
| 230 | Id write_global_func_u32{}; | ||
| 231 | Id write_global_func_u32x2{}; | ||
| 232 | Id write_global_func_u32x4{}; | ||
| 233 | |||
| 227 | Id input_position{}; | 234 | Id input_position{}; |
| 228 | std::array<Id, 32> input_generics{}; | 235 | std::array<Id, 32> input_generics{}; |
| 229 | 236 | ||
| @@ -255,6 +262,7 @@ private: | |||
| 255 | void DefineTextures(const Info& info, u32& binding); | 262 | void DefineTextures(const Info& info, u32& binding); |
| 256 | void DefineImages(const Info& info, u32& binding); | 263 | void DefineImages(const Info& info, u32& binding); |
| 257 | void DefineAttributeMemAccess(const Info& info); | 264 | void DefineAttributeMemAccess(const Info& info); |
| 265 | void DefineGlobalMemoryFunctions(const Info& info); | ||
| 258 | void DefineLabels(IR::Program& program); | 266 | void DefineLabels(IR::Program& program); |
| 259 | 267 | ||
| 260 | void DefineInputs(const Info& info); | 268 | void DefineInputs(const Info& info); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 67d06faa0..89a82e858 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -84,16 +84,16 @@ void EmitLoadGlobalU8(EmitContext& ctx); | |||
| 84 | void EmitLoadGlobalS8(EmitContext& ctx); | 84 | void EmitLoadGlobalS8(EmitContext& ctx); |
| 85 | void EmitLoadGlobalU16(EmitContext& ctx); | 85 | void EmitLoadGlobalU16(EmitContext& ctx); |
| 86 | void EmitLoadGlobalS16(EmitContext& ctx); | 86 | void EmitLoadGlobalS16(EmitContext& ctx); |
| 87 | void EmitLoadGlobal32(EmitContext& ctx); | 87 | Id EmitLoadGlobal32(EmitContext& ctx, Id address); |
| 88 | void EmitLoadGlobal64(EmitContext& ctx); | 88 | Id EmitLoadGlobal64(EmitContext& ctx, Id address); |
| 89 | void EmitLoadGlobal128(EmitContext& ctx); | 89 | Id EmitLoadGlobal128(EmitContext& ctx, Id address); |
| 90 | void EmitWriteGlobalU8(EmitContext& ctx); | 90 | void EmitWriteGlobalU8(EmitContext& ctx); |
| 91 | void EmitWriteGlobalS8(EmitContext& ctx); | 91 | void EmitWriteGlobalS8(EmitContext& ctx); |
| 92 | void EmitWriteGlobalU16(EmitContext& ctx); | 92 | void EmitWriteGlobalU16(EmitContext& ctx); |
| 93 | void EmitWriteGlobalS16(EmitContext& ctx); | 93 | void EmitWriteGlobalS16(EmitContext& ctx); |
| 94 | void EmitWriteGlobal32(EmitContext& ctx); | 94 | void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); |
| 95 | void EmitWriteGlobal64(EmitContext& ctx); | 95 | void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); |
| 96 | void EmitWriteGlobal128(EmitContext& ctx); | 96 | void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); |
| 97 | Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | 97 | Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); |
| 98 | Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | 98 | Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); |
| 99 | Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | 99 | Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); |
| @@ -277,9 +277,9 @@ Id EmitFPIsNan16(EmitContext& ctx, Id value); | |||
| 277 | Id EmitFPIsNan32(EmitContext& ctx, Id value); | 277 | Id EmitFPIsNan32(EmitContext& ctx, Id value); |
| 278 | Id EmitFPIsNan64(EmitContext& ctx, Id value); | 278 | Id EmitFPIsNan64(EmitContext& ctx, Id value); |
| 279 | Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | 279 | Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); |
| 280 | void EmitIAdd64(EmitContext& ctx); | 280 | Id EmitIAdd64(EmitContext& ctx, Id a, Id b); |
| 281 | Id EmitISub32(EmitContext& ctx, Id a, Id b); | 281 | Id EmitISub32(EmitContext& ctx, Id a, Id b); |
| 282 | void EmitISub64(EmitContext& ctx); | 282 | Id EmitISub64(EmitContext& ctx, Id a, Id b); |
| 283 | Id EmitIMul32(EmitContext& ctx, Id a, Id b); | 283 | Id EmitIMul32(EmitContext& ctx, Id a, Id b); |
| 284 | Id EmitINeg32(EmitContext& ctx, Id value); | 284 | Id EmitINeg32(EmitContext& ctx, Id value); |
| 285 | Id EmitINeg64(EmitContext& ctx, Id value); | 285 | Id EmitINeg64(EmitContext& ctx, Id value); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index c12d0a513..cd5b1f42c 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp | |||
| @@ -55,16 +55,16 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | |||
| 55 | return result; | 55 | return result; |
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | void EmitIAdd64(EmitContext&) { | 58 | Id EmitIAdd64(EmitContext& ctx, Id a, Id b) { |
| 59 | throw NotImplementedException("SPIR-V Instruction"); | 59 | return ctx.OpIAdd(ctx.U64, a, b); |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | Id EmitISub32(EmitContext& ctx, Id a, Id b) { | 62 | Id EmitISub32(EmitContext& ctx, Id a, Id b) { |
| 63 | return ctx.OpISub(ctx.U32[1], a, b); | 63 | return ctx.OpISub(ctx.U32[1], a, b); |
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | void EmitISub64(EmitContext&) { | 66 | Id EmitISub64(EmitContext& ctx, Id a, Id b) { |
| 67 | throw NotImplementedException("SPIR-V Instruction"); | 67 | return ctx.OpISub(ctx.U64, a, b); |
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | Id EmitIMul32(EmitContext& ctx, Id a, Id b) { | 70 | Id EmitIMul32(EmitContext& ctx, Id a, Id b) { |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 7bf828995..8849258e3 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp | |||
| @@ -64,16 +64,16 @@ void EmitLoadGlobalS16(EmitContext&) { | |||
| 64 | throw NotImplementedException("SPIR-V Instruction"); | 64 | throw NotImplementedException("SPIR-V Instruction"); |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | void EmitLoadGlobal32(EmitContext&) { | 67 | Id EmitLoadGlobal32(EmitContext& ctx, Id address) { |
| 68 | throw NotImplementedException("SPIR-V Instruction"); | 68 | return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | void EmitLoadGlobal64(EmitContext&) { | 71 | Id EmitLoadGlobal64(EmitContext& ctx, Id address) { |
| 72 | throw NotImplementedException("SPIR-V Instruction"); | 72 | return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | void EmitLoadGlobal128(EmitContext&) { | 75 | Id EmitLoadGlobal128(EmitContext& ctx, Id address) { |
| 76 | throw NotImplementedException("SPIR-V Instruction"); | 76 | return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | void EmitWriteGlobalU8(EmitContext&) { | 79 | void EmitWriteGlobalU8(EmitContext&) { |
| @@ -92,16 +92,16 @@ void EmitWriteGlobalS16(EmitContext&) { | |||
| 92 | throw NotImplementedException("SPIR-V Instruction"); | 92 | throw NotImplementedException("SPIR-V Instruction"); |
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | void EmitWriteGlobal32(EmitContext&) { | 95 | void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { |
| 96 | throw NotImplementedException("SPIR-V Instruction"); | 96 | ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); |
| 97 | } | 97 | } |
| 98 | 98 | ||
| 99 | void EmitWriteGlobal64(EmitContext&) { | 99 | void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { |
| 100 | throw NotImplementedException("SPIR-V Instruction"); | 100 | ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | void EmitWriteGlobal128(EmitContext&) { | 103 | void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { |
| 104 | throw NotImplementedException("SPIR-V Instruction"); | 104 | ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | 107 | Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { |
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp index 20a1d61cc..14180dcd9 100644 --- a/src/shader_recompiler/frontend/maxwell/program.cpp +++ b/src/shader_recompiler/frontend/maxwell/program.cpp | |||
| @@ -60,6 +60,48 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) { | |||
| 60 | }(); | 60 | }(); |
| 61 | } | 61 | } |
| 62 | } | 62 | } |
| 63 | |||
| 64 | void AddNVNStorageBuffers(IR::Program& program) { | ||
| 65 | if (!program.info.uses_global_memory) { | ||
| 66 | return; | ||
| 67 | } | ||
| 68 | const u32 driver_cbuf{0}; | ||
| 69 | const u32 descriptor_size{0x10}; | ||
| 70 | const u32 num_buffers{16}; | ||
| 71 | const u32 base{[&] { | ||
| 72 | switch (program.stage) { | ||
| 73 | case Stage::VertexA: | ||
| 74 | case Stage::VertexB: | ||
| 75 | return 0x110u; | ||
| 76 | case Stage::TessellationControl: | ||
| 77 | return 0x210u; | ||
| 78 | case Stage::TessellationEval: | ||
| 79 | return 0x310u; | ||
| 80 | case Stage::Geometry: | ||
| 81 | return 0x410u; | ||
| 82 | case Stage::Fragment: | ||
| 83 | return 0x510u; | ||
| 84 | case Stage::Compute: | ||
| 85 | return 0x310u; | ||
| 86 | } | ||
| 87 | throw InvalidArgument("Invalid stage {}", program.stage); | ||
| 88 | }()}; | ||
| 89 | auto& descs{program.info.storage_buffers_descriptors}; | ||
| 90 | for (u32 index = 0; index < num_buffers; ++index) { | ||
| 91 | const u32 offset{base + index * descriptor_size}; | ||
| 92 | const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; | ||
| 93 | if (it != descs.end()) { | ||
| 94 | continue; | ||
| 95 | } | ||
| 96 | // Assume these are written for now | ||
| 97 | descs.push_back({ | ||
| 98 | .cbuf_index = driver_cbuf, | ||
| 99 | .cbuf_offset = offset, | ||
| 100 | .count = 1, | ||
| 101 | .is_written = true, | ||
| 102 | }); | ||
| 103 | } | ||
| 104 | } | ||
| 63 | } // Anonymous namespace | 105 | } // Anonymous namespace |
| 64 | 106 | ||
| 65 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | 107 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, |
| @@ -105,6 +147,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||
| 105 | Optimization::VerificationPass(program); | 147 | Optimization::VerificationPass(program); |
| 106 | Optimization::CollectShaderInfoPass(env, program); | 148 | Optimization::CollectShaderInfoPass(env, program); |
| 107 | CollectInterpolationInfo(env, program); | 149 | CollectInterpolationInfo(env, program); |
| 150 | AddNVNStorageBuffers(program); | ||
| 108 | return program; | 151 | return program; |
| 109 | } | 152 | } |
| 110 | 153 | ||
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 0500a5141..cccf0909d 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -187,6 +187,8 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 187 | case IR::Opcode::FPUnordGreaterThanEqual16: | 187 | case IR::Opcode::FPUnordGreaterThanEqual16: |
| 188 | case IR::Opcode::FPIsNan16: | 188 | case IR::Opcode::FPIsNan16: |
| 189 | case IR::Opcode::GlobalAtomicAddF16x2: | 189 | case IR::Opcode::GlobalAtomicAddF16x2: |
| 190 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 191 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 190 | case IR::Opcode::StorageAtomicAddF16x2: | 192 | case IR::Opcode::StorageAtomicAddF16x2: |
| 191 | case IR::Opcode::StorageAtomicMinF16x2: | 193 | case IR::Opcode::StorageAtomicMinF16x2: |
| 192 | case IR::Opcode::StorageAtomicMaxF16x2: | 194 | case IR::Opcode::StorageAtomicMaxF16x2: |
| @@ -373,7 +375,58 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 373 | case IR::Opcode::StorageAtomicAnd64: | 375 | case IR::Opcode::StorageAtomicAnd64: |
| 374 | case IR::Opcode::StorageAtomicOr64: | 376 | case IR::Opcode::StorageAtomicOr64: |
| 375 | case IR::Opcode::StorageAtomicXor64: | 377 | case IR::Opcode::StorageAtomicXor64: |
| 378 | case IR::Opcode::StorageAtomicExchange64: | ||
| 379 | info.uses_int64 = true; | ||
| 380 | break; | ||
| 381 | default: | ||
| 382 | break; | ||
| 383 | } | ||
| 384 | switch (inst.GetOpcode()) { | ||
| 385 | case IR::Opcode::LoadGlobalU8: | ||
| 386 | case IR::Opcode::LoadGlobalS8: | ||
| 387 | case IR::Opcode::LoadGlobalU16: | ||
| 388 | case IR::Opcode::LoadGlobalS16: | ||
| 389 | case IR::Opcode::LoadGlobal32: | ||
| 390 | case IR::Opcode::LoadGlobal64: | ||
| 391 | case IR::Opcode::LoadGlobal128: | ||
| 392 | case IR::Opcode::WriteGlobalU8: | ||
| 393 | case IR::Opcode::WriteGlobalS8: | ||
| 394 | case IR::Opcode::WriteGlobalU16: | ||
| 395 | case IR::Opcode::WriteGlobalS16: | ||
| 396 | case IR::Opcode::WriteGlobal32: | ||
| 397 | case IR::Opcode::WriteGlobal64: | ||
| 398 | case IR::Opcode::WriteGlobal128: | ||
| 399 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 400 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 401 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 402 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 403 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 404 | case IR::Opcode::GlobalAtomicInc32: | ||
| 405 | case IR::Opcode::GlobalAtomicDec32: | ||
| 406 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 407 | case IR::Opcode::GlobalAtomicOr32: | ||
| 408 | case IR::Opcode::GlobalAtomicXor32: | ||
| 409 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 410 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 411 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 412 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 413 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 414 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 415 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 416 | case IR::Opcode::GlobalAtomicOr64: | ||
| 417 | case IR::Opcode::GlobalAtomicXor64: | ||
| 418 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 419 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 420 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 421 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 422 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 423 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 424 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 425 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 376 | info.uses_int64 = true; | 426 | info.uses_int64 = true; |
| 427 | info.uses_global_memory = true; | ||
| 428 | info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; | ||
| 429 | info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4; | ||
| 377 | break; | 430 | break; |
| 378 | default: | 431 | default: |
| 379 | break; | 432 | break; |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 378a3a915..f294d297f 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include <boost/container/flat_set.hpp> | 11 | #include <boost/container/flat_set.hpp> |
| 12 | #include <boost/container/small_vector.hpp> | 12 | #include <boost/container/small_vector.hpp> |
| 13 | 13 | ||
| 14 | #include "common/alignment.h" | ||
| 14 | #include "shader_recompiler/frontend/ir/basic_block.h" | 15 | #include "shader_recompiler/frontend/ir/basic_block.h" |
| 15 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | 16 | #include "shader_recompiler/frontend/ir/breadth_first_search.h" |
| 16 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | 17 | #include "shader_recompiler/frontend/ir/ir_emitter.h" |
| @@ -244,39 +245,6 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce | |||
| 244 | storage_buffer.offset < bias.offset_end; | 245 | storage_buffer.offset < bias.offset_end; |
| 245 | } | 246 | } |
| 246 | 247 | ||
| 247 | /// Discards a global memory operation, reads return zero and writes are ignored | ||
| 248 | void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) { | ||
| 249 | IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 250 | const IR::Value zero{u32{0}}; | ||
| 251 | switch (inst.GetOpcode()) { | ||
| 252 | case IR::Opcode::LoadGlobalS8: | ||
| 253 | case IR::Opcode::LoadGlobalU8: | ||
| 254 | case IR::Opcode::LoadGlobalS16: | ||
| 255 | case IR::Opcode::LoadGlobalU16: | ||
| 256 | case IR::Opcode::LoadGlobal32: | ||
| 257 | inst.ReplaceUsesWith(zero); | ||
| 258 | break; | ||
| 259 | case IR::Opcode::LoadGlobal64: | ||
| 260 | inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)}); | ||
| 261 | break; | ||
| 262 | case IR::Opcode::LoadGlobal128: | ||
| 263 | inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)}); | ||
| 264 | break; | ||
| 265 | case IR::Opcode::WriteGlobalS8: | ||
| 266 | case IR::Opcode::WriteGlobalU8: | ||
| 267 | case IR::Opcode::WriteGlobalS16: | ||
| 268 | case IR::Opcode::WriteGlobalU16: | ||
| 269 | case IR::Opcode::WriteGlobal32: | ||
| 270 | case IR::Opcode::WriteGlobal64: | ||
| 271 | case IR::Opcode::WriteGlobal128: | ||
| 272 | inst.Invalidate(); | ||
| 273 | break; | ||
| 274 | default: | ||
| 275 | throw LogicError("Invalid opcode to discard its global memory operation {}", | ||
| 276 | inst.GetOpcode()); | ||
| 277 | } | ||
| 278 | } | ||
| 279 | |||
| 280 | struct LowAddrInfo { | 248 | struct LowAddrInfo { |
| 281 | IR::U32 value; | 249 | IR::U32 value; |
| 282 | s32 imm_offset; | 250 | s32 imm_offset; |
| @@ -350,6 +318,10 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) | |||
| 350 | .index{index.U32()}, | 318 | .index{index.U32()}, |
| 351 | .offset{offset.U32()}, | 319 | .offset{offset.U32()}, |
| 352 | }; | 320 | }; |
| 321 | if (!Common::IsAligned(storage_buffer.offset, 16)) { | ||
| 322 | // The SSBO pointer has to be aligned | ||
| 323 | return std::nullopt; | ||
| 324 | } | ||
| 353 | if (bias && !MeetsBias(storage_buffer, *bias)) { | 325 | if (bias && !MeetsBias(storage_buffer, *bias)) { |
| 354 | // We have to blacklist some addresses in case we wrongly | 326 | // We have to blacklist some addresses in case we wrongly |
| 355 | // point to them | 327 | // point to them |
| @@ -372,19 +344,17 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) | |||
| 372 | // Track the low address of the instruction | 344 | // Track the low address of the instruction |
| 373 | const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; | 345 | const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; |
| 374 | if (!low_addr_info) { | 346 | if (!low_addr_info) { |
| 375 | DiscardGlobalMemory(block, inst); | 347 | // Failed to track the low address, use NVN fallbacks |
| 376 | return; | 348 | return; |
| 377 | } | 349 | } |
| 378 | // First try to find storage buffers in the NVN address | 350 | // First try to find storage buffers in the NVN address |
| 379 | const IR::U32 low_addr{low_addr_info->value}; | 351 | const IR::U32 low_addr{low_addr_info->value}; |
| 380 | std::optional storage_buffer{Track(low_addr, &nvn_bias)}; | 352 | std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)}; |
| 381 | if (!storage_buffer) { | 353 | if (!storage_buffer) { |
| 382 | // If it fails, track without a bias | 354 | // If it fails, track without a bias |
| 383 | storage_buffer = Track(low_addr, nullptr); | 355 | storage_buffer = Track(low_addr, nullptr); |
| 384 | if (!storage_buffer) { | 356 | if (!storage_buffer) { |
| 385 | // If that also failed, drop the global memory usage | 357 | // If that also fails, use NVN fallbacks |
| 386 | // LOG_ERROR | ||
| 387 | DiscardGlobalMemory(block, inst); | ||
| 388 | return; | 358 | return; |
| 389 | } | 359 | } |
| 390 | } | 360 | } |
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index f808adeba..50b4d1c05 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -162,6 +162,7 @@ struct Info { | |||
| 162 | bool uses_atomic_f32x2_min{}; | 162 | bool uses_atomic_f32x2_min{}; |
| 163 | bool uses_atomic_f32x2_max{}; | 163 | bool uses_atomic_f32x2_max{}; |
| 164 | bool uses_int64_bit_atomics{}; | 164 | bool uses_int64_bit_atomics{}; |
| 165 | bool uses_global_memory{}; | ||
| 165 | 166 | ||
| 166 | IR::Type used_constant_buffer_types{}; | 167 | IR::Type used_constant_buffer_types{}; |
| 167 | IR::Type used_storage_buffer_types{}; | 168 | IR::Type used_storage_buffer_types{}; |