summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-04-19 16:33:23 -0300
committerGravatar ameerj2021-07-22 21:51:28 -0400
commit7018e524f5e6217b3259333acc4ea09ad036d331 (patch)
tree58e750b08d48e018accc4de9a05cb483d825904c
parentspirv: Fix ViewportMask (diff)
downloadyuzu-7018e524f5e6217b3259333acc4ea09ad036d331.tar.gz
yuzu-7018e524f5e6217b3259333acc4ea09ad036d331.tar.xz
yuzu-7018e524f5e6217b3259333acc4ea09ad036d331.zip
shader: Add NVN storage buffer fallbacks
When we can't track the SSBO origin of a global memory instruction, leave it as a global memory operation and assume these pointers are in the NVN storage buffer slots, then apply a linear search in the shader's runtime.
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp77
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h8
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h16
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp8
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp24
-rw-r--r--src/shader_recompiler/frontend/maxwell/program.cpp43
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp53
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp46
-rw-r--r--src/shader_recompiler/shader_info.h1
9 files changed, 214 insertions, 62 deletions
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 2ffa8c453..7f16cb0dc 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -411,6 +411,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin
411 DefineTextures(program.info, binding); 411 DefineTextures(program.info, binding);
412 DefineImages(program.info, binding); 412 DefineImages(program.info, binding);
413 DefineAttributeMemAccess(program.info); 413 DefineAttributeMemAccess(program.info);
414 DefineGlobalMemoryFunctions(program.info);
414 DefineLabels(program); 415 DefineLabels(program);
415} 416}
416 417
@@ -762,6 +763,82 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
762 } 763 }
763} 764}
764 765
766void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
767 if (!info.uses_global_memory) {
768 return;
769 }
770 using DefPtr = Id StorageDefinitions::*;
771 const Id zero{u32_zero_value};
772 const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift,
773 auto&& callback) {
774 AddLabel();
775 const size_t num_buffers{info.storage_buffers_descriptors.size()};
776 for (size_t index = 0; index < num_buffers; ++index) {
777 const auto& ssbo{info.storage_buffers_descriptors[index]};
778 const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
779 const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
780 const Id ssbo_addr_pointer{OpAccessChain(
781 uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)};
782 const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32,
783 zero, ssbo_size_cbuf_offset)};
784
785 const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))};
786 const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))};
787 const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)};
788 const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr),
789 OpULessThan(U1, addr, ssbo_end))};
790 const Id then_label{OpLabel()};
791 const Id else_label{OpLabel()};
792 OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone);
793 OpBranchConditional(cond, then_label, else_label);
794 AddLabel(then_label);
795 const Id ssbo_id{ssbos[index].*ssbo_member};
796 const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))};
797 const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))};
798 const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)};
799 callback(ssbo_pointer);
800 AddLabel(else_label);
801 }
802 }};
803 const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
804 const Id function_type{TypeFunction(type, U64)};
805 const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)};
806 const Id addr{OpFunctionParameter(U64)};
807 define_body(ssbo_member, addr, element_pointer, shift,
808 [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); });
809 OpReturnValue(ConstantNull(type));
810 OpFunctionEnd();
811 return func_id;
812 }};
813 const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) {
814 const Id function_type{TypeFunction(void_id, U64, type)};
815 const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)};
816 const Id addr{OpFunctionParameter(U64)};
817 const Id data{OpFunctionParameter(type)};
818 define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) {
819 OpStore(ssbo_pointer, data);
820 OpReturn();
821 });
822 OpReturn();
823 OpFunctionEnd();
824 return func_id;
825 }};
826 const auto define{
827 [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) {
828 const Id element_type{type_def.element};
829 const u32 shift{static_cast<u32>(std::countr_zero(size))};
830 const Id load_func{define_load(ssbo_member, element_type, type, shift)};
831 const Id write_func{define_write(ssbo_member, element_type, type, shift)};
832 return std::make_pair(load_func, write_func);
833 }};
834 std::tie(load_global_func_u32, write_global_func_u32) =
835 define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32));
836 std::tie(load_global_func_u32x2, write_global_func_u32x2) =
837 define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2]));
838 std::tie(load_global_func_u32x4, write_global_func_u32x4) =
839 define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4]));
840}
841
765void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { 842void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
766 if (info.constant_buffer_descriptors.empty()) { 843 if (info.constant_buffer_descriptors.empty()) {
767 return; 844 return;
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index ef8507367..a4503c7ab 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -224,6 +224,13 @@ public:
224 Id f32x2_min_cas{}; 224 Id f32x2_min_cas{};
225 Id f32x2_max_cas{}; 225 Id f32x2_max_cas{};
226 226
227 Id load_global_func_u32{};
228 Id load_global_func_u32x2{};
229 Id load_global_func_u32x4{};
230 Id write_global_func_u32{};
231 Id write_global_func_u32x2{};
232 Id write_global_func_u32x4{};
233
227 Id input_position{}; 234 Id input_position{};
228 std::array<Id, 32> input_generics{}; 235 std::array<Id, 32> input_generics{};
229 236
@@ -255,6 +262,7 @@ private:
255 void DefineTextures(const Info& info, u32& binding); 262 void DefineTextures(const Info& info, u32& binding);
256 void DefineImages(const Info& info, u32& binding); 263 void DefineImages(const Info& info, u32& binding);
257 void DefineAttributeMemAccess(const Info& info); 264 void DefineAttributeMemAccess(const Info& info);
265 void DefineGlobalMemoryFunctions(const Info& info);
258 void DefineLabels(IR::Program& program); 266 void DefineLabels(IR::Program& program);
259 267
260 void DefineInputs(const Info& info); 268 void DefineInputs(const Info& info);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 67d06faa0..89a82e858 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -84,16 +84,16 @@ void EmitLoadGlobalU8(EmitContext& ctx);
84void EmitLoadGlobalS8(EmitContext& ctx); 84void EmitLoadGlobalS8(EmitContext& ctx);
85void EmitLoadGlobalU16(EmitContext& ctx); 85void EmitLoadGlobalU16(EmitContext& ctx);
86void EmitLoadGlobalS16(EmitContext& ctx); 86void EmitLoadGlobalS16(EmitContext& ctx);
87void EmitLoadGlobal32(EmitContext& ctx); 87Id EmitLoadGlobal32(EmitContext& ctx, Id address);
88void EmitLoadGlobal64(EmitContext& ctx); 88Id EmitLoadGlobal64(EmitContext& ctx, Id address);
89void EmitLoadGlobal128(EmitContext& ctx); 89Id EmitLoadGlobal128(EmitContext& ctx, Id address);
90void EmitWriteGlobalU8(EmitContext& ctx); 90void EmitWriteGlobalU8(EmitContext& ctx);
91void EmitWriteGlobalS8(EmitContext& ctx); 91void EmitWriteGlobalS8(EmitContext& ctx);
92void EmitWriteGlobalU16(EmitContext& ctx); 92void EmitWriteGlobalU16(EmitContext& ctx);
93void EmitWriteGlobalS16(EmitContext& ctx); 93void EmitWriteGlobalS16(EmitContext& ctx);
94void EmitWriteGlobal32(EmitContext& ctx); 94void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value);
95void EmitWriteGlobal64(EmitContext& ctx); 95void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value);
96void EmitWriteGlobal128(EmitContext& ctx); 96void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value);
97Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); 97Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
98Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); 98Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
99Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); 99Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
@@ -277,9 +277,9 @@ Id EmitFPIsNan16(EmitContext& ctx, Id value);
277Id EmitFPIsNan32(EmitContext& ctx, Id value); 277Id EmitFPIsNan32(EmitContext& ctx, Id value);
278Id EmitFPIsNan64(EmitContext& ctx, Id value); 278Id EmitFPIsNan64(EmitContext& ctx, Id value);
279Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); 279Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
280void EmitIAdd64(EmitContext& ctx); 280Id EmitIAdd64(EmitContext& ctx, Id a, Id b);
281Id EmitISub32(EmitContext& ctx, Id a, Id b); 281Id EmitISub32(EmitContext& ctx, Id a, Id b);
282void EmitISub64(EmitContext& ctx); 282Id EmitISub64(EmitContext& ctx, Id a, Id b);
283Id EmitIMul32(EmitContext& ctx, Id a, Id b); 283Id EmitIMul32(EmitContext& ctx, Id a, Id b);
284Id EmitINeg32(EmitContext& ctx, Id value); 284Id EmitINeg32(EmitContext& ctx, Id value);
285Id EmitINeg64(EmitContext& ctx, Id value); 285Id EmitINeg64(EmitContext& ctx, Id value);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
index c12d0a513..cd5b1f42c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp
@@ -55,16 +55,16 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
55 return result; 55 return result;
56} 56}
57 57
58void EmitIAdd64(EmitContext&) { 58Id EmitIAdd64(EmitContext& ctx, Id a, Id b) {
59 throw NotImplementedException("SPIR-V Instruction"); 59 return ctx.OpIAdd(ctx.U64, a, b);
60} 60}
61 61
62Id EmitISub32(EmitContext& ctx, Id a, Id b) { 62Id EmitISub32(EmitContext& ctx, Id a, Id b) {
63 return ctx.OpISub(ctx.U32[1], a, b); 63 return ctx.OpISub(ctx.U32[1], a, b);
64} 64}
65 65
66void EmitISub64(EmitContext&) { 66Id EmitISub64(EmitContext& ctx, Id a, Id b) {
67 throw NotImplementedException("SPIR-V Instruction"); 67 return ctx.OpISub(ctx.U64, a, b);
68} 68}
69 69
70Id EmitIMul32(EmitContext& ctx, Id a, Id b) { 70Id EmitIMul32(EmitContext& ctx, Id a, Id b) {
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
index 7bf828995..8849258e3 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp
@@ -64,16 +64,16 @@ void EmitLoadGlobalS16(EmitContext&) {
64 throw NotImplementedException("SPIR-V Instruction"); 64 throw NotImplementedException("SPIR-V Instruction");
65} 65}
66 66
67void EmitLoadGlobal32(EmitContext&) { 67Id EmitLoadGlobal32(EmitContext& ctx, Id address) {
68 throw NotImplementedException("SPIR-V Instruction"); 68 return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address);
69} 69}
70 70
71void EmitLoadGlobal64(EmitContext&) { 71Id EmitLoadGlobal64(EmitContext& ctx, Id address) {
72 throw NotImplementedException("SPIR-V Instruction"); 72 return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address);
73} 73}
74 74
75void EmitLoadGlobal128(EmitContext&) { 75Id EmitLoadGlobal128(EmitContext& ctx, Id address) {
76 throw NotImplementedException("SPIR-V Instruction"); 76 return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address);
77} 77}
78 78
79void EmitWriteGlobalU8(EmitContext&) { 79void EmitWriteGlobalU8(EmitContext&) {
@@ -92,16 +92,16 @@ void EmitWriteGlobalS16(EmitContext&) {
92 throw NotImplementedException("SPIR-V Instruction"); 92 throw NotImplementedException("SPIR-V Instruction");
93} 93}
94 94
95void EmitWriteGlobal32(EmitContext&) { 95void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) {
96 throw NotImplementedException("SPIR-V Instruction"); 96 ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value);
97} 97}
98 98
99void EmitWriteGlobal64(EmitContext&) { 99void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) {
100 throw NotImplementedException("SPIR-V Instruction"); 100 ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value);
101} 101}
102 102
103void EmitWriteGlobal128(EmitContext&) { 103void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) {
104 throw NotImplementedException("SPIR-V Instruction"); 104 ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value);
105} 105}
106 106
107Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { 107Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
diff --git a/src/shader_recompiler/frontend/maxwell/program.cpp b/src/shader_recompiler/frontend/maxwell/program.cpp
index 20a1d61cc..14180dcd9 100644
--- a/src/shader_recompiler/frontend/maxwell/program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/program.cpp
@@ -60,6 +60,48 @@ void CollectInterpolationInfo(Environment& env, IR::Program& program) {
60 }(); 60 }();
61 } 61 }
62} 62}
63
64void AddNVNStorageBuffers(IR::Program& program) {
65 if (!program.info.uses_global_memory) {
66 return;
67 }
68 const u32 driver_cbuf{0};
69 const u32 descriptor_size{0x10};
70 const u32 num_buffers{16};
71 const u32 base{[&] {
72 switch (program.stage) {
73 case Stage::VertexA:
74 case Stage::VertexB:
75 return 0x110u;
76 case Stage::TessellationControl:
77 return 0x210u;
78 case Stage::TessellationEval:
79 return 0x310u;
80 case Stage::Geometry:
81 return 0x410u;
82 case Stage::Fragment:
83 return 0x510u;
84 case Stage::Compute:
85 return 0x310u;
86 }
87 throw InvalidArgument("Invalid stage {}", program.stage);
88 }()};
89 auto& descs{program.info.storage_buffers_descriptors};
90 for (u32 index = 0; index < num_buffers; ++index) {
91 const u32 offset{base + index * descriptor_size};
92 const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
93 if (it != descs.end()) {
94 continue;
95 }
96 // Assume these are written for now
97 descs.push_back({
98 .cbuf_index = driver_cbuf,
99 .cbuf_offset = offset,
100 .count = 1,
101 .is_written = true,
102 });
103 }
104}
63} // Anonymous namespace 105} // Anonymous namespace
64 106
65IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, 107IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
@@ -105,6 +147,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
105 Optimization::VerificationPass(program); 147 Optimization::VerificationPass(program);
106 Optimization::CollectShaderInfoPass(env, program); 148 Optimization::CollectShaderInfoPass(env, program);
107 CollectInterpolationInfo(env, program); 149 CollectInterpolationInfo(env, program);
150 AddNVNStorageBuffers(program);
108 return program; 151 return program;
109} 152}
110 153
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 0500a5141..cccf0909d 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -187,6 +187,8 @@ void VisitUsages(Info& info, IR::Inst& inst) {
187 case IR::Opcode::FPUnordGreaterThanEqual16: 187 case IR::Opcode::FPUnordGreaterThanEqual16:
188 case IR::Opcode::FPIsNan16: 188 case IR::Opcode::FPIsNan16:
189 case IR::Opcode::GlobalAtomicAddF16x2: 189 case IR::Opcode::GlobalAtomicAddF16x2:
190 case IR::Opcode::GlobalAtomicMinF16x2:
191 case IR::Opcode::GlobalAtomicMaxF16x2:
190 case IR::Opcode::StorageAtomicAddF16x2: 192 case IR::Opcode::StorageAtomicAddF16x2:
191 case IR::Opcode::StorageAtomicMinF16x2: 193 case IR::Opcode::StorageAtomicMinF16x2:
192 case IR::Opcode::StorageAtomicMaxF16x2: 194 case IR::Opcode::StorageAtomicMaxF16x2:
@@ -373,7 +375,58 @@ void VisitUsages(Info& info, IR::Inst& inst) {
373 case IR::Opcode::StorageAtomicAnd64: 375 case IR::Opcode::StorageAtomicAnd64:
374 case IR::Opcode::StorageAtomicOr64: 376 case IR::Opcode::StorageAtomicOr64:
375 case IR::Opcode::StorageAtomicXor64: 377 case IR::Opcode::StorageAtomicXor64:
378 case IR::Opcode::StorageAtomicExchange64:
379 info.uses_int64 = true;
380 break;
381 default:
382 break;
383 }
384 switch (inst.GetOpcode()) {
385 case IR::Opcode::LoadGlobalU8:
386 case IR::Opcode::LoadGlobalS8:
387 case IR::Opcode::LoadGlobalU16:
388 case IR::Opcode::LoadGlobalS16:
389 case IR::Opcode::LoadGlobal32:
390 case IR::Opcode::LoadGlobal64:
391 case IR::Opcode::LoadGlobal128:
392 case IR::Opcode::WriteGlobalU8:
393 case IR::Opcode::WriteGlobalS8:
394 case IR::Opcode::WriteGlobalU16:
395 case IR::Opcode::WriteGlobalS16:
396 case IR::Opcode::WriteGlobal32:
397 case IR::Opcode::WriteGlobal64:
398 case IR::Opcode::WriteGlobal128:
399 case IR::Opcode::GlobalAtomicIAdd32:
400 case IR::Opcode::GlobalAtomicSMin32:
401 case IR::Opcode::GlobalAtomicUMin32:
402 case IR::Opcode::GlobalAtomicSMax32:
403 case IR::Opcode::GlobalAtomicUMax32:
404 case IR::Opcode::GlobalAtomicInc32:
405 case IR::Opcode::GlobalAtomicDec32:
406 case IR::Opcode::GlobalAtomicAnd32:
407 case IR::Opcode::GlobalAtomicOr32:
408 case IR::Opcode::GlobalAtomicXor32:
409 case IR::Opcode::GlobalAtomicExchange32:
410 case IR::Opcode::GlobalAtomicIAdd64:
411 case IR::Opcode::GlobalAtomicSMin64:
412 case IR::Opcode::GlobalAtomicUMin64:
413 case IR::Opcode::GlobalAtomicSMax64:
414 case IR::Opcode::GlobalAtomicUMax64:
415 case IR::Opcode::GlobalAtomicAnd64:
416 case IR::Opcode::GlobalAtomicOr64:
417 case IR::Opcode::GlobalAtomicXor64:
418 case IR::Opcode::GlobalAtomicExchange64:
419 case IR::Opcode::GlobalAtomicAddF32:
420 case IR::Opcode::GlobalAtomicAddF16x2:
421 case IR::Opcode::GlobalAtomicAddF32x2:
422 case IR::Opcode::GlobalAtomicMinF16x2:
423 case IR::Opcode::GlobalAtomicMinF32x2:
424 case IR::Opcode::GlobalAtomicMaxF16x2:
425 case IR::Opcode::GlobalAtomicMaxF32x2:
376 info.uses_int64 = true; 426 info.uses_int64 = true;
427 info.uses_global_memory = true;
428 info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
429 info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4;
377 break; 430 break;
378 default: 431 default:
379 break; 432 break;
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index 378a3a915..f294d297f 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -11,6 +11,7 @@
11#include <boost/container/flat_set.hpp> 11#include <boost/container/flat_set.hpp>
12#include <boost/container/small_vector.hpp> 12#include <boost/container/small_vector.hpp>
13 13
14#include "common/alignment.h"
14#include "shader_recompiler/frontend/ir/basic_block.h" 15#include "shader_recompiler/frontend/ir/basic_block.h"
15#include "shader_recompiler/frontend/ir/breadth_first_search.h" 16#include "shader_recompiler/frontend/ir/breadth_first_search.h"
16#include "shader_recompiler/frontend/ir/ir_emitter.h" 17#include "shader_recompiler/frontend/ir/ir_emitter.h"
@@ -244,39 +245,6 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce
244 storage_buffer.offset < bias.offset_end; 245 storage_buffer.offset < bias.offset_end;
245} 246}
246 247
247/// Discards a global memory operation, reads return zero and writes are ignored
248void DiscardGlobalMemory(IR::Block& block, IR::Inst& inst) {
249 IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
250 const IR::Value zero{u32{0}};
251 switch (inst.GetOpcode()) {
252 case IR::Opcode::LoadGlobalS8:
253 case IR::Opcode::LoadGlobalU8:
254 case IR::Opcode::LoadGlobalS16:
255 case IR::Opcode::LoadGlobalU16:
256 case IR::Opcode::LoadGlobal32:
257 inst.ReplaceUsesWith(zero);
258 break;
259 case IR::Opcode::LoadGlobal64:
260 inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero)});
261 break;
262 case IR::Opcode::LoadGlobal128:
263 inst.ReplaceUsesWith(IR::Value{ir.CompositeConstruct(zero, zero, zero, zero)});
264 break;
265 case IR::Opcode::WriteGlobalS8:
266 case IR::Opcode::WriteGlobalU8:
267 case IR::Opcode::WriteGlobalS16:
268 case IR::Opcode::WriteGlobalU16:
269 case IR::Opcode::WriteGlobal32:
270 case IR::Opcode::WriteGlobal64:
271 case IR::Opcode::WriteGlobal128:
272 inst.Invalidate();
273 break;
274 default:
275 throw LogicError("Invalid opcode to discard its global memory operation {}",
276 inst.GetOpcode());
277 }
278}
279
280struct LowAddrInfo { 248struct LowAddrInfo {
281 IR::U32 value; 249 IR::U32 value;
282 s32 imm_offset; 250 s32 imm_offset;
@@ -350,6 +318,10 @@ std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias)
350 .index{index.U32()}, 318 .index{index.U32()},
351 .offset{offset.U32()}, 319 .offset{offset.U32()},
352 }; 320 };
321 if (!Common::IsAligned(storage_buffer.offset, 16)) {
322 // The SSBO pointer has to be aligned
323 return std::nullopt;
324 }
353 if (bias && !MeetsBias(storage_buffer, *bias)) { 325 if (bias && !MeetsBias(storage_buffer, *bias)) {
354 // We have to blacklist some addresses in case we wrongly 326 // We have to blacklist some addresses in case we wrongly
355 // point to them 327 // point to them
@@ -372,19 +344,17 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info)
372 // Track the low address of the instruction 344 // Track the low address of the instruction
373 const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)}; 345 const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
374 if (!low_addr_info) { 346 if (!low_addr_info) {
375 DiscardGlobalMemory(block, inst); 347 // Failed to track the low address, use NVN fallbacks
376 return; 348 return;
377 } 349 }
378 // First try to find storage buffers in the NVN address 350 // First try to find storage buffers in the NVN address
379 const IR::U32 low_addr{low_addr_info->value}; 351 const IR::U32 low_addr{low_addr_info->value};
380 std::optional storage_buffer{Track(low_addr, &nvn_bias)}; 352 std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
381 if (!storage_buffer) { 353 if (!storage_buffer) {
382 // If it fails, track without a bias 354 // If it fails, track without a bias
383 storage_buffer = Track(low_addr, nullptr); 355 storage_buffer = Track(low_addr, nullptr);
384 if (!storage_buffer) { 356 if (!storage_buffer) {
385 // If that also failed, drop the global memory usage 357 // If that also fails, use NVN fallbacks
386 // LOG_ERROR
387 DiscardGlobalMemory(block, inst);
388 return; 358 return;
389 } 359 }
390 } 360 }
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index f808adeba..50b4d1c05 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -162,6 +162,7 @@ struct Info {
162 bool uses_atomic_f32x2_min{}; 162 bool uses_atomic_f32x2_min{};
163 bool uses_atomic_f32x2_max{}; 163 bool uses_atomic_f32x2_max{};
164 bool uses_int64_bit_atomics{}; 164 bool uses_int64_bit_atomics{};
165 bool uses_global_memory{};
165 166
166 IR::Type used_constant_buffer_types{}; 167 IR::Type used_constant_buffer_types{};
167 IR::Type used_storage_buffer_types{}; 168 IR::Type used_storage_buffer_types{};