summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/shader_recompiler/CMakeLists.txt3
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.cpp158
-rw-r--r--src/shader_recompiler/backend/spirv/emit_context.h20
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp3
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.h95
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp528
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp200
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h39
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.cpp66
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc70
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp222
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp12
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp70
-rw-r--r--src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp121
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp12
-rw-r--r--src/shader_recompiler/profile.h1
-rw-r--r--src/shader_recompiler/shader_info.h13
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp1
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp14
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h6
21 files changed, 1745 insertions, 19 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 8e1d37373..7b9f08aa0 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -3,6 +3,7 @@ add_library(shader_recompiler STATIC
3 backend/spirv/emit_context.h 3 backend/spirv/emit_context.h
4 backend/spirv/emit_spirv.cpp 4 backend/spirv/emit_spirv.cpp
5 backend/spirv/emit_spirv.h 5 backend/spirv/emit_spirv.h
6 backend/spirv/emit_spirv_atomic.cpp
6 backend/spirv/emit_spirv_barriers.cpp 7 backend/spirv/emit_spirv_barriers.cpp
7 backend/spirv/emit_spirv_bitwise_conversion.cpp 8 backend/spirv/emit_spirv_bitwise_conversion.cpp
8 backend/spirv/emit_spirv_composite.cpp 9 backend/spirv/emit_spirv_composite.cpp
@@ -65,6 +66,8 @@ add_library(shader_recompiler STATIC
65 frontend/maxwell/program.h 66 frontend/maxwell/program.h
66 frontend/maxwell/structured_control_flow.cpp 67 frontend/maxwell/structured_control_flow.cpp
67 frontend/maxwell/structured_control_flow.h 68 frontend/maxwell/structured_control_flow.h
69 frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
70 frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
68 frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp 71 frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
69 frontend/maxwell/translate/impl/barrier_operations.cpp 72 frontend/maxwell/translate/impl/barrier_operations.cpp
70 frontend/maxwell/translate/impl/bitfield_extract.cpp 73 frontend/maxwell/translate/impl/bitfield_extract.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp
index 32f8c4508..e5d83e9b4 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_context.cpp
@@ -15,6 +15,53 @@
15 15
16namespace Shader::Backend::SPIRV { 16namespace Shader::Backend::SPIRV {
17namespace { 17namespace {
18enum class CasFunctionType {
19 Increment,
20 Decrement,
21 FPAdd,
22 FPMin,
23 FPMax,
24};
25
26Id CasFunction(EmitContext& ctx, CasFunctionType function_type, Id value_type) {
27 const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)};
28 const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
29 const Id op_a{ctx.OpFunctionParameter(value_type)};
30 const Id op_b{ctx.OpFunctionParameter(value_type)};
31 ctx.AddLabel();
32 Id result{};
33 switch (function_type) {
34 case CasFunctionType::Increment: {
35 const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)};
36 const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))};
37 result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr);
38 break;
39 }
40 case CasFunctionType::Decrement: {
41 const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))};
42 const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)};
43 const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)};
44 const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))};
45 result = ctx.OpSelect(value_type, pred, op_b, decr);
46 break;
47 }
48 case CasFunctionType::FPAdd:
49 result = ctx.OpFAdd(value_type, op_a, op_b);
50 break;
51 case CasFunctionType::FPMin:
52 result = ctx.OpFMin(value_type, op_a, op_b);
53 break;
54 case CasFunctionType::FPMax:
55 result = ctx.OpFMax(value_type, op_a, op_b);
56 break;
57 default:
58 break;
59 }
60 ctx.OpReturnValue(result);
61 ctx.OpFunctionEnd();
62 return func;
63}
64
18Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { 65Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
19 const spv::ImageFormat format{spv::ImageFormat::Unknown}; 66 const spv::ImageFormat format{spv::ImageFormat::Unknown};
20 const Id type{ctx.F32[1]}; 67 const Id type{ctx.F32[1]};
@@ -196,6 +243,56 @@ Id EmitContext::Def(const IR::Value& value) {
196 } 243 }
197} 244}
198 245
246Id EmitContext::CasLoop(Id function, CasPointerType pointer_type, Id value_type) {
247 const Id loop_header{OpLabel()};
248 const Id continue_block{OpLabel()};
249 const Id merge_block{OpLabel()};
250 const Id storage_type{pointer_type == CasPointerType::Shared ? shared_memory_u32_type
251 : storage_memory_u32};
252 const Id func_type{TypeFunction(value_type, U32[1], value_type, storage_type)};
253 const Id func{OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)};
254 const Id index{OpFunctionParameter(U32[1])};
255 const Id op_b{OpFunctionParameter(value_type)};
256 const Id base{OpFunctionParameter(storage_type)};
257 AddLabel();
258 const Id one{Constant(U32[1], 1)};
259 OpBranch(loop_header);
260 AddLabel(loop_header);
261 OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone);
262 OpBranch(continue_block);
263
264 AddLabel(continue_block);
265 const Id word_pointer{pointer_type == CasPointerType::Shared
266 ? OpAccessChain(shared_u32, base, index)
267 : OpAccessChain(storage_u32, base, u32_zero_value, index)};
268 if (value_type.value == F32[2].value) {
269 const Id u32_value{OpLoad(U32[1], word_pointer)};
270 const Id value{OpUnpackHalf2x16(F32[2], u32_value)};
271 const Id new_value{OpFunctionCall(value_type, function, value, op_b)};
272 const Id u32_new_value{OpPackHalf2x16(U32[1], new_value)};
273 const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value,
274 u32_zero_value, u32_new_value, u32_value)};
275 const Id success{OpIEqual(U1, atomic_res, u32_value)};
276 OpBranchConditional(success, merge_block, loop_header);
277
278 AddLabel(merge_block);
279 OpReturnValue(OpUnpackHalf2x16(F32[2], atomic_res));
280 } else {
281 const Id value{OpLoad(U32[1], word_pointer)};
282 const Id new_value{OpBitcast(
283 U32[1], OpFunctionCall(value_type, function, OpBitcast(value_type, value), op_b))};
284 const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value,
285 u32_zero_value, new_value, value)};
286 const Id success{OpIEqual(U1, atomic_res, value)};
287 OpBranchConditional(success, merge_block, loop_header);
288
289 AddLabel(merge_block);
290 OpReturnValue(OpBitcast(value_type, atomic_res));
291 }
292 OpFunctionEnd();
293 return func;
294}
295
199void EmitContext::DefineCommonTypes(const Info& info) { 296void EmitContext::DefineCommonTypes(const Info& info) {
200 void_id = TypeVoid(); 297 void_id = TypeVoid();
201 298
@@ -300,9 +397,9 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
300 } 397 }
301 const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; 398 const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)};
302 const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; 399 const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))};
303 const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)}; 400 shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type);
304 shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); 401 shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]);
305 shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup); 402 shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup);
306 interfaces.push_back(shared_memory_u32); 403 interfaces.push_back(shared_memory_u32);
307 404
308 const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; 405 const Id func_type{TypeFunction(void_id, U32[1], U32[1])};
@@ -346,6 +443,14 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) {
346 if (program.info.uses_int16) { 443 if (program.info.uses_int16) {
347 shared_store_u16_func = make_function(16, 16); 444 shared_store_u16_func = make_function(16, 16);
348 } 445 }
446 if (program.info.uses_shared_increment) {
447 const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])};
448 increment_cas_shared = CasLoop(inc_func, CasPointerType::Shared, U32[1]);
449 }
450 if (program.info.uses_shared_decrement) {
451 const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])};
452 decrement_cas_shared = CasLoop(dec_func, CasPointerType::Shared, U32[1]);
453 }
349} 454}
350 455
351void EmitContext::DefineAttributeMemAccess(const Info& info) { 456void EmitContext::DefineAttributeMemAccess(const Info& info) {
@@ -530,12 +635,12 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
530 MemberName(struct_type, 0, "data"); 635 MemberName(struct_type, 0, "data");
531 MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); 636 MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U);
532 637
533 const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; 638 storage_memory_u32 = TypePointer(spv::StorageClass::StorageBuffer, struct_type);
534 storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); 639 storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
535 640
536 u32 index{}; 641 u32 index{};
537 for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { 642 for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
538 const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; 643 const Id id{AddGlobalVariable(storage_memory_u32, spv::StorageClass::StorageBuffer)};
539 Decorate(id, spv::Decoration::Binding, binding); 644 Decorate(id, spv::Decoration::Binding, binding);
540 Decorate(id, spv::Decoration::DescriptorSet, 0U); 645 Decorate(id, spv::Decoration::DescriptorSet, 0U);
541 Name(id, fmt::format("ssbo{}", index)); 646 Name(id, fmt::format("ssbo{}", index));
@@ -546,6 +651,51 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
546 index += desc.count; 651 index += desc.count;
547 binding += desc.count; 652 binding += desc.count;
548 } 653 }
654 if (info.uses_global_increment) {
655 AddCapability(spv::Capability::VariablePointersStorageBuffer);
656 const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])};
657 increment_cas_ssbo = CasLoop(inc_func, CasPointerType::Ssbo, U32[1]);
658 }
659 if (info.uses_global_decrement) {
660 AddCapability(spv::Capability::VariablePointersStorageBuffer);
661 const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])};
662 decrement_cas_ssbo = CasLoop(dec_func, CasPointerType::Ssbo, U32[1]);
663 }
664 if (info.uses_atomic_f32_add) {
665 AddCapability(spv::Capability::VariablePointersStorageBuffer);
666 const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[1])};
667 f32_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[1]);
668 }
669 if (info.uses_atomic_f16x2_add) {
670 AddCapability(spv::Capability::VariablePointersStorageBuffer);
671 const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F16[2])};
672 f16x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F16[2]);
673 }
674 if (info.uses_atomic_f16x2_min) {
675 AddCapability(spv::Capability::VariablePointersStorageBuffer);
676 const Id func{CasFunction(*this, CasFunctionType::FPMin, F16[2])};
677 f16x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]);
678 }
679 if (info.uses_atomic_f16x2_max) {
680 AddCapability(spv::Capability::VariablePointersStorageBuffer);
681 const Id func{CasFunction(*this, CasFunctionType::FPMax, F16[2])};
682 f16x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]);
683 }
684 if (info.uses_atomic_f32x2_add) {
685 AddCapability(spv::Capability::VariablePointersStorageBuffer);
686 const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[2])};
687 f32x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[2]);
688 }
689 if (info.uses_atomic_f32x2_min) {
690 AddCapability(spv::Capability::VariablePointersStorageBuffer);
691 const Id func{CasFunction(*this, CasFunctionType::FPMin, F32[2])};
692 f32x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]);
693 }
694 if (info.uses_atomic_f32x2_max) {
695 AddCapability(spv::Capability::VariablePointersStorageBuffer);
696 const Id func{CasFunction(*this, CasFunctionType::FPMax, F32[2])};
697 f32x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]);
698 }
549} 699}
550 700
551void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { 701void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) {
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h
index e70f3458c..34f38454f 100644
--- a/src/shader_recompiler/backend/spirv/emit_context.h
+++ b/src/shader_recompiler/backend/spirv/emit_context.h
@@ -94,6 +94,7 @@ public:
94 Id output_f32{}; 94 Id output_f32{};
95 95
96 Id storage_u32{}; 96 Id storage_u32{};
97 Id storage_memory_u32{};
97 98
98 Id image_buffer_type{}; 99 Id image_buffer_type{};
99 Id sampled_texture_buffer_type{}; 100 Id sampled_texture_buffer_type{};
@@ -136,9 +137,21 @@ public:
136 Id shared_memory_u32{}; 137 Id shared_memory_u32{};
137 Id shared_memory_u32x2{}; 138 Id shared_memory_u32x2{};
138 Id shared_memory_u32x4{}; 139 Id shared_memory_u32x4{};
140 Id shared_memory_u32_type{};
139 141
140 Id shared_store_u8_func{}; 142 Id shared_store_u8_func{};
141 Id shared_store_u16_func{}; 143 Id shared_store_u16_func{};
144 Id increment_cas_shared{};
145 Id increment_cas_ssbo{};
146 Id decrement_cas_shared{};
147 Id decrement_cas_ssbo{};
148 Id f32_add_cas{};
149 Id f16x2_add_cas{};
150 Id f16x2_min_cas{};
151 Id f16x2_max_cas{};
152 Id f32x2_add_cas{};
153 Id f32x2_min_cas{};
154 Id f32x2_max_cas{};
142 155
143 Id input_position{}; 156 Id input_position{};
144 std::array<Id, 32> input_generics{}; 157 std::array<Id, 32> input_generics{};
@@ -153,6 +166,11 @@ public:
153 std::vector<Id> interfaces; 166 std::vector<Id> interfaces;
154 167
155private: 168private:
169 enum class CasPointerType {
170 Shared,
171 Ssbo,
172 };
173
156 void DefineCommonTypes(const Info& info); 174 void DefineCommonTypes(const Info& info);
157 void DefineCommonConstants(); 175 void DefineCommonConstants();
158 void DefineInterfaces(const Info& info); 176 void DefineInterfaces(const Info& info);
@@ -171,6 +189,8 @@ private:
171 189
172 void DefineInputs(const Info& info); 190 void DefineInputs(const Info& info);
173 void DefineOutputs(const Info& info); 191 void DefineOutputs(const Info& info);
192
193 [[nodiscard]] Id CasLoop(Id function, CasPointerType pointer_type, Id value_type);
174}; 194};
175 195
176} // namespace Shader::Backend::SPIRV 196} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 5a1ffd61c..9248bd78b 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -238,6 +238,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
238 ctx.AddCapability(spv::Capability::SubgroupVoteKHR); 238 ctx.AddCapability(spv::Capability::SubgroupVoteKHR);
239 } 239 }
240 } 240 }
241 if (info.uses_64_bit_atomics && profile.support_int64_atomics) {
242 ctx.AddCapability(spv::Capability::Int64Atomics);
243 }
241 if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { 244 if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) {
242 ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); 245 ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat);
243 } 246 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h
index 12b7993ae..a3398a605 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.h
@@ -306,6 +306,101 @@ Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs);
306Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); 306Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs);
307Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); 307Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
308Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); 308Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs);
309Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value);
310Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value);
311Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value);
312Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value);
313Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value);
314Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value);
315Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value);
316Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value);
317Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value);
318Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value);
319Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value);
320Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value);
321Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
322 Id value);
323Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
324 Id value);
325Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
326 Id value);
327Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
328 Id value);
329Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
330 Id value);
331Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
332 Id value);
333Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
334 Id value);
335Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
336 Id value);
337Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
338 Id value);
339Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
340 Id value);
341Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
342 Id value);
343Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
344 Id value);
345Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
346 Id value);
347Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
348 Id value);
349Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
350 Id value);
351Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
352 Id value);
353Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
354 Id value);
355Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
356 Id value);
357Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
358 Id value);
359Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
360 Id value);
361Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
362 Id value);
363Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
364 Id value);
365Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
366 Id value);
367Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
368 Id value);
369Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
370 Id value);
371Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
372 Id value);
373Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
374 Id value);
375Id EmitGlobalAtomicIAdd32(EmitContext& ctx);
376Id EmitGlobalAtomicSMin32(EmitContext& ctx);
377Id EmitGlobalAtomicUMin32(EmitContext& ctx);
378Id EmitGlobalAtomicSMax32(EmitContext& ctx);
379Id EmitGlobalAtomicUMax32(EmitContext& ctx);
380Id EmitGlobalAtomicInc32(EmitContext& ctx);
381Id EmitGlobalAtomicDec32(EmitContext& ctx);
382Id EmitGlobalAtomicAnd32(EmitContext& ctx);
383Id EmitGlobalAtomicOr32(EmitContext& ctx);
384Id EmitGlobalAtomicXor32(EmitContext& ctx);
385Id EmitGlobalAtomicExchange32(EmitContext& ctx);
386Id EmitGlobalAtomicIAdd64(EmitContext& ctx);
387Id EmitGlobalAtomicSMin64(EmitContext& ctx);
388Id EmitGlobalAtomicUMin64(EmitContext& ctx);
389Id EmitGlobalAtomicSMax64(EmitContext& ctx);
390Id EmitGlobalAtomicUMax64(EmitContext& ctx);
391Id EmitGlobalAtomicInc64(EmitContext& ctx);
392Id EmitGlobalAtomicDec64(EmitContext& ctx);
393Id EmitGlobalAtomicAnd64(EmitContext& ctx);
394Id EmitGlobalAtomicOr64(EmitContext& ctx);
395Id EmitGlobalAtomicXor64(EmitContext& ctx);
396Id EmitGlobalAtomicExchange64(EmitContext& ctx);
397Id EmitGlobalAtomicAddF32(EmitContext& ctx);
398Id EmitGlobalAtomicAddF16x2(EmitContext& ctx);
399Id EmitGlobalAtomicAddF32x2(EmitContext& ctx);
400Id EmitGlobalAtomicMinF16x2(EmitContext& ctx);
401Id EmitGlobalAtomicMinF32x2(EmitContext& ctx);
402Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx);
403Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx);
309Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); 404Id EmitLogicalOr(EmitContext& ctx, Id a, Id b);
310Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); 405Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b);
311Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); 406Id EmitLogicalXor(EmitContext& ctx, Id a, Id b);
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
new file mode 100644
index 000000000..03d891419
--- /dev/null
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@@ -0,0 +1,528 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/backend/spirv/emit_spirv.h"
6
7namespace Shader::Backend::SPIRV {
8namespace {
9
10Id GetSharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) {
11 const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
12 const Id shifted_value{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)};
13 const Id index{ctx.OpIAdd(ctx.U32[1], shifted_value, ctx.Constant(ctx.U32[1], index_offset))};
14 return ctx.profile.support_explicit_workgroup_layout
15 ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index)
16 : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index);
17}
18
19Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) {
20 if (offset.IsImmediate()) {
21 const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)};
22 return ctx.Constant(ctx.U32[1], imm_offset);
23 }
24 const u32 shift{static_cast<u32>(std::countr_zero(element_size))};
25 const Id index{ctx.Def(offset)};
26 if (shift == 0) {
27 return index;
28 }
29 const Id shift_id{ctx.Constant(ctx.U32[1], shift)};
30 return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id);
31}
32
33Id GetStoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
34 u32 index_offset = 0) {
35 // TODO: Support reinterpreting bindings, guaranteed to be aligned
36 if (!binding.IsImmediate()) {
37 throw NotImplementedException("Dynamic storage buffer indexing");
38 }
39 const Id ssbo{ctx.ssbos[binding.U32()]};
40 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
41 const Id index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], index_offset))};
42 return ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index);
43}
44
45std::pair<Id, Id> GetAtomicArgs(EmitContext& ctx) {
46 const Id scope{ctx.Constant(ctx.U32[1], static_cast<u32>(spv::Scope::Device))};
47 const Id semantics{ctx.u32_zero_value};
48 return {scope, semantics};
49}
50
51Id LoadU64(EmitContext& ctx, Id pointer_1, Id pointer_2) {
52 const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)};
53 const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)};
54 const Id original_composite{ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)};
55 return ctx.OpBitcast(ctx.U64, original_composite);
56}
57
58void StoreResult(EmitContext& ctx, Id pointer_1, Id pointer_2, Id result) {
59 const Id composite{ctx.OpBitcast(ctx.U32[2], result)};
60 ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], composite, 0));
61 ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], composite, 1));
62}
63} // Anonymous namespace
64
65Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value) {
66 const Id pointer{GetSharedPointer(ctx, pointer_offset)};
67 const auto [scope, semantics]{GetAtomicArgs(ctx)};
68 return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value);
69}
70
71Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value) {
72 const Id pointer{GetSharedPointer(ctx, pointer_offset)};
73 const auto [scope, semantics]{GetAtomicArgs(ctx)};
74 return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value);
75}
76
77Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value) {
78 const Id pointer{GetSharedPointer(ctx, pointer_offset)};
79 const auto [scope, semantics]{GetAtomicArgs(ctx)};
80 return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value);
81}
82
83Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value) {
84 const Id pointer{GetSharedPointer(ctx, pointer_offset)};
85 const auto [scope, semantics]{GetAtomicArgs(ctx)};
86 return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value);
87}
88
89Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value) {
90 const Id pointer{GetSharedPointer(ctx, pointer_offset)};
91 const auto [scope, semantics]{GetAtomicArgs(ctx)};
92 return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value);
93}
94
95Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value) {
96 const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
97 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)};
98 return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value,
99 ctx.shared_memory_u32);
100}
101
102Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value) {
103 const Id shift_id{ctx.Constant(ctx.U32[1], 2U)};
104 const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)};
105 return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value,
106 ctx.shared_memory_u32);
107}
108
109Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value) {
110 const Id pointer{GetSharedPointer(ctx, pointer_offset)};
111 const auto [scope, semantics]{GetAtomicArgs(ctx)};
112 return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value);
113}
114
115Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value) {
116 const Id pointer{GetSharedPointer(ctx, pointer_offset)};
117 const auto [scope, semantics]{GetAtomicArgs(ctx)};
118 return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value);
119}
120
121Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value) {
122 const Id pointer{GetSharedPointer(ctx, pointer_offset)};
123 const auto [scope, semantics]{GetAtomicArgs(ctx)};
124 return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value);
125}
126
127Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value) {
128 const Id pointer{GetSharedPointer(ctx, pointer_offset)};
129 const auto [scope, semantics]{GetAtomicArgs(ctx)};
130 return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value);
131}
132
133Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value) {
134 const Id pointer_1{GetSharedPointer(ctx, pointer_offset)};
135 if (ctx.profile.support_int64_atomics) {
136 const auto [scope, semantics]{GetAtomicArgs(ctx)};
137 return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value);
138 }
139 // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
140 const Id pointer_2{GetSharedPointer(ctx, pointer_offset, 1)};
141 const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
142 StoreResult(ctx, pointer_1, pointer_2, value);
143 return original_value;
144}
145
146Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
147 Id value) {
148 const Id pointer{GetStoragePointer(ctx, binding, offset)};
149 const auto [scope, semantics]{GetAtomicArgs(ctx)};
150 return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value);
151}
152
153Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
154 Id value) {
155 const Id pointer{GetStoragePointer(ctx, binding, offset)};
156 const auto [scope, semantics]{GetAtomicArgs(ctx)};
157 return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value);
158}
159
160Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
161 Id value) {
162 const Id pointer{GetStoragePointer(ctx, binding, offset)};
163 const auto [scope, semantics]{GetAtomicArgs(ctx)};
164 return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value);
165}
166
167Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
168 Id value) {
169 const Id pointer{GetStoragePointer(ctx, binding, offset)};
170 const auto [scope, semantics]{GetAtomicArgs(ctx)};
171 return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value);
172}
173
174Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
175 Id value) {
176 const Id pointer{GetStoragePointer(ctx, binding, offset)};
177 const auto [scope, semantics]{GetAtomicArgs(ctx)};
178 return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value);
179}
180
181Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
182 Id value) {
183 const Id ssbo{ctx.ssbos[binding.U32()]};
184 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
185 return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo);
186}
187
188Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
189 Id value) {
190 const Id ssbo{ctx.ssbos[binding.U32()]};
191 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
192 return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo);
193}
194
195Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
196 Id value) {
197 const Id pointer{GetStoragePointer(ctx, binding, offset)};
198 const auto [scope, semantics]{GetAtomicArgs(ctx)};
199 return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value);
200}
201
202Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
203 Id value) {
204 const Id pointer{GetStoragePointer(ctx, binding, offset)};
205 const auto [scope, semantics]{GetAtomicArgs(ctx)};
206 return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value);
207}
208
209Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
210 Id value) {
211 const Id pointer{GetStoragePointer(ctx, binding, offset)};
212 const auto [scope, semantics]{GetAtomicArgs(ctx)};
213 return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value);
214}
215
216Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
217 Id value) {
218 const Id pointer{GetStoragePointer(ctx, binding, offset)};
219 const auto [scope, semantics]{GetAtomicArgs(ctx)};
220 return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value);
221}
222
223Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
224 Id value) {
225 const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
226 if (ctx.profile.support_int64_atomics) {
227 const auto [scope, semantics]{GetAtomicArgs(ctx)};
228 return ctx.OpAtomicIAdd(ctx.U64, pointer_1, scope, semantics, value);
229 }
230 // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
231 const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
232 const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
233 const Id result{ctx.OpIAdd(ctx.U64, value, original_value)};
234 StoreResult(ctx, pointer_1, pointer_2, result);
235 return original_value;
236}
237
238Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
239 Id value) {
240 const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
241 if (ctx.profile.support_int64_atomics) {
242 const auto [scope, semantics]{GetAtomicArgs(ctx)};
243 return ctx.OpAtomicSMin(ctx.U64, pointer_1, scope, semantics, value);
244 }
245 // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
246 const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
247 const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
248 const Id result{ctx.OpSMin(ctx.U64, value, original_value)};
249 StoreResult(ctx, pointer_1, pointer_2, result);
250 return original_value;
251}
252
253Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
254 Id value) {
255 const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
256 if (ctx.profile.support_int64_atomics) {
257 const auto [scope, semantics]{GetAtomicArgs(ctx)};
258 return ctx.OpAtomicUMin(ctx.U64, pointer_1, scope, semantics, value);
259 }
260 // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
261 const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
262 const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
263 const Id result{ctx.OpUMin(ctx.U64, value, original_value)};
264 StoreResult(ctx, pointer_1, pointer_2, result);
265 return original_value;
266}
267
268Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
269 Id value) {
270 const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
271 if (ctx.profile.support_int64_atomics) {
272 const auto [scope, semantics]{GetAtomicArgs(ctx)};
273 return ctx.OpAtomicSMax(ctx.U64, pointer_1, scope, semantics, value);
274 }
275 // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
276 const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
277 const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
278 const Id result{ctx.OpSMax(ctx.U64, value, original_value)};
279 StoreResult(ctx, pointer_1, pointer_2, result);
280 return original_value;
281}
282
283Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
284 Id value) {
285 const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
286 if (ctx.profile.support_int64_atomics) {
287 const auto [scope, semantics]{GetAtomicArgs(ctx)};
288 return ctx.OpAtomicUMax(ctx.U64, pointer_1, scope, semantics, value);
289 }
290 // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
291 const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
292 const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
293 const Id result{ctx.OpUMax(ctx.U64, value, original_value)};
294 StoreResult(ctx, pointer_1, pointer_2, result);
295 return original_value;
296}
297
298Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
299 Id value) {
300 const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
301 if (ctx.profile.support_int64_atomics) {
302 const auto [scope, semantics]{GetAtomicArgs(ctx)};
303 return ctx.OpAtomicAnd(ctx.U64, pointer_1, scope, semantics, value);
304 }
305 // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
306 const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
307 const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
308 const Id result{ctx.OpBitwiseAnd(ctx.U64, value, original_value)};
309 StoreResult(ctx, pointer_1, pointer_2, result);
310 return original_value;
311}
312
313Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
314 Id value) {
315 const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
316 if (ctx.profile.support_int64_atomics) {
317 const auto [scope, semantics]{GetAtomicArgs(ctx)};
318 return ctx.OpAtomicOr(ctx.U64, pointer_1, scope, semantics, value);
319 }
320 // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
321 const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
322 const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
323 const Id result{ctx.OpBitwiseOr(ctx.U64, value, original_value)};
324 StoreResult(ctx, pointer_1, pointer_2, result);
325 return original_value;
326}
327
328Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
329 Id value) {
330 const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
331 if (ctx.profile.support_int64_atomics) {
332 const auto [scope, semantics]{GetAtomicArgs(ctx)};
333 return ctx.OpAtomicXor(ctx.U64, pointer_1, scope, semantics, value);
334 }
335 // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
336 const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
337 const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
338 const Id result{ctx.OpBitwiseXor(ctx.U64, value, original_value)};
339 StoreResult(ctx, pointer_1, pointer_2, result);
340 return original_value;
341}
342
343Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
344 Id value) {
345 const Id pointer_1{GetStoragePointer(ctx, binding, offset)};
346 if (ctx.profile.support_int64_atomics) {
347 const auto [scope, semantics]{GetAtomicArgs(ctx)};
348 return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value);
349 }
350 // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic");
351 const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)};
352 const Id original_value{LoadU64(ctx, pointer_1, pointer_2)};
353 StoreResult(ctx, pointer_1, pointer_2, value);
354 return original_value;
355}
356
357Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
358 Id value) {
359 const Id ssbo{ctx.ssbos[binding.U32()]};
360 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
361 return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo);
362}
363
364Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
365 Id value) {
366 const Id ssbo{ctx.ssbos[binding.U32()]};
367 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
368 const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)};
369 return ctx.OpBitcast(ctx.U32[1], result);
370}
371
372Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
373 Id value) {
374 const Id ssbo{ctx.ssbos[binding.U32()]};
375 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
376 const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)};
377 return ctx.OpPackHalf2x16(ctx.U32[1], result);
378}
379
380Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
381 Id value) {
382 const Id ssbo{ctx.ssbos[binding.U32()]};
383 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
384 const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)};
385 return ctx.OpBitcast(ctx.U32[1], result);
386}
387
388Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
389 Id value) {
390 const Id ssbo{ctx.ssbos[binding.U32()]};
391 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
392 const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)};
393 return ctx.OpPackHalf2x16(ctx.U32[1], result);
394}
395
396Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
397 Id value) {
398 const Id ssbo{ctx.ssbos[binding.U32()]};
399 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
400 const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)};
401 return ctx.OpBitcast(ctx.U32[1], result);
402}
403
404Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
405 Id value) {
406 const Id ssbo{ctx.ssbos[binding.U32()]};
407 const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
408 const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)};
409 return ctx.OpPackHalf2x16(ctx.U32[1], result);
410}
411
412Id EmitGlobalAtomicIAdd32(EmitContext&) {
413 throw NotImplementedException("SPIR-V Instruction");
414}
415
416Id EmitGlobalAtomicSMin32(EmitContext&) {
417 throw NotImplementedException("SPIR-V Instruction");
418}
419
420Id EmitGlobalAtomicUMin32(EmitContext&) {
421 throw NotImplementedException("SPIR-V Instruction");
422}
423
424Id EmitGlobalAtomicSMax32(EmitContext&) {
425 throw NotImplementedException("SPIR-V Instruction");
426}
427
428Id EmitGlobalAtomicUMax32(EmitContext&) {
429 throw NotImplementedException("SPIR-V Instruction");
430}
431
432Id EmitGlobalAtomicInc32(EmitContext&) {
433 throw NotImplementedException("SPIR-V Instruction");
434}
435
436Id EmitGlobalAtomicDec32(EmitContext&) {
437 throw NotImplementedException("SPIR-V Instruction");
438}
439
440Id EmitGlobalAtomicAnd32(EmitContext&) {
441 throw NotImplementedException("SPIR-V Instruction");
442}
443
444Id EmitGlobalAtomicOr32(EmitContext&) {
445 throw NotImplementedException("SPIR-V Instruction");
446}
447
448Id EmitGlobalAtomicXor32(EmitContext&) {
449 throw NotImplementedException("SPIR-V Instruction");
450}
451
452Id EmitGlobalAtomicExchange32(EmitContext&) {
453 throw NotImplementedException("SPIR-V Instruction");
454}
455
456Id EmitGlobalAtomicIAdd64(EmitContext&) {
457 throw NotImplementedException("SPIR-V Instruction");
458}
459
460Id EmitGlobalAtomicSMin64(EmitContext&) {
461 throw NotImplementedException("SPIR-V Instruction");
462}
463
464Id EmitGlobalAtomicUMin64(EmitContext&) {
465 throw NotImplementedException("SPIR-V Instruction");
466}
467
468Id EmitGlobalAtomicSMax64(EmitContext&) {
469 throw NotImplementedException("SPIR-V Instruction");
470}
471
472Id EmitGlobalAtomicUMax64(EmitContext&) {
473 throw NotImplementedException("SPIR-V Instruction");
474}
475
476Id EmitGlobalAtomicInc64(EmitContext&) {
477 throw NotImplementedException("SPIR-V Instruction");
478}
479
480Id EmitGlobalAtomicDec64(EmitContext&) {
481 throw NotImplementedException("SPIR-V Instruction");
482}
483
484Id EmitGlobalAtomicAnd64(EmitContext&) {
485 throw NotImplementedException("SPIR-V Instruction");
486}
487
488Id EmitGlobalAtomicOr64(EmitContext&) {
489 throw NotImplementedException("SPIR-V Instruction");
490}
491
492Id EmitGlobalAtomicXor64(EmitContext&) {
493 throw NotImplementedException("SPIR-V Instruction");
494}
495
496Id EmitGlobalAtomicExchange64(EmitContext&) {
497 throw NotImplementedException("SPIR-V Instruction");
498}
499
500Id EmitGlobalAtomicAddF32(EmitContext&) {
501 throw NotImplementedException("SPIR-V Instruction");
502}
503
504Id EmitGlobalAtomicAddF16x2(EmitContext&) {
505 throw NotImplementedException("SPIR-V Instruction");
506}
507
508Id EmitGlobalAtomicAddF32x2(EmitContext&) {
509 throw NotImplementedException("SPIR-V Instruction");
510}
511
512Id EmitGlobalAtomicMinF16x2(EmitContext&) {
513 throw NotImplementedException("SPIR-V Instruction");
514}
515
516Id EmitGlobalAtomicMinF32x2(EmitContext&) {
517 throw NotImplementedException("SPIR-V Instruction");
518}
519
520Id EmitGlobalAtomicMaxF16x2(EmitContext&) {
521 throw NotImplementedException("SPIR-V Instruction");
522}
523
524Id EmitGlobalAtomicMaxF32x2(EmitContext&) {
525 throw NotImplementedException("SPIR-V Instruction");
526}
527
528} // namespace Shader::Backend::SPIRV
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 17be0c639..a3339f624 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -1284,6 +1284,204 @@ U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed)
1284 return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); 1284 return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
1285} 1285}
1286 1286
1287U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) {
1288 return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value);
1289}
1290
1291U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) {
1292 return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value);
1293}
1294
1295U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) {
1296 return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value);
1297}
1298
1299U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) {
1300 return is_signed ? SharedAtomicSMin(pointer_offset, value)
1301 : SharedAtomicUMin(pointer_offset, value);
1302}
1303
1304U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) {
1305 return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value);
1306}
1307
1308U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) {
1309 return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value);
1310}
1311
1312U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) {
1313 return is_signed ? SharedAtomicSMax(pointer_offset, value)
1314 : SharedAtomicUMax(pointer_offset, value);
1315}
1316
1317U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) {
1318 return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value);
1319}
1320
1321U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) {
1322 return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value);
1323}
1324
1325U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) {
1326 return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value);
1327}
1328
1329U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) {
1330 return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value);
1331}
1332
1333U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) {
1334 return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value);
1335}
1336
1337U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) {
1338 switch (value.Type()) {
1339 case Type::U32:
1340 return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value);
1341 case Type::U64:
1342 return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value);
1343 default:
1344 ThrowInvalidType(pointer_offset.Type());
1345 }
1346}
1347
1348U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) {
1349 switch (value.Type()) {
1350 case Type::U32:
1351 return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value);
1352 case Type::U64:
1353 return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value);
1354 default:
1355 ThrowInvalidType(value.Type());
1356 }
1357}
1358
1359U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) {
1360 switch (value.Type()) {
1361 case Type::U32:
1362 return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value);
1363 case Type::U64:
1364 return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value);
1365 default:
1366 ThrowInvalidType(value.Type());
1367 }
1368}
1369
1370U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) {
1371 switch (value.Type()) {
1372 case Type::U32:
1373 return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value);
1374 case Type::U64:
1375 return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value);
1376 default:
1377 ThrowInvalidType(value.Type());
1378 }
1379}
1380
1381U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) {
1382 return is_signed ? GlobalAtomicSMin(pointer_offset, value)
1383 : GlobalAtomicUMin(pointer_offset, value);
1384}
1385
1386U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) {
1387 switch (value.Type()) {
1388 case Type::U32:
1389 return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value);
1390 case Type::U64:
1391 return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value);
1392 default:
1393 ThrowInvalidType(value.Type());
1394 }
1395}
1396
1397U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) {
1398 switch (value.Type()) {
1399 case Type::U32:
1400 return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value);
1401 case Type::U64:
1402 return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value);
1403 default:
1404 ThrowInvalidType(value.Type());
1405 }
1406}
1407
1408U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) {
1409 return is_signed ? GlobalAtomicSMax(pointer_offset, value)
1410 : GlobalAtomicUMax(pointer_offset, value);
1411}
1412
1413U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) {
1414 return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value);
1415}
1416
1417U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) {
1418 return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value);
1419}
1420
1421U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) {
1422 switch (value.Type()) {
1423 case Type::U32:
1424 return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value);
1425 case Type::U64:
1426 return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value);
1427 default:
1428 ThrowInvalidType(value.Type());
1429 }
1430}
1431
1432U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) {
1433 switch (value.Type()) {
1434 case Type::U32:
1435 return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value);
1436 case Type::U64:
1437 return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value);
1438 default:
1439 ThrowInvalidType(value.Type());
1440 }
1441}
1442
1443U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) {
1444 switch (value.Type()) {
1445 case Type::U32:
1446 return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value);
1447 case Type::U64:
1448 return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value);
1449 default:
1450 ThrowInvalidType(value.Type());
1451 }
1452}
1453
1454U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) {
1455 switch (value.Type()) {
1456 case Type::U32:
1457 return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value);
1458 case Type::U64:
1459 return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value);
1460 default:
1461 ThrowInvalidType(pointer_offset.Type());
1462 }
1463}
1464
1465F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
1466 const FpControl control) {
1467 return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value);
1468}
1469
1470Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
1471 const FpControl control) {
1472 return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value);
1473}
1474
1475Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
1476 const FpControl control) {
1477 return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value);
1478}
1479
1480Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
1481 const FpControl control) {
1482 return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value);
1483}
1484
1287U1 IREmitter::LogicalOr(const U1& a, const U1& b) { 1485U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
1288 return Inst<U1>(Opcode::LogicalOr, a, b); 1486 return Inst<U1>(Opcode::LogicalOr, a, b);
1289} 1487}
@@ -1626,7 +1824,7 @@ Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInst
1626} 1824}
1627 1825
1628void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, 1826void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
1629 TextureInstInfo info) { 1827 TextureInstInfo info) {
1630 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; 1828 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite};
1631 Inst(op, Flags{info}, handle, coords, color); 1829 Inst(op, Flags{info}, handle, coords, color);
1632} 1830}
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index ec60070ef..f9cbf1304 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -228,6 +228,45 @@ public:
228 [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); 228 [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
229 [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); 229 [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
230 230
231 [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value);
232 [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value);
233 [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value);
234 [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed);
235 [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value);
236 [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value);
237 [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed);
238 [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value);
239 [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value);
240 [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value);
241 [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value);
242 [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value);
243 [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value);
244
245 [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value);
246 [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value);
247 [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value);
248 [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value,
249 bool is_signed);
250 [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value);
251 [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value);
252 [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value,
253 bool is_signed);
254 [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value);
255 [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value);
256 [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value);
257 [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value);
258 [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value);
259 [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value);
260
261 [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
262 const FpControl control = {});
263 [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
264 const FpControl control = {});
265 [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
266 const FpControl control = {});
267 [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
268 const FpControl control = {});
269
231 [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); 270 [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
232 [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); 271 [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
233 [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); 272 [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
index 2df631791..0f66c5627 100644
--- a/src/shader_recompiler/frontend/ir/microinstruction.cpp
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -93,6 +93,72 @@ bool Inst::MayHaveSideEffects() const noexcept {
93 case Opcode::WriteSharedU32: 93 case Opcode::WriteSharedU32:
94 case Opcode::WriteSharedU64: 94 case Opcode::WriteSharedU64:
95 case Opcode::WriteSharedU128: 95 case Opcode::WriteSharedU128:
96 case Opcode::SharedAtomicIAdd32:
97 case Opcode::SharedAtomicSMin32:
98 case Opcode::SharedAtomicUMin32:
99 case Opcode::SharedAtomicSMax32:
100 case Opcode::SharedAtomicUMax32:
101 case Opcode::SharedAtomicInc32:
102 case Opcode::SharedAtomicDec32:
103 case Opcode::SharedAtomicAnd32:
104 case Opcode::SharedAtomicOr32:
105 case Opcode::SharedAtomicXor32:
106 case Opcode::SharedAtomicExchange32:
107 case Opcode::SharedAtomicExchange64:
108 case Opcode::GlobalAtomicIAdd32:
109 case Opcode::GlobalAtomicSMin32:
110 case Opcode::GlobalAtomicUMin32:
111 case Opcode::GlobalAtomicSMax32:
112 case Opcode::GlobalAtomicUMax32:
113 case Opcode::GlobalAtomicInc32:
114 case Opcode::GlobalAtomicDec32:
115 case Opcode::GlobalAtomicAnd32:
116 case Opcode::GlobalAtomicOr32:
117 case Opcode::GlobalAtomicXor32:
118 case Opcode::GlobalAtomicExchange32:
119 case Opcode::GlobalAtomicIAdd64:
120 case Opcode::GlobalAtomicSMin64:
121 case Opcode::GlobalAtomicUMin64:
122 case Opcode::GlobalAtomicSMax64:
123 case Opcode::GlobalAtomicUMax64:
124 case Opcode::GlobalAtomicAnd64:
125 case Opcode::GlobalAtomicOr64:
126 case Opcode::GlobalAtomicXor64:
127 case Opcode::GlobalAtomicExchange64:
128 case Opcode::GlobalAtomicAddF32:
129 case Opcode::GlobalAtomicAddF16x2:
130 case Opcode::GlobalAtomicAddF32x2:
131 case Opcode::GlobalAtomicMinF16x2:
132 case Opcode::GlobalAtomicMinF32x2:
133 case Opcode::GlobalAtomicMaxF16x2:
134 case Opcode::GlobalAtomicMaxF32x2:
135 case Opcode::StorageAtomicIAdd32:
136 case Opcode::StorageAtomicSMin32:
137 case Opcode::StorageAtomicUMin32:
138 case Opcode::StorageAtomicSMax32:
139 case Opcode::StorageAtomicUMax32:
140 case Opcode::StorageAtomicInc32:
141 case Opcode::StorageAtomicDec32:
142 case Opcode::StorageAtomicAnd32:
143 case Opcode::StorageAtomicOr32:
144 case Opcode::StorageAtomicXor32:
145 case Opcode::StorageAtomicExchange32:
146 case Opcode::StorageAtomicIAdd64:
147 case Opcode::StorageAtomicSMin64:
148 case Opcode::StorageAtomicUMin64:
149 case Opcode::StorageAtomicSMax64:
150 case Opcode::StorageAtomicUMax64:
151 case Opcode::StorageAtomicAnd64:
152 case Opcode::StorageAtomicOr64:
153 case Opcode::StorageAtomicXor64:
154 case Opcode::StorageAtomicExchange64:
155 case Opcode::StorageAtomicAddF32:
156 case Opcode::StorageAtomicAddF16x2:
157 case Opcode::StorageAtomicAddF32x2:
158 case Opcode::StorageAtomicMinF16x2:
159 case Opcode::StorageAtomicMinF32x2:
160 case Opcode::StorageAtomicMaxF16x2:
161 case Opcode::StorageAtomicMaxF32x2:
96 case Opcode::BindlessImageWrite: 162 case Opcode::BindlessImageWrite:
97 case Opcode::BoundImageWrite: 163 case Opcode::BoundImageWrite:
98 case Opcode::ImageWrite: 164 case Opcode::ImageWrite:
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
index 86ea02560..dc776a73e 100644
--- a/src/shader_recompiler/frontend/ir/opcodes.inc
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -321,6 +321,76 @@ OPCODE(INotEqual, U1, U32,
321OPCODE(SGreaterThanEqual, U1, U32, U32, ) 321OPCODE(SGreaterThanEqual, U1, U32, U32, )
322OPCODE(UGreaterThanEqual, U1, U32, U32, ) 322OPCODE(UGreaterThanEqual, U1, U32, U32, )
323 323
324// Atomic operations
325OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
326OPCODE(SharedAtomicSMin32, U32, U32, U32, )
327OPCODE(SharedAtomicUMin32, U32, U32, U32, )
328OPCODE(SharedAtomicSMax32, U32, U32, U32, )
329OPCODE(SharedAtomicUMax32, U32, U32, U32, )
330OPCODE(SharedAtomicInc32, U32, U32, U32, )
331OPCODE(SharedAtomicDec32, U32, U32, U32, )
332OPCODE(SharedAtomicAnd32, U32, U32, U32, )
333OPCODE(SharedAtomicOr32, U32, U32, U32, )
334OPCODE(SharedAtomicXor32, U32, U32, U32, )
335OPCODE(SharedAtomicExchange32, U32, U32, U32, )
336OPCODE(SharedAtomicExchange64, U64, U32, U64, )
337
338OPCODE(GlobalAtomicIAdd32, U32, U64, U32, )
339OPCODE(GlobalAtomicSMin32, U32, U64, U32, )
340OPCODE(GlobalAtomicUMin32, U32, U64, U32, )
341OPCODE(GlobalAtomicSMax32, U32, U64, U32, )
342OPCODE(GlobalAtomicUMax32, U32, U64, U32, )
343OPCODE(GlobalAtomicInc32, U32, U64, U32, )
344OPCODE(GlobalAtomicDec32, U32, U64, U32, )
345OPCODE(GlobalAtomicAnd32, U32, U64, U32, )
346OPCODE(GlobalAtomicOr32, U32, U64, U32, )
347OPCODE(GlobalAtomicXor32, U32, U64, U32, )
348OPCODE(GlobalAtomicExchange32, U32, U64, U32, )
349OPCODE(GlobalAtomicIAdd64, U64, U64, U64, )
350OPCODE(GlobalAtomicSMin64, U64, U64, U64, )
351OPCODE(GlobalAtomicUMin64, U64, U64, U64, )
352OPCODE(GlobalAtomicSMax64, U64, U64, U64, )
353OPCODE(GlobalAtomicUMax64, U64, U64, U64, )
354OPCODE(GlobalAtomicAnd64, U64, U64, U64, )
355OPCODE(GlobalAtomicOr64, U64, U64, U64, )
356OPCODE(GlobalAtomicXor64, U64, U64, U64, )
357OPCODE(GlobalAtomicExchange64, U64, U64, U64, )
358OPCODE(GlobalAtomicAddF32, F32, U64, F32, )
359OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, )
360OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, )
361OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, )
362OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, )
363OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, )
364OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, )
365
366OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, )
367OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, )
368OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, )
369OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, )
370OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, )
371OPCODE(StorageAtomicInc32, U32, U32, U32, U32, )
372OPCODE(StorageAtomicDec32, U32, U32, U32, U32, )
373OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, )
374OPCODE(StorageAtomicOr32, U32, U32, U32, U32, )
375OPCODE(StorageAtomicXor32, U32, U32, U32, U32, )
376OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, )
377OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, )
378OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, )
379OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, )
380OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, )
381OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, )
382OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, )
383OPCODE(StorageAtomicOr64, U64, U32, U32, U64, )
384OPCODE(StorageAtomicXor64, U64, U32, U32, U64, )
385OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, )
386OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, )
387OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, )
388OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, )
389OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, )
390OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, )
391OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, )
392OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, )
393
324// Logical operations 394// Logical operations
325OPCODE(LogicalOr, U1, U1, U1, ) 395OPCODE(LogicalOr, U1, U1, U1, )
326OPCODE(LogicalAnd, U1, U1, U1, ) 396OPCODE(LogicalAnd, U1, U1, U1, )
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 000000000..7a32c5eb3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,222 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class AtomOp : u64 {
12 ADD,
13 MIN,
14 MAX,
15 INC,
16 DEC,
17 AND,
18 OR,
19 XOR,
20 EXCH,
21 SAFEADD,
22};
23
24enum class AtomSize : u64 {
25 U32,
26 S32,
27 U64,
28 F32,
29 F16x2,
30 S64,
31};
32
33IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
34 AtomOp op, bool is_signed) {
35 switch (op) {
36 case AtomOp::ADD:
37 return ir.GlobalAtomicIAdd(offset, op_b);
38 case AtomOp::MIN:
39 return ir.GlobalAtomicIMin(offset, op_b, is_signed);
40 case AtomOp::MAX:
41 return ir.GlobalAtomicIMax(offset, op_b, is_signed);
42 case AtomOp::INC:
43 return ir.GlobalAtomicInc(offset, op_b);
44 case AtomOp::DEC:
45 return ir.GlobalAtomicDec(offset, op_b);
46 case AtomOp::AND:
47 return ir.GlobalAtomicAnd(offset, op_b);
48 case AtomOp::OR:
49 return ir.GlobalAtomicOr(offset, op_b);
50 case AtomOp::XOR:
51 return ir.GlobalAtomicXor(offset, op_b);
52 case AtomOp::EXCH:
53 return ir.GlobalAtomicExchange(offset, op_b);
54 default:
55 throw NotImplementedException("Integer Atom Operation {}", op);
56 }
57}
58
59IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
60 AtomSize size) {
61 static constexpr IR::FpControl f16_control{
62 .no_contraction{false},
63 .rounding{IR::FpRounding::RN},
64 .fmz_mode{IR::FmzMode::DontCare},
65 };
66 static constexpr IR::FpControl f32_control{
67 .no_contraction{false},
68 .rounding{IR::FpRounding::RN},
69 .fmz_mode{IR::FmzMode::FTZ},
70 };
71 switch (op) {
72 case AtomOp::ADD:
73 return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
74 : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
75 case AtomOp::MIN:
76 return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
77 case AtomOp::MAX:
78 return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
79 default:
80 throw NotImplementedException("FP Atom Operation {}", op);
81 }
82}
83
84IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
85 union {
86 u64 raw;
87 BitField<8, 8, IR::Reg> addr_reg;
88 BitField<28, 20, s64> addr_offset;
89 BitField<28, 20, u64> rz_addr_offset;
90 BitField<48, 1, u64> e;
91 } const mem{insn};
92
93 const IR::U64 address{[&]() -> IR::U64 {
94 if (mem.e == 0) {
95 return v.ir.UConvert(64, v.X(mem.addr_reg));
96 }
97 return v.L(mem.addr_reg);
98 }()};
99 const u64 addr_offset{[&]() -> u64 {
100 if (mem.addr_reg == IR::Reg::RZ) {
101 // When RZ is used, the address is an absolute address
102 return static_cast<u64>(mem.rz_addr_offset.Value());
103 } else {
104 return static_cast<u64>(mem.addr_offset.Value());
105 }
106 }()};
107 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
108}
109
110bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
111 // TODO: SAFEADD
112 switch (size) {
113 case AtomSize::S32:
114 case AtomSize::U64:
115 return (op == AtomOp::INC || op == AtomOp::DEC);
116 case AtomSize::S64:
117 return !(op == AtomOp::MIN || op == AtomOp::MAX);
118 case AtomSize::F32:
119 return op != AtomOp::ADD;
120 case AtomSize::F16x2:
121 return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
122 default:
123 return false;
124 }
125}
126
127IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
128 switch (size) {
129 case AtomSize::U32:
130 case AtomSize::S32:
131 case AtomSize::F32:
132 case AtomSize::F16x2:
133 return ir.LoadGlobal32(offset);
134 case AtomSize::U64:
135 case AtomSize::S64:
136 return ir.PackUint2x32(ir.LoadGlobal64(offset));
137 default:
138 throw NotImplementedException("Atom Size {}", size);
139 }
140}
141
142void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
143 switch (size) {
144 case AtomSize::U32:
145 case AtomSize::S32:
146 case AtomSize::F16x2:
147 return v.X(dest_reg, IR::U32{result});
148 case AtomSize::U64:
149 case AtomSize::S64:
150 return v.L(dest_reg, IR::U64{result});
151 case AtomSize::F32:
152 return v.F(dest_reg, IR::F32{result});
153 default:
154 break;
155 }
156}
157} // Anonymous namespace
158
159void TranslatorVisitor::ATOM(u64 insn) {
160 union {
161 u64 raw;
162 BitField<0, 8, IR::Reg> dest_reg;
163 BitField<8, 8, IR::Reg> addr_reg;
164 BitField<20, 8, IR::Reg> src_reg_b;
165 BitField<49, 3, AtomSize> size;
166 BitField<52, 4, AtomOp> op;
167 } const atom{insn};
168
169 const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64};
170 const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64};
171 const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2};
172 const IR::U64 offset{AtomOffset(*this, insn)};
173 IR::Value result;
174
175 if (AtomOpNotApplicable(atom.size, atom.op)) {
176 result = LoadGlobal(ir, offset, atom.size);
177 } else if (!is_integer) {
178 if (atom.size == AtomSize::F32) {
179 result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size);
180 } else {
181 const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))};
182 result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size);
183 }
184 } else if (size_64) {
185 result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed);
186 } else {
187 result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed);
188 }
189 StoreResult(*this, atom.dest_reg, result, atom.size);
190}
191
192void TranslatorVisitor::RED(u64 insn) {
193 union {
194 u64 raw;
195 BitField<0, 8, IR::Reg> src_reg_b;
196 BitField<8, 8, IR::Reg> addr_reg;
197 BitField<20, 3, AtomSize> size;
198 BitField<23, 3, AtomOp> op;
199 } const red{insn};
200
201 if (AtomOpNotApplicable(red.size, red.op)) {
202 return;
203 }
204 const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64};
205 const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64};
206 const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2};
207 const IR::U64 offset{AtomOffset(*this, insn)};
208 if (!is_integer) {
209 if (red.size == AtomSize::F32) {
210 ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size);
211 } else {
212 const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))};
213 ApplyFpAtomOp(ir, offset, src_b, red.op, red.size);
214 }
215 } else if (size_64) {
216 ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed);
217 } else {
218 ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed);
219 }
220}
221
222} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 000000000..8b974621e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class AtomOp : u64 {
12 ADD,
13 MIN,
14 MAX,
15 INC,
16 DEC,
17 AND,
18 OR,
19 XOR,
20 EXCH,
21};
22
23enum class AtomsSize : u64 {
24 U32,
25 S32,
26 U64,
27};
28
29IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
30 bool is_signed) {
31 switch (op) {
32 case AtomOp::ADD:
33 return ir.SharedAtomicIAdd(offset, op_b);
34 case AtomOp::MIN:
35 return ir.SharedAtomicIMin(offset, op_b, is_signed);
36 case AtomOp::MAX:
37 return ir.SharedAtomicIMax(offset, op_b, is_signed);
38 case AtomOp::INC:
39 return ir.SharedAtomicInc(offset, op_b);
40 case AtomOp::DEC:
41 return ir.SharedAtomicDec(offset, op_b);
42 case AtomOp::AND:
43 return ir.SharedAtomicAnd(offset, op_b);
44 case AtomOp::OR:
45 return ir.SharedAtomicOr(offset, op_b);
46 case AtomOp::XOR:
47 return ir.SharedAtomicXor(offset, op_b);
48 case AtomOp::EXCH:
49 return ir.SharedAtomicExchange(offset, op_b);
50 default:
51 throw NotImplementedException("Integer Atoms Operation {}", op);
52 }
53}
54
55IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
56 union {
57 u64 raw;
58 BitField<8, 8, IR::Reg> offset_reg;
59 BitField<30, 22, u64> absolute_offset;
60 BitField<30, 22, s64> relative_offset;
61 } const encoding{insn};
62
63 if (encoding.offset_reg == IR::Reg::RZ) {
64 return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
65 } else {
66 const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
67 return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
68 }
69}
70
71void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
72 switch (size) {
73 case AtomsSize::U32:
74 case AtomsSize::S32:
75 return v.X(dest_reg, IR::U32{result});
76 case AtomsSize::U64:
77 return v.L(dest_reg, IR::U64{result});
78 default:
79 break;
80 }
81}
82} // Anonymous namespace
83
84void TranslatorVisitor::ATOMS(u64 insn) {
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<8, 8, IR::Reg> addr_reg;
89 BitField<20, 8, IR::Reg> src_reg_b;
90 BitField<28, 2, AtomsSize> size;
91 BitField<52, 4, AtomOp> op;
92 } const atoms{insn};
93
94 const bool size_64{atoms.size == AtomsSize::U64};
95 if (size_64 && atoms.op != AtomOp::EXCH) {
96 throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
97 }
98 const bool is_signed{atoms.size == AtomsSize::S32};
99 const IR::U32 offset{AtomsOffset(*this, insn)};
100
101 IR::Value result;
102 if (size_64) {
103 result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
104 } else {
105 result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
106 }
107 StoreResult(*this, atoms.dest_reg, result, atoms.size);
108}
109
110} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 327941223..aebe3072a 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) {
17 ThrowNotImplemented(Opcode::ATOM_cas); 17 ThrowNotImplemented(Opcode::ATOM_cas);
18} 18}
19 19
20void TranslatorVisitor::ATOM(u64) {
21 ThrowNotImplemented(Opcode::ATOM);
22}
23
24void TranslatorVisitor::ATOMS_cas(u64) { 20void TranslatorVisitor::ATOMS_cas(u64) {
25 ThrowNotImplemented(Opcode::ATOMS_cas); 21 ThrowNotImplemented(Opcode::ATOMS_cas);
26} 22}
27 23
28void TranslatorVisitor::ATOMS(u64) {
29 ThrowNotImplemented(Opcode::ATOMS);
30}
31
32void TranslatorVisitor::B2R(u64) { 24void TranslatorVisitor::B2R(u64) {
33 ThrowNotImplemented(Opcode::B2R); 25 ThrowNotImplemented(Opcode::B2R);
34} 26}
@@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) {
241 ThrowNotImplemented(Opcode::RAM); 233 ThrowNotImplemented(Opcode::RAM);
242} 234}
243 235
244void TranslatorVisitor::RED(u64) {
245 ThrowNotImplemented(Opcode::RED);
246}
247
248void TranslatorVisitor::RET(u64) { 236void TranslatorVisitor::RET(u64) {
249 ThrowNotImplemented(Opcode::RET); 237 ThrowNotImplemented(Opcode::RET);
250} 238}
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index 9ef8688c9..73373576b 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -145,6 +145,10 @@ void VisitUsages(Info& info, IR::Inst& inst) {
145 case IR::Opcode::FPOrdGreaterThanEqual16: 145 case IR::Opcode::FPOrdGreaterThanEqual16:
146 case IR::Opcode::FPUnordGreaterThanEqual16: 146 case IR::Opcode::FPUnordGreaterThanEqual16:
147 case IR::Opcode::FPIsNan16: 147 case IR::Opcode::FPIsNan16:
148 case IR::Opcode::GlobalAtomicAddF16x2:
149 case IR::Opcode::StorageAtomicAddF16x2:
150 case IR::Opcode::StorageAtomicMinF16x2:
151 case IR::Opcode::StorageAtomicMaxF16x2:
148 info.uses_fp16 = true; 152 info.uses_fp16 = true;
149 break; 153 break;
150 case IR::Opcode::CompositeConstructF64x2: 154 case IR::Opcode::CompositeConstructF64x2:
@@ -310,6 +314,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
310 case IR::Opcode::ConvertF16U64: 314 case IR::Opcode::ConvertF16U64:
311 case IR::Opcode::ConvertF32U64: 315 case IR::Opcode::ConvertF32U64:
312 case IR::Opcode::ConvertF64U64: 316 case IR::Opcode::ConvertF64U64:
317 case IR::Opcode::SharedAtomicExchange64:
313 info.uses_int64 = true; 318 info.uses_int64 = true;
314 break; 319 break;
315 default: 320 default:
@@ -444,6 +449,71 @@ void VisitUsages(Info& info, IR::Inst& inst) {
444 case IR::Opcode::FSwizzleAdd: 449 case IR::Opcode::FSwizzleAdd:
445 info.uses_fswzadd = true; 450 info.uses_fswzadd = true;
446 break; 451 break;
452 case IR::Opcode::SharedAtomicInc32:
453 info.uses_shared_increment = true;
454 break;
455 case IR::Opcode::SharedAtomicDec32:
456 info.uses_shared_decrement = true;
457 break;
458 case IR::Opcode::GlobalAtomicInc32:
459 case IR::Opcode::StorageAtomicInc32:
460 info.uses_global_increment = true;
461 break;
462 case IR::Opcode::GlobalAtomicDec32:
463 case IR::Opcode::StorageAtomicDec32:
464 info.uses_global_decrement = true;
465 break;
466 case IR::Opcode::GlobalAtomicAddF32:
467 case IR::Opcode::StorageAtomicAddF32:
468 info.uses_atomic_f32_add = true;
469 break;
470 case IR::Opcode::GlobalAtomicAddF16x2:
471 case IR::Opcode::StorageAtomicAddF16x2:
472 info.uses_atomic_f16x2_add = true;
473 break;
474 case IR::Opcode::GlobalAtomicAddF32x2:
475 case IR::Opcode::StorageAtomicAddF32x2:
476 info.uses_atomic_f32x2_add = true;
477 break;
478 case IR::Opcode::GlobalAtomicMinF16x2:
479 case IR::Opcode::StorageAtomicMinF16x2:
480 info.uses_atomic_f16x2_min = true;
481 break;
482 case IR::Opcode::GlobalAtomicMinF32x2:
483 case IR::Opcode::StorageAtomicMinF32x2:
484 info.uses_atomic_f32x2_min = true;
485 break;
486 case IR::Opcode::GlobalAtomicMaxF16x2:
487 case IR::Opcode::StorageAtomicMaxF16x2:
488 info.uses_atomic_f16x2_max = true;
489 break;
490 case IR::Opcode::GlobalAtomicMaxF32x2:
491 case IR::Opcode::StorageAtomicMaxF32x2:
492 info.uses_atomic_f32x2_max = true;
493 break;
494 case IR::Opcode::GlobalAtomicIAdd64:
495 case IR::Opcode::GlobalAtomicSMin64:
496 case IR::Opcode::GlobalAtomicUMin64:
497 case IR::Opcode::GlobalAtomicSMax64:
498 case IR::Opcode::GlobalAtomicUMax64:
499 case IR::Opcode::GlobalAtomicAnd64:
500 case IR::Opcode::GlobalAtomicOr64:
501 case IR::Opcode::GlobalAtomicXor64:
502 case IR::Opcode::GlobalAtomicExchange64:
503 case IR::Opcode::StorageAtomicIAdd64:
504 case IR::Opcode::StorageAtomicSMin64:
505 case IR::Opcode::StorageAtomicUMin64:
506 case IR::Opcode::StorageAtomicSMax64:
507 case IR::Opcode::StorageAtomicUMax64:
508 case IR::Opcode::StorageAtomicAnd64:
509 case IR::Opcode::StorageAtomicOr64:
510 case IR::Opcode::StorageAtomicXor64:
511 info.uses_64_bit_atomics = true;
512 break;
513 case IR::Opcode::SharedAtomicExchange64:
514 info.uses_64_bit_atomics = true;
515 info.uses_shared_memory_u32x2 = true;
516 break;
447 default: 517 default:
448 break; 518 break;
449 } 519 }
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
index afe871505..0d4f266c3 100644
--- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
+++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp
@@ -72,6 +72,33 @@ bool IsGlobalMemory(const IR::Inst& inst) {
72 case IR::Opcode::WriteGlobal32: 72 case IR::Opcode::WriteGlobal32:
73 case IR::Opcode::WriteGlobal64: 73 case IR::Opcode::WriteGlobal64:
74 case IR::Opcode::WriteGlobal128: 74 case IR::Opcode::WriteGlobal128:
75 case IR::Opcode::GlobalAtomicIAdd32:
76 case IR::Opcode::GlobalAtomicSMin32:
77 case IR::Opcode::GlobalAtomicUMin32:
78 case IR::Opcode::GlobalAtomicSMax32:
79 case IR::Opcode::GlobalAtomicUMax32:
80 case IR::Opcode::GlobalAtomicInc32:
81 case IR::Opcode::GlobalAtomicDec32:
82 case IR::Opcode::GlobalAtomicAnd32:
83 case IR::Opcode::GlobalAtomicOr32:
84 case IR::Opcode::GlobalAtomicXor32:
85 case IR::Opcode::GlobalAtomicExchange32:
86 case IR::Opcode::GlobalAtomicIAdd64:
87 case IR::Opcode::GlobalAtomicSMin64:
88 case IR::Opcode::GlobalAtomicUMin64:
89 case IR::Opcode::GlobalAtomicSMax64:
90 case IR::Opcode::GlobalAtomicUMax64:
91 case IR::Opcode::GlobalAtomicAnd64:
92 case IR::Opcode::GlobalAtomicOr64:
93 case IR::Opcode::GlobalAtomicXor64:
94 case IR::Opcode::GlobalAtomicExchange64:
95 case IR::Opcode::GlobalAtomicAddF32:
96 case IR::Opcode::GlobalAtomicAddF16x2:
97 case IR::Opcode::GlobalAtomicAddF32x2:
98 case IR::Opcode::GlobalAtomicMinF16x2:
99 case IR::Opcode::GlobalAtomicMinF32x2:
100 case IR::Opcode::GlobalAtomicMaxF16x2:
101 case IR::Opcode::GlobalAtomicMaxF32x2:
75 return true; 102 return true;
76 default: 103 default:
77 return false; 104 return false;
@@ -125,6 +152,60 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) {
125 return IR::Opcode::WriteStorage64; 152 return IR::Opcode::WriteStorage64;
126 case IR::Opcode::WriteGlobal128: 153 case IR::Opcode::WriteGlobal128:
127 return IR::Opcode::WriteStorage128; 154 return IR::Opcode::WriteStorage128;
155 case IR::Opcode::GlobalAtomicIAdd32:
156 return IR::Opcode::StorageAtomicIAdd32;
157 case IR::Opcode::GlobalAtomicSMin32:
158 return IR::Opcode::StorageAtomicSMin32;
159 case IR::Opcode::GlobalAtomicUMin32:
160 return IR::Opcode::StorageAtomicUMin32;
161 case IR::Opcode::GlobalAtomicSMax32:
162 return IR::Opcode::StorageAtomicSMax32;
163 case IR::Opcode::GlobalAtomicUMax32:
164 return IR::Opcode::StorageAtomicUMax32;
165 case IR::Opcode::GlobalAtomicInc32:
166 return IR::Opcode::StorageAtomicInc32;
167 case IR::Opcode::GlobalAtomicDec32:
168 return IR::Opcode::StorageAtomicDec32;
169 case IR::Opcode::GlobalAtomicAnd32:
170 return IR::Opcode::StorageAtomicAnd32;
171 case IR::Opcode::GlobalAtomicOr32:
172 return IR::Opcode::StorageAtomicOr32;
173 case IR::Opcode::GlobalAtomicXor32:
174 return IR::Opcode::StorageAtomicXor32;
175 case IR::Opcode::GlobalAtomicIAdd64:
176 return IR::Opcode::StorageAtomicIAdd64;
177 case IR::Opcode::GlobalAtomicSMin64:
178 return IR::Opcode::StorageAtomicSMin64;
179 case IR::Opcode::GlobalAtomicUMin64:
180 return IR::Opcode::StorageAtomicUMin64;
181 case IR::Opcode::GlobalAtomicSMax64:
182 return IR::Opcode::StorageAtomicSMax64;
183 case IR::Opcode::GlobalAtomicUMax64:
184 return IR::Opcode::StorageAtomicUMax64;
185 case IR::Opcode::GlobalAtomicAnd64:
186 return IR::Opcode::StorageAtomicAnd64;
187 case IR::Opcode::GlobalAtomicOr64:
188 return IR::Opcode::StorageAtomicOr64;
189 case IR::Opcode::GlobalAtomicXor64:
190 return IR::Opcode::StorageAtomicXor64;
191 case IR::Opcode::GlobalAtomicExchange32:
192 return IR::Opcode::StorageAtomicExchange32;
193 case IR::Opcode::GlobalAtomicExchange64:
194 return IR::Opcode::StorageAtomicExchange64;
195 case IR::Opcode::GlobalAtomicAddF32:
196 return IR::Opcode::StorageAtomicAddF32;
197 case IR::Opcode::GlobalAtomicAddF16x2:
198 return IR::Opcode::StorageAtomicAddF16x2;
199 case IR::Opcode::GlobalAtomicMinF16x2:
200 return IR::Opcode::StorageAtomicMinF16x2;
201 case IR::Opcode::GlobalAtomicMaxF16x2:
202 return IR::Opcode::StorageAtomicMaxF16x2;
203 case IR::Opcode::GlobalAtomicAddF32x2:
204 return IR::Opcode::StorageAtomicAddF32x2;
205 case IR::Opcode::GlobalAtomicMinF32x2:
206 return IR::Opcode::StorageAtomicMinF32x2;
207 case IR::Opcode::GlobalAtomicMaxF32x2:
208 return IR::Opcode::StorageAtomicMaxF32x2;
128 default: 209 default:
129 throw InvalidArgument("Invalid global memory opcode {}", opcode); 210 throw InvalidArgument("Invalid global memory opcode {}", opcode);
130 } 211 }
@@ -328,6 +409,16 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index
328 inst.Invalidate(); 409 inst.Invalidate();
329} 410}
330 411
412/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
413void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
414 const IR::U32& offset) {
415 const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
416 const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
417 const IR::Value value{
418 &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
419 inst.ReplaceUsesWith(value);
420}
421
331/// Replace a global memory instruction with its storage buffer equivalent 422/// Replace a global memory instruction with its storage buffer equivalent
332void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, 423void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
333 const IR::U32& offset) { 424 const IR::U32& offset) {
@@ -348,6 +439,34 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
348 case IR::Opcode::WriteGlobal64: 439 case IR::Opcode::WriteGlobal64:
349 case IR::Opcode::WriteGlobal128: 440 case IR::Opcode::WriteGlobal128:
350 return ReplaceWrite(block, inst, storage_index, offset); 441 return ReplaceWrite(block, inst, storage_index, offset);
442 case IR::Opcode::GlobalAtomicIAdd32:
443 case IR::Opcode::GlobalAtomicSMin32:
444 case IR::Opcode::GlobalAtomicUMin32:
445 case IR::Opcode::GlobalAtomicSMax32:
446 case IR::Opcode::GlobalAtomicUMax32:
447 case IR::Opcode::GlobalAtomicInc32:
448 case IR::Opcode::GlobalAtomicDec32:
449 case IR::Opcode::GlobalAtomicAnd32:
450 case IR::Opcode::GlobalAtomicOr32:
451 case IR::Opcode::GlobalAtomicXor32:
452 case IR::Opcode::GlobalAtomicExchange32:
453 case IR::Opcode::GlobalAtomicIAdd64:
454 case IR::Opcode::GlobalAtomicSMin64:
455 case IR::Opcode::GlobalAtomicUMin64:
456 case IR::Opcode::GlobalAtomicSMax64:
457 case IR::Opcode::GlobalAtomicUMax64:
458 case IR::Opcode::GlobalAtomicAnd64:
459 case IR::Opcode::GlobalAtomicOr64:
460 case IR::Opcode::GlobalAtomicXor64:
461 case IR::Opcode::GlobalAtomicExchange64:
462 case IR::Opcode::GlobalAtomicAddF32:
463 case IR::Opcode::GlobalAtomicAddF16x2:
464 case IR::Opcode::GlobalAtomicAddF32x2:
465 case IR::Opcode::GlobalAtomicMinF16x2:
466 case IR::Opcode::GlobalAtomicMinF32x2:
467 case IR::Opcode::GlobalAtomicMaxF16x2:
468 case IR::Opcode::GlobalAtomicMaxF32x2:
469 return ReplaceAtomic(block, inst, storage_index, offset);
351 default: 470 default:
352 throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); 471 throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
353 } 472 }
@@ -364,7 +483,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
364 CollectStorageBuffers(*block, inst, info); 483 CollectStorageBuffers(*block, inst, info);
365 } 484 }
366 } 485 }
367 u32 storage_index{};
368 for (const StorageBufferAddr& storage_buffer : info.set) { 486 for (const StorageBufferAddr& storage_buffer : info.set) {
369 program.info.storage_buffers_descriptors.push_back({ 487 program.info.storage_buffers_descriptors.push_back({
370 .cbuf_index = storage_buffer.index, 488 .cbuf_index = storage_buffer.index,
@@ -372,7 +490,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) {
372 .count = 1, 490 .count = 1,
373 .is_written{info.writes.contains(storage_buffer)}, 491 .is_written{info.writes.contains(storage_buffer)},
374 }); 492 });
375 ++storage_index;
376 } 493 }
377 for (const StorageInst& storage_inst : info.to_replace) { 494 for (const StorageInst& storage_inst : info.to_replace) {
378 const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; 495 const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
index 52576b07f..62e73d52d 100644
--- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -114,6 +114,18 @@ IR::Opcode Replace(IR::Opcode op) {
114 return IR::Opcode::ConvertF32U32; 114 return IR::Opcode::ConvertF32U32;
115 case IR::Opcode::ConvertF16U64: 115 case IR::Opcode::ConvertF16U64:
116 return IR::Opcode::ConvertF32U64; 116 return IR::Opcode::ConvertF32U64;
117 case IR::Opcode::GlobalAtomicAddF16x2:
118 return IR::Opcode::GlobalAtomicAddF32x2;
119 case IR::Opcode::StorageAtomicAddF16x2:
120 return IR::Opcode::StorageAtomicAddF32x2;
121 case IR::Opcode::GlobalAtomicMinF16x2:
122 return IR::Opcode::GlobalAtomicMinF32x2;
123 case IR::Opcode::StorageAtomicMinF16x2:
124 return IR::Opcode::StorageAtomicMinF32x2;
125 case IR::Opcode::GlobalAtomicMaxF16x2:
126 return IR::Opcode::GlobalAtomicMaxF32x2;
127 case IR::Opcode::StorageAtomicMaxF16x2:
128 return IR::Opcode::StorageAtomicMaxF32x2;
117 default: 129 default:
118 return op; 130 return op;
119 } 131 }
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h
index f0d68d516..a4e41bda1 100644
--- a/src/shader_recompiler/profile.h
+++ b/src/shader_recompiler/profile.h
@@ -38,6 +38,7 @@ struct Profile {
38 bool support_viewport_index_layer_non_geometry{}; 38 bool support_viewport_index_layer_non_geometry{};
39 bool support_typeless_image_loads{}; 39 bool support_typeless_image_loads{};
40 bool warp_size_potentially_larger_than_guest{}; 40 bool warp_size_potentially_larger_than_guest{};
41 bool support_int64_atomics{};
41 42
42 // FClamp is broken and OpFMax + OpFMin should be used instead 43 // FClamp is broken and OpFMax + OpFMin should be used instead
43 bool has_broken_spirv_clamp{}; 44 bool has_broken_spirv_clamp{};
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h
index 3fbe99268..7bcecf554 100644
--- a/src/shader_recompiler/shader_info.h
+++ b/src/shader_recompiler/shader_info.h
@@ -128,6 +128,19 @@ struct Info {
128 bool uses_subgroup_mask{}; 128 bool uses_subgroup_mask{};
129 bool uses_fswzadd{}; 129 bool uses_fswzadd{};
130 bool uses_typeless_image_reads{}; 130 bool uses_typeless_image_reads{};
131 bool uses_shared_increment{};
132 bool uses_shared_decrement{};
133 bool uses_global_increment{};
134 bool uses_global_decrement{};
135 bool uses_atomic_f32_add{};
136 bool uses_atomic_f16x2_add{};
137 bool uses_atomic_f16x2_min{};
138 bool uses_atomic_f16x2_max{};
139 bool uses_atomic_f32x2_add{};
140 bool uses_atomic_f32x2_min{};
141 bool uses_atomic_f32x2_max{};
142 bool uses_64_bit_atomics{};
143 bool uses_shared_memory_u32x2{};
131 144
132 IR::Type used_constant_buffer_types{}; 145 IR::Type used_constant_buffer_types{};
133 146
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index f699a9bdf..b953d694b 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -637,6 +637,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
637 device.IsExtShaderViewportIndexLayerSupported(), 637 device.IsExtShaderViewportIndexLayerSupported(),
638 .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), 638 .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(),
639 .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), 639 .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
640 .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
640 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, 641 .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
641 .generic_input_types{}, 642 .generic_input_types{},
642 .fixed_state_point_size{}, 643 .fixed_state_point_size{},
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 78bb741bc..911dfed44 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -681,6 +681,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
681 bool has_ext_transform_feedback{}; 681 bool has_ext_transform_feedback{};
682 bool has_ext_custom_border_color{}; 682 bool has_ext_custom_border_color{};
683 bool has_ext_extended_dynamic_state{}; 683 bool has_ext_extended_dynamic_state{};
684 bool has_ext_shader_atomic_int64{};
684 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { 685 for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
685 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, 686 const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
686 bool push) { 687 bool push) {
@@ -710,6 +711,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
710 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); 711 test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
711 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); 712 test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
712 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); 713 test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
714 test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false);
713 test(has_khr_workgroup_memory_explicit_layout, 715 test(has_khr_workgroup_memory_explicit_layout,
714 VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); 716 VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false);
715 if (Settings::values.renderer_debug) { 717 if (Settings::values.renderer_debug) {
@@ -760,6 +762,18 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
760 } else { 762 } else {
761 is_warp_potentially_bigger = true; 763 is_warp_potentially_bigger = true;
762 } 764 }
765 if (has_ext_shader_atomic_int64) {
766 VkPhysicalDeviceShaderAtomicInt64Features atomic_int64;
767 atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT;
768 atomic_int64.pNext = nullptr;
769 features.pNext = &atomic_int64;
770 physical.GetFeatures2KHR(features);
771
772 if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) {
773 extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
774 ext_shader_atomic_int64 = true;
775 }
776 }
763 if (has_ext_transform_feedback) { 777 if (has_ext_transform_feedback) {
764 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; 778 VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features;
765 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; 779 tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index adf62a707..4e6d13308 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -229,6 +229,11 @@ public:
229 return ext_shader_stencil_export; 229 return ext_shader_stencil_export;
230 } 230 }
231 231
232 /// Returns true if the device supports VK_KHR_shader_atomic_int64.
233 bool IsExtShaderAtomicInt64Supported() const {
234 return ext_shader_atomic_int64;
235 }
236
232 /// Returns true when a known debugging tool is attached. 237 /// Returns true when a known debugging tool is attached.
233 bool HasDebuggingToolAttached() const { 238 bool HasDebuggingToolAttached() const {
234 return has_renderdoc || has_nsight_graphics; 239 return has_renderdoc || has_nsight_graphics;
@@ -320,6 +325,7 @@ private:
320 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. 325 bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
321 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. 326 bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
322 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. 327 bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
328 bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64.
323 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. 329 bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
324 bool has_renderdoc{}; ///< Has RenderDoc attached 330 bool has_renderdoc{}; ///< Has RenderDoc attached
325 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached 331 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached