diff options
Diffstat (limited to 'src')
21 files changed, 1745 insertions, 19 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 8e1d37373..7b9f08aa0 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt | |||
| @@ -3,6 +3,7 @@ add_library(shader_recompiler STATIC | |||
| 3 | backend/spirv/emit_context.h | 3 | backend/spirv/emit_context.h |
| 4 | backend/spirv/emit_spirv.cpp | 4 | backend/spirv/emit_spirv.cpp |
| 5 | backend/spirv/emit_spirv.h | 5 | backend/spirv/emit_spirv.h |
| 6 | backend/spirv/emit_spirv_atomic.cpp | ||
| 6 | backend/spirv/emit_spirv_barriers.cpp | 7 | backend/spirv/emit_spirv_barriers.cpp |
| 7 | backend/spirv/emit_spirv_bitwise_conversion.cpp | 8 | backend/spirv/emit_spirv_bitwise_conversion.cpp |
| 8 | backend/spirv/emit_spirv_composite.cpp | 9 | backend/spirv/emit_spirv_composite.cpp |
| @@ -65,6 +66,8 @@ add_library(shader_recompiler STATIC | |||
| 65 | frontend/maxwell/program.h | 66 | frontend/maxwell/program.h |
| 66 | frontend/maxwell/structured_control_flow.cpp | 67 | frontend/maxwell/structured_control_flow.cpp |
| 67 | frontend/maxwell/structured_control_flow.h | 68 | frontend/maxwell/structured_control_flow.h |
| 69 | frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp | ||
| 70 | frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp | ||
| 68 | frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp | 71 | frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp |
| 69 | frontend/maxwell/translate/impl/barrier_operations.cpp | 72 | frontend/maxwell/translate/impl/barrier_operations.cpp |
| 70 | frontend/maxwell/translate/impl/bitfield_extract.cpp | 73 | frontend/maxwell/translate/impl/bitfield_extract.cpp |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.cpp b/src/shader_recompiler/backend/spirv/emit_context.cpp index 32f8c4508..e5d83e9b4 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/emit_context.cpp | |||
| @@ -15,6 +15,53 @@ | |||
| 15 | 15 | ||
| 16 | namespace Shader::Backend::SPIRV { | 16 | namespace Shader::Backend::SPIRV { |
| 17 | namespace { | 17 | namespace { |
| 18 | enum class CasFunctionType { | ||
| 19 | Increment, | ||
| 20 | Decrement, | ||
| 21 | FPAdd, | ||
| 22 | FPMin, | ||
| 23 | FPMax, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Id CasFunction(EmitContext& ctx, CasFunctionType function_type, Id value_type) { | ||
| 27 | const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; | ||
| 28 | const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; | ||
| 29 | const Id op_a{ctx.OpFunctionParameter(value_type)}; | ||
| 30 | const Id op_b{ctx.OpFunctionParameter(value_type)}; | ||
| 31 | ctx.AddLabel(); | ||
| 32 | Id result{}; | ||
| 33 | switch (function_type) { | ||
| 34 | case CasFunctionType::Increment: { | ||
| 35 | const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; | ||
| 36 | const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; | ||
| 37 | result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); | ||
| 38 | break; | ||
| 39 | } | ||
| 40 | case CasFunctionType::Decrement: { | ||
| 41 | const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; | ||
| 42 | const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; | ||
| 43 | const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; | ||
| 44 | const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; | ||
| 45 | result = ctx.OpSelect(value_type, pred, op_b, decr); | ||
| 46 | break; | ||
| 47 | } | ||
| 48 | case CasFunctionType::FPAdd: | ||
| 49 | result = ctx.OpFAdd(value_type, op_a, op_b); | ||
| 50 | break; | ||
| 51 | case CasFunctionType::FPMin: | ||
| 52 | result = ctx.OpFMin(value_type, op_a, op_b); | ||
| 53 | break; | ||
| 54 | case CasFunctionType::FPMax: | ||
| 55 | result = ctx.OpFMax(value_type, op_a, op_b); | ||
| 56 | break; | ||
| 57 | default: | ||
| 58 | break; | ||
| 59 | } | ||
| 60 | ctx.OpReturnValue(result); | ||
| 61 | ctx.OpFunctionEnd(); | ||
| 62 | return func; | ||
| 63 | } | ||
| 64 | |||
| 18 | Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { | 65 | Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { |
| 19 | const spv::ImageFormat format{spv::ImageFormat::Unknown}; | 66 | const spv::ImageFormat format{spv::ImageFormat::Unknown}; |
| 20 | const Id type{ctx.F32[1]}; | 67 | const Id type{ctx.F32[1]}; |
| @@ -196,6 +243,56 @@ Id EmitContext::Def(const IR::Value& value) { | |||
| 196 | } | 243 | } |
| 197 | } | 244 | } |
| 198 | 245 | ||
| 246 | Id EmitContext::CasLoop(Id function, CasPointerType pointer_type, Id value_type) { | ||
| 247 | const Id loop_header{OpLabel()}; | ||
| 248 | const Id continue_block{OpLabel()}; | ||
| 249 | const Id merge_block{OpLabel()}; | ||
| 250 | const Id storage_type{pointer_type == CasPointerType::Shared ? shared_memory_u32_type | ||
| 251 | : storage_memory_u32}; | ||
| 252 | const Id func_type{TypeFunction(value_type, U32[1], value_type, storage_type)}; | ||
| 253 | const Id func{OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; | ||
| 254 | const Id index{OpFunctionParameter(U32[1])}; | ||
| 255 | const Id op_b{OpFunctionParameter(value_type)}; | ||
| 256 | const Id base{OpFunctionParameter(storage_type)}; | ||
| 257 | AddLabel(); | ||
| 258 | const Id one{Constant(U32[1], 1)}; | ||
| 259 | OpBranch(loop_header); | ||
| 260 | AddLabel(loop_header); | ||
| 261 | OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); | ||
| 262 | OpBranch(continue_block); | ||
| 263 | |||
| 264 | AddLabel(continue_block); | ||
| 265 | const Id word_pointer{pointer_type == CasPointerType::Shared | ||
| 266 | ? OpAccessChain(shared_u32, base, index) | ||
| 267 | : OpAccessChain(storage_u32, base, u32_zero_value, index)}; | ||
| 268 | if (value_type.value == F32[2].value) { | ||
| 269 | const Id u32_value{OpLoad(U32[1], word_pointer)}; | ||
| 270 | const Id value{OpUnpackHalf2x16(F32[2], u32_value)}; | ||
| 271 | const Id new_value{OpFunctionCall(value_type, function, value, op_b)}; | ||
| 272 | const Id u32_new_value{OpPackHalf2x16(U32[1], new_value)}; | ||
| 273 | const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, | ||
| 274 | u32_zero_value, u32_new_value, u32_value)}; | ||
| 275 | const Id success{OpIEqual(U1, atomic_res, u32_value)}; | ||
| 276 | OpBranchConditional(success, merge_block, loop_header); | ||
| 277 | |||
| 278 | AddLabel(merge_block); | ||
| 279 | OpReturnValue(OpUnpackHalf2x16(F32[2], atomic_res)); | ||
| 280 | } else { | ||
| 281 | const Id value{OpLoad(U32[1], word_pointer)}; | ||
| 282 | const Id new_value{OpBitcast( | ||
| 283 | U32[1], OpFunctionCall(value_type, function, OpBitcast(value_type, value), op_b))}; | ||
| 284 | const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, | ||
| 285 | u32_zero_value, new_value, value)}; | ||
| 286 | const Id success{OpIEqual(U1, atomic_res, value)}; | ||
| 287 | OpBranchConditional(success, merge_block, loop_header); | ||
| 288 | |||
| 289 | AddLabel(merge_block); | ||
| 290 | OpReturnValue(OpBitcast(value_type, atomic_res)); | ||
| 291 | } | ||
| 292 | OpFunctionEnd(); | ||
| 293 | return func; | ||
| 294 | } | ||
| 295 | |||
| 199 | void EmitContext::DefineCommonTypes(const Info& info) { | 296 | void EmitContext::DefineCommonTypes(const Info& info) { |
| 200 | void_id = TypeVoid(); | 297 | void_id = TypeVoid(); |
| 201 | 298 | ||
| @@ -300,9 +397,9 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { | |||
| 300 | } | 397 | } |
| 301 | const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; | 398 | const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; |
| 302 | const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; | 399 | const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; |
| 303 | const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)}; | 400 | shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); |
| 304 | shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); | 401 | shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); |
| 305 | shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup); | 402 | shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); |
| 306 | interfaces.push_back(shared_memory_u32); | 403 | interfaces.push_back(shared_memory_u32); |
| 307 | 404 | ||
| 308 | const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; | 405 | const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; |
| @@ -346,6 +443,14 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { | |||
| 346 | if (program.info.uses_int16) { | 443 | if (program.info.uses_int16) { |
| 347 | shared_store_u16_func = make_function(16, 16); | 444 | shared_store_u16_func = make_function(16, 16); |
| 348 | } | 445 | } |
| 446 | if (program.info.uses_shared_increment) { | ||
| 447 | const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; | ||
| 448 | increment_cas_shared = CasLoop(inc_func, CasPointerType::Shared, U32[1]); | ||
| 449 | } | ||
| 450 | if (program.info.uses_shared_decrement) { | ||
| 451 | const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; | ||
| 452 | decrement_cas_shared = CasLoop(dec_func, CasPointerType::Shared, U32[1]); | ||
| 453 | } | ||
| 349 | } | 454 | } |
| 350 | 455 | ||
| 351 | void EmitContext::DefineAttributeMemAccess(const Info& info) { | 456 | void EmitContext::DefineAttributeMemAccess(const Info& info) { |
| @@ -530,12 +635,12 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { | |||
| 530 | MemberName(struct_type, 0, "data"); | 635 | MemberName(struct_type, 0, "data"); |
| 531 | MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); | 636 | MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); |
| 532 | 637 | ||
| 533 | const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; | 638 | storage_memory_u32 = TypePointer(spv::StorageClass::StorageBuffer, struct_type); |
| 534 | storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); | 639 | storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); |
| 535 | 640 | ||
| 536 | u32 index{}; | 641 | u32 index{}; |
| 537 | for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { | 642 | for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { |
| 538 | const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; | 643 | const Id id{AddGlobalVariable(storage_memory_u32, spv::StorageClass::StorageBuffer)}; |
| 539 | Decorate(id, spv::Decoration::Binding, binding); | 644 | Decorate(id, spv::Decoration::Binding, binding); |
| 540 | Decorate(id, spv::Decoration::DescriptorSet, 0U); | 645 | Decorate(id, spv::Decoration::DescriptorSet, 0U); |
| 541 | Name(id, fmt::format("ssbo{}", index)); | 646 | Name(id, fmt::format("ssbo{}", index)); |
| @@ -546,6 +651,51 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { | |||
| 546 | index += desc.count; | 651 | index += desc.count; |
| 547 | binding += desc.count; | 652 | binding += desc.count; |
| 548 | } | 653 | } |
| 654 | if (info.uses_global_increment) { | ||
| 655 | AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||
| 656 | const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; | ||
| 657 | increment_cas_ssbo = CasLoop(inc_func, CasPointerType::Ssbo, U32[1]); | ||
| 658 | } | ||
| 659 | if (info.uses_global_decrement) { | ||
| 660 | AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||
| 661 | const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; | ||
| 662 | decrement_cas_ssbo = CasLoop(dec_func, CasPointerType::Ssbo, U32[1]); | ||
| 663 | } | ||
| 664 | if (info.uses_atomic_f32_add) { | ||
| 665 | AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||
| 666 | const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[1])}; | ||
| 667 | f32_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[1]); | ||
| 668 | } | ||
| 669 | if (info.uses_atomic_f16x2_add) { | ||
| 670 | AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||
| 671 | const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F16[2])}; | ||
| 672 | f16x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F16[2]); | ||
| 673 | } | ||
| 674 | if (info.uses_atomic_f16x2_min) { | ||
| 675 | AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||
| 676 | const Id func{CasFunction(*this, CasFunctionType::FPMin, F16[2])}; | ||
| 677 | f16x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); | ||
| 678 | } | ||
| 679 | if (info.uses_atomic_f16x2_max) { | ||
| 680 | AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||
| 681 | const Id func{CasFunction(*this, CasFunctionType::FPMax, F16[2])}; | ||
| 682 | f16x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); | ||
| 683 | } | ||
| 684 | if (info.uses_atomic_f32x2_add) { | ||
| 685 | AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||
| 686 | const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[2])}; | ||
| 687 | f32x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[2]); | ||
| 688 | } | ||
| 689 | if (info.uses_atomic_f32x2_min) { | ||
| 690 | AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||
| 691 | const Id func{CasFunction(*this, CasFunctionType::FPMin, F32[2])}; | ||
| 692 | f32x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); | ||
| 693 | } | ||
| 694 | if (info.uses_atomic_f32x2_max) { | ||
| 695 | AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||
| 696 | const Id func{CasFunction(*this, CasFunctionType::FPMax, F32[2])}; | ||
| 697 | f32x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); | ||
| 698 | } | ||
| 549 | } | 699 | } |
| 550 | 700 | ||
| 551 | void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { | 701 | void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { |
diff --git a/src/shader_recompiler/backend/spirv/emit_context.h b/src/shader_recompiler/backend/spirv/emit_context.h index e70f3458c..34f38454f 100644 --- a/src/shader_recompiler/backend/spirv/emit_context.h +++ b/src/shader_recompiler/backend/spirv/emit_context.h | |||
| @@ -94,6 +94,7 @@ public: | |||
| 94 | Id output_f32{}; | 94 | Id output_f32{}; |
| 95 | 95 | ||
| 96 | Id storage_u32{}; | 96 | Id storage_u32{}; |
| 97 | Id storage_memory_u32{}; | ||
| 97 | 98 | ||
| 98 | Id image_buffer_type{}; | 99 | Id image_buffer_type{}; |
| 99 | Id sampled_texture_buffer_type{}; | 100 | Id sampled_texture_buffer_type{}; |
| @@ -136,9 +137,21 @@ public: | |||
| 136 | Id shared_memory_u32{}; | 137 | Id shared_memory_u32{}; |
| 137 | Id shared_memory_u32x2{}; | 138 | Id shared_memory_u32x2{}; |
| 138 | Id shared_memory_u32x4{}; | 139 | Id shared_memory_u32x4{}; |
| 140 | Id shared_memory_u32_type{}; | ||
| 139 | 141 | ||
| 140 | Id shared_store_u8_func{}; | 142 | Id shared_store_u8_func{}; |
| 141 | Id shared_store_u16_func{}; | 143 | Id shared_store_u16_func{}; |
| 144 | Id increment_cas_shared{}; | ||
| 145 | Id increment_cas_ssbo{}; | ||
| 146 | Id decrement_cas_shared{}; | ||
| 147 | Id decrement_cas_ssbo{}; | ||
| 148 | Id f32_add_cas{}; | ||
| 149 | Id f16x2_add_cas{}; | ||
| 150 | Id f16x2_min_cas{}; | ||
| 151 | Id f16x2_max_cas{}; | ||
| 152 | Id f32x2_add_cas{}; | ||
| 153 | Id f32x2_min_cas{}; | ||
| 154 | Id f32x2_max_cas{}; | ||
| 142 | 155 | ||
| 143 | Id input_position{}; | 156 | Id input_position{}; |
| 144 | std::array<Id, 32> input_generics{}; | 157 | std::array<Id, 32> input_generics{}; |
| @@ -153,6 +166,11 @@ public: | |||
| 153 | std::vector<Id> interfaces; | 166 | std::vector<Id> interfaces; |
| 154 | 167 | ||
| 155 | private: | 168 | private: |
| 169 | enum class CasPointerType { | ||
| 170 | Shared, | ||
| 171 | Ssbo, | ||
| 172 | }; | ||
| 173 | |||
| 156 | void DefineCommonTypes(const Info& info); | 174 | void DefineCommonTypes(const Info& info); |
| 157 | void DefineCommonConstants(); | 175 | void DefineCommonConstants(); |
| 158 | void DefineInterfaces(const Info& info); | 176 | void DefineInterfaces(const Info& info); |
| @@ -171,6 +189,8 @@ private: | |||
| 171 | 189 | ||
| 172 | void DefineInputs(const Info& info); | 190 | void DefineInputs(const Info& info); |
| 173 | void DefineOutputs(const Info& info); | 191 | void DefineOutputs(const Info& info); |
| 192 | |||
| 193 | [[nodiscard]] Id CasLoop(Id function, CasPointerType pointer_type, Id value_type); | ||
| 174 | }; | 194 | }; |
| 175 | 195 | ||
| 176 | } // namespace Shader::Backend::SPIRV | 196 | } // namespace Shader::Backend::SPIRV |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 5a1ffd61c..9248bd78b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp | |||
| @@ -238,6 +238,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct | |||
| 238 | ctx.AddCapability(spv::Capability::SubgroupVoteKHR); | 238 | ctx.AddCapability(spv::Capability::SubgroupVoteKHR); |
| 239 | } | 239 | } |
| 240 | } | 240 | } |
| 241 | if (info.uses_64_bit_atomics && profile.support_int64_atomics) { | ||
| 242 | ctx.AddCapability(spv::Capability::Int64Atomics); | ||
| 243 | } | ||
| 241 | if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { | 244 | if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { |
| 242 | ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); | 245 | ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); |
| 243 | } | 246 | } |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.h b/src/shader_recompiler/backend/spirv/emit_spirv.h index 12b7993ae..a3398a605 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv.h | |||
| @@ -306,6 +306,101 @@ Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); | |||
| 306 | Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); | 306 | Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); |
| 307 | Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); | 307 | Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); |
| 308 | Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); | 308 | Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); |
| 309 | Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 310 | Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 311 | Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 312 | Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 313 | Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 314 | Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 315 | Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 316 | Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 317 | Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 318 | Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 319 | Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 320 | Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); | ||
| 321 | Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 322 | Id value); | ||
| 323 | Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 324 | Id value); | ||
| 325 | Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 326 | Id value); | ||
| 327 | Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 328 | Id value); | ||
| 329 | Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 330 | Id value); | ||
| 331 | Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 332 | Id value); | ||
| 333 | Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 334 | Id value); | ||
| 335 | Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 336 | Id value); | ||
| 337 | Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 338 | Id value); | ||
| 339 | Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 340 | Id value); | ||
| 341 | Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 342 | Id value); | ||
| 343 | Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 344 | Id value); | ||
| 345 | Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 346 | Id value); | ||
| 347 | Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 348 | Id value); | ||
| 349 | Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 350 | Id value); | ||
| 351 | Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 352 | Id value); | ||
| 353 | Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 354 | Id value); | ||
| 355 | Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 356 | Id value); | ||
| 357 | Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 358 | Id value); | ||
| 359 | Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 360 | Id value); | ||
| 361 | Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 362 | Id value); | ||
| 363 | Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 364 | Id value); | ||
| 365 | Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 366 | Id value); | ||
| 367 | Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 368 | Id value); | ||
| 369 | Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 370 | Id value); | ||
| 371 | Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 372 | Id value); | ||
| 373 | Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 374 | Id value); | ||
| 375 | Id EmitGlobalAtomicIAdd32(EmitContext& ctx); | ||
| 376 | Id EmitGlobalAtomicSMin32(EmitContext& ctx); | ||
| 377 | Id EmitGlobalAtomicUMin32(EmitContext& ctx); | ||
| 378 | Id EmitGlobalAtomicSMax32(EmitContext& ctx); | ||
| 379 | Id EmitGlobalAtomicUMax32(EmitContext& ctx); | ||
| 380 | Id EmitGlobalAtomicInc32(EmitContext& ctx); | ||
| 381 | Id EmitGlobalAtomicDec32(EmitContext& ctx); | ||
| 382 | Id EmitGlobalAtomicAnd32(EmitContext& ctx); | ||
| 383 | Id EmitGlobalAtomicOr32(EmitContext& ctx); | ||
| 384 | Id EmitGlobalAtomicXor32(EmitContext& ctx); | ||
| 385 | Id EmitGlobalAtomicExchange32(EmitContext& ctx); | ||
| 386 | Id EmitGlobalAtomicIAdd64(EmitContext& ctx); | ||
| 387 | Id EmitGlobalAtomicSMin64(EmitContext& ctx); | ||
| 388 | Id EmitGlobalAtomicUMin64(EmitContext& ctx); | ||
| 389 | Id EmitGlobalAtomicSMax64(EmitContext& ctx); | ||
| 390 | Id EmitGlobalAtomicUMax64(EmitContext& ctx); | ||
| 391 | Id EmitGlobalAtomicInc64(EmitContext& ctx); | ||
| 392 | Id EmitGlobalAtomicDec64(EmitContext& ctx); | ||
| 393 | Id EmitGlobalAtomicAnd64(EmitContext& ctx); | ||
| 394 | Id EmitGlobalAtomicOr64(EmitContext& ctx); | ||
| 395 | Id EmitGlobalAtomicXor64(EmitContext& ctx); | ||
| 396 | Id EmitGlobalAtomicExchange64(EmitContext& ctx); | ||
| 397 | Id EmitGlobalAtomicAddF32(EmitContext& ctx); | ||
| 398 | Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); | ||
| 399 | Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); | ||
| 400 | Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); | ||
| 401 | Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); | ||
| 402 | Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); | ||
| 403 | Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); | ||
| 309 | Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); | 404 | Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); |
| 310 | Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); | 405 | Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); |
| 311 | Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); | 406 | Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); |
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp new file mode 100644 index 000000000..03d891419 --- /dev/null +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp | |||
| @@ -0,0 +1,528 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||
| 6 | |||
| 7 | namespace Shader::Backend::SPIRV { | ||
| 8 | namespace { | ||
| 9 | |||
| 10 | Id GetSharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { | ||
| 11 | const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 12 | const Id shifted_value{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||
| 13 | const Id index{ctx.OpIAdd(ctx.U32[1], shifted_value, ctx.Constant(ctx.U32[1], index_offset))}; | ||
| 14 | return ctx.profile.support_explicit_workgroup_layout | ||
| 15 | ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) | ||
| 16 | : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); | ||
| 17 | } | ||
| 18 | |||
| 19 | Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { | ||
| 20 | if (offset.IsImmediate()) { | ||
| 21 | const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)}; | ||
| 22 | return ctx.Constant(ctx.U32[1], imm_offset); | ||
| 23 | } | ||
| 24 | const u32 shift{static_cast<u32>(std::countr_zero(element_size))}; | ||
| 25 | const Id index{ctx.Def(offset)}; | ||
| 26 | if (shift == 0) { | ||
| 27 | return index; | ||
| 28 | } | ||
| 29 | const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; | ||
| 30 | return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); | ||
| 31 | } | ||
| 32 | |||
| 33 | Id GetStoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 34 | u32 index_offset = 0) { | ||
| 35 | // TODO: Support reinterpreting bindings, guaranteed to be aligned | ||
| 36 | if (!binding.IsImmediate()) { | ||
| 37 | throw NotImplementedException("Dynamic storage buffer indexing"); | ||
| 38 | } | ||
| 39 | const Id ssbo{ctx.ssbos[binding.U32()]}; | ||
| 40 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 41 | const Id index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], index_offset))}; | ||
| 42 | return ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index); | ||
| 43 | } | ||
| 44 | |||
| 45 | std::pair<Id, Id> GetAtomicArgs(EmitContext& ctx) { | ||
| 46 | const Id scope{ctx.Constant(ctx.U32[1], static_cast<u32>(spv::Scope::Device))}; | ||
| 47 | const Id semantics{ctx.u32_zero_value}; | ||
| 48 | return {scope, semantics}; | ||
| 49 | } | ||
| 50 | |||
| 51 | Id LoadU64(EmitContext& ctx, Id pointer_1, Id pointer_2) { | ||
| 52 | const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; | ||
| 53 | const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; | ||
| 54 | const Id original_composite{ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)}; | ||
| 55 | return ctx.OpBitcast(ctx.U64, original_composite); | ||
| 56 | } | ||
| 57 | |||
| 58 | void StoreResult(EmitContext& ctx, Id pointer_1, Id pointer_2, Id result) { | ||
| 59 | const Id composite{ctx.OpBitcast(ctx.U32[2], result)}; | ||
| 60 | ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], composite, 0)); | ||
| 61 | ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], composite, 1)); | ||
| 62 | } | ||
| 63 | } // Anonymous namespace | ||
| 64 | |||
| 65 | Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value) { | ||
| 66 | const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||
| 67 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 68 | return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); | ||
| 69 | } | ||
| 70 | |||
| 71 | Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value) { | ||
| 72 | const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||
| 73 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 74 | return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); | ||
| 75 | } | ||
| 76 | |||
| 77 | Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value) { | ||
| 78 | const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||
| 79 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 80 | return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); | ||
| 81 | } | ||
| 82 | |||
| 83 | Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value) { | ||
| 84 | const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||
| 85 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 86 | return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); | ||
| 87 | } | ||
| 88 | |||
| 89 | Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value) { | ||
| 90 | const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||
| 91 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 92 | return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); | ||
| 93 | } | ||
| 94 | |||
| 95 | Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value) { | ||
| 96 | const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 97 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; | ||
| 98 | return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value, | ||
| 99 | ctx.shared_memory_u32); | ||
| 100 | } | ||
| 101 | |||
| 102 | Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value) { | ||
| 103 | const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; | ||
| 104 | const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; | ||
| 105 | return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value, | ||
| 106 | ctx.shared_memory_u32); | ||
| 107 | } | ||
| 108 | |||
| 109 | Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value) { | ||
| 110 | const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||
| 111 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 112 | return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); | ||
| 113 | } | ||
| 114 | |||
| 115 | Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value) { | ||
| 116 | const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||
| 117 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 118 | return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); | ||
| 119 | } | ||
| 120 | |||
| 121 | Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value) { | ||
| 122 | const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||
| 123 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 124 | return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); | ||
| 125 | } | ||
| 126 | |||
| 127 | Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value) { | ||
| 128 | const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||
| 129 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 130 | return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); | ||
| 131 | } | ||
| 132 | |||
| 133 | Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value) { | ||
| 134 | const Id pointer_1{GetSharedPointer(ctx, pointer_offset)}; | ||
| 135 | if (ctx.profile.support_int64_atomics) { | ||
| 136 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 137 | return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); | ||
| 138 | } | ||
| 139 | // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||
| 140 | const Id pointer_2{GetSharedPointer(ctx, pointer_offset, 1)}; | ||
| 141 | const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||
| 142 | StoreResult(ctx, pointer_1, pointer_2, value); | ||
| 143 | return original_value; | ||
| 144 | } | ||
| 145 | |||
| 146 | Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 147 | Id value) { | ||
| 148 | const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||
| 149 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 150 | return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); | ||
| 151 | } | ||
| 152 | |||
| 153 | Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 154 | Id value) { | ||
| 155 | const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||
| 156 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 157 | return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); | ||
| 158 | } | ||
| 159 | |||
| 160 | Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 161 | Id value) { | ||
| 162 | const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||
| 163 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 164 | return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); | ||
| 165 | } | ||
| 166 | |||
| 167 | Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 168 | Id value) { | ||
| 169 | const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||
| 170 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 171 | return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); | ||
| 172 | } | ||
| 173 | |||
| 174 | Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 175 | Id value) { | ||
| 176 | const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||
| 177 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 178 | return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); | ||
| 179 | } | ||
| 180 | |||
| 181 | Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 182 | Id value) { | ||
| 183 | const Id ssbo{ctx.ssbos[binding.U32()]}; | ||
| 184 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 185 | return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo); | ||
| 186 | } | ||
| 187 | |||
| 188 | Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 189 | Id value) { | ||
| 190 | const Id ssbo{ctx.ssbos[binding.U32()]}; | ||
| 191 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 192 | return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo); | ||
| 193 | } | ||
| 194 | |||
| 195 | Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 196 | Id value) { | ||
| 197 | const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||
| 198 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 199 | return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); | ||
| 200 | } | ||
| 201 | |||
| 202 | Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 203 | Id value) { | ||
| 204 | const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||
| 205 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 206 | return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); | ||
| 207 | } | ||
| 208 | |||
| 209 | Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 210 | Id value) { | ||
| 211 | const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||
| 212 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 213 | return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); | ||
| 214 | } | ||
| 215 | |||
| 216 | Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 217 | Id value) { | ||
| 218 | const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||
| 219 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 220 | return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); | ||
| 221 | } | ||
| 222 | |||
| 223 | Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 224 | Id value) { | ||
| 225 | const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||
| 226 | if (ctx.profile.support_int64_atomics) { | ||
| 227 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 228 | return ctx.OpAtomicIAdd(ctx.U64, pointer_1, scope, semantics, value); | ||
| 229 | } | ||
| 230 | // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||
| 231 | const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||
| 232 | const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||
| 233 | const Id result{ctx.OpIAdd(ctx.U64, value, original_value)}; | ||
| 234 | StoreResult(ctx, pointer_1, pointer_2, result); | ||
| 235 | return original_value; | ||
| 236 | } | ||
| 237 | |||
| 238 | Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 239 | Id value) { | ||
| 240 | const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||
| 241 | if (ctx.profile.support_int64_atomics) { | ||
| 242 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 243 | return ctx.OpAtomicSMin(ctx.U64, pointer_1, scope, semantics, value); | ||
| 244 | } | ||
| 245 | // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||
| 246 | const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||
| 247 | const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||
| 248 | const Id result{ctx.OpSMin(ctx.U64, value, original_value)}; | ||
| 249 | StoreResult(ctx, pointer_1, pointer_2, result); | ||
| 250 | return original_value; | ||
| 251 | } | ||
| 252 | |||
| 253 | Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 254 | Id value) { | ||
| 255 | const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||
| 256 | if (ctx.profile.support_int64_atomics) { | ||
| 257 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 258 | return ctx.OpAtomicUMin(ctx.U64, pointer_1, scope, semantics, value); | ||
| 259 | } | ||
| 260 | // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||
| 261 | const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||
| 262 | const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||
| 263 | const Id result{ctx.OpUMin(ctx.U64, value, original_value)}; | ||
| 264 | StoreResult(ctx, pointer_1, pointer_2, result); | ||
| 265 | return original_value; | ||
| 266 | } | ||
| 267 | |||
| 268 | Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 269 | Id value) { | ||
| 270 | const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||
| 271 | if (ctx.profile.support_int64_atomics) { | ||
| 272 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 273 | return ctx.OpAtomicSMax(ctx.U64, pointer_1, scope, semantics, value); | ||
| 274 | } | ||
| 275 | // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||
| 276 | const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||
| 277 | const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||
| 278 | const Id result{ctx.OpSMax(ctx.U64, value, original_value)}; | ||
| 279 | StoreResult(ctx, pointer_1, pointer_2, result); | ||
| 280 | return original_value; | ||
| 281 | } | ||
| 282 | |||
| 283 | Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 284 | Id value) { | ||
| 285 | const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||
| 286 | if (ctx.profile.support_int64_atomics) { | ||
| 287 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 288 | return ctx.OpAtomicUMax(ctx.U64, pointer_1, scope, semantics, value); | ||
| 289 | } | ||
| 290 | // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||
| 291 | const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||
| 292 | const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||
| 293 | const Id result{ctx.OpUMax(ctx.U64, value, original_value)}; | ||
| 294 | StoreResult(ctx, pointer_1, pointer_2, result); | ||
| 295 | return original_value; | ||
| 296 | } | ||
| 297 | |||
| 298 | Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 299 | Id value) { | ||
| 300 | const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||
| 301 | if (ctx.profile.support_int64_atomics) { | ||
| 302 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 303 | return ctx.OpAtomicAnd(ctx.U64, pointer_1, scope, semantics, value); | ||
| 304 | } | ||
| 305 | // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||
| 306 | const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||
| 307 | const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||
| 308 | const Id result{ctx.OpBitwiseAnd(ctx.U64, value, original_value)}; | ||
| 309 | StoreResult(ctx, pointer_1, pointer_2, result); | ||
| 310 | return original_value; | ||
| 311 | } | ||
| 312 | |||
| 313 | Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 314 | Id value) { | ||
| 315 | const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||
| 316 | if (ctx.profile.support_int64_atomics) { | ||
| 317 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 318 | return ctx.OpAtomicOr(ctx.U64, pointer_1, scope, semantics, value); | ||
| 319 | } | ||
| 320 | // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||
| 321 | const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||
| 322 | const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||
| 323 | const Id result{ctx.OpBitwiseOr(ctx.U64, value, original_value)}; | ||
| 324 | StoreResult(ctx, pointer_1, pointer_2, result); | ||
| 325 | return original_value; | ||
| 326 | } | ||
| 327 | |||
| 328 | Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 329 | Id value) { | ||
| 330 | const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||
| 331 | if (ctx.profile.support_int64_atomics) { | ||
| 332 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 333 | return ctx.OpAtomicXor(ctx.U64, pointer_1, scope, semantics, value); | ||
| 334 | } | ||
| 335 | // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||
| 336 | const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||
| 337 | const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||
| 338 | const Id result{ctx.OpBitwiseXor(ctx.U64, value, original_value)}; | ||
| 339 | StoreResult(ctx, pointer_1, pointer_2, result); | ||
| 340 | return original_value; | ||
| 341 | } | ||
| 342 | |||
| 343 | Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 344 | Id value) { | ||
| 345 | const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||
| 346 | if (ctx.profile.support_int64_atomics) { | ||
| 347 | const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||
| 348 | return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); | ||
| 349 | } | ||
| 350 | // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||
| 351 | const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||
| 352 | const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||
| 353 | StoreResult(ctx, pointer_1, pointer_2, value); | ||
| 354 | return original_value; | ||
| 355 | } | ||
| 356 | |||
| 357 | Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 358 | Id value) { | ||
| 359 | const Id ssbo{ctx.ssbos[binding.U32()]}; | ||
| 360 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 361 | return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo); | ||
| 362 | } | ||
| 363 | |||
| 364 | Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 365 | Id value) { | ||
| 366 | const Id ssbo{ctx.ssbos[binding.U32()]}; | ||
| 367 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 368 | const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)}; | ||
| 369 | return ctx.OpBitcast(ctx.U32[1], result); | ||
| 370 | } | ||
| 371 | |||
| 372 | Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 373 | Id value) { | ||
| 374 | const Id ssbo{ctx.ssbos[binding.U32()]}; | ||
| 375 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 376 | const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)}; | ||
| 377 | return ctx.OpPackHalf2x16(ctx.U32[1], result); | ||
| 378 | } | ||
| 379 | |||
| 380 | Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 381 | Id value) { | ||
| 382 | const Id ssbo{ctx.ssbos[binding.U32()]}; | ||
| 383 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 384 | const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)}; | ||
| 385 | return ctx.OpBitcast(ctx.U32[1], result); | ||
| 386 | } | ||
| 387 | |||
| 388 | Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 389 | Id value) { | ||
| 390 | const Id ssbo{ctx.ssbos[binding.U32()]}; | ||
| 391 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 392 | const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)}; | ||
| 393 | return ctx.OpPackHalf2x16(ctx.U32[1], result); | ||
| 394 | } | ||
| 395 | |||
| 396 | Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 397 | Id value) { | ||
| 398 | const Id ssbo{ctx.ssbos[binding.U32()]}; | ||
| 399 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 400 | const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)}; | ||
| 401 | return ctx.OpBitcast(ctx.U32[1], result); | ||
| 402 | } | ||
| 403 | |||
| 404 | Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||
| 405 | Id value) { | ||
| 406 | const Id ssbo{ctx.ssbos[binding.U32()]}; | ||
| 407 | const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||
| 408 | const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)}; | ||
| 409 | return ctx.OpPackHalf2x16(ctx.U32[1], result); | ||
| 410 | } | ||
| 411 | |||
| 412 | Id EmitGlobalAtomicIAdd32(EmitContext&) { | ||
| 413 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 414 | } | ||
| 415 | |||
| 416 | Id EmitGlobalAtomicSMin32(EmitContext&) { | ||
| 417 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 418 | } | ||
| 419 | |||
| 420 | Id EmitGlobalAtomicUMin32(EmitContext&) { | ||
| 421 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 422 | } | ||
| 423 | |||
| 424 | Id EmitGlobalAtomicSMax32(EmitContext&) { | ||
| 425 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 426 | } | ||
| 427 | |||
| 428 | Id EmitGlobalAtomicUMax32(EmitContext&) { | ||
| 429 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 430 | } | ||
| 431 | |||
| 432 | Id EmitGlobalAtomicInc32(EmitContext&) { | ||
| 433 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 434 | } | ||
| 435 | |||
| 436 | Id EmitGlobalAtomicDec32(EmitContext&) { | ||
| 437 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 438 | } | ||
| 439 | |||
| 440 | Id EmitGlobalAtomicAnd32(EmitContext&) { | ||
| 441 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 442 | } | ||
| 443 | |||
| 444 | Id EmitGlobalAtomicOr32(EmitContext&) { | ||
| 445 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 446 | } | ||
| 447 | |||
| 448 | Id EmitGlobalAtomicXor32(EmitContext&) { | ||
| 449 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 450 | } | ||
| 451 | |||
| 452 | Id EmitGlobalAtomicExchange32(EmitContext&) { | ||
| 453 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 454 | } | ||
| 455 | |||
| 456 | Id EmitGlobalAtomicIAdd64(EmitContext&) { | ||
| 457 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 458 | } | ||
| 459 | |||
| 460 | Id EmitGlobalAtomicSMin64(EmitContext&) { | ||
| 461 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 462 | } | ||
| 463 | |||
| 464 | Id EmitGlobalAtomicUMin64(EmitContext&) { | ||
| 465 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 466 | } | ||
| 467 | |||
| 468 | Id EmitGlobalAtomicSMax64(EmitContext&) { | ||
| 469 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 470 | } | ||
| 471 | |||
| 472 | Id EmitGlobalAtomicUMax64(EmitContext&) { | ||
| 473 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 474 | } | ||
| 475 | |||
| 476 | Id EmitGlobalAtomicInc64(EmitContext&) { | ||
| 477 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 478 | } | ||
| 479 | |||
| 480 | Id EmitGlobalAtomicDec64(EmitContext&) { | ||
| 481 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 482 | } | ||
| 483 | |||
| 484 | Id EmitGlobalAtomicAnd64(EmitContext&) { | ||
| 485 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 486 | } | ||
| 487 | |||
| 488 | Id EmitGlobalAtomicOr64(EmitContext&) { | ||
| 489 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 490 | } | ||
| 491 | |||
| 492 | Id EmitGlobalAtomicXor64(EmitContext&) { | ||
| 493 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 494 | } | ||
| 495 | |||
| 496 | Id EmitGlobalAtomicExchange64(EmitContext&) { | ||
| 497 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 498 | } | ||
| 499 | |||
| 500 | Id EmitGlobalAtomicAddF32(EmitContext&) { | ||
| 501 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 502 | } | ||
| 503 | |||
| 504 | Id EmitGlobalAtomicAddF16x2(EmitContext&) { | ||
| 505 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 506 | } | ||
| 507 | |||
| 508 | Id EmitGlobalAtomicAddF32x2(EmitContext&) { | ||
| 509 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 510 | } | ||
| 511 | |||
| 512 | Id EmitGlobalAtomicMinF16x2(EmitContext&) { | ||
| 513 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 514 | } | ||
| 515 | |||
| 516 | Id EmitGlobalAtomicMinF32x2(EmitContext&) { | ||
| 517 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 518 | } | ||
| 519 | |||
| 520 | Id EmitGlobalAtomicMaxF16x2(EmitContext&) { | ||
| 521 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 522 | } | ||
| 523 | |||
| 524 | Id EmitGlobalAtomicMaxF32x2(EmitContext&) { | ||
| 525 | throw NotImplementedException("SPIR-V Instruction"); | ||
| 526 | } | ||
| 527 | |||
| 528 | } // namespace Shader::Backend::SPIRV | ||
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp index 17be0c639..a3339f624 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp +++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp | |||
| @@ -1284,6 +1284,204 @@ U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) | |||
| 1284 | return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); | 1284 | return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); |
| 1285 | } | 1285 | } |
| 1286 | 1286 | ||
| 1287 | U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) { | ||
| 1288 | return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value); | ||
| 1289 | } | ||
| 1290 | |||
| 1291 | U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) { | ||
| 1292 | return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value); | ||
| 1293 | } | ||
| 1294 | |||
| 1295 | U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) { | ||
| 1296 | return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value); | ||
| 1297 | } | ||
| 1298 | |||
| 1299 | U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) { | ||
| 1300 | return is_signed ? SharedAtomicSMin(pointer_offset, value) | ||
| 1301 | : SharedAtomicUMin(pointer_offset, value); | ||
| 1302 | } | ||
| 1303 | |||
| 1304 | U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) { | ||
| 1305 | return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value); | ||
| 1306 | } | ||
| 1307 | |||
| 1308 | U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) { | ||
| 1309 | return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value); | ||
| 1310 | } | ||
| 1311 | |||
| 1312 | U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) { | ||
| 1313 | return is_signed ? SharedAtomicSMax(pointer_offset, value) | ||
| 1314 | : SharedAtomicUMax(pointer_offset, value); | ||
| 1315 | } | ||
| 1316 | |||
| 1317 | U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) { | ||
| 1318 | return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value); | ||
| 1319 | } | ||
| 1320 | |||
| 1321 | U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) { | ||
| 1322 | return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value); | ||
| 1323 | } | ||
| 1324 | |||
| 1325 | U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) { | ||
| 1326 | return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value); | ||
| 1327 | } | ||
| 1328 | |||
| 1329 | U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) { | ||
| 1330 | return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value); | ||
| 1331 | } | ||
| 1332 | |||
| 1333 | U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) { | ||
| 1334 | return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value); | ||
| 1335 | } | ||
| 1336 | |||
| 1337 | U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) { | ||
| 1338 | switch (value.Type()) { | ||
| 1339 | case Type::U32: | ||
| 1340 | return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value); | ||
| 1341 | case Type::U64: | ||
| 1342 | return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value); | ||
| 1343 | default: | ||
| 1344 | ThrowInvalidType(pointer_offset.Type()); | ||
| 1345 | } | ||
| 1346 | } | ||
| 1347 | |||
| 1348 | U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) { | ||
| 1349 | switch (value.Type()) { | ||
| 1350 | case Type::U32: | ||
| 1351 | return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value); | ||
| 1352 | case Type::U64: | ||
| 1353 | return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value); | ||
| 1354 | default: | ||
| 1355 | ThrowInvalidType(value.Type()); | ||
| 1356 | } | ||
| 1357 | } | ||
| 1358 | |||
| 1359 | U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) { | ||
| 1360 | switch (value.Type()) { | ||
| 1361 | case Type::U32: | ||
| 1362 | return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value); | ||
| 1363 | case Type::U64: | ||
| 1364 | return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value); | ||
| 1365 | default: | ||
| 1366 | ThrowInvalidType(value.Type()); | ||
| 1367 | } | ||
| 1368 | } | ||
| 1369 | |||
| 1370 | U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) { | ||
| 1371 | switch (value.Type()) { | ||
| 1372 | case Type::U32: | ||
| 1373 | return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value); | ||
| 1374 | case Type::U64: | ||
| 1375 | return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value); | ||
| 1376 | default: | ||
| 1377 | ThrowInvalidType(value.Type()); | ||
| 1378 | } | ||
| 1379 | } | ||
| 1380 | |||
| 1381 | U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) { | ||
| 1382 | return is_signed ? GlobalAtomicSMin(pointer_offset, value) | ||
| 1383 | : GlobalAtomicUMin(pointer_offset, value); | ||
| 1384 | } | ||
| 1385 | |||
| 1386 | U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) { | ||
| 1387 | switch (value.Type()) { | ||
| 1388 | case Type::U32: | ||
| 1389 | return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value); | ||
| 1390 | case Type::U64: | ||
| 1391 | return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value); | ||
| 1392 | default: | ||
| 1393 | ThrowInvalidType(value.Type()); | ||
| 1394 | } | ||
| 1395 | } | ||
| 1396 | |||
| 1397 | U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) { | ||
| 1398 | switch (value.Type()) { | ||
| 1399 | case Type::U32: | ||
| 1400 | return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value); | ||
| 1401 | case Type::U64: | ||
| 1402 | return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value); | ||
| 1403 | default: | ||
| 1404 | ThrowInvalidType(value.Type()); | ||
| 1405 | } | ||
| 1406 | } | ||
| 1407 | |||
| 1408 | U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) { | ||
| 1409 | return is_signed ? GlobalAtomicSMax(pointer_offset, value) | ||
| 1410 | : GlobalAtomicUMax(pointer_offset, value); | ||
| 1411 | } | ||
| 1412 | |||
| 1413 | U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) { | ||
| 1414 | return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value); | ||
| 1415 | } | ||
| 1416 | |||
| 1417 | U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) { | ||
| 1418 | return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value); | ||
| 1419 | } | ||
| 1420 | |||
| 1421 | U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) { | ||
| 1422 | switch (value.Type()) { | ||
| 1423 | case Type::U32: | ||
| 1424 | return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value); | ||
| 1425 | case Type::U64: | ||
| 1426 | return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value); | ||
| 1427 | default: | ||
| 1428 | ThrowInvalidType(value.Type()); | ||
| 1429 | } | ||
| 1430 | } | ||
| 1431 | |||
| 1432 | U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) { | ||
| 1433 | switch (value.Type()) { | ||
| 1434 | case Type::U32: | ||
| 1435 | return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value); | ||
| 1436 | case Type::U64: | ||
| 1437 | return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value); | ||
| 1438 | default: | ||
| 1439 | ThrowInvalidType(value.Type()); | ||
| 1440 | } | ||
| 1441 | } | ||
| 1442 | |||
| 1443 | U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) { | ||
| 1444 | switch (value.Type()) { | ||
| 1445 | case Type::U32: | ||
| 1446 | return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value); | ||
| 1447 | case Type::U64: | ||
| 1448 | return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value); | ||
| 1449 | default: | ||
| 1450 | ThrowInvalidType(value.Type()); | ||
| 1451 | } | ||
| 1452 | } | ||
| 1453 | |||
| 1454 | U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) { | ||
| 1455 | switch (value.Type()) { | ||
| 1456 | case Type::U32: | ||
| 1457 | return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value); | ||
| 1458 | case Type::U64: | ||
| 1459 | return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value); | ||
| 1460 | default: | ||
| 1461 | ThrowInvalidType(pointer_offset.Type()); | ||
| 1462 | } | ||
| 1463 | } | ||
| 1464 | |||
| 1465 | F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, | ||
| 1466 | const FpControl control) { | ||
| 1467 | return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value); | ||
| 1468 | } | ||
| 1469 | |||
| 1470 | Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, | ||
| 1471 | const FpControl control) { | ||
| 1472 | return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value); | ||
| 1473 | } | ||
| 1474 | |||
| 1475 | Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, | ||
| 1476 | const FpControl control) { | ||
| 1477 | return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value); | ||
| 1478 | } | ||
| 1479 | |||
| 1480 | Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, | ||
| 1481 | const FpControl control) { | ||
| 1482 | return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value); | ||
| 1483 | } | ||
| 1484 | |||
| 1287 | U1 IREmitter::LogicalOr(const U1& a, const U1& b) { | 1485 | U1 IREmitter::LogicalOr(const U1& a, const U1& b) { |
| 1288 | return Inst<U1>(Opcode::LogicalOr, a, b); | 1486 | return Inst<U1>(Opcode::LogicalOr, a, b); |
| 1289 | } | 1487 | } |
| @@ -1626,7 +1824,7 @@ Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInst | |||
| 1626 | } | 1824 | } |
| 1627 | 1825 | ||
| 1628 | void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, | 1826 | void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, |
| 1629 | TextureInstInfo info) { | 1827 | TextureInstInfo info) { |
| 1630 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; | 1828 | const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; |
| 1631 | Inst(op, Flags{info}, handle, coords, color); | 1829 | Inst(op, Flags{info}, handle, coords, color); |
| 1632 | } | 1830 | } |
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h index ec60070ef..f9cbf1304 100644 --- a/src/shader_recompiler/frontend/ir/ir_emitter.h +++ b/src/shader_recompiler/frontend/ir/ir_emitter.h | |||
| @@ -228,6 +228,45 @@ public: | |||
| 228 | [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); | 228 | [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); |
| 229 | [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); | 229 | [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); |
| 230 | 230 | ||
| 231 | [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value); | ||
| 232 | [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value); | ||
| 233 | [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value); | ||
| 234 | [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed); | ||
| 235 | [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value); | ||
| 236 | [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value); | ||
| 237 | [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed); | ||
| 238 | [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value); | ||
| 239 | [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value); | ||
| 240 | [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value); | ||
| 241 | [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value); | ||
| 242 | [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value); | ||
| 243 | [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value); | ||
| 244 | |||
| 245 | [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value); | ||
| 246 | [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value); | ||
| 247 | [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value); | ||
| 248 | [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, | ||
| 249 | bool is_signed); | ||
| 250 | [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value); | ||
| 251 | [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value); | ||
| 252 | [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, | ||
| 253 | bool is_signed); | ||
| 254 | [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value); | ||
| 255 | [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value); | ||
| 256 | [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value); | ||
| 257 | [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value); | ||
| 258 | [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value); | ||
| 259 | [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value); | ||
| 260 | |||
| 261 | [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, | ||
| 262 | const FpControl control = {}); | ||
| 263 | [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, | ||
| 264 | const FpControl control = {}); | ||
| 265 | [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, | ||
| 266 | const FpControl control = {}); | ||
| 267 | [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, | ||
| 268 | const FpControl control = {}); | ||
| 269 | |||
| 231 | [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); | 270 | [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); |
| 232 | [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); | 271 | [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); |
| 233 | [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); | 272 | [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); |
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp index 2df631791..0f66c5627 100644 --- a/src/shader_recompiler/frontend/ir/microinstruction.cpp +++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp | |||
| @@ -93,6 +93,72 @@ bool Inst::MayHaveSideEffects() const noexcept { | |||
| 93 | case Opcode::WriteSharedU32: | 93 | case Opcode::WriteSharedU32: |
| 94 | case Opcode::WriteSharedU64: | 94 | case Opcode::WriteSharedU64: |
| 95 | case Opcode::WriteSharedU128: | 95 | case Opcode::WriteSharedU128: |
| 96 | case Opcode::SharedAtomicIAdd32: | ||
| 97 | case Opcode::SharedAtomicSMin32: | ||
| 98 | case Opcode::SharedAtomicUMin32: | ||
| 99 | case Opcode::SharedAtomicSMax32: | ||
| 100 | case Opcode::SharedAtomicUMax32: | ||
| 101 | case Opcode::SharedAtomicInc32: | ||
| 102 | case Opcode::SharedAtomicDec32: | ||
| 103 | case Opcode::SharedAtomicAnd32: | ||
| 104 | case Opcode::SharedAtomicOr32: | ||
| 105 | case Opcode::SharedAtomicXor32: | ||
| 106 | case Opcode::SharedAtomicExchange32: | ||
| 107 | case Opcode::SharedAtomicExchange64: | ||
| 108 | case Opcode::GlobalAtomicIAdd32: | ||
| 109 | case Opcode::GlobalAtomicSMin32: | ||
| 110 | case Opcode::GlobalAtomicUMin32: | ||
| 111 | case Opcode::GlobalAtomicSMax32: | ||
| 112 | case Opcode::GlobalAtomicUMax32: | ||
| 113 | case Opcode::GlobalAtomicInc32: | ||
| 114 | case Opcode::GlobalAtomicDec32: | ||
| 115 | case Opcode::GlobalAtomicAnd32: | ||
| 116 | case Opcode::GlobalAtomicOr32: | ||
| 117 | case Opcode::GlobalAtomicXor32: | ||
| 118 | case Opcode::GlobalAtomicExchange32: | ||
| 119 | case Opcode::GlobalAtomicIAdd64: | ||
| 120 | case Opcode::GlobalAtomicSMin64: | ||
| 121 | case Opcode::GlobalAtomicUMin64: | ||
| 122 | case Opcode::GlobalAtomicSMax64: | ||
| 123 | case Opcode::GlobalAtomicUMax64: | ||
| 124 | case Opcode::GlobalAtomicAnd64: | ||
| 125 | case Opcode::GlobalAtomicOr64: | ||
| 126 | case Opcode::GlobalAtomicXor64: | ||
| 127 | case Opcode::GlobalAtomicExchange64: | ||
| 128 | case Opcode::GlobalAtomicAddF32: | ||
| 129 | case Opcode::GlobalAtomicAddF16x2: | ||
| 130 | case Opcode::GlobalAtomicAddF32x2: | ||
| 131 | case Opcode::GlobalAtomicMinF16x2: | ||
| 132 | case Opcode::GlobalAtomicMinF32x2: | ||
| 133 | case Opcode::GlobalAtomicMaxF16x2: | ||
| 134 | case Opcode::GlobalAtomicMaxF32x2: | ||
| 135 | case Opcode::StorageAtomicIAdd32: | ||
| 136 | case Opcode::StorageAtomicSMin32: | ||
| 137 | case Opcode::StorageAtomicUMin32: | ||
| 138 | case Opcode::StorageAtomicSMax32: | ||
| 139 | case Opcode::StorageAtomicUMax32: | ||
| 140 | case Opcode::StorageAtomicInc32: | ||
| 141 | case Opcode::StorageAtomicDec32: | ||
| 142 | case Opcode::StorageAtomicAnd32: | ||
| 143 | case Opcode::StorageAtomicOr32: | ||
| 144 | case Opcode::StorageAtomicXor32: | ||
| 145 | case Opcode::StorageAtomicExchange32: | ||
| 146 | case Opcode::StorageAtomicIAdd64: | ||
| 147 | case Opcode::StorageAtomicSMin64: | ||
| 148 | case Opcode::StorageAtomicUMin64: | ||
| 149 | case Opcode::StorageAtomicSMax64: | ||
| 150 | case Opcode::StorageAtomicUMax64: | ||
| 151 | case Opcode::StorageAtomicAnd64: | ||
| 152 | case Opcode::StorageAtomicOr64: | ||
| 153 | case Opcode::StorageAtomicXor64: | ||
| 154 | case Opcode::StorageAtomicExchange64: | ||
| 155 | case Opcode::StorageAtomicAddF32: | ||
| 156 | case Opcode::StorageAtomicAddF16x2: | ||
| 157 | case Opcode::StorageAtomicAddF32x2: | ||
| 158 | case Opcode::StorageAtomicMinF16x2: | ||
| 159 | case Opcode::StorageAtomicMinF32x2: | ||
| 160 | case Opcode::StorageAtomicMaxF16x2: | ||
| 161 | case Opcode::StorageAtomicMaxF32x2: | ||
| 96 | case Opcode::BindlessImageWrite: | 162 | case Opcode::BindlessImageWrite: |
| 97 | case Opcode::BoundImageWrite: | 163 | case Opcode::BoundImageWrite: |
| 98 | case Opcode::ImageWrite: | 164 | case Opcode::ImageWrite: |
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc index 86ea02560..dc776a73e 100644 --- a/src/shader_recompiler/frontend/ir/opcodes.inc +++ b/src/shader_recompiler/frontend/ir/opcodes.inc | |||
| @@ -321,6 +321,76 @@ OPCODE(INotEqual, U1, U32, | |||
| 321 | OPCODE(SGreaterThanEqual, U1, U32, U32, ) | 321 | OPCODE(SGreaterThanEqual, U1, U32, U32, ) |
| 322 | OPCODE(UGreaterThanEqual, U1, U32, U32, ) | 322 | OPCODE(UGreaterThanEqual, U1, U32, U32, ) |
| 323 | 323 | ||
| 324 | // Atomic operations | ||
| 325 | OPCODE(SharedAtomicIAdd32, U32, U32, U32, ) | ||
| 326 | OPCODE(SharedAtomicSMin32, U32, U32, U32, ) | ||
| 327 | OPCODE(SharedAtomicUMin32, U32, U32, U32, ) | ||
| 328 | OPCODE(SharedAtomicSMax32, U32, U32, U32, ) | ||
| 329 | OPCODE(SharedAtomicUMax32, U32, U32, U32, ) | ||
| 330 | OPCODE(SharedAtomicInc32, U32, U32, U32, ) | ||
| 331 | OPCODE(SharedAtomicDec32, U32, U32, U32, ) | ||
| 332 | OPCODE(SharedAtomicAnd32, U32, U32, U32, ) | ||
| 333 | OPCODE(SharedAtomicOr32, U32, U32, U32, ) | ||
| 334 | OPCODE(SharedAtomicXor32, U32, U32, U32, ) | ||
| 335 | OPCODE(SharedAtomicExchange32, U32, U32, U32, ) | ||
| 336 | OPCODE(SharedAtomicExchange64, U64, U32, U64, ) | ||
| 337 | |||
| 338 | OPCODE(GlobalAtomicIAdd32, U32, U64, U32, ) | ||
| 339 | OPCODE(GlobalAtomicSMin32, U32, U64, U32, ) | ||
| 340 | OPCODE(GlobalAtomicUMin32, U32, U64, U32, ) | ||
| 341 | OPCODE(GlobalAtomicSMax32, U32, U64, U32, ) | ||
| 342 | OPCODE(GlobalAtomicUMax32, U32, U64, U32, ) | ||
| 343 | OPCODE(GlobalAtomicInc32, U32, U64, U32, ) | ||
| 344 | OPCODE(GlobalAtomicDec32, U32, U64, U32, ) | ||
| 345 | OPCODE(GlobalAtomicAnd32, U32, U64, U32, ) | ||
| 346 | OPCODE(GlobalAtomicOr32, U32, U64, U32, ) | ||
| 347 | OPCODE(GlobalAtomicXor32, U32, U64, U32, ) | ||
| 348 | OPCODE(GlobalAtomicExchange32, U32, U64, U32, ) | ||
| 349 | OPCODE(GlobalAtomicIAdd64, U64, U64, U64, ) | ||
| 350 | OPCODE(GlobalAtomicSMin64, U64, U64, U64, ) | ||
| 351 | OPCODE(GlobalAtomicUMin64, U64, U64, U64, ) | ||
| 352 | OPCODE(GlobalAtomicSMax64, U64, U64, U64, ) | ||
| 353 | OPCODE(GlobalAtomicUMax64, U64, U64, U64, ) | ||
| 354 | OPCODE(GlobalAtomicAnd64, U64, U64, U64, ) | ||
| 355 | OPCODE(GlobalAtomicOr64, U64, U64, U64, ) | ||
| 356 | OPCODE(GlobalAtomicXor64, U64, U64, U64, ) | ||
| 357 | OPCODE(GlobalAtomicExchange64, U64, U64, U64, ) | ||
| 358 | OPCODE(GlobalAtomicAddF32, F32, U64, F32, ) | ||
| 359 | OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, ) | ||
| 360 | OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, ) | ||
| 361 | OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, ) | ||
| 362 | OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, ) | ||
| 363 | OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, ) | ||
| 364 | OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, ) | ||
| 365 | |||
| 366 | OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, ) | ||
| 367 | OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, ) | ||
| 368 | OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, ) | ||
| 369 | OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, ) | ||
| 370 | OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, ) | ||
| 371 | OPCODE(StorageAtomicInc32, U32, U32, U32, U32, ) | ||
| 372 | OPCODE(StorageAtomicDec32, U32, U32, U32, U32, ) | ||
| 373 | OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, ) | ||
| 374 | OPCODE(StorageAtomicOr32, U32, U32, U32, U32, ) | ||
| 375 | OPCODE(StorageAtomicXor32, U32, U32, U32, U32, ) | ||
| 376 | OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, ) | ||
| 377 | OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, ) | ||
| 378 | OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, ) | ||
| 379 | OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, ) | ||
| 380 | OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, ) | ||
| 381 | OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, ) | ||
| 382 | OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, ) | ||
| 383 | OPCODE(StorageAtomicOr64, U64, U32, U32, U64, ) | ||
| 384 | OPCODE(StorageAtomicXor64, U64, U32, U32, U64, ) | ||
| 385 | OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, ) | ||
| 386 | OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, ) | ||
| 387 | OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, ) | ||
| 388 | OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, ) | ||
| 389 | OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, ) | ||
| 390 | OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, ) | ||
| 391 | OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, ) | ||
| 392 | OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, ) | ||
| 393 | |||
| 324 | // Logical operations | 394 | // Logical operations |
| 325 | OPCODE(LogicalOr, U1, U1, U1, ) | 395 | OPCODE(LogicalOr, U1, U1, U1, ) |
| 326 | OPCODE(LogicalAnd, U1, U1, U1, ) | 396 | OPCODE(LogicalAnd, U1, U1, U1, ) |
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..7a32c5eb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp | |||
| @@ -0,0 +1,222 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class AtomOp : u64 { | ||
| 12 | ADD, | ||
| 13 | MIN, | ||
| 14 | MAX, | ||
| 15 | INC, | ||
| 16 | DEC, | ||
| 17 | AND, | ||
| 18 | OR, | ||
| 19 | XOR, | ||
| 20 | EXCH, | ||
| 21 | SAFEADD, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class AtomSize : u64 { | ||
| 25 | U32, | ||
| 26 | S32, | ||
| 27 | U64, | ||
| 28 | F32, | ||
| 29 | F16x2, | ||
| 30 | S64, | ||
| 31 | }; | ||
| 32 | |||
| 33 | IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, | ||
| 34 | AtomOp op, bool is_signed) { | ||
| 35 | switch (op) { | ||
| 36 | case AtomOp::ADD: | ||
| 37 | return ir.GlobalAtomicIAdd(offset, op_b); | ||
| 38 | case AtomOp::MIN: | ||
| 39 | return ir.GlobalAtomicIMin(offset, op_b, is_signed); | ||
| 40 | case AtomOp::MAX: | ||
| 41 | return ir.GlobalAtomicIMax(offset, op_b, is_signed); | ||
| 42 | case AtomOp::INC: | ||
| 43 | return ir.GlobalAtomicInc(offset, op_b); | ||
| 44 | case AtomOp::DEC: | ||
| 45 | return ir.GlobalAtomicDec(offset, op_b); | ||
| 46 | case AtomOp::AND: | ||
| 47 | return ir.GlobalAtomicAnd(offset, op_b); | ||
| 48 | case AtomOp::OR: | ||
| 49 | return ir.GlobalAtomicOr(offset, op_b); | ||
| 50 | case AtomOp::XOR: | ||
| 51 | return ir.GlobalAtomicXor(offset, op_b); | ||
| 52 | case AtomOp::EXCH: | ||
| 53 | return ir.GlobalAtomicExchange(offset, op_b); | ||
| 54 | default: | ||
| 55 | throw NotImplementedException("Integer Atom Operation {}", op); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, | ||
| 60 | AtomSize size) { | ||
| 61 | static constexpr IR::FpControl f16_control{ | ||
| 62 | .no_contraction{false}, | ||
| 63 | .rounding{IR::FpRounding::RN}, | ||
| 64 | .fmz_mode{IR::FmzMode::DontCare}, | ||
| 65 | }; | ||
| 66 | static constexpr IR::FpControl f32_control{ | ||
| 67 | .no_contraction{false}, | ||
| 68 | .rounding{IR::FpRounding::RN}, | ||
| 69 | .fmz_mode{IR::FmzMode::FTZ}, | ||
| 70 | }; | ||
| 71 | switch (op) { | ||
| 72 | case AtomOp::ADD: | ||
| 73 | return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) | ||
| 74 | : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); | ||
| 75 | case AtomOp::MIN: | ||
| 76 | return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); | ||
| 77 | case AtomOp::MAX: | ||
| 78 | return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); | ||
| 79 | default: | ||
| 80 | throw NotImplementedException("FP Atom Operation {}", op); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 88 | BitField<28, 20, s64> addr_offset; | ||
| 89 | BitField<28, 20, u64> rz_addr_offset; | ||
| 90 | BitField<48, 1, u64> e; | ||
| 91 | } const mem{insn}; | ||
| 92 | |||
| 93 | const IR::U64 address{[&]() -> IR::U64 { | ||
| 94 | if (mem.e == 0) { | ||
| 95 | return v.ir.UConvert(64, v.X(mem.addr_reg)); | ||
| 96 | } | ||
| 97 | return v.L(mem.addr_reg); | ||
| 98 | }()}; | ||
| 99 | const u64 addr_offset{[&]() -> u64 { | ||
| 100 | if (mem.addr_reg == IR::Reg::RZ) { | ||
| 101 | // When RZ is used, the address is an absolute address | ||
| 102 | return static_cast<u64>(mem.rz_addr_offset.Value()); | ||
| 103 | } else { | ||
| 104 | return static_cast<u64>(mem.addr_offset.Value()); | ||
| 105 | } | ||
| 106 | }()}; | ||
| 107 | return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||
| 108 | } | ||
| 109 | |||
| 110 | bool AtomOpNotApplicable(AtomSize size, AtomOp op) { | ||
| 111 | // TODO: SAFEADD | ||
| 112 | switch (size) { | ||
| 113 | case AtomSize::S32: | ||
| 114 | case AtomSize::U64: | ||
| 115 | return (op == AtomOp::INC || op == AtomOp::DEC); | ||
| 116 | case AtomSize::S64: | ||
| 117 | return !(op == AtomOp::MIN || op == AtomOp::MAX); | ||
| 118 | case AtomSize::F32: | ||
| 119 | return op != AtomOp::ADD; | ||
| 120 | case AtomSize::F16x2: | ||
| 121 | return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); | ||
| 122 | default: | ||
| 123 | return false; | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { | ||
| 128 | switch (size) { | ||
| 129 | case AtomSize::U32: | ||
| 130 | case AtomSize::S32: | ||
| 131 | case AtomSize::F32: | ||
| 132 | case AtomSize::F16x2: | ||
| 133 | return ir.LoadGlobal32(offset); | ||
| 134 | case AtomSize::U64: | ||
| 135 | case AtomSize::S64: | ||
| 136 | return ir.PackUint2x32(ir.LoadGlobal64(offset)); | ||
| 137 | default: | ||
| 138 | throw NotImplementedException("Atom Size {}", size); | ||
| 139 | } | ||
| 140 | } | ||
| 141 | |||
| 142 | void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { | ||
| 143 | switch (size) { | ||
| 144 | case AtomSize::U32: | ||
| 145 | case AtomSize::S32: | ||
| 146 | case AtomSize::F16x2: | ||
| 147 | return v.X(dest_reg, IR::U32{result}); | ||
| 148 | case AtomSize::U64: | ||
| 149 | case AtomSize::S64: | ||
| 150 | return v.L(dest_reg, IR::U64{result}); | ||
| 151 | case AtomSize::F32: | ||
| 152 | return v.F(dest_reg, IR::F32{result}); | ||
| 153 | default: | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | } // Anonymous namespace | ||
| 158 | |||
| 159 | void TranslatorVisitor::ATOM(u64 insn) { | ||
| 160 | union { | ||
| 161 | u64 raw; | ||
| 162 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 163 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 164 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 165 | BitField<49, 3, AtomSize> size; | ||
| 166 | BitField<52, 4, AtomOp> op; | ||
| 167 | } const atom{insn}; | ||
| 168 | |||
| 169 | const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64}; | ||
| 170 | const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64}; | ||
| 171 | const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2}; | ||
| 172 | const IR::U64 offset{AtomOffset(*this, insn)}; | ||
| 173 | IR::Value result; | ||
| 174 | |||
| 175 | if (AtomOpNotApplicable(atom.size, atom.op)) { | ||
| 176 | result = LoadGlobal(ir, offset, atom.size); | ||
| 177 | } else if (!is_integer) { | ||
| 178 | if (atom.size == AtomSize::F32) { | ||
| 179 | result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size); | ||
| 180 | } else { | ||
| 181 | const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))}; | ||
| 182 | result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size); | ||
| 183 | } | ||
| 184 | } else if (size_64) { | ||
| 185 | result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed); | ||
| 186 | } else { | ||
| 187 | result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed); | ||
| 188 | } | ||
| 189 | StoreResult(*this, atom.dest_reg, result, atom.size); | ||
| 190 | } | ||
| 191 | |||
| 192 | void TranslatorVisitor::RED(u64 insn) { | ||
| 193 | union { | ||
| 194 | u64 raw; | ||
| 195 | BitField<0, 8, IR::Reg> src_reg_b; | ||
| 196 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 197 | BitField<20, 3, AtomSize> size; | ||
| 198 | BitField<23, 3, AtomOp> op; | ||
| 199 | } const red{insn}; | ||
| 200 | |||
| 201 | if (AtomOpNotApplicable(red.size, red.op)) { | ||
| 202 | return; | ||
| 203 | } | ||
| 204 | const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64}; | ||
| 205 | const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64}; | ||
| 206 | const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2}; | ||
| 207 | const IR::U64 offset{AtomOffset(*this, insn)}; | ||
| 208 | if (!is_integer) { | ||
| 209 | if (red.size == AtomSize::F32) { | ||
| 210 | ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size); | ||
| 211 | } else { | ||
| 212 | const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))}; | ||
| 213 | ApplyFpAtomOp(ir, offset, src_b, red.op, red.size); | ||
| 214 | } | ||
| 215 | } else if (size_64) { | ||
| 216 | ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed); | ||
| 217 | } else { | ||
| 218 | ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed); | ||
| 219 | } | ||
| 220 | } | ||
| 221 | |||
| 222 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class AtomOp : u64 { | ||
| 12 | ADD, | ||
| 13 | MIN, | ||
| 14 | MAX, | ||
| 15 | INC, | ||
| 16 | DEC, | ||
| 17 | AND, | ||
| 18 | OR, | ||
| 19 | XOR, | ||
| 20 | EXCH, | ||
| 21 | }; | ||
| 22 | |||
| 23 | enum class AtomsSize : u64 { | ||
| 24 | U32, | ||
| 25 | S32, | ||
| 26 | U64, | ||
| 27 | }; | ||
| 28 | |||
| 29 | IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, | ||
| 30 | bool is_signed) { | ||
| 31 | switch (op) { | ||
| 32 | case AtomOp::ADD: | ||
| 33 | return ir.SharedAtomicIAdd(offset, op_b); | ||
| 34 | case AtomOp::MIN: | ||
| 35 | return ir.SharedAtomicIMin(offset, op_b, is_signed); | ||
| 36 | case AtomOp::MAX: | ||
| 37 | return ir.SharedAtomicIMax(offset, op_b, is_signed); | ||
| 38 | case AtomOp::INC: | ||
| 39 | return ir.SharedAtomicInc(offset, op_b); | ||
| 40 | case AtomOp::DEC: | ||
| 41 | return ir.SharedAtomicDec(offset, op_b); | ||
| 42 | case AtomOp::AND: | ||
| 43 | return ir.SharedAtomicAnd(offset, op_b); | ||
| 44 | case AtomOp::OR: | ||
| 45 | return ir.SharedAtomicOr(offset, op_b); | ||
| 46 | case AtomOp::XOR: | ||
| 47 | return ir.SharedAtomicXor(offset, op_b); | ||
| 48 | case AtomOp::EXCH: | ||
| 49 | return ir.SharedAtomicExchange(offset, op_b); | ||
| 50 | default: | ||
| 51 | throw NotImplementedException("Integer Atoms Operation {}", op); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { | ||
| 56 | union { | ||
| 57 | u64 raw; | ||
| 58 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 59 | BitField<30, 22, u64> absolute_offset; | ||
| 60 | BitField<30, 22, s64> relative_offset; | ||
| 61 | } const encoding{insn}; | ||
| 62 | |||
| 63 | if (encoding.offset_reg == IR::Reg::RZ) { | ||
| 64 | return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2)); | ||
| 65 | } else { | ||
| 66 | const s32 relative{static_cast<s32>(encoding.relative_offset << 2)}; | ||
| 67 | return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 71 | void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { | ||
| 72 | switch (size) { | ||
| 73 | case AtomsSize::U32: | ||
| 74 | case AtomsSize::S32: | ||
| 75 | return v.X(dest_reg, IR::U32{result}); | ||
| 76 | case AtomsSize::U64: | ||
| 77 | return v.L(dest_reg, IR::U64{result}); | ||
| 78 | default: | ||
| 79 | break; | ||
| 80 | } | ||
| 81 | } | ||
| 82 | } // Anonymous namespace | ||
| 83 | |||
| 84 | void TranslatorVisitor::ATOMS(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 88 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 89 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 90 | BitField<28, 2, AtomsSize> size; | ||
| 91 | BitField<52, 4, AtomOp> op; | ||
| 92 | } const atoms{insn}; | ||
| 93 | |||
| 94 | const bool size_64{atoms.size == AtomsSize::U64}; | ||
| 95 | if (size_64 && atoms.op != AtomOp::EXCH) { | ||
| 96 | throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); | ||
| 97 | } | ||
| 98 | const bool is_signed{atoms.size == AtomsSize::S32}; | ||
| 99 | const IR::U32 offset{AtomsOffset(*this, insn)}; | ||
| 100 | |||
| 101 | IR::Value result; | ||
| 102 | if (size_64) { | ||
| 103 | result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); | ||
| 104 | } else { | ||
| 105 | result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); | ||
| 106 | } | ||
| 107 | StoreResult(*this, atoms.dest_reg, result, atoms.size); | ||
| 108 | } | ||
| 109 | |||
| 110 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 327941223..aebe3072a 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) { | |||
| 17 | ThrowNotImplemented(Opcode::ATOM_cas); | 17 | ThrowNotImplemented(Opcode::ATOM_cas); |
| 18 | } | 18 | } |
| 19 | 19 | ||
| 20 | void TranslatorVisitor::ATOM(u64) { | ||
| 21 | ThrowNotImplemented(Opcode::ATOM); | ||
| 22 | } | ||
| 23 | |||
| 24 | void TranslatorVisitor::ATOMS_cas(u64) { | 20 | void TranslatorVisitor::ATOMS_cas(u64) { |
| 25 | ThrowNotImplemented(Opcode::ATOMS_cas); | 21 | ThrowNotImplemented(Opcode::ATOMS_cas); |
| 26 | } | 22 | } |
| 27 | 23 | ||
| 28 | void TranslatorVisitor::ATOMS(u64) { | ||
| 29 | ThrowNotImplemented(Opcode::ATOMS); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::B2R(u64) { | 24 | void TranslatorVisitor::B2R(u64) { |
| 33 | ThrowNotImplemented(Opcode::B2R); | 25 | ThrowNotImplemented(Opcode::B2R); |
| 34 | } | 26 | } |
| @@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) { | |||
| 241 | ThrowNotImplemented(Opcode::RAM); | 233 | ThrowNotImplemented(Opcode::RAM); |
| 242 | } | 234 | } |
| 243 | 235 | ||
| 244 | void TranslatorVisitor::RED(u64) { | ||
| 245 | ThrowNotImplemented(Opcode::RED); | ||
| 246 | } | ||
| 247 | |||
| 248 | void TranslatorVisitor::RET(u64) { | 236 | void TranslatorVisitor::RET(u64) { |
| 249 | ThrowNotImplemented(Opcode::RET); | 237 | ThrowNotImplemented(Opcode::RET); |
| 250 | } | 238 | } |
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 9ef8688c9..73373576b 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -145,6 +145,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 145 | case IR::Opcode::FPOrdGreaterThanEqual16: | 145 | case IR::Opcode::FPOrdGreaterThanEqual16: |
| 146 | case IR::Opcode::FPUnordGreaterThanEqual16: | 146 | case IR::Opcode::FPUnordGreaterThanEqual16: |
| 147 | case IR::Opcode::FPIsNan16: | 147 | case IR::Opcode::FPIsNan16: |
| 148 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 149 | case IR::Opcode::StorageAtomicAddF16x2: | ||
| 150 | case IR::Opcode::StorageAtomicMinF16x2: | ||
| 151 | case IR::Opcode::StorageAtomicMaxF16x2: | ||
| 148 | info.uses_fp16 = true; | 152 | info.uses_fp16 = true; |
| 149 | break; | 153 | break; |
| 150 | case IR::Opcode::CompositeConstructF64x2: | 154 | case IR::Opcode::CompositeConstructF64x2: |
| @@ -310,6 +314,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 310 | case IR::Opcode::ConvertF16U64: | 314 | case IR::Opcode::ConvertF16U64: |
| 311 | case IR::Opcode::ConvertF32U64: | 315 | case IR::Opcode::ConvertF32U64: |
| 312 | case IR::Opcode::ConvertF64U64: | 316 | case IR::Opcode::ConvertF64U64: |
| 317 | case IR::Opcode::SharedAtomicExchange64: | ||
| 313 | info.uses_int64 = true; | 318 | info.uses_int64 = true; |
| 314 | break; | 319 | break; |
| 315 | default: | 320 | default: |
| @@ -444,6 +449,71 @@ void VisitUsages(Info& info, IR::Inst& inst) { | |||
| 444 | case IR::Opcode::FSwizzleAdd: | 449 | case IR::Opcode::FSwizzleAdd: |
| 445 | info.uses_fswzadd = true; | 450 | info.uses_fswzadd = true; |
| 446 | break; | 451 | break; |
| 452 | case IR::Opcode::SharedAtomicInc32: | ||
| 453 | info.uses_shared_increment = true; | ||
| 454 | break; | ||
| 455 | case IR::Opcode::SharedAtomicDec32: | ||
| 456 | info.uses_shared_decrement = true; | ||
| 457 | break; | ||
| 458 | case IR::Opcode::GlobalAtomicInc32: | ||
| 459 | case IR::Opcode::StorageAtomicInc32: | ||
| 460 | info.uses_global_increment = true; | ||
| 461 | break; | ||
| 462 | case IR::Opcode::GlobalAtomicDec32: | ||
| 463 | case IR::Opcode::StorageAtomicDec32: | ||
| 464 | info.uses_global_decrement = true; | ||
| 465 | break; | ||
| 466 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 467 | case IR::Opcode::StorageAtomicAddF32: | ||
| 468 | info.uses_atomic_f32_add = true; | ||
| 469 | break; | ||
| 470 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 471 | case IR::Opcode::StorageAtomicAddF16x2: | ||
| 472 | info.uses_atomic_f16x2_add = true; | ||
| 473 | break; | ||
| 474 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 475 | case IR::Opcode::StorageAtomicAddF32x2: | ||
| 476 | info.uses_atomic_f32x2_add = true; | ||
| 477 | break; | ||
| 478 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 479 | case IR::Opcode::StorageAtomicMinF16x2: | ||
| 480 | info.uses_atomic_f16x2_min = true; | ||
| 481 | break; | ||
| 482 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 483 | case IR::Opcode::StorageAtomicMinF32x2: | ||
| 484 | info.uses_atomic_f32x2_min = true; | ||
| 485 | break; | ||
| 486 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 487 | case IR::Opcode::StorageAtomicMaxF16x2: | ||
| 488 | info.uses_atomic_f16x2_max = true; | ||
| 489 | break; | ||
| 490 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 491 | case IR::Opcode::StorageAtomicMaxF32x2: | ||
| 492 | info.uses_atomic_f32x2_max = true; | ||
| 493 | break; | ||
| 494 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 495 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 496 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 497 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 498 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 499 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 500 | case IR::Opcode::GlobalAtomicOr64: | ||
| 501 | case IR::Opcode::GlobalAtomicXor64: | ||
| 502 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 503 | case IR::Opcode::StorageAtomicIAdd64: | ||
| 504 | case IR::Opcode::StorageAtomicSMin64: | ||
| 505 | case IR::Opcode::StorageAtomicUMin64: | ||
| 506 | case IR::Opcode::StorageAtomicSMax64: | ||
| 507 | case IR::Opcode::StorageAtomicUMax64: | ||
| 508 | case IR::Opcode::StorageAtomicAnd64: | ||
| 509 | case IR::Opcode::StorageAtomicOr64: | ||
| 510 | case IR::Opcode::StorageAtomicXor64: | ||
| 511 | info.uses_64_bit_atomics = true; | ||
| 512 | break; | ||
| 513 | case IR::Opcode::SharedAtomicExchange64: | ||
| 514 | info.uses_64_bit_atomics = true; | ||
| 515 | info.uses_shared_memory_u32x2 = true; | ||
| 516 | break; | ||
| 447 | default: | 517 | default: |
| 448 | break; | 518 | break; |
| 449 | } | 519 | } |
diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index afe871505..0d4f266c3 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp | |||
| @@ -72,6 +72,33 @@ bool IsGlobalMemory(const IR::Inst& inst) { | |||
| 72 | case IR::Opcode::WriteGlobal32: | 72 | case IR::Opcode::WriteGlobal32: |
| 73 | case IR::Opcode::WriteGlobal64: | 73 | case IR::Opcode::WriteGlobal64: |
| 74 | case IR::Opcode::WriteGlobal128: | 74 | case IR::Opcode::WriteGlobal128: |
| 75 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 76 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 77 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 78 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 79 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 80 | case IR::Opcode::GlobalAtomicInc32: | ||
| 81 | case IR::Opcode::GlobalAtomicDec32: | ||
| 82 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 83 | case IR::Opcode::GlobalAtomicOr32: | ||
| 84 | case IR::Opcode::GlobalAtomicXor32: | ||
| 85 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 86 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 87 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 88 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 89 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 90 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 91 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 92 | case IR::Opcode::GlobalAtomicOr64: | ||
| 93 | case IR::Opcode::GlobalAtomicXor64: | ||
| 94 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 95 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 96 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 97 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 98 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 99 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 100 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 101 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 75 | return true; | 102 | return true; |
| 76 | default: | 103 | default: |
| 77 | return false; | 104 | return false; |
| @@ -125,6 +152,60 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { | |||
| 125 | return IR::Opcode::WriteStorage64; | 152 | return IR::Opcode::WriteStorage64; |
| 126 | case IR::Opcode::WriteGlobal128: | 153 | case IR::Opcode::WriteGlobal128: |
| 127 | return IR::Opcode::WriteStorage128; | 154 | return IR::Opcode::WriteStorage128; |
| 155 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 156 | return IR::Opcode::StorageAtomicIAdd32; | ||
| 157 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 158 | return IR::Opcode::StorageAtomicSMin32; | ||
| 159 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 160 | return IR::Opcode::StorageAtomicUMin32; | ||
| 161 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 162 | return IR::Opcode::StorageAtomicSMax32; | ||
| 163 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 164 | return IR::Opcode::StorageAtomicUMax32; | ||
| 165 | case IR::Opcode::GlobalAtomicInc32: | ||
| 166 | return IR::Opcode::StorageAtomicInc32; | ||
| 167 | case IR::Opcode::GlobalAtomicDec32: | ||
| 168 | return IR::Opcode::StorageAtomicDec32; | ||
| 169 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 170 | return IR::Opcode::StorageAtomicAnd32; | ||
| 171 | case IR::Opcode::GlobalAtomicOr32: | ||
| 172 | return IR::Opcode::StorageAtomicOr32; | ||
| 173 | case IR::Opcode::GlobalAtomicXor32: | ||
| 174 | return IR::Opcode::StorageAtomicXor32; | ||
| 175 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 176 | return IR::Opcode::StorageAtomicIAdd64; | ||
| 177 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 178 | return IR::Opcode::StorageAtomicSMin64; | ||
| 179 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 180 | return IR::Opcode::StorageAtomicUMin64; | ||
| 181 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 182 | return IR::Opcode::StorageAtomicSMax64; | ||
| 183 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 184 | return IR::Opcode::StorageAtomicUMax64; | ||
| 185 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 186 | return IR::Opcode::StorageAtomicAnd64; | ||
| 187 | case IR::Opcode::GlobalAtomicOr64: | ||
| 188 | return IR::Opcode::StorageAtomicOr64; | ||
| 189 | case IR::Opcode::GlobalAtomicXor64: | ||
| 190 | return IR::Opcode::StorageAtomicXor64; | ||
| 191 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 192 | return IR::Opcode::StorageAtomicExchange32; | ||
| 193 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 194 | return IR::Opcode::StorageAtomicExchange64; | ||
| 195 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 196 | return IR::Opcode::StorageAtomicAddF32; | ||
| 197 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 198 | return IR::Opcode::StorageAtomicAddF16x2; | ||
| 199 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 200 | return IR::Opcode::StorageAtomicMinF16x2; | ||
| 201 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 202 | return IR::Opcode::StorageAtomicMaxF16x2; | ||
| 203 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 204 | return IR::Opcode::StorageAtomicAddF32x2; | ||
| 205 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 206 | return IR::Opcode::StorageAtomicMinF32x2; | ||
| 207 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 208 | return IR::Opcode::StorageAtomicMaxF32x2; | ||
| 128 | default: | 209 | default: |
| 129 | throw InvalidArgument("Invalid global memory opcode {}", opcode); | 210 | throw InvalidArgument("Invalid global memory opcode {}", opcode); |
| 130 | } | 211 | } |
| @@ -328,6 +409,16 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index | |||
| 328 | inst.Invalidate(); | 409 | inst.Invalidate(); |
| 329 | } | 410 | } |
| 330 | 411 | ||
| 412 | /// Replace an atomic operation on global memory instruction with its storage buffer equivalent | ||
| 413 | void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||
| 414 | const IR::U32& offset) { | ||
| 415 | const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; | ||
| 416 | const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; | ||
| 417 | const IR::Value value{ | ||
| 418 | &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})}; | ||
| 419 | inst.ReplaceUsesWith(value); | ||
| 420 | } | ||
| 421 | |||
| 331 | /// Replace a global memory instruction with its storage buffer equivalent | 422 | /// Replace a global memory instruction with its storage buffer equivalent |
| 332 | void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | 423 | void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, |
| 333 | const IR::U32& offset) { | 424 | const IR::U32& offset) { |
| @@ -348,6 +439,34 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | |||
| 348 | case IR::Opcode::WriteGlobal64: | 439 | case IR::Opcode::WriteGlobal64: |
| 349 | case IR::Opcode::WriteGlobal128: | 440 | case IR::Opcode::WriteGlobal128: |
| 350 | return ReplaceWrite(block, inst, storage_index, offset); | 441 | return ReplaceWrite(block, inst, storage_index, offset); |
| 442 | case IR::Opcode::GlobalAtomicIAdd32: | ||
| 443 | case IR::Opcode::GlobalAtomicSMin32: | ||
| 444 | case IR::Opcode::GlobalAtomicUMin32: | ||
| 445 | case IR::Opcode::GlobalAtomicSMax32: | ||
| 446 | case IR::Opcode::GlobalAtomicUMax32: | ||
| 447 | case IR::Opcode::GlobalAtomicInc32: | ||
| 448 | case IR::Opcode::GlobalAtomicDec32: | ||
| 449 | case IR::Opcode::GlobalAtomicAnd32: | ||
| 450 | case IR::Opcode::GlobalAtomicOr32: | ||
| 451 | case IR::Opcode::GlobalAtomicXor32: | ||
| 452 | case IR::Opcode::GlobalAtomicExchange32: | ||
| 453 | case IR::Opcode::GlobalAtomicIAdd64: | ||
| 454 | case IR::Opcode::GlobalAtomicSMin64: | ||
| 455 | case IR::Opcode::GlobalAtomicUMin64: | ||
| 456 | case IR::Opcode::GlobalAtomicSMax64: | ||
| 457 | case IR::Opcode::GlobalAtomicUMax64: | ||
| 458 | case IR::Opcode::GlobalAtomicAnd64: | ||
| 459 | case IR::Opcode::GlobalAtomicOr64: | ||
| 460 | case IR::Opcode::GlobalAtomicXor64: | ||
| 461 | case IR::Opcode::GlobalAtomicExchange64: | ||
| 462 | case IR::Opcode::GlobalAtomicAddF32: | ||
| 463 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 464 | case IR::Opcode::GlobalAtomicAddF32x2: | ||
| 465 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 466 | case IR::Opcode::GlobalAtomicMinF32x2: | ||
| 467 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 468 | case IR::Opcode::GlobalAtomicMaxF32x2: | ||
| 469 | return ReplaceAtomic(block, inst, storage_index, offset); | ||
| 351 | default: | 470 | default: |
| 352 | throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); | 471 | throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); |
| 353 | } | 472 | } |
| @@ -364,7 +483,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | |||
| 364 | CollectStorageBuffers(*block, inst, info); | 483 | CollectStorageBuffers(*block, inst, info); |
| 365 | } | 484 | } |
| 366 | } | 485 | } |
| 367 | u32 storage_index{}; | ||
| 368 | for (const StorageBufferAddr& storage_buffer : info.set) { | 486 | for (const StorageBufferAddr& storage_buffer : info.set) { |
| 369 | program.info.storage_buffers_descriptors.push_back({ | 487 | program.info.storage_buffers_descriptors.push_back({ |
| 370 | .cbuf_index = storage_buffer.index, | 488 | .cbuf_index = storage_buffer.index, |
| @@ -372,7 +490,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | |||
| 372 | .count = 1, | 490 | .count = 1, |
| 373 | .is_written{info.writes.contains(storage_buffer)}, | 491 | .is_written{info.writes.contains(storage_buffer)}, |
| 374 | }); | 492 | }); |
| 375 | ++storage_index; | ||
| 376 | } | 493 | } |
| 377 | for (const StorageInst& storage_inst : info.to_replace) { | 494 | for (const StorageInst& storage_inst : info.to_replace) { |
| 378 | const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; | 495 | const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; |
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index 52576b07f..62e73d52d 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp | |||
| @@ -114,6 +114,18 @@ IR::Opcode Replace(IR::Opcode op) { | |||
| 114 | return IR::Opcode::ConvertF32U32; | 114 | return IR::Opcode::ConvertF32U32; |
| 115 | case IR::Opcode::ConvertF16U64: | 115 | case IR::Opcode::ConvertF16U64: |
| 116 | return IR::Opcode::ConvertF32U64; | 116 | return IR::Opcode::ConvertF32U64; |
| 117 | case IR::Opcode::GlobalAtomicAddF16x2: | ||
| 118 | return IR::Opcode::GlobalAtomicAddF32x2; | ||
| 119 | case IR::Opcode::StorageAtomicAddF16x2: | ||
| 120 | return IR::Opcode::StorageAtomicAddF32x2; | ||
| 121 | case IR::Opcode::GlobalAtomicMinF16x2: | ||
| 122 | return IR::Opcode::GlobalAtomicMinF32x2; | ||
| 123 | case IR::Opcode::StorageAtomicMinF16x2: | ||
| 124 | return IR::Opcode::StorageAtomicMinF32x2; | ||
| 125 | case IR::Opcode::GlobalAtomicMaxF16x2: | ||
| 126 | return IR::Opcode::GlobalAtomicMaxF32x2; | ||
| 127 | case IR::Opcode::StorageAtomicMaxF16x2: | ||
| 128 | return IR::Opcode::StorageAtomicMaxF32x2; | ||
| 117 | default: | 129 | default: |
| 118 | return op; | 130 | return op; |
| 119 | } | 131 | } |
diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index f0d68d516..a4e41bda1 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h | |||
| @@ -38,6 +38,7 @@ struct Profile { | |||
| 38 | bool support_viewport_index_layer_non_geometry{}; | 38 | bool support_viewport_index_layer_non_geometry{}; |
| 39 | bool support_typeless_image_loads{}; | 39 | bool support_typeless_image_loads{}; |
| 40 | bool warp_size_potentially_larger_than_guest{}; | 40 | bool warp_size_potentially_larger_than_guest{}; |
| 41 | bool support_int64_atomics{}; | ||
| 41 | 42 | ||
| 42 | // FClamp is broken and OpFMax + OpFMin should be used instead | 43 | // FClamp is broken and OpFMax + OpFMin should be used instead |
| 43 | bool has_broken_spirv_clamp{}; | 44 | bool has_broken_spirv_clamp{}; |
diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index 3fbe99268..7bcecf554 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h | |||
| @@ -128,6 +128,19 @@ struct Info { | |||
| 128 | bool uses_subgroup_mask{}; | 128 | bool uses_subgroup_mask{}; |
| 129 | bool uses_fswzadd{}; | 129 | bool uses_fswzadd{}; |
| 130 | bool uses_typeless_image_reads{}; | 130 | bool uses_typeless_image_reads{}; |
| 131 | bool uses_shared_increment{}; | ||
| 132 | bool uses_shared_decrement{}; | ||
| 133 | bool uses_global_increment{}; | ||
| 134 | bool uses_global_decrement{}; | ||
| 135 | bool uses_atomic_f32_add{}; | ||
| 136 | bool uses_atomic_f16x2_add{}; | ||
| 137 | bool uses_atomic_f16x2_min{}; | ||
| 138 | bool uses_atomic_f16x2_max{}; | ||
| 139 | bool uses_atomic_f32x2_add{}; | ||
| 140 | bool uses_atomic_f32x2_min{}; | ||
| 141 | bool uses_atomic_f32x2_max{}; | ||
| 142 | bool uses_64_bit_atomics{}; | ||
| 143 | bool uses_shared_memory_u32x2{}; | ||
| 131 | 144 | ||
| 132 | IR::Type used_constant_buffer_types{}; | 145 | IR::Type used_constant_buffer_types{}; |
| 133 | 146 | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f699a9bdf..b953d694b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -637,6 +637,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | |||
| 637 | device.IsExtShaderViewportIndexLayerSupported(), | 637 | device.IsExtShaderViewportIndexLayerSupported(), |
| 638 | .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), | 638 | .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), |
| 639 | .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), | 639 | .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), |
| 640 | .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), | ||
| 640 | .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, | 641 | .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, |
| 641 | .generic_input_types{}, | 642 | .generic_input_types{}, |
| 642 | .fixed_state_point_size{}, | 643 | .fixed_state_point_size{}, |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 78bb741bc..911dfed44 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -681,6 +681,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 681 | bool has_ext_transform_feedback{}; | 681 | bool has_ext_transform_feedback{}; |
| 682 | bool has_ext_custom_border_color{}; | 682 | bool has_ext_custom_border_color{}; |
| 683 | bool has_ext_extended_dynamic_state{}; | 683 | bool has_ext_extended_dynamic_state{}; |
| 684 | bool has_ext_shader_atomic_int64{}; | ||
| 684 | for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { | 685 | for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { |
| 685 | const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, | 686 | const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, |
| 686 | bool push) { | 687 | bool push) { |
| @@ -710,6 +711,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 710 | test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); | 711 | test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); |
| 711 | test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); | 712 | test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); |
| 712 | test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); | 713 | test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); |
| 714 | test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); | ||
| 713 | test(has_khr_workgroup_memory_explicit_layout, | 715 | test(has_khr_workgroup_memory_explicit_layout, |
| 714 | VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); | 716 | VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); |
| 715 | if (Settings::values.renderer_debug) { | 717 | if (Settings::values.renderer_debug) { |
| @@ -760,6 +762,18 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 760 | } else { | 762 | } else { |
| 761 | is_warp_potentially_bigger = true; | 763 | is_warp_potentially_bigger = true; |
| 762 | } | 764 | } |
| 765 | if (has_ext_shader_atomic_int64) { | ||
| 766 | VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; | ||
| 767 | atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; | ||
| 768 | atomic_int64.pNext = nullptr; | ||
| 769 | features.pNext = &atomic_int64; | ||
| 770 | physical.GetFeatures2KHR(features); | ||
| 771 | |||
| 772 | if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) { | ||
| 773 | extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); | ||
| 774 | ext_shader_atomic_int64 = true; | ||
| 775 | } | ||
| 776 | } | ||
| 763 | if (has_ext_transform_feedback) { | 777 | if (has_ext_transform_feedback) { |
| 764 | VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; | 778 | VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; |
| 765 | tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; | 779 | tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index adf62a707..4e6d13308 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -229,6 +229,11 @@ public: | |||
| 229 | return ext_shader_stencil_export; | 229 | return ext_shader_stencil_export; |
| 230 | } | 230 | } |
| 231 | 231 | ||
| 232 | /// Returns true if the device supports VK_KHR_shader_atomic_int64. | ||
| 233 | bool IsExtShaderAtomicInt64Supported() const { | ||
| 234 | return ext_shader_atomic_int64; | ||
| 235 | } | ||
| 236 | |||
| 232 | /// Returns true when a known debugging tool is attached. | 237 | /// Returns true when a known debugging tool is attached. |
| 233 | bool HasDebuggingToolAttached() const { | 238 | bool HasDebuggingToolAttached() const { |
| 234 | return has_renderdoc || has_nsight_graphics; | 239 | return has_renderdoc || has_nsight_graphics; |
| @@ -320,6 +325,7 @@ private: | |||
| 320 | bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. | 325 | bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. |
| 321 | bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. | 326 | bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. |
| 322 | bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. | 327 | bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. |
| 328 | bool ext_shader_atomic_int64{}; ///< Support for VK_KHR_shader_atomic_int64. | ||
| 323 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. | 329 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. |
| 324 | bool has_renderdoc{}; ///< Has RenderDoc attached | 330 | bool has_renderdoc{}; ///< Has RenderDoc attached |
| 325 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached | 331 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached |