diff options
| author | 2020-01-29 16:49:54 -0500 | |
|---|---|---|
| committer | 2020-01-29 16:49:54 -0500 | |
| commit | 2db7adc42a3d72fe7b02fbf4902b98d69c777b2f (patch) | |
| tree | 197f2b968aa80756e329d9c76cfe72575e2f2ac5 | |
| parent | Merge pull request #3355 from ReinUsesLisp/break-down (diff) | |
| parent | shader/memory: Implement ATOM.ADD (diff) | |
| download | yuzu-2db7adc42a3d72fe7b02fbf4902b98d69c777b2f.tar.gz yuzu-2db7adc42a3d72fe7b02fbf4902b98d69c777b2f.tar.xz yuzu-2db7adc42a3d72fe7b02fbf4902b98d69c777b2f.zip | |
Merge pull request #3350 from ReinUsesLisp/atom
shader/memory: Implement ATOM.ADD
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 30 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 66 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 2 |
5 files changed, 86 insertions, 39 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 6f98bd827..f443ec0fe 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -227,6 +227,28 @@ enum class AtomicOp : u64 { | |||
| 227 | Exch = 8, | 227 | Exch = 8, |
| 228 | }; | 228 | }; |
| 229 | 229 | ||
| 230 | enum class GlobalAtomicOp : u64 { | ||
| 231 | Add = 0, | ||
| 232 | Min = 1, | ||
| 233 | Max = 2, | ||
| 234 | Inc = 3, | ||
| 235 | Dec = 4, | ||
| 236 | And = 5, | ||
| 237 | Or = 6, | ||
| 238 | Xor = 7, | ||
| 239 | Exch = 8, | ||
| 240 | SafeAdd = 10, | ||
| 241 | }; | ||
| 242 | |||
| 243 | enum class GlobalAtomicType : u64 { | ||
| 244 | U32 = 0, | ||
| 245 | S32 = 1, | ||
| 246 | U64 = 2, | ||
| 247 | F32_FTZ_RN = 3, | ||
| 248 | F16x2_FTZ_RN = 4, | ||
| 249 | S64 = 5, | ||
| 250 | }; | ||
| 251 | |||
| 230 | enum class UniformType : u64 { | 252 | enum class UniformType : u64 { |
| 231 | UnsignedByte = 0, | 253 | UnsignedByte = 0, |
| 232 | SignedByte = 1, | 254 | SignedByte = 1, |
| @@ -958,6 +980,12 @@ union Instruction { | |||
| 958 | } stg; | 980 | } stg; |
| 959 | 981 | ||
| 960 | union { | 982 | union { |
| 983 | BitField<52, 4, GlobalAtomicOp> operation; | ||
| 984 | BitField<49, 3, GlobalAtomicType> type; | ||
| 985 | BitField<28, 20, s64> offset; | ||
| 986 | } atom; | ||
| 987 | |||
| 988 | union { | ||
| 961 | BitField<52, 4, AtomicOp> operation; | 989 | BitField<52, 4, AtomicOp> operation; |
| 962 | BitField<28, 2, AtomicType> type; | 990 | BitField<28, 2, AtomicType> type; |
| 963 | BitField<30, 22, s64> offset; | 991 | BitField<30, 22, s64> offset; |
| @@ -1690,6 +1718,7 @@ public: | |||
| 1690 | ST_S, | 1718 | ST_S, |
| 1691 | ST, // Store in generic memory | 1719 | ST, // Store in generic memory |
| 1692 | STG, // Store in global memory | 1720 | STG, // Store in global memory |
| 1721 | ATOM, // Atomic operation on global memory | ||
| 1693 | ATOMS, // Atomic operation on shared memory | 1722 | ATOMS, // Atomic operation on shared memory |
| 1694 | AL2P, // Transforms attribute memory into physical memory | 1723 | AL2P, // Transforms attribute memory into physical memory |
| 1695 | TEX, | 1724 | TEX, |
| @@ -1994,6 +2023,7 @@ private: | |||
| 1994 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), | 2023 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), |
| 1995 | INST("101-------------", Id::ST, Type::Memory, "ST"), | 2024 | INST("101-------------", Id::ST, Type::Memory, "ST"), |
| 1996 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), | 2025 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), |
| 2026 | INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), | ||
| 1997 | INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), | 2027 | INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), |
| 1998 | INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), | 2028 | INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), |
| 1999 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), | 2029 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 6531dfe9b..a1ac3d7a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -1857,10 +1857,7 @@ private: | |||
| 1857 | 1857 | ||
| 1858 | template <const std::string_view& opname, Type type> | 1858 | template <const std::string_view& opname, Type type> |
| 1859 | Expression Atomic(Operation operation) { | 1859 | Expression Atomic(Operation operation) { |
| 1860 | ASSERT(stage == ShaderType::Compute); | 1860 | return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), |
| 1861 | auto& smem = std::get<SmemNode>(*operation[0]); | ||
| 1862 | |||
| 1863 | return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), | ||
| 1864 | Visit(operation[1]).As(type)), | 1861 | Visit(operation[1]).As(type)), |
| 1865 | type}; | 1862 | type}; |
| 1866 | } | 1863 | } |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b53078721..1ab22251e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -1123,15 +1123,7 @@ private: | |||
| 1123 | } | 1123 | } |
| 1124 | 1124 | ||
| 1125 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | 1125 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { |
| 1126 | const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); | 1126 | return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; |
| 1127 | const Id real = AsUint(Visit(gmem->GetRealAddress())); | ||
| 1128 | const Id base = AsUint(Visit(gmem->GetBaseAddress())); | ||
| 1129 | |||
| 1130 | Id offset = OpISub(t_uint, real, base); | ||
| 1131 | offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U)); | ||
| 1132 | return {OpLoad(t_float, | ||
| 1133 | OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)), | ||
| 1134 | Type::Float}; | ||
| 1135 | } | 1127 | } |
| 1136 | 1128 | ||
| 1137 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | 1129 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { |
| @@ -1142,10 +1134,7 @@ private: | |||
| 1142 | } | 1134 | } |
| 1143 | 1135 | ||
| 1144 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | 1136 | if (const auto smem = std::get_if<SmemNode>(&*node)) { |
| 1145 | Id address = AsUint(Visit(smem->GetAddress())); | 1137 | return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; |
| 1146 | address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||
| 1147 | const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); | ||
| 1148 | return {OpLoad(t_uint, pointer), Type::Uint}; | ||
| 1149 | } | 1138 | } |
| 1150 | 1139 | ||
| 1151 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | 1140 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { |
| @@ -1339,20 +1328,10 @@ private: | |||
| 1339 | target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; | 1328 | target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; |
| 1340 | 1329 | ||
| 1341 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | 1330 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { |
| 1342 | ASSERT(stage == ShaderType::Compute); | 1331 | target = {GetSharedMemoryPointer(*smem), Type::Uint}; |
| 1343 | Id address = AsUint(Visit(smem->GetAddress())); | ||
| 1344 | address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||
| 1345 | target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint}; | ||
| 1346 | 1332 | ||
| 1347 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1333 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 1348 | const Id real = AsUint(Visit(gmem->GetRealAddress())); | 1334 | target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; |
| 1349 | const Id base = AsUint(Visit(gmem->GetBaseAddress())); | ||
| 1350 | const Id diff = OpISub(t_uint, real, base); | ||
| 1351 | const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); | ||
| 1352 | |||
| 1353 | const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); | ||
| 1354 | target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset), | ||
| 1355 | Type::Float}; | ||
| 1356 | 1335 | ||
| 1357 | } else { | 1336 | } else { |
| 1358 | UNIMPLEMENTED(); | 1337 | UNIMPLEMENTED(); |
| @@ -1804,11 +1783,16 @@ private: | |||
| 1804 | return {}; | 1783 | return {}; |
| 1805 | } | 1784 | } |
| 1806 | 1785 | ||
| 1807 | Expression UAtomicAdd(Operation operation) { | 1786 | Expression AtomicAdd(Operation operation) { |
| 1808 | const auto& smem = std::get<SmemNode>(*operation[0]); | 1787 | Id pointer; |
| 1809 | Id address = AsUint(Visit(smem.GetAddress())); | 1788 | if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { |
| 1810 | address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | 1789 | pointer = GetSharedMemoryPointer(*smem); |
| 1811 | const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); | 1790 | } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { |
| 1791 | pointer = GetGlobalMemoryPointer(*gmem); | ||
| 1792 | } else { | ||
| 1793 | UNREACHABLE(); | ||
| 1794 | return {Constant(t_uint, 0), Type::Uint}; | ||
| 1795 | } | ||
| 1812 | 1796 | ||
| 1813 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); | 1797 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); |
| 1814 | const Id semantics = Constant(t_uint, 0U); | 1798 | const Id semantics = Constant(t_uint, 0U); |
| @@ -2243,6 +2227,22 @@ private: | |||
| 2243 | return {}; | 2227 | return {}; |
| 2244 | } | 2228 | } |
| 2245 | 2229 | ||
| 2230 | Id GetGlobalMemoryPointer(const GmemNode& gmem) { | ||
| 2231 | const Id real = AsUint(Visit(gmem.GetRealAddress())); | ||
| 2232 | const Id base = AsUint(Visit(gmem.GetBaseAddress())); | ||
| 2233 | const Id diff = OpISub(t_uint, real, base); | ||
| 2234 | const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); | ||
| 2235 | const Id buffer = global_buffers.at(gmem.GetDescriptor()); | ||
| 2236 | return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); | ||
| 2237 | } | ||
| 2238 | |||
| 2239 | Id GetSharedMemoryPointer(const SmemNode& smem) { | ||
| 2240 | ASSERT(stage == ShaderType::Compute); | ||
| 2241 | Id address = AsUint(Visit(smem.GetAddress())); | ||
| 2242 | address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||
| 2243 | return OpAccessChain(t_smem_uint, shared_memory, address); | ||
| 2244 | } | ||
| 2245 | |||
| 2246 | static constexpr std::array operation_decompilers = { | 2246 | static constexpr std::array operation_decompilers = { |
| 2247 | &SPIRVDecompiler::Assign, | 2247 | &SPIRVDecompiler::Assign, |
| 2248 | 2248 | ||
| @@ -2389,7 +2389,7 @@ private: | |||
| 2389 | &SPIRVDecompiler::AtomicImageXor, | 2389 | &SPIRVDecompiler::AtomicImageXor, |
| 2390 | &SPIRVDecompiler::AtomicImageExchange, | 2390 | &SPIRVDecompiler::AtomicImageExchange, |
| 2391 | 2391 | ||
| 2392 | &SPIRVDecompiler::UAtomicAdd, | 2392 | &SPIRVDecompiler::AtomicAdd, |
| 2393 | 2393 | ||
| 2394 | &SPIRVDecompiler::Branch, | 2394 | &SPIRVDecompiler::Branch, |
| 2395 | &SPIRVDecompiler::BranchIndirect, | 2395 | &SPIRVDecompiler::BranchIndirect, |
| @@ -2485,9 +2485,9 @@ private: | |||
| 2485 | 2485 | ||
| 2486 | Id t_smem_uint{}; | 2486 | Id t_smem_uint{}; |
| 2487 | 2487 | ||
| 2488 | const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); | 2488 | const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); |
| 2489 | const Id t_gmem_array = | 2489 | const Id t_gmem_array = |
| 2490 | Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); | 2490 | Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); |
| 2491 | const Id t_gmem_struct = MemberDecorate( | 2491 | const Id t_gmem_struct = MemberDecorate( |
| 2492 | Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | 2492 | Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); |
| 2493 | const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); | 2493 | const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7591a715f..3da833e81 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -19,6 +19,8 @@ namespace VideoCommon::Shader { | |||
| 19 | using Tegra::Shader::AtomicOp; | 19 | using Tegra::Shader::AtomicOp; |
| 20 | using Tegra::Shader::AtomicType; | 20 | using Tegra::Shader::AtomicType; |
| 21 | using Tegra::Shader::Attribute; | 21 | using Tegra::Shader::Attribute; |
| 22 | using Tegra::Shader::GlobalAtomicOp; | ||
| 23 | using Tegra::Shader::GlobalAtomicType; | ||
| 22 | using Tegra::Shader::Instruction; | 24 | using Tegra::Shader::Instruction; |
| 23 | using Tegra::Shader::OpCode; | 25 | using Tegra::Shader::OpCode; |
| 24 | using Tegra::Shader::Register; | 26 | using Tegra::Shader::Register; |
| @@ -335,6 +337,24 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 335 | } | 337 | } |
| 336 | break; | 338 | break; |
| 337 | } | 339 | } |
| 340 | case OpCode::Id::ATOM: { | ||
| 341 | UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", | ||
| 342 | static_cast<int>(instr.atom.operation.Value())); | ||
| 343 | UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", | ||
| 344 | static_cast<int>(instr.atom.type.Value())); | ||
| 345 | |||
| 346 | const auto [real_address, base_address, descriptor] = | ||
| 347 | TrackGlobalMemory(bb, instr, true, true); | ||
| 348 | if (!real_address || !base_address) { | ||
| 349 | // Tracking failed, skip atomic. | ||
| 350 | break; | ||
| 351 | } | ||
| 352 | |||
| 353 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 354 | Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); | ||
| 355 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 356 | break; | ||
| 357 | } | ||
| 338 | case OpCode::Id::ATOMS: { | 358 | case OpCode::Id::ATOMS: { |
| 339 | UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", | 359 | UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", |
| 340 | static_cast<int>(instr.atoms.operation.Value())); | 360 | static_cast<int>(instr.atoms.operation.Value())); |
| @@ -348,7 +368,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 348 | Node memory = GetSharedMemory(std::move(address)); | 368 | Node memory = GetSharedMemory(std::move(address)); |
| 349 | Node data = GetRegister(instr.gpr20); | 369 | Node data = GetRegister(instr.gpr20); |
| 350 | 370 | ||
| 351 | Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); | 371 | Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); |
| 352 | SetRegister(bb, instr.gpr0, std::move(value)); | 372 | SetRegister(bb, instr.gpr0, std::move(value)); |
| 353 | break; | 373 | break; |
| 354 | } | 374 | } |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 075c7d07c..9af1f0228 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -162,7 +162,7 @@ enum class OperationCode { | |||
| 162 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void | 162 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void |
| 163 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void | 163 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void |
| 164 | 164 | ||
| 165 | UAtomicAdd, /// (smem, uint) -> uint | 165 | AtomicAdd, /// (memory, {u}int) -> {u}int |
| 166 | 166 | ||
| 167 | Branch, /// (uint branch_target) -> void | 167 | Branch, /// (uint branch_target) -> void |
| 168 | BranchIndirect, /// (uint branch_target) -> void | 168 | BranchIndirect, /// (uint branch_target) -> void |