diff options
| author | 2020-01-25 21:03:02 -0300 | |
|---|---|---|
| committer | 2020-01-26 01:54:24 -0300 | |
| commit | d95d4ac84396973d76985dc96249d4714f58b6a2 (patch) | |
| tree | 63083efff309ea993e1de84e4b93fd6d348dfce2 /src | |
| parent | Merge pull request #3343 from FearlessTobi/ui-tab (diff) | |
| download | yuzu-d95d4ac84396973d76985dc96249d4714f58b6a2.tar.gz yuzu-d95d4ac84396973d76985dc96249d4714f58b6a2.tar.xz yuzu-d95d4ac84396973d76985dc96249d4714f58b6a2.zip | |
shader/memory: Implement ATOM.ADD
ATOM operates atomically on global memory. For now only add ATOM.ADD
since that's what was found in commercial games.
This asserts for ATOM.ADD.S32 (handling the others as unimplemented),
although ATOM.ADD.U32 shouldn't be any different.
This change forces us to change the default type on SPIR-V storage
buffers from float to uint. We could also alias the buffers, but it's
simpler for now to just use uint. While we are at it, abstract the code
to avoid repetition.
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 30 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 66 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 22 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 2 |
5 files changed, 86 insertions, 39 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 6f98bd827..f443ec0fe 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -227,6 +227,28 @@ enum class AtomicOp : u64 { | |||
| 227 | Exch = 8, | 227 | Exch = 8, |
| 228 | }; | 228 | }; |
| 229 | 229 | ||
| 230 | enum class GlobalAtomicOp : u64 { | ||
| 231 | Add = 0, | ||
| 232 | Min = 1, | ||
| 233 | Max = 2, | ||
| 234 | Inc = 3, | ||
| 235 | Dec = 4, | ||
| 236 | And = 5, | ||
| 237 | Or = 6, | ||
| 238 | Xor = 7, | ||
| 239 | Exch = 8, | ||
| 240 | SafeAdd = 10, | ||
| 241 | }; | ||
| 242 | |||
| 243 | enum class GlobalAtomicType : u64 { | ||
| 244 | U32 = 0, | ||
| 245 | S32 = 1, | ||
| 246 | U64 = 2, | ||
| 247 | F32_FTZ_RN = 3, | ||
| 248 | F16x2_FTZ_RN = 4, | ||
| 249 | S64 = 5, | ||
| 250 | }; | ||
| 251 | |||
| 230 | enum class UniformType : u64 { | 252 | enum class UniformType : u64 { |
| 231 | UnsignedByte = 0, | 253 | UnsignedByte = 0, |
| 232 | SignedByte = 1, | 254 | SignedByte = 1, |
| @@ -958,6 +980,12 @@ union Instruction { | |||
| 958 | } stg; | 980 | } stg; |
| 959 | 981 | ||
| 960 | union { | 982 | union { |
| 983 | BitField<52, 4, GlobalAtomicOp> operation; | ||
| 984 | BitField<49, 3, GlobalAtomicType> type; | ||
| 985 | BitField<28, 20, s64> offset; | ||
| 986 | } atom; | ||
| 987 | |||
| 988 | union { | ||
| 961 | BitField<52, 4, AtomicOp> operation; | 989 | BitField<52, 4, AtomicOp> operation; |
| 962 | BitField<28, 2, AtomicType> type; | 990 | BitField<28, 2, AtomicType> type; |
| 963 | BitField<30, 22, s64> offset; | 991 | BitField<30, 22, s64> offset; |
| @@ -1690,6 +1718,7 @@ public: | |||
| 1690 | ST_S, | 1718 | ST_S, |
| 1691 | ST, // Store in generic memory | 1719 | ST, // Store in generic memory |
| 1692 | STG, // Store in global memory | 1720 | STG, // Store in global memory |
| 1721 | ATOM, // Atomic operation on global memory | ||
| 1693 | ATOMS, // Atomic operation on shared memory | 1722 | ATOMS, // Atomic operation on shared memory |
| 1694 | AL2P, // Transforms attribute memory into physical memory | 1723 | AL2P, // Transforms attribute memory into physical memory |
| 1695 | TEX, | 1724 | TEX, |
| @@ -1994,6 +2023,7 @@ private: | |||
| 1994 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), | 2023 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), |
| 1995 | INST("101-------------", Id::ST, Type::Memory, "ST"), | 2024 | INST("101-------------", Id::ST, Type::Memory, "ST"), |
| 1996 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), | 2025 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), |
| 2026 | INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), | ||
| 1997 | INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), | 2027 | INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), |
| 1998 | INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), | 2028 | INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), |
| 1999 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), | 2029 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), |
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 2996aaf08..7b2a3a00b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -1858,10 +1858,7 @@ private: | |||
| 1858 | 1858 | ||
| 1859 | template <const std::string_view& opname, Type type> | 1859 | template <const std::string_view& opname, Type type> |
| 1860 | Expression Atomic(Operation operation) { | 1860 | Expression Atomic(Operation operation) { |
| 1861 | ASSERT(stage == ShaderType::Compute); | 1861 | return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), |
| 1862 | auto& smem = std::get<SmemNode>(*operation[0]); | ||
| 1863 | |||
| 1864 | return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), | ||
| 1865 | Visit(operation[1]).As(type)), | 1862 | Visit(operation[1]).As(type)), |
| 1866 | type}; | 1863 | type}; |
| 1867 | } | 1864 | } |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b53078721..1ab22251e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -1123,15 +1123,7 @@ private: | |||
| 1123 | } | 1123 | } |
| 1124 | 1124 | ||
| 1125 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { | 1125 | if (const auto gmem = std::get_if<GmemNode>(&*node)) { |
| 1126 | const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); | 1126 | return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; |
| 1127 | const Id real = AsUint(Visit(gmem->GetRealAddress())); | ||
| 1128 | const Id base = AsUint(Visit(gmem->GetBaseAddress())); | ||
| 1129 | |||
| 1130 | Id offset = OpISub(t_uint, real, base); | ||
| 1131 | offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U)); | ||
| 1132 | return {OpLoad(t_float, | ||
| 1133 | OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)), | ||
| 1134 | Type::Float}; | ||
| 1135 | } | 1127 | } |
| 1136 | 1128 | ||
| 1137 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { | 1129 | if (const auto lmem = std::get_if<LmemNode>(&*node)) { |
| @@ -1142,10 +1134,7 @@ private: | |||
| 1142 | } | 1134 | } |
| 1143 | 1135 | ||
| 1144 | if (const auto smem = std::get_if<SmemNode>(&*node)) { | 1136 | if (const auto smem = std::get_if<SmemNode>(&*node)) { |
| 1145 | Id address = AsUint(Visit(smem->GetAddress())); | 1137 | return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; |
| 1146 | address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||
| 1147 | const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); | ||
| 1148 | return {OpLoad(t_uint, pointer), Type::Uint}; | ||
| 1149 | } | 1138 | } |
| 1150 | 1139 | ||
| 1151 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | 1140 | if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { |
| @@ -1339,20 +1328,10 @@ private: | |||
| 1339 | target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; | 1328 | target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; |
| 1340 | 1329 | ||
| 1341 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | 1330 | } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { |
| 1342 | ASSERT(stage == ShaderType::Compute); | 1331 | target = {GetSharedMemoryPointer(*smem), Type::Uint}; |
| 1343 | Id address = AsUint(Visit(smem->GetAddress())); | ||
| 1344 | address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||
| 1345 | target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint}; | ||
| 1346 | 1332 | ||
| 1347 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1333 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 1348 | const Id real = AsUint(Visit(gmem->GetRealAddress())); | 1334 | target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; |
| 1349 | const Id base = AsUint(Visit(gmem->GetBaseAddress())); | ||
| 1350 | const Id diff = OpISub(t_uint, real, base); | ||
| 1351 | const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); | ||
| 1352 | |||
| 1353 | const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); | ||
| 1354 | target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset), | ||
| 1355 | Type::Float}; | ||
| 1356 | 1335 | ||
| 1357 | } else { | 1336 | } else { |
| 1358 | UNIMPLEMENTED(); | 1337 | UNIMPLEMENTED(); |
| @@ -1804,11 +1783,16 @@ private: | |||
| 1804 | return {}; | 1783 | return {}; |
| 1805 | } | 1784 | } |
| 1806 | 1785 | ||
| 1807 | Expression UAtomicAdd(Operation operation) { | 1786 | Expression AtomicAdd(Operation operation) { |
| 1808 | const auto& smem = std::get<SmemNode>(*operation[0]); | 1787 | Id pointer; |
| 1809 | Id address = AsUint(Visit(smem.GetAddress())); | 1788 | if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { |
| 1810 | address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | 1789 | pointer = GetSharedMemoryPointer(*smem); |
| 1811 | const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); | 1790 | } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { |
| 1791 | pointer = GetGlobalMemoryPointer(*gmem); | ||
| 1792 | } else { | ||
| 1793 | UNREACHABLE(); | ||
| 1794 | return {Constant(t_uint, 0), Type::Uint}; | ||
| 1795 | } | ||
| 1812 | 1796 | ||
| 1813 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); | 1797 | const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); |
| 1814 | const Id semantics = Constant(t_uint, 0U); | 1798 | const Id semantics = Constant(t_uint, 0U); |
| @@ -2243,6 +2227,22 @@ private: | |||
| 2243 | return {}; | 2227 | return {}; |
| 2244 | } | 2228 | } |
| 2245 | 2229 | ||
| 2230 | Id GetGlobalMemoryPointer(const GmemNode& gmem) { | ||
| 2231 | const Id real = AsUint(Visit(gmem.GetRealAddress())); | ||
| 2232 | const Id base = AsUint(Visit(gmem.GetBaseAddress())); | ||
| 2233 | const Id diff = OpISub(t_uint, real, base); | ||
| 2234 | const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); | ||
| 2235 | const Id buffer = global_buffers.at(gmem.GetDescriptor()); | ||
| 2236 | return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); | ||
| 2237 | } | ||
| 2238 | |||
| 2239 | Id GetSharedMemoryPointer(const SmemNode& smem) { | ||
| 2240 | ASSERT(stage == ShaderType::Compute); | ||
| 2241 | Id address = AsUint(Visit(smem.GetAddress())); | ||
| 2242 | address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||
| 2243 | return OpAccessChain(t_smem_uint, shared_memory, address); | ||
| 2244 | } | ||
| 2245 | |||
| 2246 | static constexpr std::array operation_decompilers = { | 2246 | static constexpr std::array operation_decompilers = { |
| 2247 | &SPIRVDecompiler::Assign, | 2247 | &SPIRVDecompiler::Assign, |
| 2248 | 2248 | ||
| @@ -2389,7 +2389,7 @@ private: | |||
| 2389 | &SPIRVDecompiler::AtomicImageXor, | 2389 | &SPIRVDecompiler::AtomicImageXor, |
| 2390 | &SPIRVDecompiler::AtomicImageExchange, | 2390 | &SPIRVDecompiler::AtomicImageExchange, |
| 2391 | 2391 | ||
| 2392 | &SPIRVDecompiler::UAtomicAdd, | 2392 | &SPIRVDecompiler::AtomicAdd, |
| 2393 | 2393 | ||
| 2394 | &SPIRVDecompiler::Branch, | 2394 | &SPIRVDecompiler::Branch, |
| 2395 | &SPIRVDecompiler::BranchIndirect, | 2395 | &SPIRVDecompiler::BranchIndirect, |
| @@ -2485,9 +2485,9 @@ private: | |||
| 2485 | 2485 | ||
| 2486 | Id t_smem_uint{}; | 2486 | Id t_smem_uint{}; |
| 2487 | 2487 | ||
| 2488 | const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); | 2488 | const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); |
| 2489 | const Id t_gmem_array = | 2489 | const Id t_gmem_array = |
| 2490 | Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); | 2490 | Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); |
| 2491 | const Id t_gmem_struct = MemberDecorate( | 2491 | const Id t_gmem_struct = MemberDecorate( |
| 2492 | Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | 2492 | Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); |
| 2493 | const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); | 2493 | const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 7591a715f..3da833e81 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -19,6 +19,8 @@ namespace VideoCommon::Shader { | |||
| 19 | using Tegra::Shader::AtomicOp; | 19 | using Tegra::Shader::AtomicOp; |
| 20 | using Tegra::Shader::AtomicType; | 20 | using Tegra::Shader::AtomicType; |
| 21 | using Tegra::Shader::Attribute; | 21 | using Tegra::Shader::Attribute; |
| 22 | using Tegra::Shader::GlobalAtomicOp; | ||
| 23 | using Tegra::Shader::GlobalAtomicType; | ||
| 22 | using Tegra::Shader::Instruction; | 24 | using Tegra::Shader::Instruction; |
| 23 | using Tegra::Shader::OpCode; | 25 | using Tegra::Shader::OpCode; |
| 24 | using Tegra::Shader::Register; | 26 | using Tegra::Shader::Register; |
| @@ -335,6 +337,24 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 335 | } | 337 | } |
| 336 | break; | 338 | break; |
| 337 | } | 339 | } |
| 340 | case OpCode::Id::ATOM: { | ||
| 341 | UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", | ||
| 342 | static_cast<int>(instr.atom.operation.Value())); | ||
| 343 | UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", | ||
| 344 | static_cast<int>(instr.atom.type.Value())); | ||
| 345 | |||
| 346 | const auto [real_address, base_address, descriptor] = | ||
| 347 | TrackGlobalMemory(bb, instr, true, true); | ||
| 348 | if (!real_address || !base_address) { | ||
| 349 | // Tracking failed, skip atomic. | ||
| 350 | break; | ||
| 351 | } | ||
| 352 | |||
| 353 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 354 | Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); | ||
| 355 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 356 | break; | ||
| 357 | } | ||
| 338 | case OpCode::Id::ATOMS: { | 358 | case OpCode::Id::ATOMS: { |
| 339 | UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", | 359 | UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", |
| 340 | static_cast<int>(instr.atoms.operation.Value())); | 360 | static_cast<int>(instr.atoms.operation.Value())); |
| @@ -348,7 +368,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 348 | Node memory = GetSharedMemory(std::move(address)); | 368 | Node memory = GetSharedMemory(std::move(address)); |
| 349 | Node data = GetRegister(instr.gpr20); | 369 | Node data = GetRegister(instr.gpr20); |
| 350 | 370 | ||
| 351 | Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); | 371 | Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); |
| 352 | SetRegister(bb, instr.gpr0, std::move(value)); | 372 | SetRegister(bb, instr.gpr0, std::move(value)); |
| 353 | break; | 373 | break; |
| 354 | } | 374 | } |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 075c7d07c..9af1f0228 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -162,7 +162,7 @@ enum class OperationCode { | |||
| 162 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void | 162 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void |
| 163 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void | 163 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void |
| 164 | 164 | ||
| 165 | UAtomicAdd, /// (smem, uint) -> uint | 165 | AtomicAdd, /// (memory, {u}int) -> {u}int |
| 166 | 166 | ||
| 167 | Branch, /// (uint branch_target) -> void | 167 | Branch, /// (uint branch_target) -> void |
| 168 | BranchIndirect, /// (uint branch_target) -> void | 168 | BranchIndirect, /// (uint branch_target) -> void |