summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-01-25 21:03:02 -0300
committerGravatar ReinUsesLisp2020-01-26 01:54:24 -0300
commitd95d4ac84396973d76985dc96249d4714f58b6a2 (patch)
tree63083efff309ea993e1de84e4b93fd6d348dfce2 /src
parentMerge pull request #3343 from FearlessTobi/ui-tab (diff)
downloadyuzu-d95d4ac84396973d76985dc96249d4714f58b6a2.tar.gz
yuzu-d95d4ac84396973d76985dc96249d4714f58b6a2.tar.xz
yuzu-d95d4ac84396973d76985dc96249d4714f58b6a2.zip
shader/memory: Implement ATOM.ADD
ATOM operates atomically on global memory. For now only add ATOM.ADD since that's what was found in commercial games. This asserts for ATOM.ADD.S32 (handling the others as unimplemented), although ATOM.ADD.U32 shouldn't be any different. This change forces us to change the default type on SPIR-V storage buffers from float to uint. We could also alias the buffers, but it's simpler for now to just use uint. While we are at it, abstract the code to avoid repetition.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/shader_bytecode.h30
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp66
-rw-r--r--src/video_core/shader/decode/memory.cpp22
-rw-r--r--src/video_core/shader/node.h2
5 files changed, 86 insertions, 39 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 6f98bd827..f443ec0fe 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -227,6 +227,28 @@ enum class AtomicOp : u64 {
227 Exch = 8, 227 Exch = 8,
228}; 228};
229 229
230enum class GlobalAtomicOp : u64 {
231 Add = 0,
232 Min = 1,
233 Max = 2,
234 Inc = 3,
235 Dec = 4,
236 And = 5,
237 Or = 6,
238 Xor = 7,
239 Exch = 8,
240 SafeAdd = 10,
241};
242
243enum class GlobalAtomicType : u64 {
244 U32 = 0,
245 S32 = 1,
246 U64 = 2,
247 F32_FTZ_RN = 3,
248 F16x2_FTZ_RN = 4,
249 S64 = 5,
250};
251
230enum class UniformType : u64 { 252enum class UniformType : u64 {
231 UnsignedByte = 0, 253 UnsignedByte = 0,
232 SignedByte = 1, 254 SignedByte = 1,
@@ -958,6 +980,12 @@ union Instruction {
958 } stg; 980 } stg;
959 981
960 union { 982 union {
983 BitField<52, 4, GlobalAtomicOp> operation;
984 BitField<49, 3, GlobalAtomicType> type;
985 BitField<28, 20, s64> offset;
986 } atom;
987
988 union {
961 BitField<52, 4, AtomicOp> operation; 989 BitField<52, 4, AtomicOp> operation;
962 BitField<28, 2, AtomicType> type; 990 BitField<28, 2, AtomicType> type;
963 BitField<30, 22, s64> offset; 991 BitField<30, 22, s64> offset;
@@ -1690,6 +1718,7 @@ public:
1690 ST_S, 1718 ST_S,
1691 ST, // Store in generic memory 1719 ST, // Store in generic memory
1692 STG, // Store in global memory 1720 STG, // Store in global memory
1721 ATOM, // Atomic operation on global memory
1693 ATOMS, // Atomic operation on shared memory 1722 ATOMS, // Atomic operation on shared memory
1694 AL2P, // Transforms attribute memory into physical memory 1723 AL2P, // Transforms attribute memory into physical memory
1695 TEX, 1724 TEX,
@@ -1994,6 +2023,7 @@ private:
1994 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 2023 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1995 INST("101-------------", Id::ST, Type::Memory, "ST"), 2024 INST("101-------------", Id::ST, Type::Memory, "ST"),
1996 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 2025 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
2026 INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
1997 INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), 2027 INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
1998 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), 2028 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
1999 INST("110000----111---", Id::TEX, Type::Texture, "TEX"), 2029 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 2996aaf08..7b2a3a00b 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1858,10 +1858,7 @@ private:
1858 1858
1859 template <const std::string_view& opname, Type type> 1859 template <const std::string_view& opname, Type type>
1860 Expression Atomic(Operation operation) { 1860 Expression Atomic(Operation operation) {
1861 ASSERT(stage == ShaderType::Compute); 1861 return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
1862 auto& smem = std::get<SmemNode>(*operation[0]);
1863
1864 return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
1865 Visit(operation[1]).As(type)), 1862 Visit(operation[1]).As(type)),
1866 type}; 1863 type};
1867 } 1864 }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index b53078721..1ab22251e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1123,15 +1123,7 @@ private:
1123 } 1123 }
1124 1124
1125 if (const auto gmem = std::get_if<GmemNode>(&*node)) { 1125 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1126 const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); 1126 return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
1127 const Id real = AsUint(Visit(gmem->GetRealAddress()));
1128 const Id base = AsUint(Visit(gmem->GetBaseAddress()));
1129
1130 Id offset = OpISub(t_uint, real, base);
1131 offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U));
1132 return {OpLoad(t_float,
1133 OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)),
1134 Type::Float};
1135 } 1127 }
1136 1128
1137 if (const auto lmem = std::get_if<LmemNode>(&*node)) { 1129 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
@@ -1142,10 +1134,7 @@ private:
1142 } 1134 }
1143 1135
1144 if (const auto smem = std::get_if<SmemNode>(&*node)) { 1136 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1145 Id address = AsUint(Visit(smem->GetAddress())); 1137 return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
1146 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
1147 const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
1148 return {OpLoad(t_uint, pointer), Type::Uint};
1149 } 1138 }
1150 1139
1151 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { 1140 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
@@ -1339,20 +1328,10 @@ private:
1339 target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; 1328 target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
1340 1329
1341 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { 1330 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1342 ASSERT(stage == ShaderType::Compute); 1331 target = {GetSharedMemoryPointer(*smem), Type::Uint};
1343 Id address = AsUint(Visit(smem->GetAddress()));
1344 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
1345 target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint};
1346 1332
1347 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { 1333 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1348 const Id real = AsUint(Visit(gmem->GetRealAddress())); 1334 target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
1349 const Id base = AsUint(Visit(gmem->GetBaseAddress()));
1350 const Id diff = OpISub(t_uint, real, base);
1351 const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
1352
1353 const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
1354 target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset),
1355 Type::Float};
1356 1335
1357 } else { 1336 } else {
1358 UNIMPLEMENTED(); 1337 UNIMPLEMENTED();
@@ -1804,11 +1783,16 @@ private:
1804 return {}; 1783 return {};
1805 } 1784 }
1806 1785
1807 Expression UAtomicAdd(Operation operation) { 1786 Expression AtomicAdd(Operation operation) {
1808 const auto& smem = std::get<SmemNode>(*operation[0]); 1787 Id pointer;
1809 Id address = AsUint(Visit(smem.GetAddress())); 1788 if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
1810 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); 1789 pointer = GetSharedMemoryPointer(*smem);
1811 const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); 1790 } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
1791 pointer = GetGlobalMemoryPointer(*gmem);
1792 } else {
1793 UNREACHABLE();
1794 return {Constant(t_uint, 0), Type::Uint};
1795 }
1812 1796
1813 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); 1797 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
1814 const Id semantics = Constant(t_uint, 0U); 1798 const Id semantics = Constant(t_uint, 0U);
@@ -2243,6 +2227,22 @@ private:
2243 return {}; 2227 return {};
2244 } 2228 }
2245 2229
2230 Id GetGlobalMemoryPointer(const GmemNode& gmem) {
2231 const Id real = AsUint(Visit(gmem.GetRealAddress()));
2232 const Id base = AsUint(Visit(gmem.GetBaseAddress()));
2233 const Id diff = OpISub(t_uint, real, base);
2234 const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
2235 const Id buffer = global_buffers.at(gmem.GetDescriptor());
2236 return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
2237 }
2238
2239 Id GetSharedMemoryPointer(const SmemNode& smem) {
2240 ASSERT(stage == ShaderType::Compute);
2241 Id address = AsUint(Visit(smem.GetAddress()));
2242 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
2243 return OpAccessChain(t_smem_uint, shared_memory, address);
2244 }
2245
2246 static constexpr std::array operation_decompilers = { 2246 static constexpr std::array operation_decompilers = {
2247 &SPIRVDecompiler::Assign, 2247 &SPIRVDecompiler::Assign,
2248 2248
@@ -2389,7 +2389,7 @@ private:
2389 &SPIRVDecompiler::AtomicImageXor, 2389 &SPIRVDecompiler::AtomicImageXor,
2390 &SPIRVDecompiler::AtomicImageExchange, 2390 &SPIRVDecompiler::AtomicImageExchange,
2391 2391
2392 &SPIRVDecompiler::UAtomicAdd, 2392 &SPIRVDecompiler::AtomicAdd,
2393 2393
2394 &SPIRVDecompiler::Branch, 2394 &SPIRVDecompiler::Branch,
2395 &SPIRVDecompiler::BranchIndirect, 2395 &SPIRVDecompiler::BranchIndirect,
@@ -2485,9 +2485,9 @@ private:
2485 2485
2486 Id t_smem_uint{}; 2486 Id t_smem_uint{};
2487 2487
2488 const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); 2488 const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
2489 const Id t_gmem_array = 2489 const Id t_gmem_array =
2490 Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); 2490 Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
2491 const Id t_gmem_struct = MemberDecorate( 2491 const Id t_gmem_struct = MemberDecorate(
2492 Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); 2492 Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
2493 const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); 2493 const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 7591a715f..3da833e81 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -19,6 +19,8 @@ namespace VideoCommon::Shader {
19using Tegra::Shader::AtomicOp; 19using Tegra::Shader::AtomicOp;
20using Tegra::Shader::AtomicType; 20using Tegra::Shader::AtomicType;
21using Tegra::Shader::Attribute; 21using Tegra::Shader::Attribute;
22using Tegra::Shader::GlobalAtomicOp;
23using Tegra::Shader::GlobalAtomicType;
22using Tegra::Shader::Instruction; 24using Tegra::Shader::Instruction;
23using Tegra::Shader::OpCode; 25using Tegra::Shader::OpCode;
24using Tegra::Shader::Register; 26using Tegra::Shader::Register;
@@ -335,6 +337,24 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
335 } 337 }
336 break; 338 break;
337 } 339 }
340 case OpCode::Id::ATOM: {
341 UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
342 static_cast<int>(instr.atom.operation.Value()));
343 UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
344 static_cast<int>(instr.atom.type.Value()));
345
346 const auto [real_address, base_address, descriptor] =
347 TrackGlobalMemory(bb, instr, true, true);
348 if (!real_address || !base_address) {
349 // Tracking failed, skip atomic.
350 break;
351 }
352
353 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
354 Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
355 SetRegister(bb, instr.gpr0, std::move(value));
356 break;
357 }
338 case OpCode::Id::ATOMS: { 358 case OpCode::Id::ATOMS: {
339 UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", 359 UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
340 static_cast<int>(instr.atoms.operation.Value())); 360 static_cast<int>(instr.atoms.operation.Value()));
@@ -348,7 +368,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
348 Node memory = GetSharedMemory(std::move(address)); 368 Node memory = GetSharedMemory(std::move(address));
349 Node data = GetRegister(instr.gpr20); 369 Node data = GetRegister(instr.gpr20);
350 370
351 Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); 371 Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
352 SetRegister(bb, instr.gpr0, std::move(value)); 372 SetRegister(bb, instr.gpr0, std::move(value));
353 break; 373 break;
354 } 374 }
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 075c7d07c..9af1f0228 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,7 +162,7 @@ enum class OperationCode {
162 AtomicImageXor, /// (MetaImage, int[N] coords) -> void 162 AtomicImageXor, /// (MetaImage, int[N] coords) -> void
163 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void 163 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
164 164
165 UAtomicAdd, /// (smem, uint) -> uint 165 AtomicAdd, /// (memory, {u}int) -> {u}int
166 166
167 Branch, /// (uint branch_target) -> void 167 Branch, /// (uint branch_target) -> void
168 BranchIndirect, /// (uint branch_target) -> void 168 BranchIndirect, /// (uint branch_target) -> void