summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2020-01-29 16:49:54 -0500
committerGravatar GitHub2020-01-29 16:49:54 -0500
commit2db7adc42a3d72fe7b02fbf4902b98d69c777b2f (patch)
tree197f2b968aa80756e329d9c76cfe72575e2f2ac5 /src
parentMerge pull request #3355 from ReinUsesLisp/break-down (diff)
parentshader/memory: Implement ATOM.ADD (diff)
downloadyuzu-2db7adc42a3d72fe7b02fbf4902b98d69c777b2f.tar.gz
yuzu-2db7adc42a3d72fe7b02fbf4902b98d69c777b2f.tar.xz
yuzu-2db7adc42a3d72fe7b02fbf4902b98d69c777b2f.zip
Merge pull request #3350 from ReinUsesLisp/atom
shader/memory: Implement ATOM.ADD
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/shader_bytecode.h30
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp66
-rw-r--r--src/video_core/shader/decode/memory.cpp22
-rw-r--r--src/video_core/shader/node.h2
5 files changed, 86 insertions, 39 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 6f98bd827..f443ec0fe 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -227,6 +227,28 @@ enum class AtomicOp : u64 {
227 Exch = 8, 227 Exch = 8,
228}; 228};
229 229
230enum class GlobalAtomicOp : u64 {
231 Add = 0,
232 Min = 1,
233 Max = 2,
234 Inc = 3,
235 Dec = 4,
236 And = 5,
237 Or = 6,
238 Xor = 7,
239 Exch = 8,
240 SafeAdd = 10,
241};
242
243enum class GlobalAtomicType : u64 {
244 U32 = 0,
245 S32 = 1,
246 U64 = 2,
247 F32_FTZ_RN = 3,
248 F16x2_FTZ_RN = 4,
249 S64 = 5,
250};
251
230enum class UniformType : u64 { 252enum class UniformType : u64 {
231 UnsignedByte = 0, 253 UnsignedByte = 0,
232 SignedByte = 1, 254 SignedByte = 1,
@@ -958,6 +980,12 @@ union Instruction {
958 } stg; 980 } stg;
959 981
960 union { 982 union {
983 BitField<52, 4, GlobalAtomicOp> operation;
984 BitField<49, 3, GlobalAtomicType> type;
985 BitField<28, 20, s64> offset;
986 } atom;
987
988 union {
961 BitField<52, 4, AtomicOp> operation; 989 BitField<52, 4, AtomicOp> operation;
962 BitField<28, 2, AtomicType> type; 990 BitField<28, 2, AtomicType> type;
963 BitField<30, 22, s64> offset; 991 BitField<30, 22, s64> offset;
@@ -1690,6 +1718,7 @@ public:
1690 ST_S, 1718 ST_S,
1691 ST, // Store in generic memory 1719 ST, // Store in generic memory
1692 STG, // Store in global memory 1720 STG, // Store in global memory
1721 ATOM, // Atomic operation on global memory
1693 ATOMS, // Atomic operation on shared memory 1722 ATOMS, // Atomic operation on shared memory
1694 AL2P, // Transforms attribute memory into physical memory 1723 AL2P, // Transforms attribute memory into physical memory
1695 TEX, 1724 TEX,
@@ -1994,6 +2023,7 @@ private:
1994 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 2023 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1995 INST("101-------------", Id::ST, Type::Memory, "ST"), 2024 INST("101-------------", Id::ST, Type::Memory, "ST"),
1996 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 2025 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
2026 INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
1997 INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), 2027 INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
1998 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), 2028 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
1999 INST("110000----111---", Id::TEX, Type::Texture, "TEX"), 2029 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 6531dfe9b..a1ac3d7a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1857,10 +1857,7 @@ private:
1857 1857
1858 template <const std::string_view& opname, Type type> 1858 template <const std::string_view& opname, Type type>
1859 Expression Atomic(Operation operation) { 1859 Expression Atomic(Operation operation) {
1860 ASSERT(stage == ShaderType::Compute); 1860 return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
1861 auto& smem = std::get<SmemNode>(*operation[0]);
1862
1863 return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
1864 Visit(operation[1]).As(type)), 1861 Visit(operation[1]).As(type)),
1865 type}; 1862 type};
1866 } 1863 }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index b53078721..1ab22251e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1123,15 +1123,7 @@ private:
1123 } 1123 }
1124 1124
1125 if (const auto gmem = std::get_if<GmemNode>(&*node)) { 1125 if (const auto gmem = std::get_if<GmemNode>(&*node)) {
1126 const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); 1126 return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
1127 const Id real = AsUint(Visit(gmem->GetRealAddress()));
1128 const Id base = AsUint(Visit(gmem->GetBaseAddress()));
1129
1130 Id offset = OpISub(t_uint, real, base);
1131 offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U));
1132 return {OpLoad(t_float,
1133 OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)),
1134 Type::Float};
1135 } 1127 }
1136 1128
1137 if (const auto lmem = std::get_if<LmemNode>(&*node)) { 1129 if (const auto lmem = std::get_if<LmemNode>(&*node)) {
@@ -1142,10 +1134,7 @@ private:
1142 } 1134 }
1143 1135
1144 if (const auto smem = std::get_if<SmemNode>(&*node)) { 1136 if (const auto smem = std::get_if<SmemNode>(&*node)) {
1145 Id address = AsUint(Visit(smem->GetAddress())); 1137 return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
1146 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
1147 const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
1148 return {OpLoad(t_uint, pointer), Type::Uint};
1149 } 1138 }
1150 1139
1151 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { 1140 if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
@@ -1339,20 +1328,10 @@ private:
1339 target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; 1328 target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
1340 1329
1341 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { 1330 } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
1342 ASSERT(stage == ShaderType::Compute); 1331 target = {GetSharedMemoryPointer(*smem), Type::Uint};
1343 Id address = AsUint(Visit(smem->GetAddress()));
1344 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
1345 target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint};
1346 1332
1347 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { 1333 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1348 const Id real = AsUint(Visit(gmem->GetRealAddress())); 1334 target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
1349 const Id base = AsUint(Visit(gmem->GetBaseAddress()));
1350 const Id diff = OpISub(t_uint, real, base);
1351 const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
1352
1353 const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
1354 target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset),
1355 Type::Float};
1356 1335
1357 } else { 1336 } else {
1358 UNIMPLEMENTED(); 1337 UNIMPLEMENTED();
@@ -1804,11 +1783,16 @@ private:
1804 return {}; 1783 return {};
1805 } 1784 }
1806 1785
1807 Expression UAtomicAdd(Operation operation) { 1786 Expression AtomicAdd(Operation operation) {
1808 const auto& smem = std::get<SmemNode>(*operation[0]); 1787 Id pointer;
1809 Id address = AsUint(Visit(smem.GetAddress())); 1788 if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
1810 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); 1789 pointer = GetSharedMemoryPointer(*smem);
1811 const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); 1790 } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
1791 pointer = GetGlobalMemoryPointer(*gmem);
1792 } else {
1793 UNREACHABLE();
1794 return {Constant(t_uint, 0), Type::Uint};
1795 }
1812 1796
1813 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); 1797 const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
1814 const Id semantics = Constant(t_uint, 0U); 1798 const Id semantics = Constant(t_uint, 0U);
@@ -2243,6 +2227,22 @@ private:
2243 return {}; 2227 return {};
2244 } 2228 }
2245 2229
2230 Id GetGlobalMemoryPointer(const GmemNode& gmem) {
2231 const Id real = AsUint(Visit(gmem.GetRealAddress()));
2232 const Id base = AsUint(Visit(gmem.GetBaseAddress()));
2233 const Id diff = OpISub(t_uint, real, base);
2234 const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
2235 const Id buffer = global_buffers.at(gmem.GetDescriptor());
2236 return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
2237 }
2238
2239 Id GetSharedMemoryPointer(const SmemNode& smem) {
2240 ASSERT(stage == ShaderType::Compute);
2241 Id address = AsUint(Visit(smem.GetAddress()));
2242 address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
2243 return OpAccessChain(t_smem_uint, shared_memory, address);
2244 }
2245
2246 static constexpr std::array operation_decompilers = { 2246 static constexpr std::array operation_decompilers = {
2247 &SPIRVDecompiler::Assign, 2247 &SPIRVDecompiler::Assign,
2248 2248
@@ -2389,7 +2389,7 @@ private:
2389 &SPIRVDecompiler::AtomicImageXor, 2389 &SPIRVDecompiler::AtomicImageXor,
2390 &SPIRVDecompiler::AtomicImageExchange, 2390 &SPIRVDecompiler::AtomicImageExchange,
2391 2391
2392 &SPIRVDecompiler::UAtomicAdd, 2392 &SPIRVDecompiler::AtomicAdd,
2393 2393
2394 &SPIRVDecompiler::Branch, 2394 &SPIRVDecompiler::Branch,
2395 &SPIRVDecompiler::BranchIndirect, 2395 &SPIRVDecompiler::BranchIndirect,
@@ -2485,9 +2485,9 @@ private:
2485 2485
2486 Id t_smem_uint{}; 2486 Id t_smem_uint{};
2487 2487
2488 const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); 2488 const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
2489 const Id t_gmem_array = 2489 const Id t_gmem_array =
2490 Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); 2490 Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
2491 const Id t_gmem_struct = MemberDecorate( 2491 const Id t_gmem_struct = MemberDecorate(
2492 Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); 2492 Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
2493 const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); 2493 const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 7591a715f..3da833e81 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -19,6 +19,8 @@ namespace VideoCommon::Shader {
19using Tegra::Shader::AtomicOp; 19using Tegra::Shader::AtomicOp;
20using Tegra::Shader::AtomicType; 20using Tegra::Shader::AtomicType;
21using Tegra::Shader::Attribute; 21using Tegra::Shader::Attribute;
22using Tegra::Shader::GlobalAtomicOp;
23using Tegra::Shader::GlobalAtomicType;
22using Tegra::Shader::Instruction; 24using Tegra::Shader::Instruction;
23using Tegra::Shader::OpCode; 25using Tegra::Shader::OpCode;
24using Tegra::Shader::Register; 26using Tegra::Shader::Register;
@@ -335,6 +337,24 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
335 } 337 }
336 break; 338 break;
337 } 339 }
340 case OpCode::Id::ATOM: {
341 UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
342 static_cast<int>(instr.atom.operation.Value()));
343 UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
344 static_cast<int>(instr.atom.type.Value()));
345
346 const auto [real_address, base_address, descriptor] =
347 TrackGlobalMemory(bb, instr, true, true);
348 if (!real_address || !base_address) {
349 // Tracking failed, skip atomic.
350 break;
351 }
352
353 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
354 Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
355 SetRegister(bb, instr.gpr0, std::move(value));
356 break;
357 }
338 case OpCode::Id::ATOMS: { 358 case OpCode::Id::ATOMS: {
339 UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", 359 UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
340 static_cast<int>(instr.atoms.operation.Value())); 360 static_cast<int>(instr.atoms.operation.Value()));
@@ -348,7 +368,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
348 Node memory = GetSharedMemory(std::move(address)); 368 Node memory = GetSharedMemory(std::move(address));
349 Node data = GetRegister(instr.gpr20); 369 Node data = GetRegister(instr.gpr20);
350 370
351 Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); 371 Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
352 SetRegister(bb, instr.gpr0, std::move(value)); 372 SetRegister(bb, instr.gpr0, std::move(value));
353 break; 373 break;
354 } 374 }
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 075c7d07c..9af1f0228 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,7 +162,7 @@ enum class OperationCode {
162 AtomicImageXor, /// (MetaImage, int[N] coords) -> void 162 AtomicImageXor, /// (MetaImage, int[N] coords) -> void
163 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void 163 AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
164 164
165 UAtomicAdd, /// (smem, uint) -> uint 165 AtomicAdd, /// (memory, {u}int) -> {u}int
166 166
167 Branch, /// (uint branch_target) -> void 167 Branch, /// (uint branch_target) -> void
168 BranchIndirect, /// (uint branch_target) -> void 168 BranchIndirect, /// (uint branch_target) -> void