diff options
| author | 2019-07-30 00:21:46 -0300 | |
|---|---|---|
| committer | 2019-09-05 01:38:37 -0300 | |
| commit | f17415d431777bb234714a3c6a97072872b2cc71 (patch) | |
| tree | cb6d6fea921dc9609d2537976a8cbb4c4cb7811c | |
| parent | Merge pull request #2797 from FearlessTobi/port-4877 (diff) | |
| download | yuzu-f17415d431777bb234714a3c6a97072872b2cc71.tar.gz yuzu-f17415d431777bb234714a3c6a97072872b2cc71.tar.xz yuzu-f17415d431777bb234714a3c6a97072872b2cc71.zip | |
shader_ir: Implement ST_S
This instruction writes to a memory buffer shared with threads within
the same work group. It is known as "shared" memory in GLSL.
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 25 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 16 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 6 |
4 files changed, 45 insertions, 11 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ed108bea8..8f74fa7d8 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -35,7 +35,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { | |||
| 35 | return 1; | 35 | return 1; |
| 36 | } | 36 | } |
| 37 | } | 37 | } |
| 38 | } // namespace | 38 | } // Anonymous namespace |
| 39 | 39 | ||
| 40 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | 40 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { |
| 41 | const Instruction instr = {program_code[pc]}; | 41 | const Instruction instr = {program_code[pc]}; |
| @@ -209,27 +209,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 209 | 209 | ||
| 210 | break; | 210 | break; |
| 211 | } | 211 | } |
| 212 | case OpCode::Id::ST_L: { | 212 | case OpCode::Id::ST_L: |
| 213 | LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", | 213 | LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", |
| 214 | static_cast<u64>(instr.st_l.cache_management.Value())); | 214 | static_cast<u64>(instr.st_l.cache_management.Value())); |
| 215 | 215 | [[fallthrough]]; | |
| 216 | const auto GetLmemAddr = [&](s32 offset) { | 216 | case OpCode::Id::ST_S: { |
| 217 | const auto GetAddress = [&](s32 offset) { | ||
| 217 | ASSERT(offset % 4 == 0); | 218 | ASSERT(offset % 4 == 0); |
| 218 | const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); | 219 | const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); |
| 219 | return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); | 220 | return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); |
| 220 | }; | 221 | }; |
| 221 | 222 | ||
| 223 | const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L | ||
| 224 | ? &ShaderIR::SetLocalMemory | ||
| 225 | : &ShaderIR::SetSharedMemory; | ||
| 226 | |||
| 222 | switch (instr.ldst_sl.type.Value()) { | 227 | switch (instr.ldst_sl.type.Value()) { |
| 223 | case Tegra::Shader::StoreType::Bits128: | 228 | case Tegra::Shader::StoreType::Bits128: |
| 224 | SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3)); | 229 | (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); |
| 225 | SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2)); | 230 | (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); |
| 231 | [[fallthrough]]; | ||
| 226 | case Tegra::Shader::StoreType::Bits64: | 232 | case Tegra::Shader::StoreType::Bits64: |
| 227 | SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1)); | 233 | (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); |
| 234 | [[fallthrough]]; | ||
| 228 | case Tegra::Shader::StoreType::Bits32: | 235 | case Tegra::Shader::StoreType::Bits32: |
| 229 | SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0)); | 236 | (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); |
| 230 | break; | 237 | break; |
| 231 | default: | 238 | default: |
| 232 | UNIMPLEMENTED_MSG("ST_L Unhandled type: {}", | 239 | UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), |
| 233 | static_cast<u32>(instr.ldst_sl.type.Value())); | 240 | static_cast<u32>(instr.ldst_sl.type.Value())); |
| 234 | } | 241 | } |
| 235 | break; | 242 | break; |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 5db9313c4..e0d1979fa 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -198,12 +198,13 @@ class PredicateNode; | |||
| 198 | class AbufNode; | 198 | class AbufNode; |
| 199 | class CbufNode; | 199 | class CbufNode; |
| 200 | class LmemNode; | 200 | class LmemNode; |
| 201 | class SmemNode; | ||
| 201 | class GmemNode; | 202 | class GmemNode; |
| 202 | class CommentNode; | 203 | class CommentNode; |
| 203 | 204 | ||
| 204 | using NodeData = | 205 | using NodeData = |
| 205 | std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, | 206 | std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, |
| 206 | PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; | 207 | PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>; |
| 207 | using Node = std::shared_ptr<NodeData>; | 208 | using Node = std::shared_ptr<NodeData>; |
| 208 | using Node4 = std::array<Node, 4>; | 209 | using Node4 = std::array<Node, 4>; |
| 209 | using NodeBlock = std::vector<Node>; | 210 | using NodeBlock = std::vector<Node>; |
| @@ -536,6 +537,19 @@ private: | |||
| 536 | Node address; | 537 | Node address; |
| 537 | }; | 538 | }; |
| 538 | 539 | ||
| 540 | /// Shared memory node | ||
| 541 | class SmemNode final { | ||
| 542 | public: | ||
| 543 | explicit SmemNode(Node address) : address{std::move(address)} {} | ||
| 544 | |||
| 545 | const Node& GetAddress() const { | ||
| 546 | return address; | ||
| 547 | } | ||
| 548 | |||
| 549 | private: | ||
| 550 | Node address; | ||
| 551 | }; | ||
| 552 | |||
| 539 | /// Global memory node | 553 | /// Global memory node |
| 540 | class GmemNode final { | 554 | class GmemNode final { |
| 541 | public: | 555 | public: |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 1e5c7f660..bbbab0bca 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -137,6 +137,10 @@ Node ShaderIR::GetLocalMemory(Node address) { | |||
| 137 | return MakeNode<LmemNode>(std::move(address)); | 137 | return MakeNode<LmemNode>(std::move(address)); |
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | Node ShaderIR::GetSharedMemory(Node address) { | ||
| 141 | return MakeNode<SmemNode>(std::move(address)); | ||
| 142 | } | ||
| 143 | |||
| 140 | Node ShaderIR::GetTemporary(u32 id) { | 144 | Node ShaderIR::GetTemporary(u32 id) { |
| 141 | return GetRegister(Register::ZeroIndex + 1 + id); | 145 | return GetRegister(Register::ZeroIndex + 1 + id); |
| 142 | } | 146 | } |
| @@ -378,6 +382,11 @@ void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { | |||
| 378 | Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); | 382 | Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); |
| 379 | } | 383 | } |
| 380 | 384 | ||
| 385 | void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) { | ||
| 386 | bb.push_back( | ||
| 387 | Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value))); | ||
| 388 | } | ||
| 389 | |||
| 381 | void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { | 390 | void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { |
| 382 | SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); | 391 | SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); |
| 383 | } | 392 | } |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index bcc9b79b6..ab57388ed 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -208,6 +208,8 @@ private: | |||
| 208 | Node GetInternalFlag(InternalFlag flag, bool negated = false); | 208 | Node GetInternalFlag(InternalFlag flag, bool negated = false); |
| 209 | /// Generates a node representing a local memory address | 209 | /// Generates a node representing a local memory address |
| 210 | Node GetLocalMemory(Node address); | 210 | Node GetLocalMemory(Node address); |
| 211 | /// Generates a node representing a shared memory address | ||
| 212 | Node GetSharedMemory(Node address); | ||
| 211 | /// Generates a temporary, internally it uses a post-RZ register | 213 | /// Generates a temporary, internally it uses a post-RZ register |
| 212 | Node GetTemporary(u32 id); | 214 | Node GetTemporary(u32 id); |
| 213 | 215 | ||
| @@ -217,8 +219,10 @@ private: | |||
| 217 | void SetPredicate(NodeBlock& bb, u64 dest, Node src); | 219 | void SetPredicate(NodeBlock& bb, u64 dest, Node src); |
| 218 | /// Sets an internal flag. src value must be a bool-evaluated node | 220 | /// Sets an internal flag. src value must be a bool-evaluated node |
| 219 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); | 221 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); |
| 220 | /// Sets a local memory address. address and value must be a number-evaluated node | 222 | /// Sets a local memory address with a value. |
| 221 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); | 223 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); |
| 224 | /// Sets a shared memory address with a value. | ||
| 225 | void SetSharedMemory(NodeBlock& bb, Node address, Node value); | ||
| 222 | /// Sets a temporary. Internally it uses a post-RZ register | 226 | /// Sets a temporary. Internally it uses a post-RZ register |
| 223 | void SetTemporary(NodeBlock& bb, u32 id, Node value); | 227 | void SetTemporary(NodeBlock& bb, u32 id, Node value); |
| 224 | 228 | ||