diff options
| author | 2019-05-24 18:24:26 -0400 | |
|---|---|---|
| committer | 2019-05-24 18:24:26 -0400 | |
| commit | 1a2d90ab0936bc53cc856bacbf73e88c52f926fd (patch) | |
| tree | af1eeca043ab4842afc5066a657eb3302a5be660 | |
| parent | Merge pull request #2504 from lioncash/config (diff) | |
| parent | shader/memory: Implement ST (generic memory) (diff) | |
| download | yuzu-1a2d90ab0936bc53cc856bacbf73e88c52f926fd.tar.gz yuzu-1a2d90ab0936bc53cc856bacbf73e88c52f926fd.tar.xz yuzu-1a2d90ab0936bc53cc856bacbf73e88c52f926fd.zip | |
Merge pull request #2485 from ReinUsesLisp/generic-memory
shader/memory: Implement generic memory stores and loads (ST and LD)
| -rw-r--r-- | src/video_core/engines/shader_bytecode.h | 20 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 82 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 6 |
3 files changed, 73 insertions, 35 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7bbc556da..e83f25fa1 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h | |||
| @@ -530,6 +530,11 @@ union Instruction { | |||
| 530 | BitField<48, 16, u64> opcode; | 530 | BitField<48, 16, u64> opcode; |
| 531 | 531 | ||
| 532 | union { | 532 | union { |
| 533 | BitField<8, 8, Register> gpr; | ||
| 534 | BitField<20, 24, s64> offset; | ||
| 535 | } gmem; | ||
| 536 | |||
| 537 | union { | ||
| 533 | BitField<20, 16, u64> imm20_16; | 538 | BitField<20, 16, u64> imm20_16; |
| 534 | BitField<20, 19, u64> imm20_19; | 539 | BitField<20, 19, u64> imm20_19; |
| 535 | BitField<20, 32, s64> imm20_32; | 540 | BitField<20, 32, s64> imm20_32; |
| @@ -812,13 +817,11 @@ union Instruction { | |||
| 812 | union { | 817 | union { |
| 813 | BitField<48, 3, UniformType> type; | 818 | BitField<48, 3, UniformType> type; |
| 814 | BitField<46, 2, u64> cache_mode; | 819 | BitField<46, 2, u64> cache_mode; |
| 815 | BitField<20, 24, s64> immediate_offset; | ||
| 816 | } ldg; | 820 | } ldg; |
| 817 | 821 | ||
| 818 | union { | 822 | union { |
| 819 | BitField<48, 3, UniformType> type; | 823 | BitField<48, 3, UniformType> type; |
| 820 | BitField<46, 2, u64> cache_mode; | 824 | BitField<46, 2, u64> cache_mode; |
| 821 | BitField<20, 24, s64> immediate_offset; | ||
| 822 | } stg; | 825 | } stg; |
| 823 | 826 | ||
| 824 | union { | 827 | union { |
| @@ -828,6 +831,11 @@ union Instruction { | |||
| 828 | } al2p; | 831 | } al2p; |
| 829 | 832 | ||
| 830 | union { | 833 | union { |
| 834 | BitField<53, 3, UniformType> type; | ||
| 835 | BitField<52, 1, u64> extended; | ||
| 836 | } generic; | ||
| 837 | |||
| 838 | union { | ||
| 831 | BitField<0, 3, u64> pred0; | 839 | BitField<0, 3, u64> pred0; |
| 832 | BitField<3, 3, u64> pred3; | 840 | BitField<3, 3, u64> pred3; |
| 833 | BitField<7, 1, u64> abs_a; | 841 | BitField<7, 1, u64> abs_a; |
| @@ -1387,10 +1395,12 @@ public: | |||
| 1387 | LD_L, | 1395 | LD_L, |
| 1388 | LD_S, | 1396 | LD_S, |
| 1389 | LD_C, | 1397 | LD_C, |
| 1398 | LD, // Load from generic memory | ||
| 1399 | LDG, // Load from global memory | ||
| 1390 | ST_A, | 1400 | ST_A, |
| 1391 | ST_L, | 1401 | ST_L, |
| 1392 | ST_S, | 1402 | ST_S, |
| 1393 | LDG, // Load from global memory | 1403 | ST, // Store in generic memory |
| 1394 | STG, // Store in global memory | 1404 | STG, // Store in global memory |
| 1395 | AL2P, // Transforms attribute memory into physical memory | 1405 | AL2P, // Transforms attribute memory into physical memory |
| 1396 | TEX, | 1406 | TEX, |
| @@ -1658,10 +1668,12 @@ private: | |||
| 1658 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), | 1668 | INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), |
| 1659 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), | 1669 | INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), |
| 1660 | INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), | 1670 | INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), |
| 1671 | INST("100-------------", Id::LD, Type::Memory, "LD"), | ||
| 1672 | INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), | ||
| 1661 | INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), | 1673 | INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), |
| 1662 | INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), | 1674 | INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), |
| 1663 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), | 1675 | INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), |
| 1664 | INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), | 1676 | INST("101-------------", Id::ST, Type::Memory, "ST"), |
| 1665 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), | 1677 | INST("1110111011011---", Id::STG, Type::Memory, "STG"), |
| 1666 | INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), | 1678 | INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), |
| 1667 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), | 1679 | INST("110000----111---", Id::TEX, Type::Texture, "TEX"), |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 165c2b41b..e6a010a7d 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -146,12 +146,25 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 146 | } | 146 | } |
| 147 | break; | 147 | break; |
| 148 | } | 148 | } |
| 149 | case OpCode::Id::LD: | ||
| 149 | case OpCode::Id::LDG: { | 150 | case OpCode::Id::LDG: { |
| 151 | const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { | ||
| 152 | switch (opcode->get().GetId()) { | ||
| 153 | case OpCode::Id::LD: | ||
| 154 | UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); | ||
| 155 | return instr.generic.type; | ||
| 156 | case OpCode::Id::LDG: | ||
| 157 | return instr.ldg.type; | ||
| 158 | default: | ||
| 159 | UNREACHABLE(); | ||
| 160 | return {}; | ||
| 161 | } | ||
| 162 | }(); | ||
| 163 | |||
| 150 | const auto [real_address_base, base_address, descriptor] = | 164 | const auto [real_address_base, base_address, descriptor] = |
| 151 | TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), | 165 | TrackAndGetGlobalMemory(bb, instr, false); |
| 152 | static_cast<u32>(instr.ldg.immediate_offset.Value()), false); | ||
| 153 | 166 | ||
| 154 | const u32 count = GetUniformTypeElementsCount(instr.ldg.type); | 167 | const u32 count = GetUniformTypeElementsCount(type); |
| 155 | for (u32 i = 0; i < count; ++i) { | 168 | for (u32 i = 0; i < count; ++i) { |
| 156 | const Node it_offset = Immediate(i * 4); | 169 | const Node it_offset = Immediate(i * 4); |
| 157 | const Node real_address = | 170 | const Node real_address = |
| @@ -165,28 +178,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 165 | } | 178 | } |
| 166 | break; | 179 | break; |
| 167 | } | 180 | } |
| 168 | case OpCode::Id::STG: { | ||
| 169 | const auto [real_address_base, base_address, descriptor] = | ||
| 170 | TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), | ||
| 171 | static_cast<u32>(instr.stg.immediate_offset.Value()), true); | ||
| 172 | |||
| 173 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} | ||
| 174 | SetTemporal(bb, 0, real_address_base); | ||
| 175 | |||
| 176 | const u32 count = GetUniformTypeElementsCount(instr.stg.type); | ||
| 177 | for (u32 i = 0; i < count; ++i) { | ||
| 178 | SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); | ||
| 179 | } | ||
| 180 | for (u32 i = 0; i < count; ++i) { | ||
| 181 | const Node it_offset = Immediate(i * 4); | ||
| 182 | const Node real_address = | ||
| 183 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | ||
| 184 | const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); | ||
| 185 | |||
| 186 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); | ||
| 187 | } | ||
| 188 | break; | ||
| 189 | } | ||
| 190 | case OpCode::Id::ST_A: { | 181 | case OpCode::Id::ST_A: { |
| 191 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | 182 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, |
| 192 | "Indirect attribute loads are not supported"); | 183 | "Indirect attribute loads are not supported"); |
| @@ -242,6 +233,41 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 242 | } | 233 | } |
| 243 | break; | 234 | break; |
| 244 | } | 235 | } |
| 236 | case OpCode::Id::ST: | ||
| 237 | case OpCode::Id::STG: { | ||
| 238 | const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { | ||
| 239 | switch (opcode->get().GetId()) { | ||
| 240 | case OpCode::Id::ST: | ||
| 241 | UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); | ||
| 242 | return instr.generic.type; | ||
| 243 | case OpCode::Id::STG: | ||
| 244 | return instr.stg.type; | ||
| 245 | default: | ||
| 246 | UNREACHABLE(); | ||
| 247 | return {}; | ||
| 248 | } | ||
| 249 | }(); | ||
| 250 | |||
| 251 | const auto [real_address_base, base_address, descriptor] = | ||
| 252 | TrackAndGetGlobalMemory(bb, instr, true); | ||
| 253 | |||
| 254 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} | ||
| 255 | SetTemporal(bb, 0, real_address_base); | ||
| 256 | |||
| 257 | const u32 count = GetUniformTypeElementsCount(type); | ||
| 258 | for (u32 i = 0; i < count; ++i) { | ||
| 259 | SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); | ||
| 260 | } | ||
| 261 | for (u32 i = 0; i < count; ++i) { | ||
| 262 | const Node it_offset = Immediate(i * 4); | ||
| 263 | const Node real_address = | ||
| 264 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | ||
| 265 | const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); | ||
| 266 | |||
| 267 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); | ||
| 268 | } | ||
| 269 | break; | ||
| 270 | } | ||
| 245 | case OpCode::Id::AL2P: { | 271 | case OpCode::Id::AL2P: { |
| 246 | // Ignore al2p.direction since we don't care about it. | 272 | // Ignore al2p.direction since we don't care about it. |
| 247 | 273 | ||
| @@ -265,9 +291,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 265 | } | 291 | } |
| 266 | 292 | ||
| 267 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, | 293 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, |
| 268 | Node addr_register, | 294 | Instruction instr, |
| 269 | u32 immediate_offset, | ||
| 270 | bool is_write) { | 295 | bool is_write) { |
| 296 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | ||
| 297 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | ||
| 298 | |||
| 271 | const Node base_address{ | 299 | const Node base_address{ |
| 272 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; | 300 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; |
| 273 | const auto cbuf = std::get_if<CbufNode>(base_address); | 301 | const auto cbuf = std::get_if<CbufNode>(base_address); |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 34d183ec7..35f72bddb 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -818,10 +818,8 @@ private: | |||
| 818 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, | 818 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, |
| 819 | s64 cursor) const; | 819 | s64 cursor) const; |
| 820 | 820 | ||
| 821 | std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb, | 821 | std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory( |
| 822 | Node addr_register, | 822 | NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write); |
| 823 | u32 immediate_offset, | ||
| 824 | bool is_write); | ||
| 825 | 823 | ||
| 826 | template <typename... T> | 824 | template <typename... T> |
| 827 | Node Operation(OperationCode code, const T*... operands) { | 825 | Node Operation(OperationCode code, const T*... operands) { |