summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/shader_bytecode.h20
-rw-r--r--src/video_core/shader/decode/memory.cpp82
-rw-r--r--src/video_core/shader/shader_ir.h6
3 files changed, 73 insertions, 35 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7bbc556da..e83f25fa1 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -530,6 +530,11 @@ union Instruction {
530 BitField<48, 16, u64> opcode; 530 BitField<48, 16, u64> opcode;
531 531
532 union { 532 union {
533 BitField<8, 8, Register> gpr;
534 BitField<20, 24, s64> offset;
535 } gmem;
536
537 union {
533 BitField<20, 16, u64> imm20_16; 538 BitField<20, 16, u64> imm20_16;
534 BitField<20, 19, u64> imm20_19; 539 BitField<20, 19, u64> imm20_19;
535 BitField<20, 32, s64> imm20_32; 540 BitField<20, 32, s64> imm20_32;
@@ -812,13 +817,11 @@ union Instruction {
812 union { 817 union {
813 BitField<48, 3, UniformType> type; 818 BitField<48, 3, UniformType> type;
814 BitField<46, 2, u64> cache_mode; 819 BitField<46, 2, u64> cache_mode;
815 BitField<20, 24, s64> immediate_offset;
816 } ldg; 820 } ldg;
817 821
818 union { 822 union {
819 BitField<48, 3, UniformType> type; 823 BitField<48, 3, UniformType> type;
820 BitField<46, 2, u64> cache_mode; 824 BitField<46, 2, u64> cache_mode;
821 BitField<20, 24, s64> immediate_offset;
822 } stg; 825 } stg;
823 826
824 union { 827 union {
@@ -828,6 +831,11 @@ union Instruction {
828 } al2p; 831 } al2p;
829 832
830 union { 833 union {
834 BitField<53, 3, UniformType> type;
835 BitField<52, 1, u64> extended;
836 } generic;
837
838 union {
831 BitField<0, 3, u64> pred0; 839 BitField<0, 3, u64> pred0;
832 BitField<3, 3, u64> pred3; 840 BitField<3, 3, u64> pred3;
833 BitField<7, 1, u64> abs_a; 841 BitField<7, 1, u64> abs_a;
@@ -1387,10 +1395,12 @@ public:
1387 LD_L, 1395 LD_L,
1388 LD_S, 1396 LD_S,
1389 LD_C, 1397 LD_C,
1398 LD, // Load from generic memory
1399 LDG, // Load from global memory
1390 ST_A, 1400 ST_A,
1391 ST_L, 1401 ST_L,
1392 ST_S, 1402 ST_S,
1393 LDG, // Load from global memory 1403 ST, // Store in generic memory
1394 STG, // Store in global memory 1404 STG, // Store in global memory
1395 AL2P, // Transforms attribute memory into physical memory 1405 AL2P, // Transforms attribute memory into physical memory
1396 TEX, 1406 TEX,
@@ -1658,10 +1668,12 @@ private:
1658 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), 1668 INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
1659 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), 1669 INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
1660 INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), 1670 INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
1671 INST("100-------------", Id::LD, Type::Memory, "LD"),
1672 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
1661 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), 1673 INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
1662 INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), 1674 INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
1663 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 1675 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1664 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), 1676 INST("101-------------", Id::ST, Type::Memory, "ST"),
1665 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 1677 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
1666 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), 1678 INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
1667 INST("110000----111---", Id::TEX, Type::Texture, "TEX"), 1679 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 165c2b41b..e6a010a7d 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -146,12 +146,25 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
146 } 146 }
147 break; 147 break;
148 } 148 }
149 case OpCode::Id::LD:
149 case OpCode::Id::LDG: { 150 case OpCode::Id::LDG: {
151 const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
152 switch (opcode->get().GetId()) {
153 case OpCode::Id::LD:
154 UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
155 return instr.generic.type;
156 case OpCode::Id::LDG:
157 return instr.ldg.type;
158 default:
159 UNREACHABLE();
160 return {};
161 }
162 }();
163
150 const auto [real_address_base, base_address, descriptor] = 164 const auto [real_address_base, base_address, descriptor] =
151 TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), 165 TrackAndGetGlobalMemory(bb, instr, false);
152 static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
153 166
154 const u32 count = GetUniformTypeElementsCount(instr.ldg.type); 167 const u32 count = GetUniformTypeElementsCount(type);
155 for (u32 i = 0; i < count; ++i) { 168 for (u32 i = 0; i < count; ++i) {
156 const Node it_offset = Immediate(i * 4); 169 const Node it_offset = Immediate(i * 4);
157 const Node real_address = 170 const Node real_address =
@@ -165,28 +178,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
165 } 178 }
166 break; 179 break;
167 } 180 }
168 case OpCode::Id::STG: {
169 const auto [real_address_base, base_address, descriptor] =
170 TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
171 static_cast<u32>(instr.stg.immediate_offset.Value()), true);
172
173 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
174 SetTemporal(bb, 0, real_address_base);
175
176 const u32 count = GetUniformTypeElementsCount(instr.stg.type);
177 for (u32 i = 0; i < count; ++i) {
178 SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
179 }
180 for (u32 i = 0; i < count; ++i) {
181 const Node it_offset = Immediate(i * 4);
182 const Node real_address =
183 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
184 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
185
186 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
187 }
188 break;
189 }
190 case OpCode::Id::ST_A: { 181 case OpCode::Id::ST_A: {
191 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, 182 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
192 "Indirect attribute loads are not supported"); 183 "Indirect attribute loads are not supported");
@@ -242,6 +233,41 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
242 } 233 }
243 break; 234 break;
244 } 235 }
236 case OpCode::Id::ST:
237 case OpCode::Id::STG: {
238 const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
239 switch (opcode->get().GetId()) {
240 case OpCode::Id::ST:
241 UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
242 return instr.generic.type;
243 case OpCode::Id::STG:
244 return instr.stg.type;
245 default:
246 UNREACHABLE();
247 return {};
248 }
249 }();
250
251 const auto [real_address_base, base_address, descriptor] =
252 TrackAndGetGlobalMemory(bb, instr, true);
253
254 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
255 SetTemporal(bb, 0, real_address_base);
256
257 const u32 count = GetUniformTypeElementsCount(type);
258 for (u32 i = 0; i < count; ++i) {
259 SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
260 }
261 for (u32 i = 0; i < count; ++i) {
262 const Node it_offset = Immediate(i * 4);
263 const Node real_address =
264 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
265 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
266
267 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
268 }
269 break;
270 }
245 case OpCode::Id::AL2P: { 271 case OpCode::Id::AL2P: {
246 // Ignore al2p.direction since we don't care about it. 272 // Ignore al2p.direction since we don't care about it.
247 273
@@ -265,9 +291,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
265} 291}
266 292
267std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, 293std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
268 Node addr_register, 294 Instruction instr,
269 u32 immediate_offset,
270 bool is_write) { 295 bool is_write) {
296 const auto addr_register{GetRegister(instr.gmem.gpr)};
297 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
298
271 const Node base_address{ 299 const Node base_address{
272 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; 300 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
273 const auto cbuf = std::get_if<CbufNode>(base_address); 301 const auto cbuf = std::get_if<CbufNode>(base_address);
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 34d183ec7..35f72bddb 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -818,10 +818,8 @@ private:
818 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, 818 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
819 s64 cursor) const; 819 s64 cursor) const;
820 820
821 std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb, 821 std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(
822 Node addr_register, 822 NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write);
823 u32 immediate_offset,
824 bool is_write);
825 823
826 template <typename... T> 824 template <typename... T>
827 Node Operation(OperationCode code, const T*... operands) { 825 Node Operation(OperationCode code, const T*... operands) {