summaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2019-02-07 00:05:41 -0300
committerGravatar ReinUsesLisp2019-04-14 00:25:32 -0300
commit5c280e6ff04ae36e8cd7ba81cce4ae89e0a49b80 (patch)
treed3f411c5b0c15539bd36e86944cfdc28972fb98a /src/video_core/shader
parentMerge pull request #2378 from lioncash/ro (diff)
downloadyuzu-5c280e6ff04ae36e8cd7ba81cce4ae89e0a49b80.tar.gz
yuzu-5c280e6ff04ae36e8cd7ba81cce4ae89e0a49b80.tar.xz
yuzu-5c280e6ff04ae36e8cd7ba81cce4ae89e0a49b80.zip
shader_ir: Implement STG, keep track of global memory usage and flush
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/decode/memory.cpp109
-rw-r--r--src/video_core/shader/shader_ir.h16
2 files changed, 87 insertions, 38 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ea3c71eed..ff19ada55 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -18,6 +18,23 @@ using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register; 19using Tegra::Shader::Register;
20 20
21namespace {
22u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
23 switch (uniform_type) {
24 case Tegra::Shader::UniformType::Single:
25 return 1;
26 case Tegra::Shader::UniformType::Double:
27 return 2;
28 case Tegra::Shader::UniformType::Quad:
29 case Tegra::Shader::UniformType::UnsignedQuad:
30 return 4;
31 default:
32 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
33 return 1;
34 }
35}
36} // namespace
37
21u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { 38u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
22 const Instruction instr = {program_code[pc]}; 39 const Instruction instr = {program_code[pc]};
23 const auto opcode = OpCode::Decode(instr); 40 const auto opcode = OpCode::Decode(instr);
@@ -126,45 +143,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
126 break; 143 break;
127 } 144 }
128 case OpCode::Id::LDG: { 145 case OpCode::Id::LDG: {
129 const u32 count = [&]() { 146 const auto [real_address_base, base_address, descriptor] =
130 switch (instr.ldg.type) { 147 TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
131 case Tegra::Shader::UniformType::Single: 148 static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
132 return 1;
133 case Tegra::Shader::UniformType::Double:
134 return 2;
135 case Tegra::Shader::UniformType::Quad:
136 case Tegra::Shader::UniformType::UnsignedQuad:
137 return 4;
138 default:
139 UNIMPLEMENTED_MSG("Unimplemented LDG size!");
140 return 1;
141 }
142 }();
143
144 const Node addr_register = GetRegister(instr.gpr8);
145 const Node base_address =
146 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
147 const auto cbuf = std::get_if<CbufNode>(base_address);
148 ASSERT(cbuf != nullptr);
149 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
150 ASSERT(cbuf_offset_imm != nullptr);
151 const auto cbuf_offset = cbuf_offset_imm->GetValue();
152
153 bb.push_back(Comment(
154 fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
155
156 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
157 used_global_memory_bases.insert(descriptor);
158
159 const Node immediate_offset =
160 Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
161 const Node base_real_address =
162 Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
163 149
150 const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
164 for (u32 i = 0; i < count; ++i) { 151 for (u32 i = 0; i < count; ++i) {
165 const Node it_offset = Immediate(i * 4); 152 const Node it_offset = Immediate(i * 4);
166 const Node real_address = 153 const Node real_address =
167 Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); 154 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
168 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); 155 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
169 156
170 SetTemporal(bb, i, gmem); 157 SetTemporal(bb, i, gmem);
@@ -174,6 +161,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
174 } 161 }
175 break; 162 break;
176 } 163 }
164 case OpCode::Id::STG: {
165 const auto [real_address_base, base_address, descriptor] =
166 TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
167 static_cast<u32>(instr.stg.immediate_offset.Value()), true);
168
169 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
170 SetTemporal(bb, 0, real_address_base);
171
172 const u32 count = GetUniformTypeElementsCount(instr.stg.type);
173 for (u32 i = 0; i < count; ++i) {
174 SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
175 }
176 for (u32 i = 0; i < count; ++i) {
177 const Node it_offset = Immediate(i * 4);
178 const Node real_address =
179 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
180 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
181
182 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
183 }
184 break;
185 }
177 case OpCode::Id::ST_A: { 186 case OpCode::Id::ST_A: {
178 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, 187 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
179 "Indirect attribute loads are not supported"); 188 "Indirect attribute loads are not supported");
@@ -236,4 +245,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
236 return pc; 245 return pc;
237} 246}
238 247
248std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
249 Node addr_register,
250 u32 immediate_offset,
251 bool is_write) {
252 const Node base_address{
253 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
254 const auto cbuf = std::get_if<CbufNode>(base_address);
255 ASSERT(cbuf != nullptr);
256 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
257 ASSERT(cbuf_offset_imm != nullptr);
258 const auto cbuf_offset = cbuf_offset_imm->GetValue();
259
260 bb.push_back(
261 Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
262
263 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
264 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
265 auto& usage = entry->second;
266 if (is_write) {
267 usage.is_written = true;
268 } else {
269 usage.is_read = true;
270 }
271
272 const auto real_address =
273 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
274
275 return {real_address, base_address, descriptor};
276}
277
239} // namespace VideoCommon::Shader 278} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 4888998d3..1afab08c0 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -276,6 +276,11 @@ struct GlobalMemoryBase {
276 } 276 }
277}; 277};
278 278
279struct GlobalMemoryUsage {
280 bool is_read{};
281 bool is_written{};
282};
283
279struct MetaArithmetic { 284struct MetaArithmetic {
280 bool precise{}; 285 bool precise{};
281}; 286};
@@ -578,8 +583,8 @@ public:
578 return used_clip_distances; 583 return used_clip_distances;
579 } 584 }
580 585
581 const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const { 586 const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
582 return used_global_memory_bases; 587 return used_global_memory;
583 } 588 }
584 589
585 std::size_t GetLength() const { 590 std::size_t GetLength() const {
@@ -781,6 +786,11 @@ private:
781 786
782 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); 787 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
783 788
789 std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
790 Node addr_register,
791 u32 immediate_offset,
792 bool is_write);
793
784 template <typename... T> 794 template <typename... T>
785 Node Operation(OperationCode code, const T*... operands) { 795 Node Operation(OperationCode code, const T*... operands) {
786 return StoreNode(OperationNode(code, operands...)); 796 return StoreNode(OperationNode(code, operands...));
@@ -834,7 +844,7 @@ private:
834 std::map<u32, ConstBuffer> used_cbufs; 844 std::map<u32, ConstBuffer> used_cbufs;
835 std::set<Sampler> used_samplers; 845 std::set<Sampler> used_samplers;
836 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; 846 std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
837 std::set<GlobalMemoryBase> used_global_memory_bases; 847 std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
838 848
839 Tegra::Shader::Header header; 849 Tegra::Shader::Header header;
840}; 850};