summaryrefslogtreecommitdiff
path: root/src/video_core/shader/decode
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2019-02-07 00:05:41 -0300
committerGravatar ReinUsesLisp2019-04-14 00:25:32 -0300
commit5c280e6ff04ae36e8cd7ba81cce4ae89e0a49b80 (patch)
treed3f411c5b0c15539bd36e86944cfdc28972fb98a /src/video_core/shader/decode
parentMerge pull request #2378 from lioncash/ro (diff)
downloadyuzu-5c280e6ff04ae36e8cd7ba81cce4ae89e0a49b80.tar.gz
yuzu-5c280e6ff04ae36e8cd7ba81cce4ae89e0a49b80.tar.xz
yuzu-5c280e6ff04ae36e8cd7ba81cce4ae89e0a49b80.zip
shader_ir: Implement STG, keep track of global memory usage and flush
Diffstat (limited to 'src/video_core/shader/decode')
-rw-r--r--src/video_core/shader/decode/memory.cpp109
1 files changed, 74 insertions, 35 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index ea3c71eed..ff19ada55 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -18,6 +18,23 @@ using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register; 19using Tegra::Shader::Register;
20 20
21namespace {
22u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
23 switch (uniform_type) {
24 case Tegra::Shader::UniformType::Single:
25 return 1;
26 case Tegra::Shader::UniformType::Double:
27 return 2;
28 case Tegra::Shader::UniformType::Quad:
29 case Tegra::Shader::UniformType::UnsignedQuad:
30 return 4;
31 default:
32 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
33 return 1;
34 }
35}
36} // namespace
37
21u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { 38u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
22 const Instruction instr = {program_code[pc]}; 39 const Instruction instr = {program_code[pc]};
23 const auto opcode = OpCode::Decode(instr); 40 const auto opcode = OpCode::Decode(instr);
@@ -126,45 +143,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
126 break; 143 break;
127 } 144 }
128 case OpCode::Id::LDG: { 145 case OpCode::Id::LDG: {
129 const u32 count = [&]() { 146 const auto [real_address_base, base_address, descriptor] =
130 switch (instr.ldg.type) { 147 TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
131 case Tegra::Shader::UniformType::Single: 148 static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
132 return 1;
133 case Tegra::Shader::UniformType::Double:
134 return 2;
135 case Tegra::Shader::UniformType::Quad:
136 case Tegra::Shader::UniformType::UnsignedQuad:
137 return 4;
138 default:
139 UNIMPLEMENTED_MSG("Unimplemented LDG size!");
140 return 1;
141 }
142 }();
143
144 const Node addr_register = GetRegister(instr.gpr8);
145 const Node base_address =
146 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
147 const auto cbuf = std::get_if<CbufNode>(base_address);
148 ASSERT(cbuf != nullptr);
149 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
150 ASSERT(cbuf_offset_imm != nullptr);
151 const auto cbuf_offset = cbuf_offset_imm->GetValue();
152
153 bb.push_back(Comment(
154 fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
155
156 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
157 used_global_memory_bases.insert(descriptor);
158
159 const Node immediate_offset =
160 Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
161 const Node base_real_address =
162 Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
163 149
150 const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
164 for (u32 i = 0; i < count; ++i) { 151 for (u32 i = 0; i < count; ++i) {
165 const Node it_offset = Immediate(i * 4); 152 const Node it_offset = Immediate(i * 4);
166 const Node real_address = 153 const Node real_address =
167 Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); 154 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
168 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); 155 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
169 156
170 SetTemporal(bb, i, gmem); 157 SetTemporal(bb, i, gmem);
@@ -174,6 +161,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
174 } 161 }
175 break; 162 break;
176 } 163 }
164 case OpCode::Id::STG: {
165 const auto [real_address_base, base_address, descriptor] =
166 TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
167 static_cast<u32>(instr.stg.immediate_offset.Value()), true);
168
169 // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
170 SetTemporal(bb, 0, real_address_base);
171
172 const u32 count = GetUniformTypeElementsCount(instr.stg.type);
173 for (u32 i = 0; i < count; ++i) {
174 SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
175 }
176 for (u32 i = 0; i < count; ++i) {
177 const Node it_offset = Immediate(i * 4);
178 const Node real_address =
179 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
180 const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
181
182 bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
183 }
184 break;
185 }
177 case OpCode::Id::ST_A: { 186 case OpCode::Id::ST_A: {
178 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, 187 UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
179 "Indirect attribute loads are not supported"); 188 "Indirect attribute loads are not supported");
@@ -236,4 +245,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
236 return pc; 245 return pc;
237} 246}
238 247
248std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
249 Node addr_register,
250 u32 immediate_offset,
251 bool is_write) {
252 const Node base_address{
253 TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
254 const auto cbuf = std::get_if<CbufNode>(base_address);
255 ASSERT(cbuf != nullptr);
256 const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
257 ASSERT(cbuf_offset_imm != nullptr);
258 const auto cbuf_offset = cbuf_offset_imm->GetValue();
259
260 bb.push_back(
261 Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
262
263 const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
264 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
265 auto& usage = entry->second;
266 if (is_write) {
267 usage.is_written = true;
268 } else {
269 usage.is_read = true;
270 }
271
272 const auto real_address =
273 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
274
275 return {real_address, base_address, descriptor};
276}
277
239} // namespace VideoCommon::Shader 278} // namespace VideoCommon::Shader