diff options
| author | 2019-02-07 00:05:41 -0300 | |
|---|---|---|
| committer | 2019-04-14 00:25:32 -0300 | |
| commit | 5c280e6ff04ae36e8cd7ba81cce4ae89e0a49b80 (patch) | |
| tree | d3f411c5b0c15539bd36e86944cfdc28972fb98a /src/video_core/shader/decode | |
| parent | Merge pull request #2378 from lioncash/ro (diff) | |
| download | yuzu-5c280e6ff04ae36e8cd7ba81cce4ae89e0a49b80.tar.gz yuzu-5c280e6ff04ae36e8cd7ba81cce4ae89e0a49b80.tar.xz yuzu-5c280e6ff04ae36e8cd7ba81cce4ae89e0a49b80.zip | |
shader_ir: Implement STG, keep track of global memory usage and flush
Diffstat (limited to 'src/video_core/shader/decode')
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 109 |
1 files changed, 74 insertions, 35 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ea3c71eed..ff19ada55 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -18,6 +18,23 @@ using Tegra::Shader::Instruction; | |||
| 18 | using Tegra::Shader::OpCode; | 18 | using Tegra::Shader::OpCode; |
| 19 | using Tegra::Shader::Register; | 19 | using Tegra::Shader::Register; |
| 20 | 20 | ||
| 21 | namespace { | ||
| 22 | u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { | ||
| 23 | switch (uniform_type) { | ||
| 24 | case Tegra::Shader::UniformType::Single: | ||
| 25 | return 1; | ||
| 26 | case Tegra::Shader::UniformType::Double: | ||
| 27 | return 2; | ||
| 28 | case Tegra::Shader::UniformType::Quad: | ||
| 29 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 30 | return 4; | ||
| 31 | default: | ||
| 32 | UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); | ||
| 33 | return 1; | ||
| 34 | } | ||
| 35 | } | ||
| 36 | } // namespace | ||
| 37 | |||
| 21 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | 38 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { |
| 22 | const Instruction instr = {program_code[pc]}; | 39 | const Instruction instr = {program_code[pc]}; |
| 23 | const auto opcode = OpCode::Decode(instr); | 40 | const auto opcode = OpCode::Decode(instr); |
| @@ -126,45 +143,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 126 | break; | 143 | break; |
| 127 | } | 144 | } |
| 128 | case OpCode::Id::LDG: { | 145 | case OpCode::Id::LDG: { |
| 129 | const u32 count = [&]() { | 146 | const auto [real_address_base, base_address, descriptor] = |
| 130 | switch (instr.ldg.type) { | 147 | TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), |
| 131 | case Tegra::Shader::UniformType::Single: | 148 | static_cast<u32>(instr.ldg.immediate_offset.Value()), false); |
| 132 | return 1; | ||
| 133 | case Tegra::Shader::UniformType::Double: | ||
| 134 | return 2; | ||
| 135 | case Tegra::Shader::UniformType::Quad: | ||
| 136 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 137 | return 4; | ||
| 138 | default: | ||
| 139 | UNIMPLEMENTED_MSG("Unimplemented LDG size!"); | ||
| 140 | return 1; | ||
| 141 | } | ||
| 142 | }(); | ||
| 143 | |||
| 144 | const Node addr_register = GetRegister(instr.gpr8); | ||
| 145 | const Node base_address = | ||
| 146 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); | ||
| 147 | const auto cbuf = std::get_if<CbufNode>(base_address); | ||
| 148 | ASSERT(cbuf != nullptr); | ||
| 149 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); | ||
| 150 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 151 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 152 | |||
| 153 | bb.push_back(Comment( | ||
| 154 | fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); | ||
| 155 | |||
| 156 | const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; | ||
| 157 | used_global_memory_bases.insert(descriptor); | ||
| 158 | |||
| 159 | const Node immediate_offset = | ||
| 160 | Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value())); | ||
| 161 | const Node base_real_address = | ||
| 162 | Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register); | ||
| 163 | 149 | ||
| 150 | const u32 count = GetUniformTypeElementsCount(instr.ldg.type); | ||
| 164 | for (u32 i = 0; i < count; ++i) { | 151 | for (u32 i = 0; i < count; ++i) { |
| 165 | const Node it_offset = Immediate(i * 4); | 152 | const Node it_offset = Immediate(i * 4); |
| 166 | const Node real_address = | 153 | const Node real_address = |
| 167 | Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset); | 154 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); |
| 168 | const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); | 155 | const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); |
| 169 | 156 | ||
| 170 | SetTemporal(bb, i, gmem); | 157 | SetTemporal(bb, i, gmem); |
| @@ -174,6 +161,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 174 | } | 161 | } |
| 175 | break; | 162 | break; |
| 176 | } | 163 | } |
| 164 | case OpCode::Id::STG: { | ||
| 165 | const auto [real_address_base, base_address, descriptor] = | ||
| 166 | TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), | ||
| 167 | static_cast<u32>(instr.stg.immediate_offset.Value()), true); | ||
| 168 | |||
| 169 | // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} | ||
| 170 | SetTemporal(bb, 0, real_address_base); | ||
| 171 | |||
| 172 | const u32 count = GetUniformTypeElementsCount(instr.stg.type); | ||
| 173 | for (u32 i = 0; i < count; ++i) { | ||
| 174 | SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); | ||
| 175 | } | ||
| 176 | for (u32 i = 0; i < count; ++i) { | ||
| 177 | const Node it_offset = Immediate(i * 4); | ||
| 178 | const Node real_address = | ||
| 179 | Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); | ||
| 180 | const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); | ||
| 181 | |||
| 182 | bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); | ||
| 183 | } | ||
| 184 | break; | ||
| 185 | } | ||
| 177 | case OpCode::Id::ST_A: { | 186 | case OpCode::Id::ST_A: { |
| 178 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | 187 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, |
| 179 | "Indirect attribute loads are not supported"); | 188 | "Indirect attribute loads are not supported"); |
| @@ -236,4 +245,34 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 236 | return pc; | 245 | return pc; |
| 237 | } | 246 | } |
| 238 | 247 | ||
| 248 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, | ||
| 249 | Node addr_register, | ||
| 250 | u32 immediate_offset, | ||
| 251 | bool is_write) { | ||
| 252 | const Node base_address{ | ||
| 253 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))}; | ||
| 254 | const auto cbuf = std::get_if<CbufNode>(base_address); | ||
| 255 | ASSERT(cbuf != nullptr); | ||
| 256 | const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset()); | ||
| 257 | ASSERT(cbuf_offset_imm != nullptr); | ||
| 258 | const auto cbuf_offset = cbuf_offset_imm->GetValue(); | ||
| 259 | |||
| 260 | bb.push_back( | ||
| 261 | Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset))); | ||
| 262 | |||
| 263 | const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset}; | ||
| 264 | const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); | ||
| 265 | auto& usage = entry->second; | ||
| 266 | if (is_write) { | ||
| 267 | usage.is_written = true; | ||
| 268 | } else { | ||
| 269 | usage.is_read = true; | ||
| 270 | } | ||
| 271 | |||
| 272 | const auto real_address = | ||
| 273 | Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); | ||
| 274 | |||
| 275 | return {real_address, base_address, descriptor}; | ||
| 276 | } | ||
| 277 | |||
| 239 | } // namespace VideoCommon::Shader | 278 | } // namespace VideoCommon::Shader |