diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 85 |
1 files changed, 55 insertions, 30 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 3da833e81..b5fbc4d58 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -24,6 +24,7 @@ using Tegra::Shader::GlobalAtomicType; | |||
| 24 | using Tegra::Shader::Instruction; | 24 | using Tegra::Shader::Instruction; |
| 25 | using Tegra::Shader::OpCode; | 25 | using Tegra::Shader::OpCode; |
| 26 | using Tegra::Shader::Register; | 26 | using Tegra::Shader::Register; |
| 27 | using Tegra::Shader::StoreType; | ||
| 27 | 28 | ||
| 28 | namespace { | 29 | namespace { |
| 29 | 30 | ||
| @@ -63,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { | |||
| 63 | } | 64 | } |
| 64 | } | 65 | } |
| 65 | 66 | ||
| 67 | Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { | ||
| 68 | Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); | ||
| 69 | offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); | ||
| 70 | return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), | ||
| 71 | Immediate(size)); | ||
| 72 | } | ||
| 73 | |||
| 74 | Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { | ||
| 75 | Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); | ||
| 76 | offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); | ||
| 77 | return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), | ||
| 78 | std::move(offset), Immediate(size)); | ||
| 79 | } | ||
| 80 | |||
| 81 | Node Sign16Extend(Node value) { | ||
| 82 | Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); | ||
| 83 | Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); | ||
| 84 | Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); | ||
| 85 | return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); | ||
| 86 | } | ||
| 87 | |||
| 66 | } // Anonymous namespace | 88 | } // Anonymous namespace |
| 67 | 89 | ||
| 68 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | 90 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { |
| @@ -138,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 138 | LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); | 160 | LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); |
| 139 | [[fallthrough]]; | 161 | [[fallthrough]]; |
| 140 | case OpCode::Id::LD_S: { | 162 | case OpCode::Id::LD_S: { |
| 141 | const auto GetMemory = [&](s32 offset) { | 163 | const auto GetAddress = [&](s32 offset) { |
| 142 | ASSERT(offset % 4 == 0); | 164 | ASSERT(offset % 4 == 0); |
| 143 | const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); | 165 | const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); |
| 144 | const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), | 166 | return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); |
| 145 | immediate_offset); | 167 | }; |
| 146 | return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) | 168 | const auto GetMemory = [&](s32 offset) { |
| 147 | : GetLocalMemory(address); | 169 | return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) |
| 170 | : GetLocalMemory(GetAddress(offset)); | ||
| 148 | }; | 171 | }; |
| 149 | 172 | ||
| 150 | switch (instr.ldst_sl.type.Value()) { | 173 | switch (instr.ldst_sl.type.Value()) { |
| 151 | case Tegra::Shader::StoreType::Bits32: | 174 | case StoreType::Signed16: |
| 152 | case Tegra::Shader::StoreType::Bits64: | 175 | SetRegister(bb, instr.gpr0, |
| 153 | case Tegra::Shader::StoreType::Bits128: { | 176 | Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); |
| 154 | const u32 count = [&]() { | 177 | break; |
| 178 | case StoreType::Bits32: | ||
| 179 | case StoreType::Bits64: | ||
| 180 | case StoreType::Bits128: { | ||
| 181 | const u32 count = [&] { | ||
| 155 | switch (instr.ldst_sl.type.Value()) { | 182 | switch (instr.ldst_sl.type.Value()) { |
| 156 | case Tegra::Shader::StoreType::Bits32: | 183 | case StoreType::Bits32: |
| 157 | return 1; | 184 | return 1; |
| 158 | case Tegra::Shader::StoreType::Bits64: | 185 | case StoreType::Bits64: |
| 159 | return 2; | 186 | return 2; |
| 160 | case Tegra::Shader::StoreType::Bits128: | 187 | case StoreType::Bits128: |
| 161 | return 4; | 188 | return 4; |
| 162 | default: | 189 | default: |
| 163 | UNREACHABLE(); | 190 | UNREACHABLE(); |
| @@ -214,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 214 | // To handle unaligned loads get the bytes used to dereference global memory and extract | 241 | // To handle unaligned loads get the bytes used to dereference global memory and extract |
| 215 | // those bytes from the loaded u32. | 242 | // those bytes from the loaded u32. |
| 216 | if (IsUnaligned(type)) { | 243 | if (IsUnaligned(type)) { |
| 217 | Node mask = Immediate(GetUnalignedMask(type)); | 244 | gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); |
| 218 | Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); | ||
| 219 | offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); | ||
| 220 | |||
| 221 | gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), | ||
| 222 | std::move(offset), Immediate(size)); | ||
| 223 | } | 245 | } |
| 224 | 246 | ||
| 225 | SetTemporary(bb, i, gmem); | 247 | SetTemporary(bb, i, gmem); |
| @@ -271,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 271 | return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); | 293 | return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); |
| 272 | }; | 294 | }; |
| 273 | 295 | ||
| 274 | const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L | 296 | const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; |
| 275 | ? &ShaderIR::SetLocalMemory | 297 | const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; |
| 276 | : &ShaderIR::SetSharedMemory; | 298 | const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; |
| 277 | 299 | ||
| 278 | switch (instr.ldst_sl.type.Value()) { | 300 | switch (instr.ldst_sl.type.Value()) { |
| 279 | case Tegra::Shader::StoreType::Bits128: | 301 | case StoreType::Bits128: |
| 280 | (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); | 302 | (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); |
| 281 | (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); | 303 | (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); |
| 282 | [[fallthrough]]; | 304 | [[fallthrough]]; |
| 283 | case Tegra::Shader::StoreType::Bits64: | 305 | case StoreType::Bits64: |
| 284 | (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); | 306 | (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); |
| 285 | [[fallthrough]]; | 307 | [[fallthrough]]; |
| 286 | case Tegra::Shader::StoreType::Bits32: | 308 | case StoreType::Bits32: |
| 287 | (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); | 309 | (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); |
| 288 | break; | 310 | break; |
| 311 | case StoreType::Signed16: { | ||
| 312 | Node address = GetAddress(0); | ||
| 313 | Node memory = (this->*get_memory)(address); | ||
| 314 | (this->*set_memory)( | ||
| 315 | bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); | ||
| 316 | break; | ||
| 317 | } | ||
| 289 | default: | 318 | default: |
| 290 | UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), | 319 | UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), |
| 291 | static_cast<u32>(instr.ldst_sl.type.Value())); | 320 | static_cast<u32>(instr.ldst_sl.type.Value())); |
| @@ -325,12 +354,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 325 | Node value = GetRegister(instr.gpr0.Value() + i); | 354 | Node value = GetRegister(instr.gpr0.Value() + i); |
| 326 | 355 | ||
| 327 | if (IsUnaligned(type)) { | 356 | if (IsUnaligned(type)) { |
| 328 | Node mask = Immediate(GetUnalignedMask(type)); | 357 | const u32 mask = GetUnalignedMask(type); |
| 329 | Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); | 358 | value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); |
| 330 | offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3)); | ||
| 331 | |||
| 332 | value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset, | ||
| 333 | Immediate(size)); | ||
| 334 | } | 359 | } |
| 335 | 360 | ||
| 336 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); | 361 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); |