summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/shader/decode/memory.cpp85
1 files changed, 55 insertions, 30 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 3da833e81..b5fbc4d58 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -24,6 +24,7 @@ using Tegra::Shader::GlobalAtomicType;
24using Tegra::Shader::Instruction; 24using Tegra::Shader::Instruction;
25using Tegra::Shader::OpCode; 25using Tegra::Shader::OpCode;
26using Tegra::Shader::Register; 26using Tegra::Shader::Register;
27using Tegra::Shader::StoreType;
27 28
28namespace { 29namespace {
29 30
@@ -63,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
63 } 64 }
64} 65}
65 66
67Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
68 Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
69 offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
70 return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset),
71 Immediate(size));
72}
73
74Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
75 Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask));
76 offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
77 return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value),
78 std::move(offset), Immediate(size));
79}
80
81Node Sign16Extend(Node value) {
82 Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
83 Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15));
84 Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
85 return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend));
86}
87
66} // Anonymous namespace 88} // Anonymous namespace
67 89
68u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { 90u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@@ -138,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
138 LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); 160 LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown));
139 [[fallthrough]]; 161 [[fallthrough]];
140 case OpCode::Id::LD_S: { 162 case OpCode::Id::LD_S: {
141 const auto GetMemory = [&](s32 offset) { 163 const auto GetAddress = [&](s32 offset) {
142 ASSERT(offset % 4 == 0); 164 ASSERT(offset % 4 == 0);
143 const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); 165 const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
144 const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), 166 return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
145 immediate_offset); 167 };
146 return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) 168 const auto GetMemory = [&](s32 offset) {
147 : GetLocalMemory(address); 169 return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
170 : GetLocalMemory(GetAddress(offset));
148 }; 171 };
149 172
150 switch (instr.ldst_sl.type.Value()) { 173 switch (instr.ldst_sl.type.Value()) {
151 case Tegra::Shader::StoreType::Bits32: 174 case StoreType::Signed16:
152 case Tegra::Shader::StoreType::Bits64: 175 SetRegister(bb, instr.gpr0,
153 case Tegra::Shader::StoreType::Bits128: { 176 Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
154 const u32 count = [&]() { 177 break;
178 case StoreType::Bits32:
179 case StoreType::Bits64:
180 case StoreType::Bits128: {
181 const u32 count = [&] {
155 switch (instr.ldst_sl.type.Value()) { 182 switch (instr.ldst_sl.type.Value()) {
156 case Tegra::Shader::StoreType::Bits32: 183 case StoreType::Bits32:
157 return 1; 184 return 1;
158 case Tegra::Shader::StoreType::Bits64: 185 case StoreType::Bits64:
159 return 2; 186 return 2;
160 case Tegra::Shader::StoreType::Bits128: 187 case StoreType::Bits128:
161 return 4; 188 return 4;
162 default: 189 default:
163 UNREACHABLE(); 190 UNREACHABLE();
@@ -214,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
214 // To handle unaligned loads get the bytes used to dereference global memory and extract 241 // To handle unaligned loads get the bytes used to dereference global memory and extract
215 // those bytes from the loaded u32. 242 // those bytes from the loaded u32.
216 if (IsUnaligned(type)) { 243 if (IsUnaligned(type)) {
217 Node mask = Immediate(GetUnalignedMask(type)); 244 gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
218 Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
219 offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
220
221 gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
222 std::move(offset), Immediate(size));
223 } 245 }
224 246
225 SetTemporary(bb, i, gmem); 247 SetTemporary(bb, i, gmem);
@@ -271,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
271 return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); 293 return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
272 }; 294 };
273 295
274 const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L 296 const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
275 ? &ShaderIR::SetLocalMemory 297 const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
276 : &ShaderIR::SetSharedMemory; 298 const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
277 299
278 switch (instr.ldst_sl.type.Value()) { 300 switch (instr.ldst_sl.type.Value()) {
279 case Tegra::Shader::StoreType::Bits128: 301 case StoreType::Bits128:
280 (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); 302 (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
281 (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); 303 (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
282 [[fallthrough]]; 304 [[fallthrough]];
283 case Tegra::Shader::StoreType::Bits64: 305 case StoreType::Bits64:
284 (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); 306 (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
285 [[fallthrough]]; 307 [[fallthrough]];
286 case Tegra::Shader::StoreType::Bits32: 308 case StoreType::Bits32:
287 (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); 309 (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
288 break; 310 break;
311 case StoreType::Signed16: {
312 Node address = GetAddress(0);
313 Node memory = (this->*get_memory)(address);
314 (this->*set_memory)(
315 bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
316 break;
317 }
289 default: 318 default:
290 UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), 319 UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
291 static_cast<u32>(instr.ldst_sl.type.Value())); 320 static_cast<u32>(instr.ldst_sl.type.Value()));
@@ -325,12 +354,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
325 Node value = GetRegister(instr.gpr0.Value() + i); 354 Node value = GetRegister(instr.gpr0.Value() + i);
326 355
327 if (IsUnaligned(type)) { 356 if (IsUnaligned(type)) {
328 Node mask = Immediate(GetUnalignedMask(type)); 357 const u32 mask = GetUnalignedMask(type);
329 Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); 358 value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
330 offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
331
332 value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
333 Immediate(size));
334 } 359 }
335 360
336 bb.push_back(Operation(OperationCode::Assign, gmem, value)); 361 bb.push_back(Operation(OperationCode::Assign, gmem, value));