summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/video_core/shader/decode/memory.cpp84
-rw-r--r--src/video_core/shader/shader_ir.h2
2 files changed, 52 insertions, 34 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index c934d0719..8cc84e935 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -6,6 +6,7 @@
6#include <vector> 6#include <vector>
7#include <fmt/format.h> 7#include <fmt/format.h>
8 8
9#include "common/alignment.h"
9#include "common/assert.h" 10#include "common/assert.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
11#include "common/logging/log.h" 12#include "common/logging/log.h"
@@ -22,34 +23,39 @@ using Tegra::Shader::Register;
22 23
23namespace { 24namespace {
24 25
25u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { 26bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
27 return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
28 uniform_type == Tegra::Shader::UniformType::UnsignedShort;
29}
30
31u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
26 switch (uniform_type) { 32 switch (uniform_type) {
27 case Tegra::Shader::UniformType::UnsignedByte: 33 case Tegra::Shader::UniformType::UnsignedByte:
28 case Tegra::Shader::UniformType::Single: 34 return 0b11;
29 return 1; 35 case Tegra::Shader::UniformType::UnsignedShort:
30 case Tegra::Shader::UniformType::Double: 36 return 0b10;
31 return 2;
32 case Tegra::Shader::UniformType::Quad:
33 case Tegra::Shader::UniformType::UnsignedQuad:
34 return 4;
35 default: 37 default:
36 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); 38 UNREACHABLE();
37 return 1; 39 return 0;
38 } 40 }
39} 41}
40 42
41u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { 43u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
42 switch (uniform_type) { 44 switch (uniform_type) {
45 case Tegra::Shader::UniformType::UnsignedByte:
46 return 8;
47 case Tegra::Shader::UniformType::UnsignedShort:
48 return 16;
43 case Tegra::Shader::UniformType::Single: 49 case Tegra::Shader::UniformType::Single:
44 return 1; 50 return 32;
45 case Tegra::Shader::UniformType::Double: 51 case Tegra::Shader::UniformType::Double:
46 return 2; 52 return 64;
47 case Tegra::Shader::UniformType::Quad: 53 case Tegra::Shader::UniformType::Quad:
48 case Tegra::Shader::UniformType::UnsignedQuad: 54 case Tegra::Shader::UniformType::UnsignedQuad:
49 return 4; 55 return 128;
50 default: 56 default:
51 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); 57 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
52 return 1; 58 return 32;
53 } 59 }
54} 60}
55 61
@@ -184,9 +190,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
184 }(); 190 }();
185 191
186 const auto [real_address_base, base_address, descriptor] = 192 const auto [real_address_base, base_address, descriptor] =
187 TrackGlobalMemory(bb, instr, false); 193 TrackGlobalMemory(bb, instr, true, false);
188 194
189 const u32 count = GetLdgMemorySize(type); 195 const u32 size = GetMemorySize(type);
196 const u32 count = Common::AlignUp(size, 32) / 32;
190 if (!real_address_base || !base_address) { 197 if (!real_address_base || !base_address) {
191 // Tracking failed, load zeroes. 198 // Tracking failed, load zeroes.
192 for (u32 i = 0; i < count; ++i) { 199 for (u32 i = 0; i < count; ++i) {
@@ -200,14 +207,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
200 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); 207 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
201 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 208 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
202 209
203 if (type == Tegra::Shader::UniformType::UnsignedByte) { 210 // To handle unaligned loads get the bytes used to dereference global memory and extract
204 // To handle unaligned loads get the byte used to dereferenced global memory 211 // those bytes from the loaded u32.
205 // and extract that byte from the loaded uint32. 212 if (IsUnaligned(type)) {
206 Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); 213 Node mask = Immediate(GetUnalignedMask(type));
207 byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); 214 Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
215 offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
208 216
209 gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), 217 gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
210 Immediate(8)); 218 std::move(offset), Immediate(size));
211 } 219 }
212 220
213 SetTemporary(bb, i, gmem); 221 SetTemporary(bb, i, gmem);
@@ -295,19 +303,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
295 } 303 }
296 }(); 304 }();
297 305
306 // For unaligned reads we have to read memory too.
307 const bool is_read = IsUnaligned(type);
298 const auto [real_address_base, base_address, descriptor] = 308 const auto [real_address_base, base_address, descriptor] =
299 TrackGlobalMemory(bb, instr, true); 309 TrackGlobalMemory(bb, instr, is_read, true);
300 if (!real_address_base || !base_address) { 310 if (!real_address_base || !base_address) {
301 // Tracking failed, skip the store. 311 // Tracking failed, skip the store.
302 break; 312 break;
303 } 313 }
304 314
305 const u32 count = GetStgMemorySize(type); 315 const u32 size = GetMemorySize(type);
316 const u32 count = Common::AlignUp(size, 32) / 32;
306 for (u32 i = 0; i < count; ++i) { 317 for (u32 i = 0; i < count; ++i) {
307 const Node it_offset = Immediate(i * 4); 318 const Node it_offset = Immediate(i * 4);
308 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); 319 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
309 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 320 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
310 const Node value = GetRegister(instr.gpr0.Value() + i); 321 Node value = GetRegister(instr.gpr0.Value() + i);
322
323 if (IsUnaligned(type)) {
324 Node mask = Immediate(GetUnalignedMask(type));
325 Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
326 offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
327
328 value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
329 Immediate(size));
330 }
331
311 bb.push_back(Operation(OperationCode::Assign, gmem, value)); 332 bb.push_back(Operation(OperationCode::Assign, gmem, value));
312 } 333 }
313 break; 334 break;
@@ -336,7 +357,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
336 357
337std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, 358std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
338 Instruction instr, 359 Instruction instr,
339 bool is_write) { 360 bool is_read, bool is_write) {
340 const auto addr_register{GetRegister(instr.gmem.gpr)}; 361 const auto addr_register{GetRegister(instr.gmem.gpr)};
341 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; 362 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
342 363
@@ -351,11 +372,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
351 const GlobalMemoryBase descriptor{index, offset}; 372 const GlobalMemoryBase descriptor{index, offset};
352 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); 373 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
353 auto& usage = entry->second; 374 auto& usage = entry->second;
354 if (is_write) { 375 usage.is_written |= is_write;
355 usage.is_written = true; 376 usage.is_read |= is_read;
356 } else {
357 usage.is_read = true;
358 }
359 377
360 const auto real_address = 378 const auto real_address =
361 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); 379 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index aacd0a0da..ba1db4c11 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -394,7 +394,7 @@ private:
394 394
395 std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, 395 std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
396 Tegra::Shader::Instruction instr, 396 Tegra::Shader::Instruction instr,
397 bool is_write); 397 bool is_read, bool is_write);
398 398
399 /// Register new amending code and obtain the reference id. 399 /// Register new amending code and obtain the reference id.
400 std::size_t DeclareAmend(Node new_amend); 400 std::size_t DeclareAmend(Node new_amend);