summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-01-09 01:08:55 -0300
committerGravatar ReinUsesLisp2020-01-09 02:12:29 -0300
commite2a2a556b9713f7c2e8dc20dbdaff80996fa6b91 (patch)
tree0a3848d5c87d930611ff955b67b654308e2a97fa /src
parentMerge pull request #3279 from ReinUsesLisp/vk-pipeline-cache (diff)
downloadyuzu-e2a2a556b9713f7c2e8dc20dbdaff80996fa6b91.tar.gz
yuzu-e2a2a556b9713f7c2e8dc20dbdaff80996fa6b91.tar.xz
yuzu-e2a2a556b9713f7c2e8dc20dbdaff80996fa6b91.zip
shader_ir/memory: Implement u16 and u8 for STG and LDG
Using the same technique we used for u8 on LDG, implement u16. In the case of STG, load memory and insert the value we want to set into it with bitfieldInsert. Then set that value.
Diffstat (limited to 'src')
-rw-r--r--src/video_core/shader/decode/memory.cpp84
-rw-r--r--src/video_core/shader/shader_ir.h2
2 files changed, 52 insertions, 34 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index c934d0719..8cc84e935 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -6,6 +6,7 @@
6#include <vector> 6#include <vector>
7#include <fmt/format.h> 7#include <fmt/format.h>
8 8
9#include "common/alignment.h"
9#include "common/assert.h" 10#include "common/assert.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
11#include "common/logging/log.h" 12#include "common/logging/log.h"
@@ -22,34 +23,39 @@ using Tegra::Shader::Register;
22 23
23namespace { 24namespace {
24 25
25u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) { 26bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
27 return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
28 uniform_type == Tegra::Shader::UniformType::UnsignedShort;
29}
30
31u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
26 switch (uniform_type) { 32 switch (uniform_type) {
27 case Tegra::Shader::UniformType::UnsignedByte: 33 case Tegra::Shader::UniformType::UnsignedByte:
28 case Tegra::Shader::UniformType::Single: 34 return 0b11;
29 return 1; 35 case Tegra::Shader::UniformType::UnsignedShort:
30 case Tegra::Shader::UniformType::Double: 36 return 0b10;
31 return 2;
32 case Tegra::Shader::UniformType::Quad:
33 case Tegra::Shader::UniformType::UnsignedQuad:
34 return 4;
35 default: 37 default:
36 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); 38 UNREACHABLE();
37 return 1; 39 return 0;
38 } 40 }
39} 41}
40 42
41u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) { 43u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
42 switch (uniform_type) { 44 switch (uniform_type) {
45 case Tegra::Shader::UniformType::UnsignedByte:
46 return 8;
47 case Tegra::Shader::UniformType::UnsignedShort:
48 return 16;
43 case Tegra::Shader::UniformType::Single: 49 case Tegra::Shader::UniformType::Single:
44 return 1; 50 return 32;
45 case Tegra::Shader::UniformType::Double: 51 case Tegra::Shader::UniformType::Double:
46 return 2; 52 return 64;
47 case Tegra::Shader::UniformType::Quad: 53 case Tegra::Shader::UniformType::Quad:
48 case Tegra::Shader::UniformType::UnsignedQuad: 54 case Tegra::Shader::UniformType::UnsignedQuad:
49 return 4; 55 return 128;
50 default: 56 default:
51 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type)); 57 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
52 return 1; 58 return 32;
53 } 59 }
54} 60}
55 61
@@ -184,9 +190,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
184 }(); 190 }();
185 191
186 const auto [real_address_base, base_address, descriptor] = 192 const auto [real_address_base, base_address, descriptor] =
187 TrackGlobalMemory(bb, instr, false); 193 TrackGlobalMemory(bb, instr, true, false);
188 194
189 const u32 count = GetLdgMemorySize(type); 195 const u32 size = GetMemorySize(type);
196 const u32 count = Common::AlignUp(size, 32) / 32;
190 if (!real_address_base || !base_address) { 197 if (!real_address_base || !base_address) {
191 // Tracking failed, load zeroes. 198 // Tracking failed, load zeroes.
192 for (u32 i = 0; i < count; ++i) { 199 for (u32 i = 0; i < count; ++i) {
@@ -200,14 +207,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
200 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); 207 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
201 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 208 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
202 209
203 if (type == Tegra::Shader::UniformType::UnsignedByte) { 210 // To handle unaligned loads get the bytes used to dereference global memory and extract
204 // To handle unaligned loads get the byte used to dereferenced global memory 211 // those bytes from the loaded u32.
205 // and extract that byte from the loaded uint32. 212 if (IsUnaligned(type)) {
206 Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3)); 213 Node mask = Immediate(GetUnalignedMask(type));
207 byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3)); 214 Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
215 offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
208 216
209 gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte), 217 gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
210 Immediate(8)); 218 std::move(offset), Immediate(size));
211 } 219 }
212 220
213 SetTemporary(bb, i, gmem); 221 SetTemporary(bb, i, gmem);
@@ -295,19 +303,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
295 } 303 }
296 }(); 304 }();
297 305
306 // For unaligned reads we have to read memory too.
307 const bool is_read = IsUnaligned(type);
298 const auto [real_address_base, base_address, descriptor] = 308 const auto [real_address_base, base_address, descriptor] =
299 TrackGlobalMemory(bb, instr, true); 309 TrackGlobalMemory(bb, instr, is_read, true);
300 if (!real_address_base || !base_address) { 310 if (!real_address_base || !base_address) {
301 // Tracking failed, skip the store. 311 // Tracking failed, skip the store.
302 break; 312 break;
303 } 313 }
304 314
305 const u32 count = GetStgMemorySize(type); 315 const u32 size = GetMemorySize(type);
316 const u32 count = Common::AlignUp(size, 32) / 32;
306 for (u32 i = 0; i < count; ++i) { 317 for (u32 i = 0; i < count; ++i) {
307 const Node it_offset = Immediate(i * 4); 318 const Node it_offset = Immediate(i * 4);
308 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); 319 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
309 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 320 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
310 const Node value = GetRegister(instr.gpr0.Value() + i); 321 Node value = GetRegister(instr.gpr0.Value() + i);
322
323 if (IsUnaligned(type)) {
324 Node mask = Immediate(GetUnalignedMask(type));
325 Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
326 offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
327
328 value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
329 Immediate(size));
330 }
331
311 bb.push_back(Operation(OperationCode::Assign, gmem, value)); 332 bb.push_back(Operation(OperationCode::Assign, gmem, value));
312 } 333 }
313 break; 334 break;
@@ -336,7 +357,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
336 357
337std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, 358std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
338 Instruction instr, 359 Instruction instr,
339 bool is_write) { 360 bool is_read, bool is_write) {
340 const auto addr_register{GetRegister(instr.gmem.gpr)}; 361 const auto addr_register{GetRegister(instr.gmem.gpr)};
341 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; 362 const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
342 363
@@ -351,11 +372,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
351 const GlobalMemoryBase descriptor{index, offset}; 372 const GlobalMemoryBase descriptor{index, offset};
352 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor); 373 const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
353 auto& usage = entry->second; 374 auto& usage = entry->second;
354 if (is_write) { 375 usage.is_written |= is_write;
355 usage.is_written = true; 376 usage.is_read |= is_read;
356 } else {
357 usage.is_read = true;
358 }
359 377
360 const auto real_address = 378 const auto real_address =
361 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); 379 Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index aacd0a0da..ba1db4c11 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -394,7 +394,7 @@ private:
394 394
395 std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, 395 std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
396 Tegra::Shader::Instruction instr, 396 Tegra::Shader::Instruction instr,
397 bool is_write); 397 bool is_read, bool is_write);
398 398
399 /// Register new amending code and obtain the reference id. 399 /// Register new amending code and obtain the reference id.
400 std::size_t DeclareAmend(Node new_amend); 400 std::size_t DeclareAmend(Node new_amend);