summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2019-12-18 00:36:21 -0300
committerGravatar ReinUsesLisp2019-12-18 01:21:46 -0300
commitae8d4b6c0c930178d17bb61bb17270aea133754d (patch)
tree48ffb54bd8d8d397431c2c3474b2b02b0378da90 /src
parentMerge pull request #3173 from yuzu-emu/bunnei-spscqueue (diff)
downloadyuzu-ae8d4b6c0c930178d17bb61bb17270aea133754d.tar.gz
yuzu-ae8d4b6c0c930178d17bb61bb17270aea133754d.tar.xz
yuzu-ae8d4b6c0c930178d17bb61bb17270aea133754d.zip
shader/memory: Implement LDG.U8 and unaligned U8 loads
LDG can load single bytes instead of full integers or packs of integers. These have the advantage of loading bytes that are not aligned to 4 bytes. To emulate these this commit gets the byte being referenced (by doing "address & 3" and then using that to extract the byte from the loaded integer: result = bitfieldExtract(loaded_integer, (address % 4) * 8, 8)
Diffstat (limited to 'src')
-rw-r--r--src/video_core/shader/decode/memory.cpp38
1 files changed, 32 insertions, 6 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 78e92f52e..c934d0719 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -22,7 +22,23 @@ using Tegra::Shader::Register;
22 22
23namespace { 23namespace {
24 24
25u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) { 25u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
26 switch (uniform_type) {
27 case Tegra::Shader::UniformType::UnsignedByte:
28 case Tegra::Shader::UniformType::Single:
29 return 1;
30 case Tegra::Shader::UniformType::Double:
31 return 2;
32 case Tegra::Shader::UniformType::Quad:
33 case Tegra::Shader::UniformType::UnsignedQuad:
34 return 4;
35 default:
36 UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
37 return 1;
38 }
39}
40
41u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
26 switch (uniform_type) { 42 switch (uniform_type) {
27 case Tegra::Shader::UniformType::Single: 43 case Tegra::Shader::UniformType::Single:
28 return 1; 44 return 1;
@@ -170,7 +186,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
170 const auto [real_address_base, base_address, descriptor] = 186 const auto [real_address_base, base_address, descriptor] =
171 TrackGlobalMemory(bb, instr, false); 187 TrackGlobalMemory(bb, instr, false);
172 188
173 const u32 count = GetUniformTypeElementsCount(type); 189 const u32 count = GetLdgMemorySize(type);
174 if (!real_address_base || !base_address) { 190 if (!real_address_base || !base_address) {
175 // Tracking failed, load zeroes. 191 // Tracking failed, load zeroes.
176 for (u32 i = 0; i < count; ++i) { 192 for (u32 i = 0; i < count; ++i) {
@@ -181,12 +197,22 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
181 197
182 for (u32 i = 0; i < count; ++i) { 198 for (u32 i = 0; i < count; ++i) {
183 const Node it_offset = Immediate(i * 4); 199 const Node it_offset = Immediate(i * 4);
184 const Node real_address = 200 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
185 Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); 201 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
186 const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 202
203 if (type == Tegra::Shader::UniformType::UnsignedByte) {
204 // To handle unaligned loads get the byte used to dereferenced global memory
205 // and extract that byte from the loaded uint32.
206 Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
207 byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
208
209 gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
210 Immediate(8));
211 }
187 212
188 SetTemporary(bb, i, gmem); 213 SetTemporary(bb, i, gmem);
189 } 214 }
215
190 for (u32 i = 0; i < count; ++i) { 216 for (u32 i = 0; i < count; ++i) {
191 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); 217 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
192 } 218 }
@@ -276,7 +302,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
276 break; 302 break;
277 } 303 }
278 304
279 const u32 count = GetUniformTypeElementsCount(type); 305 const u32 count = GetStgMemorySize(type);
280 for (u32 i = 0; i < count; ++i) { 306 for (u32 i = 0; i < count; ++i) {
281 const Node it_offset = Immediate(i * 4); 307 const Node it_offset = Immediate(i * 4);
282 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); 308 const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);