summaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2020-04-15 15:03:49 -0400
committerGravatar GitHub2020-04-15 15:03:49 -0400
commite33196d4e7687dd29636decd4b52e01b10fe8984 (patch)
treeb20b84dc47b9ef48c8701951ead117f52252e6e5 /src/video_core/shader
parentMerge pull request #3670 from lioncash/reorder (diff)
parentshader/memory: Implement RED.E.ADD (diff)
downloadyuzu-e33196d4e7687dd29636decd4b52e01b10fe8984.tar.gz
yuzu-e33196d4e7687dd29636decd4b52e01b10fe8984.tar.xz
yuzu-e33196d4e7687dd29636decd4b52e01b10fe8984.zip
Merge pull request #3612 from ReinUsesLisp/red
shader/memory: Implement RED.E.ADD and minor changes to ATOM
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/decode/memory.cpp100
-rw-r--r--src/video_core/shader/node.h14
2 files changed, 71 insertions, 43 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index b8f63922f..8112ead3e 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -3,7 +3,9 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <algorithm> 5#include <algorithm>
6#include <utility>
6#include <vector> 7#include <vector>
8
7#include <fmt/format.h> 9#include <fmt/format.h>
8 10
9#include "common/alignment.h" 11#include "common/alignment.h"
@@ -16,6 +18,7 @@
16 18
17namespace VideoCommon::Shader { 19namespace VideoCommon::Shader {
18 20
21using std::move;
19using Tegra::Shader::AtomicOp; 22using Tegra::Shader::AtomicOp;
20using Tegra::Shader::AtomicType; 23using Tegra::Shader::AtomicType;
21using Tegra::Shader::Attribute; 24using Tegra::Shader::Attribute;
@@ -27,29 +30,26 @@ using Tegra::Shader::StoreType;
27 30
28namespace { 31namespace {
29 32
30Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { 33OperationCode GetAtomOperation(AtomicOp op) {
31 const OperationCode operation_code = [op] { 34 switch (op) {
32 switch (op) { 35 case AtomicOp::Add:
33 case AtomicOp::Add: 36 return OperationCode::AtomicIAdd;
34 return OperationCode::AtomicIAdd; 37 case AtomicOp::Min:
35 case AtomicOp::Min: 38 return OperationCode::AtomicIMin;
36 return OperationCode::AtomicIMin; 39 case AtomicOp::Max:
37 case AtomicOp::Max: 40 return OperationCode::AtomicIMax;
38 return OperationCode::AtomicIMax; 41 case AtomicOp::And:
39 case AtomicOp::And: 42 return OperationCode::AtomicIAnd;
40 return OperationCode::AtomicIAnd; 43 case AtomicOp::Or:
41 case AtomicOp::Or: 44 return OperationCode::AtomicIOr;
42 return OperationCode::AtomicIOr; 45 case AtomicOp::Xor:
43 case AtomicOp::Xor: 46 return OperationCode::AtomicIXor;
44 return OperationCode::AtomicIXor; 47 case AtomicOp::Exch:
45 case AtomicOp::Exch: 48 return OperationCode::AtomicIExchange;
46 return OperationCode::AtomicIExchange; 49 default:
47 default: 50 UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
48 UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); 51 return OperationCode::AtomicIAdd;
49 return OperationCode::AtomicIAdd; 52 }
50 }
51 }();
52 return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data));
53} 53}
54 54
55bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { 55bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
@@ -90,23 +90,22 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
90 90
91Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { 91Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
92 Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); 92 Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
93 offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); 93 offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
94 return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), 94 return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
95 Immediate(size));
96} 95}
97 96
98Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { 97Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
99 Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); 98 Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
100 offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); 99 offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
101 return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), 100 return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
102 std::move(offset), Immediate(size)); 101 Immediate(size));
103} 102}
104 103
105Node Sign16Extend(Node value) { 104Node Sign16Extend(Node value) {
106 Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); 105 Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
107 Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); 106 Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
108 Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); 107 Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
109 return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); 108 return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
110} 109}
111 110
112} // Anonymous namespace 111} // Anonymous namespace
@@ -379,20 +378,36 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
379 378
380 if (IsUnaligned(type)) { 379 if (IsUnaligned(type)) {
381 const u32 mask = GetUnalignedMask(type); 380 const u32 mask = GetUnalignedMask(type);
382 value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); 381 value = InsertUnaligned(gmem, move(value), real_address, mask, size);
383 } 382 }
384 383
385 bb.push_back(Operation(OperationCode::Assign, gmem, value)); 384 bb.push_back(Operation(OperationCode::Assign, gmem, value));
386 } 385 }
387 break; 386 break;
388 } 387 }
388 case OpCode::Id::RED: {
389 UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
390 UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
391 const auto [real_address, base_address, descriptor] =
392 TrackGlobalMemory(bb, instr, true, true);
393 if (!real_address || !base_address) {
394 // Tracking failed, skip atomic.
395 break;
396 }
397 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
398 Node value = GetRegister(instr.gpr0);
399 bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value)));
400 break;
401 }
389 case OpCode::Id::ATOM: { 402 case OpCode::Id::ATOM: {
390 UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || 403 UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
391 instr.atom.operation == AtomicOp::Dec || 404 instr.atom.operation == AtomicOp::Dec ||
392 instr.atom.operation == AtomicOp::SafeAdd, 405 instr.atom.operation == AtomicOp::SafeAdd,
393 "operation={}", static_cast<int>(instr.atom.operation.Value())); 406 "operation={}", static_cast<int>(instr.atom.operation.Value()));
394 UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || 407 UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
395 instr.atom.type == GlobalAtomicType::U64, 408 instr.atom.type == GlobalAtomicType::U64 ||
409 instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
410 instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
396 "type={}", static_cast<int>(instr.atom.type.Value())); 411 "type={}", static_cast<int>(instr.atom.type.Value()));
397 412
398 const auto [real_address, base_address, descriptor] = 413 const auto [real_address, base_address, descriptor] =
@@ -403,11 +418,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
403 } 418 }
404 419
405 const bool is_signed = 420 const bool is_signed =
406 instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; 421 instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
407 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); 422 Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
408 Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem, 423 SetRegister(bb, instr.gpr0,
409 GetRegister(instr.gpr20)); 424 SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
410 SetRegister(bb, instr.gpr0, std::move(value)); 425 GetRegister(instr.gpr20)));
411 break; 426 break;
412 } 427 }
413 case OpCode::Id::ATOMS: { 428 case OpCode::Id::ATOMS: {
@@ -421,11 +436,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
421 instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; 436 instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
422 const s32 offset = instr.atoms.GetImmediateOffset(); 437 const s32 offset = instr.atoms.GetImmediateOffset();
423 Node address = GetRegister(instr.gpr8); 438 Node address = GetRegister(instr.gpr8);
424 address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); 439 address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
425 Node value = 440 SetRegister(bb, instr.gpr0,
426 GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed, 441 SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
427 GetSharedMemory(std::move(address)), GetRegister(instr.gpr20)); 442 GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
428 SetRegister(bb, instr.gpr0, std::move(value));
429 break; 443 break;
430 } 444 }
431 case OpCode::Id::AL2P: { 445 case OpCode::Id::AL2P: {
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 5fcc9da60..3eee961f5 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -178,6 +178,20 @@ enum class OperationCode {
178 AtomicIOr, /// (memory, int) -> int 178 AtomicIOr, /// (memory, int) -> int
179 AtomicIXor, /// (memory, int) -> int 179 AtomicIXor, /// (memory, int) -> int
180 180
181 ReduceUAdd, /// (memory, uint) -> void
182 ReduceUMin, /// (memory, uint) -> void
183 ReduceUMax, /// (memory, uint) -> void
184 ReduceUAnd, /// (memory, uint) -> void
185 ReduceUOr, /// (memory, uint) -> void
186 ReduceUXor, /// (memory, uint) -> void
187
188 ReduceIAdd, /// (memory, int) -> void
189 ReduceIMin, /// (memory, int) -> void
190 ReduceIMax, /// (memory, int) -> void
191 ReduceIAnd, /// (memory, int) -> void
192 ReduceIOr, /// (memory, int) -> void
193 ReduceIXor, /// (memory, int) -> void
194
181 Branch, /// (uint branch_target) -> void 195 Branch, /// (uint branch_target) -> void
182 BranchIndirect, /// (uint branch_target) -> void 196 BranchIndirect, /// (uint branch_target) -> void
183 PushFlowStack, /// (uint branch_target) -> void 197 PushFlowStack, /// (uint branch_target) -> void