diff options
| author | 2020-04-15 15:03:49 -0400 | |
|---|---|---|
| committer | 2020-04-15 15:03:49 -0400 | |
| commit | e33196d4e7687dd29636decd4b52e01b10fe8984 (patch) | |
| tree | b20b84dc47b9ef48c8701951ead117f52252e6e5 /src/video_core/shader | |
| parent | Merge pull request #3670 from lioncash/reorder (diff) | |
| parent | shader/memory: Implement RED.E.ADD (diff) | |
| download | yuzu-e33196d4e7687dd29636decd4b52e01b10fe8984.tar.gz yuzu-e33196d4e7687dd29636decd4b52e01b10fe8984.tar.xz yuzu-e33196d4e7687dd29636decd4b52e01b10fe8984.zip | |
Merge pull request #3612 from ReinUsesLisp/red
shader/memory: Implement RED.E.ADD and minor changes to ATOM
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 100 | ||||
| -rw-r--r-- | src/video_core/shader/node.h | 14 |
2 files changed, 71 insertions, 43 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index b8f63922f..8112ead3e 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -3,7 +3,9 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <utility> | ||
| 6 | #include <vector> | 7 | #include <vector> |
| 8 | |||
| 7 | #include <fmt/format.h> | 9 | #include <fmt/format.h> |
| 8 | 10 | ||
| 9 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| @@ -16,6 +18,7 @@ | |||
| 16 | 18 | ||
| 17 | namespace VideoCommon::Shader { | 19 | namespace VideoCommon::Shader { |
| 18 | 20 | ||
| 21 | using std::move; | ||
| 19 | using Tegra::Shader::AtomicOp; | 22 | using Tegra::Shader::AtomicOp; |
| 20 | using Tegra::Shader::AtomicType; | 23 | using Tegra::Shader::AtomicType; |
| 21 | using Tegra::Shader::Attribute; | 24 | using Tegra::Shader::Attribute; |
| @@ -27,29 +30,26 @@ using Tegra::Shader::StoreType; | |||
| 27 | 30 | ||
| 28 | namespace { | 31 | namespace { |
| 29 | 32 | ||
| 30 | Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { | 33 | OperationCode GetAtomOperation(AtomicOp op) { |
| 31 | const OperationCode operation_code = [op] { | 34 | switch (op) { |
| 32 | switch (op) { | 35 | case AtomicOp::Add: |
| 33 | case AtomicOp::Add: | 36 | return OperationCode::AtomicIAdd; |
| 34 | return OperationCode::AtomicIAdd; | 37 | case AtomicOp::Min: |
| 35 | case AtomicOp::Min: | 38 | return OperationCode::AtomicIMin; |
| 36 | return OperationCode::AtomicIMin; | 39 | case AtomicOp::Max: |
| 37 | case AtomicOp::Max: | 40 | return OperationCode::AtomicIMax; |
| 38 | return OperationCode::AtomicIMax; | 41 | case AtomicOp::And: |
| 39 | case AtomicOp::And: | 42 | return OperationCode::AtomicIAnd; |
| 40 | return OperationCode::AtomicIAnd; | 43 | case AtomicOp::Or: |
| 41 | case AtomicOp::Or: | 44 | return OperationCode::AtomicIOr; |
| 42 | return OperationCode::AtomicIOr; | 45 | case AtomicOp::Xor: |
| 43 | case AtomicOp::Xor: | 46 | return OperationCode::AtomicIXor; |
| 44 | return OperationCode::AtomicIXor; | 47 | case AtomicOp::Exch: |
| 45 | case AtomicOp::Exch: | 48 | return OperationCode::AtomicIExchange; |
| 46 | return OperationCode::AtomicIExchange; | 49 | default: |
| 47 | default: | 50 | UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); |
| 48 | UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); | 51 | return OperationCode::AtomicIAdd; |
| 49 | return OperationCode::AtomicIAdd; | 52 | } |
| 50 | } | ||
| 51 | }(); | ||
| 52 | return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data)); | ||
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { | 55 | bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { |
| @@ -90,23 +90,22 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { | |||
| 90 | 90 | ||
| 91 | Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { | 91 | Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { |
| 92 | Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); | 92 | Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); |
| 93 | offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); | 93 | offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); |
| 94 | return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), | 94 | return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); |
| 95 | Immediate(size)); | ||
| 96 | } | 95 | } |
| 97 | 96 | ||
| 98 | Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { | 97 | Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { |
| 99 | Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); | 98 | Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); |
| 100 | offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); | 99 | offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); |
| 101 | return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), | 100 | return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), |
| 102 | std::move(offset), Immediate(size)); | 101 | Immediate(size)); |
| 103 | } | 102 | } |
| 104 | 103 | ||
| 105 | Node Sign16Extend(Node value) { | 104 | Node Sign16Extend(Node value) { |
| 106 | Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); | 105 | Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); |
| 107 | Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); | 106 | Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); |
| 108 | Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); | 107 | Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); |
| 109 | return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); | 108 | return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); |
| 110 | } | 109 | } |
| 111 | 110 | ||
| 112 | } // Anonymous namespace | 111 | } // Anonymous namespace |
| @@ -379,20 +378,36 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 379 | 378 | ||
| 380 | if (IsUnaligned(type)) { | 379 | if (IsUnaligned(type)) { |
| 381 | const u32 mask = GetUnalignedMask(type); | 380 | const u32 mask = GetUnalignedMask(type); |
| 382 | value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); | 381 | value = InsertUnaligned(gmem, move(value), real_address, mask, size); |
| 383 | } | 382 | } |
| 384 | 383 | ||
| 385 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); | 384 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); |
| 386 | } | 385 | } |
| 387 | break; | 386 | break; |
| 388 | } | 387 | } |
| 388 | case OpCode::Id::RED: { | ||
| 389 | UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); | ||
| 390 | UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add); | ||
| 391 | const auto [real_address, base_address, descriptor] = | ||
| 392 | TrackGlobalMemory(bb, instr, true, true); | ||
| 393 | if (!real_address || !base_address) { | ||
| 394 | // Tracking failed, skip atomic. | ||
| 395 | break; | ||
| 396 | } | ||
| 397 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 398 | Node value = GetRegister(instr.gpr0); | ||
| 399 | bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value))); | ||
| 400 | break; | ||
| 401 | } | ||
| 389 | case OpCode::Id::ATOM: { | 402 | case OpCode::Id::ATOM: { |
| 390 | UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || | 403 | UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || |
| 391 | instr.atom.operation == AtomicOp::Dec || | 404 | instr.atom.operation == AtomicOp::Dec || |
| 392 | instr.atom.operation == AtomicOp::SafeAdd, | 405 | instr.atom.operation == AtomicOp::SafeAdd, |
| 393 | "operation={}", static_cast<int>(instr.atom.operation.Value())); | 406 | "operation={}", static_cast<int>(instr.atom.operation.Value())); |
| 394 | UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || | 407 | UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || |
| 395 | instr.atom.type == GlobalAtomicType::U64, | 408 | instr.atom.type == GlobalAtomicType::U64 || |
| 409 | instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || | ||
| 410 | instr.atom.type == GlobalAtomicType::F32_FTZ_RN, | ||
| 396 | "type={}", static_cast<int>(instr.atom.type.Value())); | 411 | "type={}", static_cast<int>(instr.atom.type.Value())); |
| 397 | 412 | ||
| 398 | const auto [real_address, base_address, descriptor] = | 413 | const auto [real_address, base_address, descriptor] = |
| @@ -403,11 +418,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 403 | } | 418 | } |
| 404 | 419 | ||
| 405 | const bool is_signed = | 420 | const bool is_signed = |
| 406 | instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; | 421 | instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; |
| 407 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | 422 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); |
| 408 | Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem, | 423 | SetRegister(bb, instr.gpr0, |
| 409 | GetRegister(instr.gpr20)); | 424 | SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, |
| 410 | SetRegister(bb, instr.gpr0, std::move(value)); | 425 | GetRegister(instr.gpr20))); |
| 411 | break; | 426 | break; |
| 412 | } | 427 | } |
| 413 | case OpCode::Id::ATOMS: { | 428 | case OpCode::Id::ATOMS: { |
| @@ -421,11 +436,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 421 | instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; | 436 | instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; |
| 422 | const s32 offset = instr.atoms.GetImmediateOffset(); | 437 | const s32 offset = instr.atoms.GetImmediateOffset(); |
| 423 | Node address = GetRegister(instr.gpr8); | 438 | Node address = GetRegister(instr.gpr8); |
| 424 | address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); | 439 | address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); |
| 425 | Node value = | 440 | SetRegister(bb, instr.gpr0, |
| 426 | GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed, | 441 | SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, |
| 427 | GetSharedMemory(std::move(address)), GetRegister(instr.gpr20)); | 442 | GetSharedMemory(move(address)), GetRegister(instr.gpr20))); |
| 428 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 429 | break; | 443 | break; |
| 430 | } | 444 | } |
| 431 | case OpCode::Id::AL2P: { | 445 | case OpCode::Id::AL2P: { |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 5fcc9da60..3eee961f5 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -178,6 +178,20 @@ enum class OperationCode { | |||
| 178 | AtomicIOr, /// (memory, int) -> int | 178 | AtomicIOr, /// (memory, int) -> int |
| 179 | AtomicIXor, /// (memory, int) -> int | 179 | AtomicIXor, /// (memory, int) -> int |
| 180 | 180 | ||
| 181 | ReduceUAdd, /// (memory, uint) -> void | ||
| 182 | ReduceUMin, /// (memory, uint) -> void | ||
| 183 | ReduceUMax, /// (memory, uint) -> void | ||
| 184 | ReduceUAnd, /// (memory, uint) -> void | ||
| 185 | ReduceUOr, /// (memory, uint) -> void | ||
| 186 | ReduceUXor, /// (memory, uint) -> void | ||
| 187 | |||
| 188 | ReduceIAdd, /// (memory, int) -> void | ||
| 189 | ReduceIMin, /// (memory, int) -> void | ||
| 190 | ReduceIMax, /// (memory, int) -> void | ||
| 191 | ReduceIAnd, /// (memory, int) -> void | ||
| 192 | ReduceIOr, /// (memory, int) -> void | ||
| 193 | ReduceIXor, /// (memory, int) -> void | ||
| 194 | |||
| 181 | Branch, /// (uint branch_target) -> void | 195 | Branch, /// (uint branch_target) -> void |
| 182 | BranchIndirect, /// (uint branch_target) -> void | 196 | BranchIndirect, /// (uint branch_target) -> void |
| 183 | PushFlowStack, /// (uint branch_target) -> void | 197 | PushFlowStack, /// (uint branch_target) -> void |