summaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
authorGravatar bunnei2019-09-20 17:10:42 -0400
committerGravatar GitHub2019-09-20 17:10:42 -0400
commit88d857499b6168d7bcea9b91fa5bdd8b0144c07a (patch)
tree1efa1eddb43c0afd3003b449675997abc9a7637f /src/video_core/shader
parentMerge pull request #2784 from ReinUsesLisp/smem (diff)
parentshader_ir/warp: Implement SHFL (diff)
downloadyuzu-88d857499b6168d7bcea9b91fa5bdd8b0144c07a.tar.gz
yuzu-88d857499b6168d7bcea9b91fa5bdd8b0144c07a.tar.xz
yuzu-88d857499b6168d7bcea9b91fa5bdd8b0144c07a.zip
Merge pull request #2855 from ReinUsesLisp/shfl
shader_ir/warp: Implement SHFL for Nvidia devices
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/decode/warp.cpp47
-rw-r--r--src/video_core/shader/node.h10
2 files changed, 57 insertions, 0 deletions
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index 04ca74f46..a8e481b3c 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -13,6 +13,7 @@ namespace VideoCommon::Shader {
13using Tegra::Shader::Instruction; 13using Tegra::Shader::Instruction;
14using Tegra::Shader::OpCode; 14using Tegra::Shader::OpCode;
15using Tegra::Shader::Pred; 15using Tegra::Shader::Pred;
16using Tegra::Shader::ShuffleOperation;
16using Tegra::Shader::VoteOperation; 17using Tegra::Shader::VoteOperation;
17 18
18namespace { 19namespace {
@@ -44,6 +45,52 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
44 SetPredicate(bb, instr.vote.dest_pred, vote); 45 SetPredicate(bb, instr.vote.dest_pred, vote);
45 break; 46 break;
46 } 47 }
48 case OpCode::Id::SHFL: {
49 Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
50 : GetRegister(instr.gpr39);
51 Node width = [&] {
52 // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
53 // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
54 // different parameters that don't properly map to GLSL's interface, but it should work
55 // for cases emitted by Nvidia's compiler.
56 if (instr.shfl.operation == ShuffleOperation::Up) {
57 return Operation(
58 OperationCode::ILogicalShiftRight,
59 Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
60 Immediate(8));
61 } else {
62 return Operation(OperationCode::ILogicalShiftRight,
63 Operation(OperationCode::IAdd, Immediate(0x201F),
64 Operation(OperationCode::INegate, std::move(mask))),
65 Immediate(8));
66 }
67 }();
68
69 const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
70 switch (instr.shfl.operation) {
71 case ShuffleOperation::Idx:
72 return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
73 case ShuffleOperation::Up:
74 return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
75 case ShuffleOperation::Down:
76 return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
77 case ShuffleOperation::Bfly:
78 return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
79 }
80 UNREACHABLE_MSG("Invalid SHFL operation: {}",
81 static_cast<u64>(instr.shfl.operation.Value()));
82 return {};
83 }();
84
85 // Setting the predicate before the register is intentional to avoid overwriting.
86 Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
87 : GetRegister(instr.gpr20);
88 SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
89 SetRegister(
90 bb, instr.gpr0,
91 Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
92 break;
93 }
47 default: 94 default:
48 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); 95 UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
49 break; 96 break;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 425111cc4..abf2cb1ab 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -181,6 +181,16 @@ enum class OperationCode {
181 VoteAny, /// (bool) -> bool 181 VoteAny, /// (bool) -> bool
182 VoteEqual, /// (bool) -> bool 182 VoteEqual, /// (bool) -> bool
183 183
184 ShuffleIndexed, /// (uint value, uint index, uint width) -> uint
185 ShuffleUp, /// (uint value, uint index, uint width) -> uint
186 ShuffleDown, /// (uint value, uint index, uint width) -> uint
187 ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
188
189 InRangeShuffleIndexed, /// (uint index, uint width) -> bool
190 InRangeShuffleUp, /// (uint index, uint width) -> bool
191 InRangeShuffleDown, /// (uint index, uint width) -> bool
192 InRangeShuffleButterfly, /// (uint index, uint width) -> bool
193
184 Amount, 194 Amount,
185}; 195};
186 196