diff options
| author | 2019-11-02 23:44:13 -0300 | |
|---|---|---|
| committer | 2019-11-07 20:08:41 -0300 | |
| commit | 08b2b1080a2e5794c65ebb999c435f6eb9d1aa26 (patch) | |
| tree | 07ff442f13d3d74b7d2cb84a62cc2de222a128eb /src/video_core/shader/decode | |
| parent | Merge pull request #3032 from ReinUsesLisp/simplify-control-flow-brx (diff) | |
| download | yuzu-08b2b1080a2e5794c65ebb999c435f6eb9d1aa26.tar.gz yuzu-08b2b1080a2e5794c65ebb999c435f6eb9d1aa26.tar.xz yuzu-08b2b1080a2e5794c65ebb999c435f6eb9d1aa26.zip | |
gl_shader_decompiler: Reimplement shuffles with platform agnostic intrinsics
Diffstat (limited to 'src/video_core/shader/decode')
| -rw-r--r-- | src/video_core/shader/decode/warp.cpp | 68 |
1 files changed, 35 insertions, 33 deletions
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index fa8a250cc..c2875eb2b 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp | |||
| @@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation; | |||
| 17 | using Tegra::Shader::VoteOperation; | 17 | using Tegra::Shader::VoteOperation; |
| 18 | 18 | ||
| 19 | namespace { | 19 | namespace { |
| 20 | |||
| 20 | OperationCode GetOperationCode(VoteOperation vote_op) { | 21 | OperationCode GetOperationCode(VoteOperation vote_op) { |
| 21 | switch (vote_op) { | 22 | switch (vote_op) { |
| 22 | case VoteOperation::All: | 23 | case VoteOperation::All: |
| @@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) { | |||
| 30 | return OperationCode::VoteAll; | 31 | return OperationCode::VoteAll; |
| 31 | } | 32 | } |
| 32 | } | 33 | } |
| 34 | |||
| 33 | } // Anonymous namespace | 35 | } // Anonymous namespace |
| 34 | 36 | ||
| 35 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | 37 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { |
| @@ -46,50 +48,50 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | |||
| 46 | break; | 48 | break; |
| 47 | } | 49 | } |
| 48 | case OpCode::Id::SHFL: { | 50 | case OpCode::Id::SHFL: { |
| 49 | Node width = [this, instr] { | 51 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) |
| 50 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) | 52 | : GetRegister(instr.gpr39); |
| 51 | : GetRegister(instr.gpr39); | 53 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) |
| 54 | : GetRegister(instr.gpr20); | ||
| 52 | 55 | ||
| 53 | // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has | 56 | Node thread_id = Operation(OperationCode::ThreadId); |
| 54 | // been done reversing Nvidia's math. It won't work on all cases due to SHFL having | 57 | Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); |
| 55 | // different parameters that don't properly map to GLSL's interface, but it should work | 58 | Node seg_mask = BitfieldExtract(mask, 8, 16); |
| 56 | // for cases emitted by Nvidia's compiler. | 59 | |
| 57 | if (instr.shfl.operation == ShuffleOperation::Up) { | 60 | Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); |
| 58 | return Operation( | 61 | Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); |
| 59 | OperationCode::ILogicalShiftRight, | 62 | Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, |
| 60 | Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)), | 63 | Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); |
| 61 | Immediate(8)); | ||
| 62 | } else { | ||
| 63 | return Operation(OperationCode::ILogicalShiftRight, | ||
| 64 | Operation(OperationCode::IAdd, Immediate(0x201F), | ||
| 65 | Operation(OperationCode::INegate, std::move(mask))), | ||
| 66 | Immediate(8)); | ||
| 67 | } | ||
| 68 | }(); | ||
| 69 | 64 | ||
| 70 | const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> { | 65 | Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { |
| 71 | switch (instr.shfl.operation) { | 66 | switch (instr.shfl.operation) { |
| 72 | case ShuffleOperation::Idx: | 67 | case ShuffleOperation::Idx: |
| 73 | return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed}; | 68 | return Operation(OperationCode::IBitwiseOr, |
| 74 | case ShuffleOperation::Up: | 69 | Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), |
| 75 | return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp}; | 70 | min_thread_id); |
| 76 | case ShuffleOperation::Down: | 71 | case ShuffleOperation::Down: |
| 77 | return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown}; | 72 | return Operation(OperationCode::IAdd, thread_id, index); |
| 73 | case ShuffleOperation::Up: | ||
| 74 | return Operation(OperationCode::IAdd, thread_id, | ||
| 75 | Operation(OperationCode::INegate, index)); | ||
| 78 | case ShuffleOperation::Bfly: | 76 | case ShuffleOperation::Bfly: |
| 79 | return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly}; | 77 | return Operation(OperationCode::IBitwiseXor, thread_id, index); |
| 80 | } | 78 | } |
| 81 | UNREACHABLE_MSG("Invalid SHFL operation: {}", | 79 | UNREACHABLE(); |
| 82 | static_cast<u64>(instr.shfl.operation.Value())); | 80 | return Immediate(0U); |
| 83 | return {}; | ||
| 84 | }(); | 81 | }(); |
| 85 | 82 | ||
| 86 | // Setting the predicate before the register is intentional to avoid overwriting. | 83 | Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { |
| 87 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) | 84 | if (instr.shfl.operation == ShuffleOperation::Up) { |
| 88 | : GetRegister(instr.gpr20); | 85 | return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); |
| 89 | SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width)); | 86 | } else { |
| 87 | return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); | ||
| 88 | } | ||
| 89 | }(); | ||
| 90 | |||
| 91 | SetPredicate(bb, instr.shfl.pred48, in_bounds); | ||
| 90 | SetRegister( | 92 | SetRegister( |
| 91 | bb, instr.gpr0, | 93 | bb, instr.gpr0, |
| 92 | Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width))); | 94 | Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); |
| 93 | break; | 95 | break; |
| 94 | } | 96 | } |
| 95 | default: | 97 | default: |