diff options
Diffstat (limited to '')
4 files changed, 103 insertions, 7 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index f2326dea1..f7f102f53 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | |||
| @@ -26,6 +26,22 @@ void Visit(Info& info, IR::Inst& inst) { | |||
| 26 | case IR::Opcode::LocalInvocationId: | 26 | case IR::Opcode::LocalInvocationId: |
| 27 | info.uses_local_invocation_id = true; | 27 | info.uses_local_invocation_id = true; |
| 28 | break; | 28 | break; |
| 29 | case IR::Opcode::CompositeConstructF16x2: | ||
| 30 | case IR::Opcode::CompositeConstructF16x3: | ||
| 31 | case IR::Opcode::CompositeConstructF16x4: | ||
| 32 | case IR::Opcode::CompositeExtractF16x2: | ||
| 33 | case IR::Opcode::CompositeExtractF16x3: | ||
| 34 | case IR::Opcode::CompositeExtractF16x4: | ||
| 35 | case IR::Opcode::BitCastU16F16: | ||
| 36 | case IR::Opcode::BitCastF16U16: | ||
| 37 | case IR::Opcode::PackFloat2x16: | ||
| 38 | case IR::Opcode::UnpackFloat2x16: | ||
| 39 | case IR::Opcode::ConvertS16F16: | ||
| 40 | case IR::Opcode::ConvertS32F16: | ||
| 41 | case IR::Opcode::ConvertS64F16: | ||
| 42 | case IR::Opcode::ConvertU16F16: | ||
| 43 | case IR::Opcode::ConvertU32F16: | ||
| 44 | case IR::Opcode::ConvertU64F16: | ||
| 29 | case IR::Opcode::FPAbs16: | 45 | case IR::Opcode::FPAbs16: |
| 30 | case IR::Opcode::FPAdd16: | 46 | case IR::Opcode::FPAdd16: |
| 31 | case IR::Opcode::FPCeil16: | 47 | case IR::Opcode::FPCeil16: |
| @@ -36,7 +52,7 @@ void Visit(Info& info, IR::Inst& inst) { | |||
| 36 | case IR::Opcode::FPRoundEven16: | 52 | case IR::Opcode::FPRoundEven16: |
| 37 | case IR::Opcode::FPSaturate16: | 53 | case IR::Opcode::FPSaturate16: |
| 38 | case IR::Opcode::FPTrunc16: | 54 | case IR::Opcode::FPTrunc16: |
| 39 | info.uses_fp16; | 55 | info.uses_fp16 = true; |
| 40 | break; | 56 | break; |
| 41 | case IR::Opcode::FPAbs64: | 57 | case IR::Opcode::FPAbs64: |
| 42 | case IR::Opcode::FPAdd64: | 58 | case IR::Opcode::FPAdd64: |
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 9eb61b54c..4d4e88259 100644 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp | |||
| @@ -104,12 +104,12 @@ void FoldGetPred(IR::Inst& inst) { | |||
| 104 | bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { | 104 | bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { |
| 105 | /* | 105 | /* |
| 106 | * We are looking for this pattern: | 106 | * We are looking for this pattern: |
| 107 | * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 (uses: 1) | 107 | * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 |
| 108 | * %rhs_mul = IMul32 %rhs_bfe, %factor_b (uses: 1) | 108 | * %rhs_mul = IMul32 %rhs_bfe, %factor_b |
| 109 | * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 (uses: 1) | 109 | * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 |
| 110 | * %rhs_mul = IMul32 %lhs_bfe, %factor_b (uses: 1) | 110 | * %rhs_mul = IMul32 %lhs_bfe, %factor_b |
| 111 | * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 (uses: 1) | 111 | * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 |
| 112 | * %result = IAdd32 %lhs_shl, %rhs_mul (uses: 10) | 112 | * %result = IAdd32 %lhs_shl, %rhs_mul |
| 113 | * | 113 | * |
| 114 | * And replacing it with | 114 | * And replacing it with |
| 115 | * %result = IMul32 %factor_a, %factor_b | 115 | * %result = IMul32 %factor_a, %factor_b |
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp new file mode 100644 index 000000000..c7032f168 --- /dev/null +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | |||
| 7 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/microinstruction.h" | ||
| 9 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 10 | |||
| 11 | namespace Shader::Optimization { | ||
| 12 | namespace { | ||
| 13 | IR::Opcode Replace(IR::Opcode op) { | ||
| 14 | switch (op) { | ||
| 15 | case IR::Opcode::FPAbs16: | ||
| 16 | return IR::Opcode::FPAbs32; | ||
| 17 | case IR::Opcode::FPAdd16: | ||
| 18 | return IR::Opcode::FPAdd32; | ||
| 19 | case IR::Opcode::FPCeil16: | ||
| 20 | return IR::Opcode::FPCeil32; | ||
| 21 | case IR::Opcode::FPFloor16: | ||
| 22 | return IR::Opcode::FPFloor32; | ||
| 23 | case IR::Opcode::FPFma16: | ||
| 24 | return IR::Opcode::FPFma32; | ||
| 25 | case IR::Opcode::FPMul16: | ||
| 26 | return IR::Opcode::FPMul32; | ||
| 27 | case IR::Opcode::FPNeg16: | ||
| 28 | return IR::Opcode::FPNeg32; | ||
| 29 | case IR::Opcode::FPRoundEven16: | ||
| 30 | return IR::Opcode::FPRoundEven32; | ||
| 31 | case IR::Opcode::FPSaturate16: | ||
| 32 | return IR::Opcode::FPSaturate32; | ||
| 33 | case IR::Opcode::FPTrunc16: | ||
| 34 | return IR::Opcode::FPTrunc32; | ||
| 35 | case IR::Opcode::CompositeConstructF16x2: | ||
| 36 | return IR::Opcode::CompositeConstructF32x2; | ||
| 37 | case IR::Opcode::CompositeConstructF16x3: | ||
| 38 | return IR::Opcode::CompositeConstructF32x3; | ||
| 39 | case IR::Opcode::CompositeConstructF16x4: | ||
| 40 | return IR::Opcode::CompositeConstructF32x4; | ||
| 41 | case IR::Opcode::CompositeExtractF16x2: | ||
| 42 | return IR::Opcode::CompositeExtractF32x2; | ||
| 43 | case IR::Opcode::CompositeExtractF16x3: | ||
| 44 | return IR::Opcode::CompositeExtractF32x3; | ||
| 45 | case IR::Opcode::CompositeExtractF16x4: | ||
| 46 | return IR::Opcode::CompositeExtractF32x4; | ||
| 47 | case IR::Opcode::ConvertS16F16: | ||
| 48 | return IR::Opcode::ConvertS16F32; | ||
| 49 | case IR::Opcode::ConvertS32F16: | ||
| 50 | return IR::Opcode::ConvertS32F32; | ||
| 51 | case IR::Opcode::ConvertS64F16: | ||
| 52 | return IR::Opcode::ConvertS64F32; | ||
| 53 | case IR::Opcode::ConvertU16F16: | ||
| 54 | return IR::Opcode::ConvertU16F32; | ||
| 55 | case IR::Opcode::ConvertU32F16: | ||
| 56 | return IR::Opcode::ConvertU32F32; | ||
| 57 | case IR::Opcode::ConvertU64F16: | ||
| 58 | return IR::Opcode::ConvertU64F32; | ||
| 59 | case IR::Opcode::PackFloat2x16: | ||
| 60 | return IR::Opcode::PackHalf2x16; | ||
| 61 | case IR::Opcode::UnpackFloat2x16: | ||
| 62 | return IR::Opcode::UnpackHalf2x16; | ||
| 63 | default: | ||
| 64 | return op; | ||
| 65 | } | ||
| 66 | } | ||
| 67 | } // Anonymous namespace | ||
| 68 | |||
| 69 | void LowerFp16ToFp32(IR::Program& program) { | ||
| 70 | for (IR::Function& function : program.functions) { | ||
| 71 | for (IR::Block* const block : function.blocks) { | ||
| 72 | for (IR::Inst& inst : block->Instructions()) { | ||
| 73 | inst.ReplaceOpcode(Replace(inst.Opcode())); | ||
| 74 | } | ||
| 75 | } | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | } // namespace Shader::Optimization | ||
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h index 89e5811d3..38106308c 100644 --- a/src/shader_recompiler/ir_opt/passes.h +++ b/src/shader_recompiler/ir_opt/passes.h | |||
| @@ -24,6 +24,7 @@ void ConstantPropagationPass(IR::Block& block); | |||
| 24 | void DeadCodeEliminationPass(IR::Block& block); | 24 | void DeadCodeEliminationPass(IR::Block& block); |
| 25 | void GlobalMemoryToStorageBufferPass(IR::Program& program); | 25 | void GlobalMemoryToStorageBufferPass(IR::Program& program); |
| 26 | void IdentityRemovalPass(IR::Function& function); | 26 | void IdentityRemovalPass(IR::Function& function); |
| 27 | void LowerFp16ToFp32(IR::Program& program); | ||
| 27 | void SsaRewritePass(std::span<IR::Block* const> post_order_blocks); | 28 | void SsaRewritePass(std::span<IR::Block* const> post_order_blocks); |
| 28 | void VerificationPass(const IR::Function& function); | 29 | void VerificationPass(const IR::Function& function); |
| 29 | 30 | ||