summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/ir_opt
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/ir_opt')
-rw-r--r--src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp18
-rw-r--r--src/shader_recompiler/ir_opt/constant_propagation_pass.cpp12
-rw-r--r--src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp79
-rw-r--r--src/shader_recompiler/ir_opt/passes.h1
4 files changed, 103 insertions, 7 deletions
diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
index f2326dea1..f7f102f53 100644
--- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
+++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
@@ -26,6 +26,22 @@ void Visit(Info& info, IR::Inst& inst) {
26 case IR::Opcode::LocalInvocationId: 26 case IR::Opcode::LocalInvocationId:
27 info.uses_local_invocation_id = true; 27 info.uses_local_invocation_id = true;
28 break; 28 break;
29 case IR::Opcode::CompositeConstructF16x2:
30 case IR::Opcode::CompositeConstructF16x3:
31 case IR::Opcode::CompositeConstructF16x4:
32 case IR::Opcode::CompositeExtractF16x2:
33 case IR::Opcode::CompositeExtractF16x3:
34 case IR::Opcode::CompositeExtractF16x4:
35 case IR::Opcode::BitCastU16F16:
36 case IR::Opcode::BitCastF16U16:
37 case IR::Opcode::PackFloat2x16:
38 case IR::Opcode::UnpackFloat2x16:
39 case IR::Opcode::ConvertS16F16:
40 case IR::Opcode::ConvertS32F16:
41 case IR::Opcode::ConvertS64F16:
42 case IR::Opcode::ConvertU16F16:
43 case IR::Opcode::ConvertU32F16:
44 case IR::Opcode::ConvertU64F16:
29 case IR::Opcode::FPAbs16: 45 case IR::Opcode::FPAbs16:
30 case IR::Opcode::FPAdd16: 46 case IR::Opcode::FPAdd16:
31 case IR::Opcode::FPCeil16: 47 case IR::Opcode::FPCeil16:
@@ -36,7 +52,7 @@ void Visit(Info& info, IR::Inst& inst) {
36 case IR::Opcode::FPRoundEven16: 52 case IR::Opcode::FPRoundEven16:
37 case IR::Opcode::FPSaturate16: 53 case IR::Opcode::FPSaturate16:
38 case IR::Opcode::FPTrunc16: 54 case IR::Opcode::FPTrunc16:
39 info.uses_fp16; 55 info.uses_fp16 = true;
40 break; 56 break;
41 case IR::Opcode::FPAbs64: 57 case IR::Opcode::FPAbs64:
42 case IR::Opcode::FPAdd64: 58 case IR::Opcode::FPAdd64:
diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
index 9eb61b54c..4d4e88259 100644
--- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
+++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp
@@ -104,12 +104,12 @@ void FoldGetPred(IR::Inst& inst) {
104bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { 104bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
105 /* 105 /*
106 * We are looking for this pattern: 106 * We are looking for this pattern:
107 * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16 (uses: 1) 107 * %rhs_bfe = BitFieldUExtract %factor_a, #0, #16
108 * %rhs_mul = IMul32 %rhs_bfe, %factor_b (uses: 1) 108 * %rhs_mul = IMul32 %rhs_bfe, %factor_b
109 * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16 (uses: 1) 109 * %lhs_bfe = BitFieldUExtract %factor_a, #16, #16
110 * %rhs_mul = IMul32 %lhs_bfe, %factor_b (uses: 1) 110 * %rhs_mul = IMul32 %lhs_bfe, %factor_b
111 * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16 (uses: 1) 111 * %lhs_shl = ShiftLeftLogical32 %rhs_mul, #16
112 * %result = IAdd32 %lhs_shl, %rhs_mul (uses: 10) 112 * %result = IAdd32 %lhs_shl, %rhs_mul
113 * 113 *
114 * And replacing it with 114 * And replacing it with
115 * %result = IMul32 %factor_a, %factor_b 115 * %result = IMul32 %factor_a, %factor_b
diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
new file mode 100644
index 000000000..c7032f168
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp
@@ -0,0 +1,79 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include "shader_recompiler/frontend/ir/ir_emitter.h"
8#include "shader_recompiler/frontend/ir/microinstruction.h"
9#include "shader_recompiler/ir_opt/passes.h"
10
11namespace Shader::Optimization {
12namespace {
13IR::Opcode Replace(IR::Opcode op) {
14 switch (op) {
15 case IR::Opcode::FPAbs16:
16 return IR::Opcode::FPAbs32;
17 case IR::Opcode::FPAdd16:
18 return IR::Opcode::FPAdd32;
19 case IR::Opcode::FPCeil16:
20 return IR::Opcode::FPCeil32;
21 case IR::Opcode::FPFloor16:
22 return IR::Opcode::FPFloor32;
23 case IR::Opcode::FPFma16:
24 return IR::Opcode::FPFma32;
25 case IR::Opcode::FPMul16:
26 return IR::Opcode::FPMul32;
27 case IR::Opcode::FPNeg16:
28 return IR::Opcode::FPNeg32;
29 case IR::Opcode::FPRoundEven16:
30 return IR::Opcode::FPRoundEven32;
31 case IR::Opcode::FPSaturate16:
32 return IR::Opcode::FPSaturate32;
33 case IR::Opcode::FPTrunc16:
34 return IR::Opcode::FPTrunc32;
35 case IR::Opcode::CompositeConstructF16x2:
36 return IR::Opcode::CompositeConstructF32x2;
37 case IR::Opcode::CompositeConstructF16x3:
38 return IR::Opcode::CompositeConstructF32x3;
39 case IR::Opcode::CompositeConstructF16x4:
40 return IR::Opcode::CompositeConstructF32x4;
41 case IR::Opcode::CompositeExtractF16x2:
42 return IR::Opcode::CompositeExtractF32x2;
43 case IR::Opcode::CompositeExtractF16x3:
44 return IR::Opcode::CompositeExtractF32x3;
45 case IR::Opcode::CompositeExtractF16x4:
46 return IR::Opcode::CompositeExtractF32x4;
47 case IR::Opcode::ConvertS16F16:
48 return IR::Opcode::ConvertS16F32;
49 case IR::Opcode::ConvertS32F16:
50 return IR::Opcode::ConvertS32F32;
51 case IR::Opcode::ConvertS64F16:
52 return IR::Opcode::ConvertS64F32;
53 case IR::Opcode::ConvertU16F16:
54 return IR::Opcode::ConvertU16F32;
55 case IR::Opcode::ConvertU32F16:
56 return IR::Opcode::ConvertU32F32;
57 case IR::Opcode::ConvertU64F16:
58 return IR::Opcode::ConvertU64F32;
59 case IR::Opcode::PackFloat2x16:
60 return IR::Opcode::PackHalf2x16;
61 case IR::Opcode::UnpackFloat2x16:
62 return IR::Opcode::UnpackHalf2x16;
63 default:
64 return op;
65 }
66}
67} // Anonymous namespace
68
69void LowerFp16ToFp32(IR::Program& program) {
70 for (IR::Function& function : program.functions) {
71 for (IR::Block* const block : function.blocks) {
72 for (IR::Inst& inst : block->Instructions()) {
73 inst.ReplaceOpcode(Replace(inst.Opcode()));
74 }
75 }
76 }
77}
78
79} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 89e5811d3..38106308c 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -24,6 +24,7 @@ void ConstantPropagationPass(IR::Block& block);
24void DeadCodeEliminationPass(IR::Block& block); 24void DeadCodeEliminationPass(IR::Block& block);
25void GlobalMemoryToStorageBufferPass(IR::Program& program); 25void GlobalMemoryToStorageBufferPass(IR::Program& program);
26void IdentityRemovalPass(IR::Function& function); 26void IdentityRemovalPass(IR::Function& function);
27void LowerFp16ToFp32(IR::Program& program);
27void SsaRewritePass(std::span<IR::Block* const> post_order_blocks); 28void SsaRewritePass(std::span<IR::Block* const> post_order_blocks);
28void VerificationPass(const IR::Function& function); 29void VerificationPass(const IR::Function& function);
29 30