summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-06-23 01:33:42 -0300
committerGravatar ameerj2021-07-22 21:51:39 -0400
commitd8d5501459d6c8b4c39307d293b0f40834dce8f3 (patch)
tree5c44ce2b967f66b1362c8a00b154b7fb1bc2b3ce
parentshader: Teach global memory base tracker to follow vectors (diff)
downloadyuzu-d8d5501459d6c8b4c39307d293b0f40834dce8f3.tar.gz
yuzu-d8d5501459d6c8b4c39307d293b0f40834dce8f3.tar.xz
yuzu-d8d5501459d6c8b4c39307d293b0f40834dce8f3.zip
shader: Add int64 to int32 lowering pass
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/CMakeLists.txt1
-rw-r--r--src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp216
-rw-r--r--src/shader_recompiler/ir_opt/passes.h1
3 files changed, 218 insertions, 0 deletions
diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index f6719ad9d..3b5708cb9 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -219,6 +219,7 @@ add_library(shader_recompiler STATIC
219 ir_opt/global_memory_to_storage_buffer_pass.cpp 219 ir_opt/global_memory_to_storage_buffer_pass.cpp
220 ir_opt/identity_removal_pass.cpp 220 ir_opt/identity_removal_pass.cpp
221 ir_opt/lower_fp16_to_fp32.cpp 221 ir_opt/lower_fp16_to_fp32.cpp
222 ir_opt/lower_int64_to_int32.cpp
222 ir_opt/passes.h 223 ir_opt/passes.h
223 ir_opt/ssa_rewrite_pass.cpp 224 ir_opt/ssa_rewrite_pass.cpp
224 ir_opt/texture_pass.cpp 225 ir_opt/texture_pass.cpp
diff --git a/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
new file mode 100644
index 000000000..787a64f93
--- /dev/null
+++ b/src/shader_recompiler/ir_opt/lower_int64_to_int32.cpp
@@ -0,0 +1,216 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <ranges>
6#include <utility>
7
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/basic_block.h"
10#include "shader_recompiler/frontend/ir/ir_emitter.h"
11#include "shader_recompiler/frontend/ir/program.h"
12#include "shader_recompiler/frontend/ir/value.h"
13
14namespace Shader::Optimization {
15namespace {
16std::pair<IR::U32, IR::U32> Unpack(IR::IREmitter& ir, const IR::Value& packed) {
17 if (packed.IsImmediate()) {
18 const u64 value{packed.U64()};
19 return {
20 ir.Imm32(static_cast<u32>(value)),
21 ir.Imm32(static_cast<u32>(value >> 32)),
22 };
23 } else {
24 return std::pair<IR::U32, IR::U32>{
25 ir.CompositeExtract(packed, 0u),
26 ir.CompositeExtract(packed, 1u),
27 };
28 }
29}
30
31void IAdd64To32(IR::Block& block, IR::Inst& inst) {
32 if (inst.HasAssociatedPseudoOperation()) {
33 throw NotImplementedException("IAdd64 emulation with pseudo instructions");
34 }
35 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
36 const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
37 const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
38
39 const IR::U32 ret_lo{ir.IAdd(a_lo, b_lo)};
40 const IR::U32 carry{ir.Select(ir.GetCarryFromOp(ret_lo), ir.Imm32(1u), ir.Imm32(0u))};
41
42 const IR::U32 ret_hi{ir.IAdd(ir.IAdd(a_hi, b_hi), carry)};
43 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
44}
45
46void ISub64To32(IR::Block& block, IR::Inst& inst) {
47 if (inst.HasAssociatedPseudoOperation()) {
48 throw NotImplementedException("ISub64 emulation with pseudo instructions");
49 }
50 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
51 const auto [a_lo, a_hi]{Unpack(ir, inst.Arg(0))};
52 const auto [b_lo, b_hi]{Unpack(ir, inst.Arg(1))};
53
54 const IR::U32 ret_lo{ir.ISub(a_lo, b_lo)};
55 const IR::U1 underflow{ir.IGreaterThan(ret_lo, a_lo, false)};
56 const IR::U32 underflow_bit{ir.Select(underflow, ir.Imm32(1u), ir.Imm32(0u))};
57
58 const IR::U32 ret_hi{ir.ISub(ir.ISub(a_hi, b_hi), underflow_bit)};
59 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
60}
61
62void INeg64To32(IR::Block& block, IR::Inst& inst) {
63 if (inst.HasAssociatedPseudoOperation()) {
64 throw NotImplementedException("INeg64 emulation with pseudo instructions");
65 }
66 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
67 auto [lo, hi]{Unpack(ir, inst.Arg(0))};
68 lo = ir.BitwiseNot(lo);
69 hi = ir.BitwiseNot(hi);
70
71 lo = ir.IAdd(lo, ir.Imm32(1));
72
73 const IR::U32 carry{ir.Select(ir.GetCarryFromOp(lo), ir.Imm32(1u), ir.Imm32(0u))};
74 hi = ir.IAdd(hi, carry);
75
76 inst.ReplaceUsesWith(ir.CompositeConstruct(lo, hi));
77}
78
79void ShiftLeftLogical64To32(IR::Block& block, IR::Inst& inst) {
80 if (inst.HasAssociatedPseudoOperation()) {
81 throw NotImplementedException("ShiftLeftLogical64 emulation with pseudo instructions");
82 }
83 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
84 const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
85 const IR::U32 shift{inst.Arg(1)};
86
87 const IR::U32 shifted_lo{ir.ShiftLeftLogical(lo, shift)};
88 const IR::U32 shifted_hi{ir.ShiftLeftLogical(hi, shift)};
89
90 const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
91 const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
92 const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
93
94 const IR::U32 long_ret_lo{ir.Imm32(0)};
95 const IR::U32 long_ret_hi{ir.ShiftLeftLogical(lo, inv_shift)};
96
97 const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
98 const IR::U32 lo_extract{ir.BitFieldExtract(lo, shift_complement, shift, false)};
99 const IR::U32 short_ret_lo{shifted_lo};
100 const IR::U32 short_ret_hi{ir.BitwiseOr(shifted_hi, lo_extract)};
101
102 const IR::U32 zero_ret_lo{lo};
103 const IR::U32 zero_ret_hi{hi};
104
105 const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
106 const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
107
108 const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
109 const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
110 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
111}
112
113void ShiftRightLogical64To32(IR::Block& block, IR::Inst& inst) {
114 if (inst.HasAssociatedPseudoOperation()) {
115 throw NotImplementedException("ShiftRightLogical64 emulation with pseudo instructions");
116 }
117 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
118 const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
119 const IR::U32 shift{inst.Arg(1)};
120
121 const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
122 const IR::U32 shifted_hi{ir.ShiftRightLogical(hi, shift)};
123
124 const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
125 const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
126 const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
127
128 const IR::U32 long_ret_hi{ir.Imm32(0)};
129 const IR::U32 long_ret_lo{ir.ShiftRightLogical(hi, inv_shift)};
130
131 const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
132 const IR::U32 short_hi_extract{ir.BitFieldExtract(hi, ir.Imm32(0), shift)};
133 const IR::U32 short_ret_hi{shifted_hi};
134 const IR::U32 short_ret_lo{
135 ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
136
137 const IR::U32 zero_ret_lo{lo};
138 const IR::U32 zero_ret_hi{hi};
139
140 const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
141 const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
142
143 const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
144 const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
145 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
146}
147
148void ShiftRightArithmetic64To32(IR::Block& block, IR::Inst& inst) {
149 if (inst.HasAssociatedPseudoOperation()) {
150 throw NotImplementedException("ShiftRightArithmetic64 emulation with pseudo instructions");
151 }
152 IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
153 const auto [lo, hi]{Unpack(ir, inst.Arg(0))};
154 const IR::U32 shift{inst.Arg(1)};
155
156 const IR::U32 shifted_lo{ir.ShiftRightLogical(lo, shift)};
157 const IR::U32 shifted_hi{ir.ShiftRightArithmetic(hi, shift)};
158
159 const IR::U32 sign_extension{ir.ShiftRightArithmetic(hi, ir.Imm32(31))};
160
161 const IR::U32 inv_shift{ir.ISub(shift, ir.Imm32(32))};
162 const IR::U1 is_long{ir.IGreaterThanEqual(inv_shift, ir.Imm32(0), true)};
163 const IR::U1 is_zero{ir.IEqual(shift, ir.Imm32(0))};
164
165 const IR::U32 long_ret_hi{sign_extension};
166 const IR::U32 long_ret_lo{ir.ShiftRightArithmetic(hi, inv_shift)};
167
168 const IR::U32 shift_complement{ir.ISub(ir.Imm32(32), shift)};
169 const IR::U32 short_hi_extract(ir.BitFieldExtract(hi, ir.Imm32(0), shift));
170 const IR::U32 short_ret_hi{shifted_hi};
171 const IR::U32 short_ret_lo{
172 ir.BitFieldInsert(shifted_lo, short_hi_extract, shift_complement, shift)};
173
174 const IR::U32 zero_ret_lo{lo};
175 const IR::U32 zero_ret_hi{hi};
176
177 const IR::U32 non_zero_lo{ir.Select(is_long, long_ret_lo, short_ret_lo)};
178 const IR::U32 non_zero_hi{ir.Select(is_long, long_ret_hi, short_ret_hi)};
179
180 const IR::U32 ret_lo{ir.Select(is_zero, zero_ret_lo, non_zero_lo)};
181 const IR::U32 ret_hi{ir.Select(is_zero, zero_ret_hi, non_zero_hi)};
182 inst.ReplaceUsesWith(ir.CompositeConstruct(ret_lo, ret_hi));
183}
184
185void Lower(IR::Block& block, IR::Inst& inst) {
186 switch (inst.GetOpcode()) {
187 case IR::Opcode::PackUint2x32:
188 case IR::Opcode::UnpackUint2x32:
189 return inst.ReplaceOpcode(IR::Opcode::Identity);
190 case IR::Opcode::IAdd64:
191 return IAdd64To32(block, inst);
192 case IR::Opcode::ISub64:
193 return ISub64To32(block, inst);
194 case IR::Opcode::INeg64:
195 return INeg64To32(block, inst);
196 case IR::Opcode::ShiftLeftLogical64:
197 return ShiftLeftLogical64To32(block, inst);
198 case IR::Opcode::ShiftRightLogical64:
199 return ShiftRightLogical64To32(block, inst);
200 case IR::Opcode::ShiftRightArithmetic64:
201 return ShiftRightArithmetic64To32(block, inst);
202 default:
203 break;
204 }
205}
206} // Anonymous namespace
207
208void LowerInt64ToInt32(IR::Program& program) {
209 for (IR::Block* const block : program.post_order_blocks | std::views::reverse) {
210 for (IR::Inst& inst : block->Instructions()) {
211 Lower(*block, inst);
212 }
213 }
214}
215
216} // namespace Shader::Optimization
diff --git a/src/shader_recompiler/ir_opt/passes.h b/src/shader_recompiler/ir_opt/passes.h
index 5ebde49ea..2f89b1ea0 100644
--- a/src/shader_recompiler/ir_opt/passes.h
+++ b/src/shader_recompiler/ir_opt/passes.h
@@ -18,6 +18,7 @@ void DeadCodeEliminationPass(IR::Program& program);
18void GlobalMemoryToStorageBufferPass(IR::Program& program); 18void GlobalMemoryToStorageBufferPass(IR::Program& program);
19void IdentityRemovalPass(IR::Program& program); 19void IdentityRemovalPass(IR::Program& program);
20void LowerFp16ToFp32(IR::Program& program); 20void LowerFp16ToFp32(IR::Program& program);
21void LowerInt64ToInt32(IR::Program& program);
21void SsaRewritePass(IR::Program& program); 22void SsaRewritePass(IR::Program& program);
22void TexturePass(Environment& env, IR::Program& program); 23void TexturePass(Environment& env, IR::Program& program);
23void VerificationPass(const IR::Program& program); 24void VerificationPass(const IR::Program& program);