summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/frontend
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/frontend')
-rw-r--r--src/shader_recompiler/frontend/ir/abstract_syntax_list.h58
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.cpp454
-rw-r--r--src/shader_recompiler/frontend/ir/attribute.h250
-rw-r--r--src/shader_recompiler/frontend/ir/basic_block.cpp149
-rw-r--r--src/shader_recompiler/frontend/ir/basic_block.h185
-rw-r--r--src/shader_recompiler/frontend/ir/breadth_first_search.h56
-rw-r--r--src/shader_recompiler/frontend/ir/condition.cpp29
-rw-r--r--src/shader_recompiler/frontend/ir/condition.h60
-rw-r--r--src/shader_recompiler/frontend/ir/flow_test.cpp83
-rw-r--r--src/shader_recompiler/frontend/ir/flow_test.h62
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.cpp2017
-rw-r--r--src/shader_recompiler/frontend/ir/ir_emitter.h413
-rw-r--r--src/shader_recompiler/frontend/ir/microinstruction.cpp411
-rw-r--r--src/shader_recompiler/frontend/ir/modifiers.h49
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.cpp15
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.h110
-rw-r--r--src/shader_recompiler/frontend/ir/opcodes.inc550
-rw-r--r--src/shader_recompiler/frontend/ir/patch.cpp28
-rw-r--r--src/shader_recompiler/frontend/ir/patch.h149
-rw-r--r--src/shader_recompiler/frontend/ir/post_order.cpp46
-rw-r--r--src/shader_recompiler/frontend/ir/post_order.h14
-rw-r--r--src/shader_recompiler/frontend/ir/pred.h44
-rw-r--r--src/shader_recompiler/frontend/ir/program.cpp32
-rw-r--r--src/shader_recompiler/frontend/ir/program.h35
-rw-r--r--src/shader_recompiler/frontend/ir/reg.h332
-rw-r--r--src/shader_recompiler/frontend/ir/type.cpp38
-rw-r--r--src/shader_recompiler/frontend/ir/type.h61
-rw-r--r--src/shader_recompiler/frontend/ir/value.cpp99
-rw-r--r--src/shader_recompiler/frontend/ir/value.h398
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.cpp642
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.h169
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.cpp149
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/instruction.h63
-rw-r--r--src/shader_recompiler/frontend/maxwell/location.h112
-rw-r--r--src/shader_recompiler/frontend/maxwell/maxwell.inc286
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.cpp26
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.h30
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp883
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.h20
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp35
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp96
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp74
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h57
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp153
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp72
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp50
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp43
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp47
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp78
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp253
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp94
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp41
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp60
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp125
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp169
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h42
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp143
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp117
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp118
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp272
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h387
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp105
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp48
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp80
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp86
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp135
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp126
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h39
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp196
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp218
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp184
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp116
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp181
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp283
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp45
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp46
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp38
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp205
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp281
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp236
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp266
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp208
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp134
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp165
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp242
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp131
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp76
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp30
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h23
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp69
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.cpp52
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp223
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.h23
137 files changed, 18830 insertions, 0 deletions
diff --git a/src/shader_recompiler/frontend/ir/abstract_syntax_list.h b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
new file mode 100644
index 000000000..b61773487
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/abstract_syntax_list.h
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include "shader_recompiler/frontend/ir/value.h"
10
11namespace Shader::IR {
12
13class Block;
14
15struct AbstractSyntaxNode {
16 enum class Type {
17 Block,
18 If,
19 EndIf,
20 Loop,
21 Repeat,
22 Break,
23 Return,
24 Unreachable,
25 };
26 union Data {
27 Block* block;
28 struct {
29 U1 cond;
30 Block* body;
31 Block* merge;
32 } if_node;
33 struct {
34 Block* merge;
35 } end_if;
36 struct {
37 Block* body;
38 Block* continue_block;
39 Block* merge;
40 } loop;
41 struct {
42 U1 cond;
43 Block* loop_header;
44 Block* merge;
45 } repeat;
46 struct {
47 U1 cond;
48 Block* merge;
49 Block* skip;
50 } break_node;
51 };
52
53 Data data{};
54 Type type{};
55};
56using AbstractSyntaxList = std::vector<AbstractSyntaxNode>;
57
58} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/attribute.cpp b/src/shader_recompiler/frontend/ir/attribute.cpp
new file mode 100644
index 000000000..4d0b8b8e5
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/attribute.cpp
@@ -0,0 +1,454 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <fmt/format.h>
6
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/ir/attribute.h"
9
10namespace Shader::IR {
11
12bool IsGeneric(Attribute attribute) noexcept {
13 return attribute >= Attribute::Generic0X && attribute <= Attribute::Generic31X;
14}
15
16u32 GenericAttributeIndex(Attribute attribute) {
17 if (!IsGeneric(attribute)) {
18 throw InvalidArgument("Attribute is not generic {}", attribute);
19 }
20 return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u;
21}
22
23u32 GenericAttributeElement(Attribute attribute) {
24 if (!IsGeneric(attribute)) {
25 throw InvalidArgument("Attribute is not generic {}", attribute);
26 }
27 return static_cast<u32>(attribute) % 4;
28}
29
30std::string NameOf(Attribute attribute) {
31 switch (attribute) {
32 case Attribute::PrimitiveId:
33 return "PrimitiveId";
34 case Attribute::Layer:
35 return "Layer";
36 case Attribute::ViewportIndex:
37 return "ViewportIndex";
38 case Attribute::PointSize:
39 return "PointSize";
40 case Attribute::PositionX:
41 return "Position.X";
42 case Attribute::PositionY:
43 return "Position.Y";
44 case Attribute::PositionZ:
45 return "Position.Z";
46 case Attribute::PositionW:
47 return "Position.W";
48 case Attribute::Generic0X:
49 return "Generic[0].X";
50 case Attribute::Generic0Y:
51 return "Generic[0].Y";
52 case Attribute::Generic0Z:
53 return "Generic[0].Z";
54 case Attribute::Generic0W:
55 return "Generic[0].W";
56 case Attribute::Generic1X:
57 return "Generic[1].X";
58 case Attribute::Generic1Y:
59 return "Generic[1].Y";
60 case Attribute::Generic1Z:
61 return "Generic[1].Z";
62 case Attribute::Generic1W:
63 return "Generic[1].W";
64 case Attribute::Generic2X:
65 return "Generic[2].X";
66 case Attribute::Generic2Y:
67 return "Generic[2].Y";
68 case Attribute::Generic2Z:
69 return "Generic[2].Z";
70 case Attribute::Generic2W:
71 return "Generic[2].W";
72 case Attribute::Generic3X:
73 return "Generic[3].X";
74 case Attribute::Generic3Y:
75 return "Generic[3].Y";
76 case Attribute::Generic3Z:
77 return "Generic[3].Z";
78 case Attribute::Generic3W:
79 return "Generic[3].W";
80 case Attribute::Generic4X:
81 return "Generic[4].X";
82 case Attribute::Generic4Y:
83 return "Generic[4].Y";
84 case Attribute::Generic4Z:
85 return "Generic[4].Z";
86 case Attribute::Generic4W:
87 return "Generic[4].W";
88 case Attribute::Generic5X:
89 return "Generic[5].X";
90 case Attribute::Generic5Y:
91 return "Generic[5].Y";
92 case Attribute::Generic5Z:
93 return "Generic[5].Z";
94 case Attribute::Generic5W:
95 return "Generic[5].W";
96 case Attribute::Generic6X:
97 return "Generic[6].X";
98 case Attribute::Generic6Y:
99 return "Generic[6].Y";
100 case Attribute::Generic6Z:
101 return "Generic[6].Z";
102 case Attribute::Generic6W:
103 return "Generic[6].W";
104 case Attribute::Generic7X:
105 return "Generic[7].X";
106 case Attribute::Generic7Y:
107 return "Generic[7].Y";
108 case Attribute::Generic7Z:
109 return "Generic[7].Z";
110 case Attribute::Generic7W:
111 return "Generic[7].W";
112 case Attribute::Generic8X:
113 return "Generic[8].X";
114 case Attribute::Generic8Y:
115 return "Generic[8].Y";
116 case Attribute::Generic8Z:
117 return "Generic[8].Z";
118 case Attribute::Generic8W:
119 return "Generic[8].W";
120 case Attribute::Generic9X:
121 return "Generic[9].X";
122 case Attribute::Generic9Y:
123 return "Generic[9].Y";
124 case Attribute::Generic9Z:
125 return "Generic[9].Z";
126 case Attribute::Generic9W:
127 return "Generic[9].W";
128 case Attribute::Generic10X:
129 return "Generic[10].X";
130 case Attribute::Generic10Y:
131 return "Generic[10].Y";
132 case Attribute::Generic10Z:
133 return "Generic[10].Z";
134 case Attribute::Generic10W:
135 return "Generic[10].W";
136 case Attribute::Generic11X:
137 return "Generic[11].X";
138 case Attribute::Generic11Y:
139 return "Generic[11].Y";
140 case Attribute::Generic11Z:
141 return "Generic[11].Z";
142 case Attribute::Generic11W:
143 return "Generic[11].W";
144 case Attribute::Generic12X:
145 return "Generic[12].X";
146 case Attribute::Generic12Y:
147 return "Generic[12].Y";
148 case Attribute::Generic12Z:
149 return "Generic[12].Z";
150 case Attribute::Generic12W:
151 return "Generic[12].W";
152 case Attribute::Generic13X:
153 return "Generic[13].X";
154 case Attribute::Generic13Y:
155 return "Generic[13].Y";
156 case Attribute::Generic13Z:
157 return "Generic[13].Z";
158 case Attribute::Generic13W:
159 return "Generic[13].W";
160 case Attribute::Generic14X:
161 return "Generic[14].X";
162 case Attribute::Generic14Y:
163 return "Generic[14].Y";
164 case Attribute::Generic14Z:
165 return "Generic[14].Z";
166 case Attribute::Generic14W:
167 return "Generic[14].W";
168 case Attribute::Generic15X:
169 return "Generic[15].X";
170 case Attribute::Generic15Y:
171 return "Generic[15].Y";
172 case Attribute::Generic15Z:
173 return "Generic[15].Z";
174 case Attribute::Generic15W:
175 return "Generic[15].W";
176 case Attribute::Generic16X:
177 return "Generic[16].X";
178 case Attribute::Generic16Y:
179 return "Generic[16].Y";
180 case Attribute::Generic16Z:
181 return "Generic[16].Z";
182 case Attribute::Generic16W:
183 return "Generic[16].W";
184 case Attribute::Generic17X:
185 return "Generic[17].X";
186 case Attribute::Generic17Y:
187 return "Generic[17].Y";
188 case Attribute::Generic17Z:
189 return "Generic[17].Z";
190 case Attribute::Generic17W:
191 return "Generic[17].W";
192 case Attribute::Generic18X:
193 return "Generic[18].X";
194 case Attribute::Generic18Y:
195 return "Generic[18].Y";
196 case Attribute::Generic18Z:
197 return "Generic[18].Z";
198 case Attribute::Generic18W:
199 return "Generic[18].W";
200 case Attribute::Generic19X:
201 return "Generic[19].X";
202 case Attribute::Generic19Y:
203 return "Generic[19].Y";
204 case Attribute::Generic19Z:
205 return "Generic[19].Z";
206 case Attribute::Generic19W:
207 return "Generic[19].W";
208 case Attribute::Generic20X:
209 return "Generic[20].X";
210 case Attribute::Generic20Y:
211 return "Generic[20].Y";
212 case Attribute::Generic20Z:
213 return "Generic[20].Z";
214 case Attribute::Generic20W:
215 return "Generic[20].W";
216 case Attribute::Generic21X:
217 return "Generic[21].X";
218 case Attribute::Generic21Y:
219 return "Generic[21].Y";
220 case Attribute::Generic21Z:
221 return "Generic[21].Z";
222 case Attribute::Generic21W:
223 return "Generic[21].W";
224 case Attribute::Generic22X:
225 return "Generic[22].X";
226 case Attribute::Generic22Y:
227 return "Generic[22].Y";
228 case Attribute::Generic22Z:
229 return "Generic[22].Z";
230 case Attribute::Generic22W:
231 return "Generic[22].W";
232 case Attribute::Generic23X:
233 return "Generic[23].X";
234 case Attribute::Generic23Y:
235 return "Generic[23].Y";
236 case Attribute::Generic23Z:
237 return "Generic[23].Z";
238 case Attribute::Generic23W:
239 return "Generic[23].W";
240 case Attribute::Generic24X:
241 return "Generic[24].X";
242 case Attribute::Generic24Y:
243 return "Generic[24].Y";
244 case Attribute::Generic24Z:
245 return "Generic[24].Z";
246 case Attribute::Generic24W:
247 return "Generic[24].W";
248 case Attribute::Generic25X:
249 return "Generic[25].X";
250 case Attribute::Generic25Y:
251 return "Generic[25].Y";
252 case Attribute::Generic25Z:
253 return "Generic[25].Z";
254 case Attribute::Generic25W:
255 return "Generic[25].W";
256 case Attribute::Generic26X:
257 return "Generic[26].X";
258 case Attribute::Generic26Y:
259 return "Generic[26].Y";
260 case Attribute::Generic26Z:
261 return "Generic[26].Z";
262 case Attribute::Generic26W:
263 return "Generic[26].W";
264 case Attribute::Generic27X:
265 return "Generic[27].X";
266 case Attribute::Generic27Y:
267 return "Generic[27].Y";
268 case Attribute::Generic27Z:
269 return "Generic[27].Z";
270 case Attribute::Generic27W:
271 return "Generic[27].W";
272 case Attribute::Generic28X:
273 return "Generic[28].X";
274 case Attribute::Generic28Y:
275 return "Generic[28].Y";
276 case Attribute::Generic28Z:
277 return "Generic[28].Z";
278 case Attribute::Generic28W:
279 return "Generic[28].W";
280 case Attribute::Generic29X:
281 return "Generic[29].X";
282 case Attribute::Generic29Y:
283 return "Generic[29].Y";
284 case Attribute::Generic29Z:
285 return "Generic[29].Z";
286 case Attribute::Generic29W:
287 return "Generic[29].W";
288 case Attribute::Generic30X:
289 return "Generic[30].X";
290 case Attribute::Generic30Y:
291 return "Generic[30].Y";
292 case Attribute::Generic30Z:
293 return "Generic[30].Z";
294 case Attribute::Generic30W:
295 return "Generic[30].W";
296 case Attribute::Generic31X:
297 return "Generic[31].X";
298 case Attribute::Generic31Y:
299 return "Generic[31].Y";
300 case Attribute::Generic31Z:
301 return "Generic[31].Z";
302 case Attribute::Generic31W:
303 return "Generic[31].W";
304 case Attribute::ColorFrontDiffuseR:
305 return "ColorFrontDiffuse.R";
306 case Attribute::ColorFrontDiffuseG:
307 return "ColorFrontDiffuse.G";
308 case Attribute::ColorFrontDiffuseB:
309 return "ColorFrontDiffuse.B";
310 case Attribute::ColorFrontDiffuseA:
311 return "ColorFrontDiffuse.A";
312 case Attribute::ColorFrontSpecularR:
313 return "ColorFrontSpecular.R";
314 case Attribute::ColorFrontSpecularG:
315 return "ColorFrontSpecular.G";
316 case Attribute::ColorFrontSpecularB:
317 return "ColorFrontSpecular.B";
318 case Attribute::ColorFrontSpecularA:
319 return "ColorFrontSpecular.A";
320 case Attribute::ColorBackDiffuseR:
321 return "ColorBackDiffuse.R";
322 case Attribute::ColorBackDiffuseG:
323 return "ColorBackDiffuse.G";
324 case Attribute::ColorBackDiffuseB:
325 return "ColorBackDiffuse.B";
326 case Attribute::ColorBackDiffuseA:
327 return "ColorBackDiffuse.A";
328 case Attribute::ColorBackSpecularR:
329 return "ColorBackSpecular.R";
330 case Attribute::ColorBackSpecularG:
331 return "ColorBackSpecular.G";
332 case Attribute::ColorBackSpecularB:
333 return "ColorBackSpecular.B";
334 case Attribute::ColorBackSpecularA:
335 return "ColorBackSpecular.A";
336 case Attribute::ClipDistance0:
337 return "ClipDistance[0]";
338 case Attribute::ClipDistance1:
339 return "ClipDistance[1]";
340 case Attribute::ClipDistance2:
341 return "ClipDistance[2]";
342 case Attribute::ClipDistance3:
343 return "ClipDistance[3]";
344 case Attribute::ClipDistance4:
345 return "ClipDistance[4]";
346 case Attribute::ClipDistance5:
347 return "ClipDistance[5]";
348 case Attribute::ClipDistance6:
349 return "ClipDistance[6]";
350 case Attribute::ClipDistance7:
351 return "ClipDistance[7]";
352 case Attribute::PointSpriteS:
353 return "PointSprite.S";
354 case Attribute::PointSpriteT:
355 return "PointSprite.T";
356 case Attribute::FogCoordinate:
357 return "FogCoordinate";
358 case Attribute::TessellationEvaluationPointU:
359 return "TessellationEvaluationPoint.U";
360 case Attribute::TessellationEvaluationPointV:
361 return "TessellationEvaluationPoint.V";
362 case Attribute::InstanceId:
363 return "InstanceId";
364 case Attribute::VertexId:
365 return "VertexId";
366 case Attribute::FixedFncTexture0S:
367 return "FixedFncTexture[0].S";
368 case Attribute::FixedFncTexture0T:
369 return "FixedFncTexture[0].T";
370 case Attribute::FixedFncTexture0R:
371 return "FixedFncTexture[0].R";
372 case Attribute::FixedFncTexture0Q:
373 return "FixedFncTexture[0].Q";
374 case Attribute::FixedFncTexture1S:
375 return "FixedFncTexture[1].S";
376 case Attribute::FixedFncTexture1T:
377 return "FixedFncTexture[1].T";
378 case Attribute::FixedFncTexture1R:
379 return "FixedFncTexture[1].R";
380 case Attribute::FixedFncTexture1Q:
381 return "FixedFncTexture[1].Q";
382 case Attribute::FixedFncTexture2S:
383 return "FixedFncTexture[2].S";
384 case Attribute::FixedFncTexture2T:
385 return "FixedFncTexture[2].T";
386 case Attribute::FixedFncTexture2R:
387 return "FixedFncTexture[2].R";
388 case Attribute::FixedFncTexture2Q:
389 return "FixedFncTexture[2].Q";
390 case Attribute::FixedFncTexture3S:
391 return "FixedFncTexture[3].S";
392 case Attribute::FixedFncTexture3T:
393 return "FixedFncTexture[3].T";
394 case Attribute::FixedFncTexture3R:
395 return "FixedFncTexture[3].R";
396 case Attribute::FixedFncTexture3Q:
397 return "FixedFncTexture[3].Q";
398 case Attribute::FixedFncTexture4S:
399 return "FixedFncTexture[4].S";
400 case Attribute::FixedFncTexture4T:
401 return "FixedFncTexture[4].T";
402 case Attribute::FixedFncTexture4R:
403 return "FixedFncTexture[4].R";
404 case Attribute::FixedFncTexture4Q:
405 return "FixedFncTexture[4].Q";
406 case Attribute::FixedFncTexture5S:
407 return "FixedFncTexture[5].S";
408 case Attribute::FixedFncTexture5T:
409 return "FixedFncTexture[5].T";
410 case Attribute::FixedFncTexture5R:
411 return "FixedFncTexture[5].R";
412 case Attribute::FixedFncTexture5Q:
413 return "FixedFncTexture[5].Q";
414 case Attribute::FixedFncTexture6S:
415 return "FixedFncTexture[6].S";
416 case Attribute::FixedFncTexture6T:
417 return "FixedFncTexture[6].T";
418 case Attribute::FixedFncTexture6R:
419 return "FixedFncTexture[6].R";
420 case Attribute::FixedFncTexture6Q:
421 return "FixedFncTexture[6].Q";
422 case Attribute::FixedFncTexture7S:
423 return "FixedFncTexture[7].S";
424 case Attribute::FixedFncTexture7T:
425 return "FixedFncTexture[7].T";
426 case Attribute::FixedFncTexture7R:
427 return "FixedFncTexture[7].R";
428 case Attribute::FixedFncTexture7Q:
429 return "FixedFncTexture[7].Q";
430 case Attribute::FixedFncTexture8S:
431 return "FixedFncTexture[8].S";
432 case Attribute::FixedFncTexture8T:
433 return "FixedFncTexture[8].T";
434 case Attribute::FixedFncTexture8R:
435 return "FixedFncTexture[8].R";
436 case Attribute::FixedFncTexture8Q:
437 return "FixedFncTexture[8].Q";
438 case Attribute::FixedFncTexture9S:
439 return "FixedFncTexture[9].S";
440 case Attribute::FixedFncTexture9T:
441 return "FixedFncTexture[9].T";
442 case Attribute::FixedFncTexture9R:
443 return "FixedFncTexture[9].R";
444 case Attribute::FixedFncTexture9Q:
445 return "FixedFncTexture[9].Q";
446 case Attribute::ViewportMask:
447 return "ViewportMask";
448 case Attribute::FrontFace:
449 return "FrontFace";
450 }
451 return fmt::format("<reserved attribute {}>", static_cast<int>(attribute));
452}
453
454} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/attribute.h b/src/shader_recompiler/frontend/ir/attribute.h
new file mode 100644
index 000000000..ca1199494
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/attribute.h
@@ -0,0 +1,250 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9#include "common/common_types.h"
10
11namespace Shader::IR {
12
13enum class Attribute : u64 {
14 PrimitiveId = 24,
15 Layer = 25,
16 ViewportIndex = 26,
17 PointSize = 27,
18 PositionX = 28,
19 PositionY = 29,
20 PositionZ = 30,
21 PositionW = 31,
22 Generic0X = 32,
23 Generic0Y = 33,
24 Generic0Z = 34,
25 Generic0W = 35,
26 Generic1X = 36,
27 Generic1Y = 37,
28 Generic1Z = 38,
29 Generic1W = 39,
30 Generic2X = 40,
31 Generic2Y = 41,
32 Generic2Z = 42,
33 Generic2W = 43,
34 Generic3X = 44,
35 Generic3Y = 45,
36 Generic3Z = 46,
37 Generic3W = 47,
38 Generic4X = 48,
39 Generic4Y = 49,
40 Generic4Z = 50,
41 Generic4W = 51,
42 Generic5X = 52,
43 Generic5Y = 53,
44 Generic5Z = 54,
45 Generic5W = 55,
46 Generic6X = 56,
47 Generic6Y = 57,
48 Generic6Z = 58,
49 Generic6W = 59,
50 Generic7X = 60,
51 Generic7Y = 61,
52 Generic7Z = 62,
53 Generic7W = 63,
54 Generic8X = 64,
55 Generic8Y = 65,
56 Generic8Z = 66,
57 Generic8W = 67,
58 Generic9X = 68,
59 Generic9Y = 69,
60 Generic9Z = 70,
61 Generic9W = 71,
62 Generic10X = 72,
63 Generic10Y = 73,
64 Generic10Z = 74,
65 Generic10W = 75,
66 Generic11X = 76,
67 Generic11Y = 77,
68 Generic11Z = 78,
69 Generic11W = 79,
70 Generic12X = 80,
71 Generic12Y = 81,
72 Generic12Z = 82,
73 Generic12W = 83,
74 Generic13X = 84,
75 Generic13Y = 85,
76 Generic13Z = 86,
77 Generic13W = 87,
78 Generic14X = 88,
79 Generic14Y = 89,
80 Generic14Z = 90,
81 Generic14W = 91,
82 Generic15X = 92,
83 Generic15Y = 93,
84 Generic15Z = 94,
85 Generic15W = 95,
86 Generic16X = 96,
87 Generic16Y = 97,
88 Generic16Z = 98,
89 Generic16W = 99,
90 Generic17X = 100,
91 Generic17Y = 101,
92 Generic17Z = 102,
93 Generic17W = 103,
94 Generic18X = 104,
95 Generic18Y = 105,
96 Generic18Z = 106,
97 Generic18W = 107,
98 Generic19X = 108,
99 Generic19Y = 109,
100 Generic19Z = 110,
101 Generic19W = 111,
102 Generic20X = 112,
103 Generic20Y = 113,
104 Generic20Z = 114,
105 Generic20W = 115,
106 Generic21X = 116,
107 Generic21Y = 117,
108 Generic21Z = 118,
109 Generic21W = 119,
110 Generic22X = 120,
111 Generic22Y = 121,
112 Generic22Z = 122,
113 Generic22W = 123,
114 Generic23X = 124,
115 Generic23Y = 125,
116 Generic23Z = 126,
117 Generic23W = 127,
118 Generic24X = 128,
119 Generic24Y = 129,
120 Generic24Z = 130,
121 Generic24W = 131,
122 Generic25X = 132,
123 Generic25Y = 133,
124 Generic25Z = 134,
125 Generic25W = 135,
126 Generic26X = 136,
127 Generic26Y = 137,
128 Generic26Z = 138,
129 Generic26W = 139,
130 Generic27X = 140,
131 Generic27Y = 141,
132 Generic27Z = 142,
133 Generic27W = 143,
134 Generic28X = 144,
135 Generic28Y = 145,
136 Generic28Z = 146,
137 Generic28W = 147,
138 Generic29X = 148,
139 Generic29Y = 149,
140 Generic29Z = 150,
141 Generic29W = 151,
142 Generic30X = 152,
143 Generic30Y = 153,
144 Generic30Z = 154,
145 Generic30W = 155,
146 Generic31X = 156,
147 Generic31Y = 157,
148 Generic31Z = 158,
149 Generic31W = 159,
150 ColorFrontDiffuseR = 160,
151 ColorFrontDiffuseG = 161,
152 ColorFrontDiffuseB = 162,
153 ColorFrontDiffuseA = 163,
154 ColorFrontSpecularR = 164,
155 ColorFrontSpecularG = 165,
156 ColorFrontSpecularB = 166,
157 ColorFrontSpecularA = 167,
158 ColorBackDiffuseR = 168,
159 ColorBackDiffuseG = 169,
160 ColorBackDiffuseB = 170,
161 ColorBackDiffuseA = 171,
162 ColorBackSpecularR = 172,
163 ColorBackSpecularG = 173,
164 ColorBackSpecularB = 174,
165 ColorBackSpecularA = 175,
166 ClipDistance0 = 176,
167 ClipDistance1 = 177,
168 ClipDistance2 = 178,
169 ClipDistance3 = 179,
170 ClipDistance4 = 180,
171 ClipDistance5 = 181,
172 ClipDistance6 = 182,
173 ClipDistance7 = 183,
174 PointSpriteS = 184,
175 PointSpriteT = 185,
176 FogCoordinate = 186,
177 TessellationEvaluationPointU = 188,
178 TessellationEvaluationPointV = 189,
179 InstanceId = 190,
180 VertexId = 191,
181 FixedFncTexture0S = 192,
182 FixedFncTexture0T = 193,
183 FixedFncTexture0R = 194,
184 FixedFncTexture0Q = 195,
185 FixedFncTexture1S = 196,
186 FixedFncTexture1T = 197,
187 FixedFncTexture1R = 198,
188 FixedFncTexture1Q = 199,
189 FixedFncTexture2S = 200,
190 FixedFncTexture2T = 201,
191 FixedFncTexture2R = 202,
192 FixedFncTexture2Q = 203,
193 FixedFncTexture3S = 204,
194 FixedFncTexture3T = 205,
195 FixedFncTexture3R = 206,
196 FixedFncTexture3Q = 207,
197 FixedFncTexture4S = 208,
198 FixedFncTexture4T = 209,
199 FixedFncTexture4R = 210,
200 FixedFncTexture4Q = 211,
201 FixedFncTexture5S = 212,
202 FixedFncTexture5T = 213,
203 FixedFncTexture5R = 214,
204 FixedFncTexture5Q = 215,
205 FixedFncTexture6S = 216,
206 FixedFncTexture6T = 217,
207 FixedFncTexture6R = 218,
208 FixedFncTexture6Q = 219,
209 FixedFncTexture7S = 220,
210 FixedFncTexture7T = 221,
211 FixedFncTexture7R = 222,
212 FixedFncTexture7Q = 223,
213 FixedFncTexture8S = 224,
214 FixedFncTexture8T = 225,
215 FixedFncTexture8R = 226,
216 FixedFncTexture8Q = 227,
217 FixedFncTexture9S = 228,
218 FixedFncTexture9T = 229,
219 FixedFncTexture9R = 230,
220 FixedFncTexture9Q = 231,
221 ViewportMask = 232,
222 FrontFace = 255,
223};
224
225constexpr size_t NUM_GENERICS = 32;
226
227[[nodiscard]] bool IsGeneric(Attribute attribute) noexcept;
228
229[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute);
230
231[[nodiscard]] u32 GenericAttributeElement(Attribute attribute);
232
233[[nodiscard]] std::string NameOf(Attribute attribute);
234
235[[nodiscard]] constexpr IR::Attribute operator+(IR::Attribute attribute, size_t value) noexcept {
236 return static_cast<IR::Attribute>(static_cast<size_t>(attribute) + value);
237}
238
239} // namespace Shader::IR
240
241template <>
242struct fmt::formatter<Shader::IR::Attribute> {
243 constexpr auto parse(format_parse_context& ctx) {
244 return ctx.begin();
245 }
246 template <typename FormatContext>
247 auto format(const Shader::IR::Attribute& attribute, FormatContext& ctx) {
248 return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(attribute));
249 }
250};
diff --git a/src/shader_recompiler/frontend/ir/basic_block.cpp b/src/shader_recompiler/frontend/ir/basic_block.cpp
new file mode 100644
index 000000000..7c08b25ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/basic_block.cpp
@@ -0,0 +1,149 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <initializer_list>
7#include <map>
8#include <memory>
9
10#include "common/bit_cast.h"
11#include "common/common_types.h"
12#include "shader_recompiler/frontend/ir/basic_block.h"
13#include "shader_recompiler/frontend/ir/value.h"
14
15namespace Shader::IR {
16
17Block::Block(ObjectPool<Inst>& inst_pool_) : inst_pool{&inst_pool_} {}
18
19Block::~Block() = default;
20
21void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
22 PrependNewInst(end(), op, args);
23}
24
25Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
26 std::initializer_list<Value> args, u32 flags) {
27 Inst* const inst{inst_pool->Create(op, flags)};
28 const auto result_it{instructions.insert(insertion_point, *inst)};
29
30 if (inst->NumArgs() != args.size()) {
31 throw InvalidArgument("Invalid number of arguments {} in {}", args.size(), op);
32 }
33 std::ranges::for_each(args, [inst, index = size_t{0}](const Value& arg) mutable {
34 inst->SetArg(index, arg);
35 ++index;
36 });
37 return result_it;
38}
39
40void Block::AddBranch(Block* block) {
41 if (std::ranges::find(imm_successors, block) != imm_successors.end()) {
42 throw LogicError("Successor already inserted");
43 }
44 if (std::ranges::find(block->imm_predecessors, this) != block->imm_predecessors.end()) {
45 throw LogicError("Predecessor already inserted");
46 }
47 imm_successors.push_back(block);
48 block->imm_predecessors.push_back(this);
49}
50
51static std::string BlockToIndex(const std::map<const Block*, size_t>& block_to_index,
52 Block* block) {
53 if (const auto it{block_to_index.find(block)}; it != block_to_index.end()) {
54 return fmt::format("{{Block ${}}}", it->second);
55 }
56 return fmt::format("$<unknown block {:016x}>", reinterpret_cast<u64>(block));
57}
58
59static size_t InstIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
60 const Inst* inst) {
61 const auto [it, is_inserted]{inst_to_index.emplace(inst, inst_index + 1)};
62 if (is_inserted) {
63 ++inst_index;
64 }
65 return it->second;
66}
67
68static std::string ArgToIndex(std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index,
69 const Value& arg) {
70 if (arg.IsEmpty()) {
71 return "<null>";
72 }
73 if (!arg.IsImmediate() || arg.IsIdentity()) {
74 return fmt::format("%{}", InstIndex(inst_to_index, inst_index, arg.Inst()));
75 }
76 switch (arg.Type()) {
77 case Type::U1:
78 return fmt::format("#{}", arg.U1() ? "true" : "false");
79 case Type::U8:
80 return fmt::format("#{}", arg.U8());
81 case Type::U16:
82 return fmt::format("#{}", arg.U16());
83 case Type::U32:
84 return fmt::format("#{}", arg.U32());
85 case Type::U64:
86 return fmt::format("#{}", arg.U64());
87 case Type::F32:
88 return fmt::format("#{}", arg.F32());
89 case Type::Reg:
90 return fmt::format("{}", arg.Reg());
91 case Type::Pred:
92 return fmt::format("{}", arg.Pred());
93 case Type::Attribute:
94 return fmt::format("{}", arg.Attribute());
95 default:
96 return "<unknown immediate type>";
97 }
98}
99
100std::string DumpBlock(const Block& block) {
101 size_t inst_index{0};
102 std::map<const Inst*, size_t> inst_to_index;
103 return DumpBlock(block, {}, inst_to_index, inst_index);
104}
105
106std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>& block_to_index,
107 std::map<const Inst*, size_t>& inst_to_index, size_t& inst_index) {
108 std::string ret{"Block"};
109 if (const auto it{block_to_index.find(&block)}; it != block_to_index.end()) {
110 ret += fmt::format(" ${}", it->second);
111 }
112 ret += '\n';
113 for (const Inst& inst : block) {
114 const Opcode op{inst.GetOpcode()};
115 ret += fmt::format("[{:016x}] ", reinterpret_cast<u64>(&inst));
116 if (TypeOf(op) != Type::Void) {
117 ret += fmt::format("%{:<5} = {}", InstIndex(inst_to_index, inst_index, &inst), op);
118 } else {
119 ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
120 }
121 const size_t arg_count{inst.NumArgs()};
122 for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
123 const Value arg{inst.Arg(arg_index)};
124 const std::string arg_str{ArgToIndex(inst_to_index, inst_index, arg)};
125 ret += arg_index != 0 ? ", " : " ";
126 if (op == Opcode::Phi) {
127 ret += fmt::format("[ {}, {} ]", arg_str,
128 BlockToIndex(block_to_index, inst.PhiBlock(arg_index)));
129 } else {
130 ret += arg_str;
131 }
132 if (op != Opcode::Phi) {
133 const Type actual_type{arg.Type()};
134 const Type expected_type{ArgTypeOf(op, arg_index)};
135 if (!AreTypesCompatible(actual_type, expected_type)) {
136 ret += fmt::format("<type error: {} != {}>", actual_type, expected_type);
137 }
138 }
139 }
140 if (TypeOf(op) != Type::Void) {
141 ret += fmt::format(" (uses: {})\n", inst.UseCount());
142 } else {
143 ret += '\n';
144 }
145 }
146 return ret;
147}
148
149} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/basic_block.h b/src/shader_recompiler/frontend/ir/basic_block.h
new file mode 100644
index 000000000..7e134b4c7
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/basic_block.h
@@ -0,0 +1,185 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <initializer_list>
8#include <map>
9#include <span>
10#include <vector>
11
12#include <boost/intrusive/list.hpp>
13
14#include "common/bit_cast.h"
15#include "common/common_types.h"
16#include "shader_recompiler/frontend/ir/condition.h"
17#include "shader_recompiler/frontend/ir/value.h"
18#include "shader_recompiler/object_pool.h"
19
20namespace Shader::IR {
21
22class Block {
23public:
24 using InstructionList = boost::intrusive::list<Inst>;
25 using size_type = InstructionList::size_type;
26 using iterator = InstructionList::iterator;
27 using const_iterator = InstructionList::const_iterator;
28 using reverse_iterator = InstructionList::reverse_iterator;
29 using const_reverse_iterator = InstructionList::const_reverse_iterator;
30
31 explicit Block(ObjectPool<Inst>& inst_pool_);
32 ~Block();
33
34 Block(const Block&) = delete;
35 Block& operator=(const Block&) = delete;
36
37 Block(Block&&) = default;
38 Block& operator=(Block&&) = default;
39
40 /// Appends a new instruction to the end of this basic block.
41 void AppendNewInst(Opcode op, std::initializer_list<Value> args);
42
43 /// Prepends a new instruction to this basic block before the insertion point.
44 iterator PrependNewInst(iterator insertion_point, Opcode op,
45 std::initializer_list<Value> args = {}, u32 flags = 0);
46
47 /// Adds a new branch to this basic block.
48 void AddBranch(Block* block);
49
50 /// Gets a mutable reference to the instruction list for this basic block.
51 [[nodiscard]] InstructionList& Instructions() noexcept {
52 return instructions;
53 }
54 /// Gets an immutable reference to the instruction list for this basic block.
55 [[nodiscard]] const InstructionList& Instructions() const noexcept {
56 return instructions;
57 }
58
59 /// Gets an immutable span to the immediate predecessors.
60 [[nodiscard]] std::span<Block* const> ImmPredecessors() const noexcept {
61 return imm_predecessors;
62 }
63 /// Gets an immutable span to the immediate successors.
64 [[nodiscard]] std::span<Block* const> ImmSuccessors() const noexcept {
65 return imm_successors;
66 }
67
68 /// Intrusively store the host definition of this instruction.
69 template <typename DefinitionType>
70 void SetDefinition(DefinitionType def) {
71 definition = Common::BitCast<u32>(def);
72 }
73
74 /// Return the intrusively stored host definition of this instruction.
75 template <typename DefinitionType>
76 [[nodiscard]] DefinitionType Definition() const noexcept {
77 return Common::BitCast<DefinitionType>(definition);
78 }
79
80 void SetSsaRegValue(IR::Reg reg, const Value& value) noexcept {
81 ssa_reg_values[RegIndex(reg)] = value;
82 }
83 const Value& SsaRegValue(IR::Reg reg) const noexcept {
84 return ssa_reg_values[RegIndex(reg)];
85 }
86
87 void SsaSeal() noexcept {
88 is_ssa_sealed = true;
89 }
90 [[nodiscard]] bool IsSsaSealed() const noexcept {
91 return is_ssa_sealed;
92 }
93
94 [[nodiscard]] bool empty() const {
95 return instructions.empty();
96 }
97 [[nodiscard]] size_type size() const {
98 return instructions.size();
99 }
100
101 [[nodiscard]] Inst& front() {
102 return instructions.front();
103 }
104 [[nodiscard]] const Inst& front() const {
105 return instructions.front();
106 }
107
108 [[nodiscard]] Inst& back() {
109 return instructions.back();
110 }
111 [[nodiscard]] const Inst& back() const {
112 return instructions.back();
113 }
114
115 [[nodiscard]] iterator begin() {
116 return instructions.begin();
117 }
118 [[nodiscard]] const_iterator begin() const {
119 return instructions.begin();
120 }
121 [[nodiscard]] iterator end() {
122 return instructions.end();
123 }
124 [[nodiscard]] const_iterator end() const {
125 return instructions.end();
126 }
127
128 [[nodiscard]] reverse_iterator rbegin() {
129 return instructions.rbegin();
130 }
131 [[nodiscard]] const_reverse_iterator rbegin() const {
132 return instructions.rbegin();
133 }
134 [[nodiscard]] reverse_iterator rend() {
135 return instructions.rend();
136 }
137 [[nodiscard]] const_reverse_iterator rend() const {
138 return instructions.rend();
139 }
140
141 [[nodiscard]] const_iterator cbegin() const {
142 return instructions.cbegin();
143 }
144 [[nodiscard]] const_iterator cend() const {
145 return instructions.cend();
146 }
147
148 [[nodiscard]] const_reverse_iterator crbegin() const {
149 return instructions.crbegin();
150 }
151 [[nodiscard]] const_reverse_iterator crend() const {
152 return instructions.crend();
153 }
154
155private:
156 /// Memory pool for instruction list
157 ObjectPool<Inst>* inst_pool;
158
159 /// List of instructions in this block
160 InstructionList instructions;
161
162 /// Block immediate predecessors
163 std::vector<Block*> imm_predecessors;
164 /// Block immediate successors
165 std::vector<Block*> imm_successors;
166
167 /// Intrusively store the value of a register in the block.
168 std::array<Value, NUM_REGS> ssa_reg_values;
169 /// Intrusively store if the block is sealed in the SSA pass.
170 bool is_ssa_sealed{false};
171
172 /// Intrusively stored host definition of this block.
173 u32 definition{};
174};
175
176using BlockList = std::vector<Block*>;
177
178[[nodiscard]] std::string DumpBlock(const Block& block);
179
180[[nodiscard]] std::string DumpBlock(const Block& block,
181 const std::map<const Block*, size_t>& block_to_index,
182 std::map<const Inst*, size_t>& inst_to_index,
183 size_t& inst_index);
184
185} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/breadth_first_search.h b/src/shader_recompiler/frontend/ir/breadth_first_search.h
new file mode 100644
index 000000000..a52ccbd58
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/breadth_first_search.h
@@ -0,0 +1,56 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8#include <type_traits>
9#include <queue>
10
11#include <boost/container/small_vector.hpp>
12
13#include "shader_recompiler/frontend/ir/value.h"
14
15namespace Shader::IR {
16
17template <typename Pred>
18auto BreadthFirstSearch(const Value& value, Pred&& pred)
19 -> std::invoke_result_t<Pred, const Inst*> {
20 if (value.IsImmediate()) {
21 // Nothing to do with immediates
22 return std::nullopt;
23 }
24 // Breadth-first search visiting the right most arguments first
25 // Small vector has been determined from shaders in Super Smash Bros. Ultimate
26 boost::container::small_vector<const Inst*, 2> visited;
27 std::queue<const Inst*> queue;
28 queue.push(value.InstRecursive());
29
30 while (!queue.empty()) {
31 // Pop one instruction from the queue
32 const Inst* const inst{queue.front()};
33 queue.pop();
34 if (const std::optional result = pred(inst)) {
35 // This is the instruction we were looking for
36 return result;
37 }
38 // Visit the right most arguments first
39 for (size_t arg = inst->NumArgs(); arg--;) {
40 const Value arg_value{inst->Arg(arg)};
41 if (arg_value.IsImmediate()) {
42 continue;
43 }
44 // Queue instruction if it hasn't been visited
45 const Inst* const arg_inst{arg_value.InstRecursive()};
46 if (std::ranges::find(visited, arg_inst) == visited.end()) {
47 visited.push_back(arg_inst);
48 queue.push(arg_inst);
49 }
50 }
51 }
52 // SSA tree has been traversed and the result hasn't been found
53 return std::nullopt;
54}
55
56} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/condition.cpp b/src/shader_recompiler/frontend/ir/condition.cpp
new file mode 100644
index 000000000..fc18ea2a2
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/condition.cpp
@@ -0,0 +1,29 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6
7#include <fmt/format.h>
8
9#include "shader_recompiler/frontend/ir/condition.h"
10
11namespace Shader::IR {
12
13std::string NameOf(Condition condition) {
14 std::string ret;
15 if (condition.GetFlowTest() != FlowTest::T) {
16 ret = fmt::to_string(condition.GetFlowTest());
17 }
18 const auto [pred, negated]{condition.GetPred()};
19 if (!ret.empty()) {
20 ret += '&';
21 }
22 if (negated) {
23 ret += '!';
24 }
25 ret += fmt::to_string(pred);
26 return ret;
27}
28
29} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/condition.h b/src/shader_recompiler/frontend/ir/condition.h
new file mode 100644
index 000000000..aa8597c60
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/condition.h
@@ -0,0 +1,60 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8#include <string>
9
10#include <fmt/format.h>
11
12#include "common/common_types.h"
13#include "shader_recompiler/frontend/ir/flow_test.h"
14#include "shader_recompiler/frontend/ir/pred.h"
15
16namespace Shader::IR {
17
18class Condition {
19public:
20 Condition() noexcept = default;
21
22 explicit Condition(FlowTest flow_test_, Pred pred_, bool pred_negated_ = false) noexcept
23 : flow_test{static_cast<u16>(flow_test_)}, pred{static_cast<u8>(pred_)},
24 pred_negated{pred_negated_ ? u8{1} : u8{0}} {}
25
26 explicit Condition(Pred pred_, bool pred_negated_ = false) noexcept
27 : Condition(FlowTest::T, pred_, pred_negated_) {}
28
29 explicit Condition(bool value) : Condition(Pred::PT, !value) {}
30
31 auto operator<=>(const Condition&) const noexcept = default;
32
33 [[nodiscard]] IR::FlowTest GetFlowTest() const noexcept {
34 return static_cast<IR::FlowTest>(flow_test);
35 }
36
37 [[nodiscard]] std::pair<IR::Pred, bool> GetPred() const noexcept {
38 return {static_cast<IR::Pred>(pred), pred_negated != 0};
39 }
40
41private:
42 u16 flow_test;
43 u8 pred;
44 u8 pred_negated;
45};
46
47std::string NameOf(Condition condition);
48
49} // namespace Shader::IR
50
51template <>
52struct fmt::formatter<Shader::IR::Condition> {
53 constexpr auto parse(format_parse_context& ctx) {
54 return ctx.begin();
55 }
56 template <typename FormatContext>
57 auto format(const Shader::IR::Condition& cond, FormatContext& ctx) {
58 return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(cond));
59 }
60};
diff --git a/src/shader_recompiler/frontend/ir/flow_test.cpp b/src/shader_recompiler/frontend/ir/flow_test.cpp
new file mode 100644
index 000000000..6ebb4ad89
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/flow_test.cpp
@@ -0,0 +1,83 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string>
6
7#include <fmt/format.h>
8
9#include "shader_recompiler/frontend/ir/flow_test.h"
10
11namespace Shader::IR {
12
13std::string NameOf(FlowTest flow_test) {
14 switch (flow_test) {
15 case FlowTest::F:
16 return "F";
17 case FlowTest::LT:
18 return "LT";
19 case FlowTest::EQ:
20 return "EQ";
21 case FlowTest::LE:
22 return "LE";
23 case FlowTest::GT:
24 return "GT";
25 case FlowTest::NE:
26 return "NE";
27 case FlowTest::GE:
28 return "GE";
29 case FlowTest::NUM:
30 return "NUM";
31 case FlowTest::NaN:
32 return "NAN";
33 case FlowTest::LTU:
34 return "LTU";
35 case FlowTest::EQU:
36 return "EQU";
37 case FlowTest::LEU:
38 return "LEU";
39 case FlowTest::GTU:
40 return "GTU";
41 case FlowTest::NEU:
42 return "NEU";
43 case FlowTest::GEU:
44 return "GEU";
45 case FlowTest::T:
46 return "T";
47 case FlowTest::OFF:
48 return "OFF";
49 case FlowTest::LO:
50 return "LO";
51 case FlowTest::SFF:
52 return "SFF";
53 case FlowTest::LS:
54 return "LS";
55 case FlowTest::HI:
56 return "HI";
57 case FlowTest::SFT:
58 return "SFT";
59 case FlowTest::HS:
60 return "HS";
61 case FlowTest::OFT:
62 return "OFT";
63 case FlowTest::CSM_TA:
64 return "CSM_TA";
65 case FlowTest::CSM_TR:
66 return "CSM_TR";
67 case FlowTest::CSM_MX:
68 return "CSM_MX";
69 case FlowTest::FCSM_TA:
70 return "FCSM_TA";
71 case FlowTest::FCSM_TR:
72 return "FCSM_TR";
73 case FlowTest::FCSM_MX:
74 return "FCSM_MX";
75 case FlowTest::RLE:
76 return "RLE";
77 case FlowTest::RGT:
78 return "RGT";
79 }
80 return fmt::format("<invalid flow test {}>", static_cast<int>(flow_test));
81}
82
83} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/flow_test.h b/src/shader_recompiler/frontend/ir/flow_test.h
new file mode 100644
index 000000000..09e113773
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/flow_test.h
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8#include <fmt/format.h>
9
10#include "common/common_types.h"
11
12namespace Shader::IR {
13
14enum class FlowTest : u64 {
15 F,
16 LT,
17 EQ,
18 LE,
19 GT,
20 NE,
21 GE,
22 NUM,
23 NaN,
24 LTU,
25 EQU,
26 LEU,
27 GTU,
28 NEU,
29 GEU,
30 T,
31 OFF,
32 LO,
33 SFF,
34 LS,
35 HI,
36 SFT,
37 HS,
38 OFT,
39 CSM_TA,
40 CSM_TR,
41 CSM_MX,
42 FCSM_TA,
43 FCSM_TR,
44 FCSM_MX,
45 RLE,
46 RGT,
47};
48
49[[nodiscard]] std::string NameOf(FlowTest flow_test);
50
51} // namespace Shader::IR
52
53template <>
54struct fmt::formatter<Shader::IR::FlowTest> {
55 constexpr auto parse(format_parse_context& ctx) {
56 return ctx.begin();
57 }
58 template <typename FormatContext>
59 auto format(const Shader::IR::FlowTest& flow_test, FormatContext& ctx) {
60 return fmt::format_to(ctx.out(), "{}", Shader::IR::NameOf(flow_test));
61 }
62};
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
new file mode 100644
index 000000000..13159a68d
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -0,0 +1,2017 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_cast.h"
6#include "shader_recompiler/frontend/ir/ir_emitter.h"
7#include "shader_recompiler/frontend/ir/value.h"
8
9namespace Shader::IR {
10namespace {
11[[noreturn]] void ThrowInvalidType(Type type) {
12 throw InvalidArgument("Invalid type {}", type);
13}
14
15Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) {
16 if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) {
17 return ir.CompositeConstruct(bias_lod, lod_clamp);
18 } else if (!bias_lod.IsEmpty()) {
19 return bias_lod;
20 } else if (!lod_clamp.IsEmpty()) {
21 return lod_clamp;
22 } else {
23 return Value{};
24 }
25}
26} // Anonymous namespace
27
28U1 IREmitter::Imm1(bool value) const {
29 return U1{Value{value}};
30}
31
32U8 IREmitter::Imm8(u8 value) const {
33 return U8{Value{value}};
34}
35
36U16 IREmitter::Imm16(u16 value) const {
37 return U16{Value{value}};
38}
39
40U32 IREmitter::Imm32(u32 value) const {
41 return U32{Value{value}};
42}
43
44U32 IREmitter::Imm32(s32 value) const {
45 return U32{Value{static_cast<u32>(value)}};
46}
47
48F32 IREmitter::Imm32(f32 value) const {
49 return F32{Value{value}};
50}
51
52U64 IREmitter::Imm64(u64 value) const {
53 return U64{Value{value}};
54}
55
56U64 IREmitter::Imm64(s64 value) const {
57 return U64{Value{static_cast<u64>(value)}};
58}
59
60F64 IREmitter::Imm64(f64 value) const {
61 return F64{Value{value}};
62}
63
64U1 IREmitter::ConditionRef(const U1& value) {
65 return Inst<U1>(Opcode::ConditionRef, value);
66}
67
68void IREmitter::Reference(const Value& value) {
69 Inst(Opcode::Reference, value);
70}
71
72void IREmitter::PhiMove(IR::Inst& phi, const Value& value) {
73 Inst(Opcode::PhiMove, Value{&phi}, value);
74}
75
76void IREmitter::Prologue() {
77 Inst(Opcode::Prologue);
78}
79
80void IREmitter::Epilogue() {
81 Inst(Opcode::Epilogue);
82}
83
84void IREmitter::DemoteToHelperInvocation() {
85 Inst(Opcode::DemoteToHelperInvocation);
86}
87
88void IREmitter::EmitVertex(const U32& stream) {
89 Inst(Opcode::EmitVertex, stream);
90}
91
92void IREmitter::EndPrimitive(const U32& stream) {
93 Inst(Opcode::EndPrimitive, stream);
94}
95
96void IREmitter::Barrier() {
97 Inst(Opcode::Barrier);
98}
99
100void IREmitter::WorkgroupMemoryBarrier() {
101 Inst(Opcode::WorkgroupMemoryBarrier);
102}
103
104void IREmitter::DeviceMemoryBarrier() {
105 Inst(Opcode::DeviceMemoryBarrier);
106}
107
108U32 IREmitter::GetReg(IR::Reg reg) {
109 return Inst<U32>(Opcode::GetRegister, reg);
110}
111
112void IREmitter::SetReg(IR::Reg reg, const U32& value) {
113 Inst(Opcode::SetRegister, reg, value);
114}
115
116U1 IREmitter::GetPred(IR::Pred pred, bool is_negated) {
117 if (pred == Pred::PT) {
118 return Imm1(!is_negated);
119 }
120 const U1 value{Inst<U1>(Opcode::GetPred, pred)};
121 if (is_negated) {
122 return Inst<U1>(Opcode::LogicalNot, value);
123 } else {
124 return value;
125 }
126}
127
128void IREmitter::SetPred(IR::Pred pred, const U1& value) {
129 if (pred != IR::Pred::PT) {
130 Inst(Opcode::SetPred, pred, value);
131 }
132}
133
134U1 IREmitter::GetGotoVariable(u32 id) {
135 return Inst<U1>(Opcode::GetGotoVariable, id);
136}
137
138void IREmitter::SetGotoVariable(u32 id, const U1& value) {
139 Inst(Opcode::SetGotoVariable, id, value);
140}
141
142U32 IREmitter::GetIndirectBranchVariable() {
143 return Inst<U32>(Opcode::GetIndirectBranchVariable);
144}
145
146void IREmitter::SetIndirectBranchVariable(const U32& value) {
147 Inst(Opcode::SetIndirectBranchVariable, value);
148}
149
150U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) {
151 return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
152}
153
154Value IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
155 bool is_signed) {
156 switch (bitsize) {
157 case 8:
158 return Inst<U32>(is_signed ? Opcode::GetCbufS8 : Opcode::GetCbufU8, binding, byte_offset);
159 case 16:
160 return Inst<U32>(is_signed ? Opcode::GetCbufS16 : Opcode::GetCbufU16, binding, byte_offset);
161 case 32:
162 return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
163 case 64:
164 return Inst(Opcode::GetCbufU32x2, binding, byte_offset);
165 default:
166 throw InvalidArgument("Invalid bit size {}", bitsize);
167 }
168}
169
170F32 IREmitter::GetFloatCbuf(const U32& binding, const U32& byte_offset) {
171 return Inst<F32>(Opcode::GetCbufF32, binding, byte_offset);
172}
173
174U1 IREmitter::GetZFlag() {
175 return Inst<U1>(Opcode::GetZFlag);
176}
177
178U1 IREmitter::GetSFlag() {
179 return Inst<U1>(Opcode::GetSFlag);
180}
181
182U1 IREmitter::GetCFlag() {
183 return Inst<U1>(Opcode::GetCFlag);
184}
185
186U1 IREmitter::GetOFlag() {
187 return Inst<U1>(Opcode::GetOFlag);
188}
189
190void IREmitter::SetZFlag(const U1& value) {
191 Inst(Opcode::SetZFlag, value);
192}
193
194void IREmitter::SetSFlag(const U1& value) {
195 Inst(Opcode::SetSFlag, value);
196}
197
198void IREmitter::SetCFlag(const U1& value) {
199 Inst(Opcode::SetCFlag, value);
200}
201
202void IREmitter::SetOFlag(const U1& value) {
203 Inst(Opcode::SetOFlag, value);
204}
205
206static U1 GetFlowTest(IREmitter& ir, FlowTest flow_test) {
207 switch (flow_test) {
208 case FlowTest::F:
209 return ir.Imm1(false);
210 case FlowTest::LT:
211 return ir.LogicalXor(ir.LogicalAnd(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag())),
212 ir.GetOFlag());
213 case FlowTest::EQ:
214 return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag());
215 case FlowTest::LE:
216 return ir.LogicalXor(ir.GetSFlag(), ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag()));
217 case FlowTest::GT:
218 return ir.LogicalAnd(ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()), ir.GetOFlag()),
219 ir.LogicalNot(ir.GetZFlag()));
220 case FlowTest::NE:
221 return ir.LogicalNot(ir.GetZFlag());
222 case FlowTest::GE:
223 return ir.LogicalNot(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()));
224 case FlowTest::NUM:
225 return ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag()));
226 case FlowTest::NaN:
227 return ir.LogicalAnd(ir.GetSFlag(), ir.GetZFlag());
228 case FlowTest::LTU:
229 return ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag());
230 case FlowTest::EQU:
231 return ir.GetZFlag();
232 case FlowTest::LEU:
233 return ir.LogicalOr(ir.LogicalXor(ir.GetSFlag(), ir.GetOFlag()), ir.GetZFlag());
234 case FlowTest::GTU:
235 return ir.LogicalXor(ir.LogicalNot(ir.GetSFlag()),
236 ir.LogicalOr(ir.GetZFlag(), ir.GetOFlag()));
237 case FlowTest::NEU:
238 return ir.LogicalOr(ir.GetSFlag(), ir.LogicalNot(ir.GetZFlag()));
239 case FlowTest::GEU:
240 return ir.LogicalXor(ir.LogicalOr(ir.LogicalNot(ir.GetSFlag()), ir.GetZFlag()),
241 ir.GetOFlag());
242 case FlowTest::T:
243 return ir.Imm1(true);
244 case FlowTest::OFF:
245 return ir.LogicalNot(ir.GetOFlag());
246 case FlowTest::LO:
247 return ir.LogicalNot(ir.GetCFlag());
248 case FlowTest::SFF:
249 return ir.LogicalNot(ir.GetSFlag());
250 case FlowTest::LS:
251 return ir.LogicalOr(ir.GetZFlag(), ir.LogicalNot(ir.GetCFlag()));
252 case FlowTest::HI:
253 return ir.LogicalAnd(ir.GetCFlag(), ir.LogicalNot(ir.GetZFlag()));
254 case FlowTest::SFT:
255 return ir.GetSFlag();
256 case FlowTest::HS:
257 return ir.GetCFlag();
258 case FlowTest::OFT:
259 return ir.GetOFlag();
260 case FlowTest::RLE:
261 return ir.LogicalOr(ir.GetSFlag(), ir.GetZFlag());
262 case FlowTest::RGT:
263 return ir.LogicalAnd(ir.LogicalNot(ir.GetSFlag()), ir.LogicalNot(ir.GetZFlag()));
264 case FlowTest::FCSM_TR:
265 LOG_WARNING(Shader, "(STUBBED) FCSM_TR");
266 return ir.Imm1(false);
267 case FlowTest::CSM_TA:
268 case FlowTest::CSM_TR:
269 case FlowTest::CSM_MX:
270 case FlowTest::FCSM_TA:
271 case FlowTest::FCSM_MX:
272 default:
273 throw NotImplementedException("Flow test {}", flow_test);
274 }
275}
276
277U1 IREmitter::Condition(IR::Condition cond) {
278 const FlowTest flow_test{cond.GetFlowTest()};
279 const auto [pred, is_negated]{cond.GetPred()};
280 if (flow_test == FlowTest::T) {
281 return GetPred(pred, is_negated);
282 }
283 return LogicalAnd(GetPred(pred, is_negated), GetFlowTest(*this, flow_test));
284}
285
286U1 IREmitter::GetFlowTestResult(FlowTest test) {
287 return GetFlowTest(*this, test);
288}
289
290F32 IREmitter::GetAttribute(IR::Attribute attribute) {
291 return GetAttribute(attribute, Imm32(0));
292}
293
294F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) {
295 return Inst<F32>(Opcode::GetAttribute, attribute, vertex);
296}
297
298void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) {
299 Inst(Opcode::SetAttribute, attribute, value, vertex);
300}
301
302F32 IREmitter::GetAttributeIndexed(const U32& phys_address) {
303 return GetAttributeIndexed(phys_address, Imm32(0));
304}
305
306F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) {
307 return Inst<F32>(Opcode::GetAttributeIndexed, phys_address, vertex);
308}
309
310void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) {
311 Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex);
312}
313
314F32 IREmitter::GetPatch(Patch patch) {
315 return Inst<F32>(Opcode::GetPatch, patch);
316}
317
318void IREmitter::SetPatch(Patch patch, const F32& value) {
319 Inst(Opcode::SetPatch, patch, value);
320}
321
322void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) {
323 Inst(Opcode::SetFragColor, Imm32(index), Imm32(component), value);
324}
325
326void IREmitter::SetSampleMask(const U32& value) {
327 Inst(Opcode::SetSampleMask, value);
328}
329
330void IREmitter::SetFragDepth(const F32& value) {
331 Inst(Opcode::SetFragDepth, value);
332}
333
334U32 IREmitter::WorkgroupIdX() {
335 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 0)};
336}
337
338U32 IREmitter::WorkgroupIdY() {
339 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 1)};
340}
341
342U32 IREmitter::WorkgroupIdZ() {
343 return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)};
344}
345
346Value IREmitter::LocalInvocationId() {
347 return Inst(Opcode::LocalInvocationId);
348}
349
350U32 IREmitter::LocalInvocationIdX() {
351 return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)};
352}
353
354U32 IREmitter::LocalInvocationIdY() {
355 return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 1)};
356}
357
358U32 IREmitter::LocalInvocationIdZ() {
359 return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 2)};
360}
361
362U32 IREmitter::InvocationId() {
363 return Inst<U32>(Opcode::InvocationId);
364}
365
366U32 IREmitter::SampleId() {
367 return Inst<U32>(Opcode::SampleId);
368}
369
370U1 IREmitter::IsHelperInvocation() {
371 return Inst<U1>(Opcode::IsHelperInvocation);
372}
373
374F32 IREmitter::YDirection() {
375 return Inst<F32>(Opcode::YDirection);
376}
377
378U32 IREmitter::LaneId() {
379 return Inst<U32>(Opcode::LaneId);
380}
381
382U32 IREmitter::LoadGlobalU8(const U64& address) {
383 return Inst<U32>(Opcode::LoadGlobalU8, address);
384}
385
386U32 IREmitter::LoadGlobalS8(const U64& address) {
387 return Inst<U32>(Opcode::LoadGlobalS8, address);
388}
389
390U32 IREmitter::LoadGlobalU16(const U64& address) {
391 return Inst<U32>(Opcode::LoadGlobalU16, address);
392}
393
394U32 IREmitter::LoadGlobalS16(const U64& address) {
395 return Inst<U32>(Opcode::LoadGlobalS16, address);
396}
397
398U32 IREmitter::LoadGlobal32(const U64& address) {
399 return Inst<U32>(Opcode::LoadGlobal32, address);
400}
401
402Value IREmitter::LoadGlobal64(const U64& address) {
403 return Inst<Value>(Opcode::LoadGlobal64, address);
404}
405
406Value IREmitter::LoadGlobal128(const U64& address) {
407 return Inst<Value>(Opcode::LoadGlobal128, address);
408}
409
410void IREmitter::WriteGlobalU8(const U64& address, const U32& value) {
411 Inst(Opcode::WriteGlobalU8, address, value);
412}
413
414void IREmitter::WriteGlobalS8(const U64& address, const U32& value) {
415 Inst(Opcode::WriteGlobalS8, address, value);
416}
417
418void IREmitter::WriteGlobalU16(const U64& address, const U32& value) {
419 Inst(Opcode::WriteGlobalU16, address, value);
420}
421
422void IREmitter::WriteGlobalS16(const U64& address, const U32& value) {
423 Inst(Opcode::WriteGlobalS16, address, value);
424}
425
426void IREmitter::WriteGlobal32(const U64& address, const U32& value) {
427 Inst(Opcode::WriteGlobal32, address, value);
428}
429
430void IREmitter::WriteGlobal64(const U64& address, const IR::Value& vector) {
431 Inst(Opcode::WriteGlobal64, address, vector);
432}
433
434void IREmitter::WriteGlobal128(const U64& address, const IR::Value& vector) {
435 Inst(Opcode::WriteGlobal128, address, vector);
436}
437
438U32 IREmitter::LoadLocal(const IR::U32& word_offset) {
439 return Inst<U32>(Opcode::LoadLocal, word_offset);
440}
441
442void IREmitter::WriteLocal(const IR::U32& word_offset, const IR::U32& value) {
443 Inst(Opcode::WriteLocal, word_offset, value);
444}
445
446Value IREmitter::LoadShared(int bit_size, bool is_signed, const IR::U32& offset) {
447 switch (bit_size) {
448 case 8:
449 return Inst(is_signed ? Opcode::LoadSharedS8 : Opcode::LoadSharedU8, offset);
450 case 16:
451 return Inst(is_signed ? Opcode::LoadSharedS16 : Opcode::LoadSharedU16, offset);
452 case 32:
453 return Inst(Opcode::LoadSharedU32, offset);
454 case 64:
455 return Inst(Opcode::LoadSharedU64, offset);
456 case 128:
457 return Inst(Opcode::LoadSharedU128, offset);
458 }
459 throw InvalidArgument("Invalid bit size {}", bit_size);
460}
461
462void IREmitter::WriteShared(int bit_size, const IR::U32& offset, const IR::Value& value) {
463 switch (bit_size) {
464 case 8:
465 Inst(Opcode::WriteSharedU8, offset, value);
466 break;
467 case 16:
468 Inst(Opcode::WriteSharedU16, offset, value);
469 break;
470 case 32:
471 Inst(Opcode::WriteSharedU32, offset, value);
472 break;
473 case 64:
474 Inst(Opcode::WriteSharedU64, offset, value);
475 break;
476 case 128:
477 Inst(Opcode::WriteSharedU128, offset, value);
478 break;
479 default:
480 throw InvalidArgument("Invalid bit size {}", bit_size);
481 }
482}
483
484U1 IREmitter::GetZeroFromOp(const Value& op) {
485 return Inst<U1>(Opcode::GetZeroFromOp, op);
486}
487
488U1 IREmitter::GetSignFromOp(const Value& op) {
489 return Inst<U1>(Opcode::GetSignFromOp, op);
490}
491
492U1 IREmitter::GetCarryFromOp(const Value& op) {
493 return Inst<U1>(Opcode::GetCarryFromOp, op);
494}
495
496U1 IREmitter::GetOverflowFromOp(const Value& op) {
497 return Inst<U1>(Opcode::GetOverflowFromOp, op);
498}
499
500U1 IREmitter::GetSparseFromOp(const Value& op) {
501 return Inst<U1>(Opcode::GetSparseFromOp, op);
502}
503
504U1 IREmitter::GetInBoundsFromOp(const Value& op) {
505 return Inst<U1>(Opcode::GetInBoundsFromOp, op);
506}
507
508F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
509 if (a.Type() != b.Type()) {
510 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
511 }
512 switch (a.Type()) {
513 case Type::F16:
514 return Inst<F16>(Opcode::FPAdd16, Flags{control}, a, b);
515 case Type::F32:
516 return Inst<F32>(Opcode::FPAdd32, Flags{control}, a, b);
517 case Type::F64:
518 return Inst<F64>(Opcode::FPAdd64, Flags{control}, a, b);
519 default:
520 ThrowInvalidType(a.Type());
521 }
522}
523
524Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
525 if (e1.Type() != e2.Type()) {
526 throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
527 }
528 switch (e1.Type()) {
529 case Type::U32:
530 return Inst(Opcode::CompositeConstructU32x2, e1, e2);
531 case Type::F16:
532 return Inst(Opcode::CompositeConstructF16x2, e1, e2);
533 case Type::F32:
534 return Inst(Opcode::CompositeConstructF32x2, e1, e2);
535 case Type::F64:
536 return Inst(Opcode::CompositeConstructF64x2, e1, e2);
537 default:
538 ThrowInvalidType(e1.Type());
539 }
540}
541
542Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) {
543 if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) {
544 throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type());
545 }
546 switch (e1.Type()) {
547 case Type::U32:
548 return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3);
549 case Type::F16:
550 return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3);
551 case Type::F32:
552 return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3);
553 case Type::F64:
554 return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3);
555 default:
556 ThrowInvalidType(e1.Type());
557 }
558}
559
560Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
561 const Value& e4) {
562 if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) {
563 throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
564 e3.Type(), e4.Type());
565 }
566 switch (e1.Type()) {
567 case Type::U32:
568 return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4);
569 case Type::F16:
570 return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4);
571 case Type::F32:
572 return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4);
573 case Type::F64:
574 return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4);
575 default:
576 ThrowInvalidType(e1.Type());
577 }
578}
579
580Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
581 const auto read{[&](Opcode opcode, size_t limit) -> Value {
582 if (element >= limit) {
583 throw InvalidArgument("Out of bounds element {}", element);
584 }
585 return Inst(opcode, vector, Value{static_cast<u32>(element)});
586 }};
587 switch (vector.Type()) {
588 case Type::U32x2:
589 return read(Opcode::CompositeExtractU32x2, 2);
590 case Type::U32x3:
591 return read(Opcode::CompositeExtractU32x3, 3);
592 case Type::U32x4:
593 return read(Opcode::CompositeExtractU32x4, 4);
594 case Type::F16x2:
595 return read(Opcode::CompositeExtractF16x2, 2);
596 case Type::F16x3:
597 return read(Opcode::CompositeExtractF16x3, 3);
598 case Type::F16x4:
599 return read(Opcode::CompositeExtractF16x4, 4);
600 case Type::F32x2:
601 return read(Opcode::CompositeExtractF32x2, 2);
602 case Type::F32x3:
603 return read(Opcode::CompositeExtractF32x3, 3);
604 case Type::F32x4:
605 return read(Opcode::CompositeExtractF32x4, 4);
606 case Type::F64x2:
607 return read(Opcode::CompositeExtractF64x2, 2);
608 case Type::F64x3:
609 return read(Opcode::CompositeExtractF64x3, 3);
610 case Type::F64x4:
611 return read(Opcode::CompositeExtractF64x4, 4);
612 default:
613 ThrowInvalidType(vector.Type());
614 }
615}
616
617Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) {
618 const auto insert{[&](Opcode opcode, size_t limit) {
619 if (element >= limit) {
620 throw InvalidArgument("Out of bounds element {}", element);
621 }
622 return Inst(opcode, vector, object, Value{static_cast<u32>(element)});
623 }};
624 switch (vector.Type()) {
625 case Type::U32x2:
626 return insert(Opcode::CompositeInsertU32x2, 2);
627 case Type::U32x3:
628 return insert(Opcode::CompositeInsertU32x3, 3);
629 case Type::U32x4:
630 return insert(Opcode::CompositeInsertU32x4, 4);
631 case Type::F16x2:
632 return insert(Opcode::CompositeInsertF16x2, 2);
633 case Type::F16x3:
634 return insert(Opcode::CompositeInsertF16x3, 3);
635 case Type::F16x4:
636 return insert(Opcode::CompositeInsertF16x4, 4);
637 case Type::F32x2:
638 return insert(Opcode::CompositeInsertF32x2, 2);
639 case Type::F32x3:
640 return insert(Opcode::CompositeInsertF32x3, 3);
641 case Type::F32x4:
642 return insert(Opcode::CompositeInsertF32x4, 4);
643 case Type::F64x2:
644 return insert(Opcode::CompositeInsertF64x2, 2);
645 case Type::F64x3:
646 return insert(Opcode::CompositeInsertF64x3, 3);
647 case Type::F64x4:
648 return insert(Opcode::CompositeInsertF64x4, 4);
649 default:
650 ThrowInvalidType(vector.Type());
651 }
652}
653
654Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
655 if (true_value.Type() != false_value.Type()) {
656 throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
657 }
658 switch (true_value.Type()) {
659 case Type::U1:
660 return Inst(Opcode::SelectU1, condition, true_value, false_value);
661 case Type::U8:
662 return Inst(Opcode::SelectU8, condition, true_value, false_value);
663 case Type::U16:
664 return Inst(Opcode::SelectU16, condition, true_value, false_value);
665 case Type::U32:
666 return Inst(Opcode::SelectU32, condition, true_value, false_value);
667 case Type::U64:
668 return Inst(Opcode::SelectU64, condition, true_value, false_value);
669 case Type::F32:
670 return Inst(Opcode::SelectF32, condition, true_value, false_value);
671 case Type::F64:
672 return Inst(Opcode::SelectF64, condition, true_value, false_value);
673 default:
674 throw InvalidArgument("Invalid type {}", true_value.Type());
675 }
676}
677
678template <>
679IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) {
680 return Inst<IR::U32>(Opcode::BitCastU32F32, value);
681}
682
683template <>
684IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) {
685 return Inst<IR::F32>(Opcode::BitCastF32U32, value);
686}
687
688template <>
689IR::U16 IREmitter::BitCast<IR::U16, IR::F16>(const IR::F16& value) {
690 return Inst<IR::U16>(Opcode::BitCastU16F16, value);
691}
692
693template <>
694IR::F16 IREmitter::BitCast<IR::F16, IR::U16>(const IR::U16& value) {
695 return Inst<IR::F16>(Opcode::BitCastF16U16, value);
696}
697
698template <>
699IR::U64 IREmitter::BitCast<IR::U64, IR::F64>(const IR::F64& value) {
700 return Inst<IR::U64>(Opcode::BitCastU64F64, value);
701}
702
703template <>
704IR::F64 IREmitter::BitCast<IR::F64, IR::U64>(const IR::U64& value) {
705 return Inst<IR::F64>(Opcode::BitCastF64U64, value);
706}
707
708U64 IREmitter::PackUint2x32(const Value& vector) {
709 return Inst<U64>(Opcode::PackUint2x32, vector);
710}
711
712Value IREmitter::UnpackUint2x32(const U64& value) {
713 return Inst<Value>(Opcode::UnpackUint2x32, value);
714}
715
716U32 IREmitter::PackFloat2x16(const Value& vector) {
717 return Inst<U32>(Opcode::PackFloat2x16, vector);
718}
719
720Value IREmitter::UnpackFloat2x16(const U32& value) {
721 return Inst(Opcode::UnpackFloat2x16, value);
722}
723
724U32 IREmitter::PackHalf2x16(const Value& vector) {
725 return Inst<U32>(Opcode::PackHalf2x16, vector);
726}
727
728Value IREmitter::UnpackHalf2x16(const U32& value) {
729 return Inst(Opcode::UnpackHalf2x16, value);
730}
731
732F64 IREmitter::PackDouble2x32(const Value& vector) {
733 return Inst<F64>(Opcode::PackDouble2x32, vector);
734}
735
736Value IREmitter::UnpackDouble2x32(const F64& value) {
737 return Inst<Value>(Opcode::UnpackDouble2x32, value);
738}
739
740F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) {
741 if (a.Type() != b.Type()) {
742 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
743 }
744 switch (a.Type()) {
745 case Type::F16:
746 return Inst<F16>(Opcode::FPMul16, Flags{control}, a, b);
747 case Type::F32:
748 return Inst<F32>(Opcode::FPMul32, Flags{control}, a, b);
749 case Type::F64:
750 return Inst<F64>(Opcode::FPMul64, Flags{control}, a, b);
751 default:
752 ThrowInvalidType(a.Type());
753 }
754}
755
756F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
757 FpControl control) {
758 if (a.Type() != b.Type() || a.Type() != c.Type()) {
759 throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
760 }
761 switch (a.Type()) {
762 case Type::F16:
763 return Inst<F16>(Opcode::FPFma16, Flags{control}, a, b, c);
764 case Type::F32:
765 return Inst<F32>(Opcode::FPFma32, Flags{control}, a, b, c);
766 case Type::F64:
767 return Inst<F64>(Opcode::FPFma64, Flags{control}, a, b, c);
768 default:
769 ThrowInvalidType(a.Type());
770 }
771}
772
773F16F32F64 IREmitter::FPAbs(const F16F32F64& value) {
774 switch (value.Type()) {
775 case Type::F16:
776 return Inst<F16>(Opcode::FPAbs16, value);
777 case Type::F32:
778 return Inst<F32>(Opcode::FPAbs32, value);
779 case Type::F64:
780 return Inst<F64>(Opcode::FPAbs64, value);
781 default:
782 ThrowInvalidType(value.Type());
783 }
784}
785
786F16F32F64 IREmitter::FPNeg(const F16F32F64& value) {
787 switch (value.Type()) {
788 case Type::F16:
789 return Inst<F16>(Opcode::FPNeg16, value);
790 case Type::F32:
791 return Inst<F32>(Opcode::FPNeg32, value);
792 case Type::F64:
793 return Inst<F64>(Opcode::FPNeg64, value);
794 default:
795 ThrowInvalidType(value.Type());
796 }
797}
798
799F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {
800 F16F32F64 result{value};
801 if (abs) {
802 result = FPAbs(result);
803 }
804 if (neg) {
805 result = FPNeg(result);
806 }
807 return result;
808}
809
810F32 IREmitter::FPCos(const F32& value) {
811 return Inst<F32>(Opcode::FPCos, value);
812}
813
814F32 IREmitter::FPSin(const F32& value) {
815 return Inst<F32>(Opcode::FPSin, value);
816}
817
818F32 IREmitter::FPExp2(const F32& value) {
819 return Inst<F32>(Opcode::FPExp2, value);
820}
821
822F32 IREmitter::FPLog2(const F32& value) {
823 return Inst<F32>(Opcode::FPLog2, value);
824}
825
826F32F64 IREmitter::FPRecip(const F32F64& value) {
827 switch (value.Type()) {
828 case Type::F32:
829 return Inst<F32>(Opcode::FPRecip32, value);
830 case Type::F64:
831 return Inst<F64>(Opcode::FPRecip64, value);
832 default:
833 ThrowInvalidType(value.Type());
834 }
835}
836
837F32F64 IREmitter::FPRecipSqrt(const F32F64& value) {
838 switch (value.Type()) {
839 case Type::F32:
840 return Inst<F32>(Opcode::FPRecipSqrt32, value);
841 case Type::F64:
842 return Inst<F64>(Opcode::FPRecipSqrt64, value);
843 default:
844 ThrowInvalidType(value.Type());
845 }
846}
847
848F32 IREmitter::FPSqrt(const F32& value) {
849 return Inst<F32>(Opcode::FPSqrt, value);
850}
851
852F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
853 switch (value.Type()) {
854 case Type::F16:
855 return Inst<F16>(Opcode::FPSaturate16, value);
856 case Type::F32:
857 return Inst<F32>(Opcode::FPSaturate32, value);
858 case Type::F64:
859 return Inst<F64>(Opcode::FPSaturate64, value);
860 default:
861 ThrowInvalidType(value.Type());
862 }
863}
864
865F16F32F64 IREmitter::FPClamp(const F16F32F64& value, const F16F32F64& min_value,
866 const F16F32F64& max_value) {
867 if (value.Type() != min_value.Type() || value.Type() != max_value.Type()) {
868 throw InvalidArgument("Mismatching types {}, {}, and {}", value.Type(), min_value.Type(),
869 max_value.Type());
870 }
871 switch (value.Type()) {
872 case Type::F16:
873 return Inst<F16>(Opcode::FPClamp16, value, min_value, max_value);
874 case Type::F32:
875 return Inst<F32>(Opcode::FPClamp32, value, min_value, max_value);
876 case Type::F64:
877 return Inst<F64>(Opcode::FPClamp64, value, min_value, max_value);
878 default:
879 ThrowInvalidType(value.Type());
880 }
881}
882
883F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) {
884 switch (value.Type()) {
885 case Type::F16:
886 return Inst<F16>(Opcode::FPRoundEven16, Flags{control}, value);
887 case Type::F32:
888 return Inst<F32>(Opcode::FPRoundEven32, Flags{control}, value);
889 case Type::F64:
890 return Inst<F64>(Opcode::FPRoundEven64, Flags{control}, value);
891 default:
892 ThrowInvalidType(value.Type());
893 }
894}
895
896F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) {
897 switch (value.Type()) {
898 case Type::F16:
899 return Inst<F16>(Opcode::FPFloor16, Flags{control}, value);
900 case Type::F32:
901 return Inst<F32>(Opcode::FPFloor32, Flags{control}, value);
902 case Type::F64:
903 return Inst<F64>(Opcode::FPFloor64, Flags{control}, value);
904 default:
905 ThrowInvalidType(value.Type());
906 }
907}
908
909F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) {
910 switch (value.Type()) {
911 case Type::F16:
912 return Inst<F16>(Opcode::FPCeil16, Flags{control}, value);
913 case Type::F32:
914 return Inst<F32>(Opcode::FPCeil32, Flags{control}, value);
915 case Type::F64:
916 return Inst<F64>(Opcode::FPCeil64, Flags{control}, value);
917 default:
918 ThrowInvalidType(value.Type());
919 }
920}
921
922F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) {
923 switch (value.Type()) {
924 case Type::F16:
925 return Inst<F16>(Opcode::FPTrunc16, Flags{control}, value);
926 case Type::F32:
927 return Inst<F32>(Opcode::FPTrunc32, Flags{control}, value);
928 case Type::F64:
929 return Inst<F64>(Opcode::FPTrunc64, Flags{control}, value);
930 default:
931 ThrowInvalidType(value.Type());
932 }
933}
934
935U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) {
936 if (lhs.Type() != rhs.Type()) {
937 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
938 }
939 switch (lhs.Type()) {
940 case Type::F16:
941 return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control},
942 lhs, rhs);
943 case Type::F32:
944 return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control},
945 lhs, rhs);
946 case Type::F64:
947 return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control},
948 lhs, rhs);
949 default:
950 ThrowInvalidType(lhs.Type());
951 }
952}
953
954U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
955 bool ordered) {
956 if (lhs.Type() != rhs.Type()) {
957 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
958 }
959 switch (lhs.Type()) {
960 case Type::F16:
961 return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16,
962 Flags{control}, lhs, rhs);
963 case Type::F32:
964 return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32,
965 Flags{control}, lhs, rhs);
966 case Type::F64:
967 return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64,
968 Flags{control}, lhs, rhs);
969 default:
970 ThrowInvalidType(lhs.Type());
971 }
972}
973
974U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
975 bool ordered) {
976 if (lhs.Type() != rhs.Type()) {
977 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
978 }
979 switch (lhs.Type()) {
980 case Type::F16:
981 return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16,
982 Flags{control}, lhs, rhs);
983 case Type::F32:
984 return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32,
985 Flags{control}, lhs, rhs);
986 case Type::F64:
987 return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64,
988 Flags{control}, lhs, rhs);
989 default:
990 ThrowInvalidType(lhs.Type());
991 }
992}
993
994U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
995 bool ordered) {
996 if (lhs.Type() != rhs.Type()) {
997 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
998 }
999 switch (lhs.Type()) {
1000 case Type::F16:
1001 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16,
1002 Flags{control}, lhs, rhs);
1003 case Type::F32:
1004 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32,
1005 Flags{control}, lhs, rhs);
1006 case Type::F64:
1007 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64,
1008 Flags{control}, lhs, rhs);
1009 default:
1010 ThrowInvalidType(lhs.Type());
1011 }
1012}
1013
1014U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
1015 bool ordered) {
1016 if (lhs.Type() != rhs.Type()) {
1017 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1018 }
1019 switch (lhs.Type()) {
1020 case Type::F16:
1021 return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16,
1022 Flags{control}, lhs, rhs);
1023 case Type::F32:
1024 return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32,
1025 Flags{control}, lhs, rhs);
1026 case Type::F64:
1027 return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64,
1028 Flags{control}, lhs, rhs);
1029 default:
1030 ThrowInvalidType(lhs.Type());
1031 }
1032}
1033
1034U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
1035 bool ordered) {
1036 if (lhs.Type() != rhs.Type()) {
1037 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1038 }
1039 switch (lhs.Type()) {
1040 case Type::F16:
1041 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16
1042 : Opcode::FPUnordGreaterThanEqual16,
1043 Flags{control}, lhs, rhs);
1044 case Type::F32:
1045 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32
1046 : Opcode::FPUnordGreaterThanEqual32,
1047 Flags{control}, lhs, rhs);
1048 case Type::F64:
1049 return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64
1050 : Opcode::FPUnordGreaterThanEqual64,
1051 Flags{control}, lhs, rhs);
1052 default:
1053 ThrowInvalidType(lhs.Type());
1054 }
1055}
1056
1057U1 IREmitter::FPIsNan(const F16F32F64& value) {
1058 switch (value.Type()) {
1059 case Type::F16:
1060 return Inst<U1>(Opcode::FPIsNan16, value);
1061 case Type::F32:
1062 return Inst<U1>(Opcode::FPIsNan32, value);
1063 case Type::F64:
1064 return Inst<U1>(Opcode::FPIsNan64, value);
1065 default:
1066 ThrowInvalidType(value.Type());
1067 }
1068}
1069
1070U1 IREmitter::FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs) {
1071 if (lhs.Type() != rhs.Type()) {
1072 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1073 }
1074 return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs)));
1075}
1076
1077U1 IREmitter::FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs) {
1078 if (lhs.Type() != rhs.Type()) {
1079 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1080 }
1081 return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));
1082}
1083
1084F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control) {
1085 if (lhs.Type() != rhs.Type()) {
1086 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1087 }
1088 switch (lhs.Type()) {
1089 case Type::F32:
1090 return Inst<F32>(Opcode::FPMax32, Flags{control}, lhs, rhs);
1091 case Type::F64:
1092 return Inst<F64>(Opcode::FPMax64, Flags{control}, lhs, rhs);
1093 default:
1094 ThrowInvalidType(lhs.Type());
1095 }
1096}
1097
1098F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control) {
1099 if (lhs.Type() != rhs.Type()) {
1100 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1101 }
1102 switch (lhs.Type()) {
1103 case Type::F32:
1104 return Inst<F32>(Opcode::FPMin32, Flags{control}, lhs, rhs);
1105 case Type::F64:
1106 return Inst<F64>(Opcode::FPMin64, Flags{control}, lhs, rhs);
1107 default:
1108 ThrowInvalidType(lhs.Type());
1109 }
1110}
1111
1112U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
1113 if (a.Type() != b.Type()) {
1114 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
1115 }
1116 switch (a.Type()) {
1117 case Type::U32:
1118 return Inst<U32>(Opcode::IAdd32, a, b);
1119 case Type::U64:
1120 return Inst<U64>(Opcode::IAdd64, a, b);
1121 default:
1122 ThrowInvalidType(a.Type());
1123 }
1124}
1125
1126U32U64 IREmitter::ISub(const U32U64& a, const U32U64& b) {
1127 if (a.Type() != b.Type()) {
1128 throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
1129 }
1130 switch (a.Type()) {
1131 case Type::U32:
1132 return Inst<U32>(Opcode::ISub32, a, b);
1133 case Type::U64:
1134 return Inst<U64>(Opcode::ISub64, a, b);
1135 default:
1136 ThrowInvalidType(a.Type());
1137 }
1138}
1139
1140U32 IREmitter::IMul(const U32& a, const U32& b) {
1141 return Inst<U32>(Opcode::IMul32, a, b);
1142}
1143
1144U32U64 IREmitter::INeg(const U32U64& value) {
1145 switch (value.Type()) {
1146 case Type::U32:
1147 return Inst<U32>(Opcode::INeg32, value);
1148 case Type::U64:
1149 return Inst<U64>(Opcode::INeg64, value);
1150 default:
1151 ThrowInvalidType(value.Type());
1152 }
1153}
1154
1155U32 IREmitter::IAbs(const U32& value) {
1156 return Inst<U32>(Opcode::IAbs32, value);
1157}
1158
1159U32U64 IREmitter::ShiftLeftLogical(const U32U64& base, const U32& shift) {
1160 switch (base.Type()) {
1161 case Type::U32:
1162 return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift);
1163 case Type::U64:
1164 return Inst<U64>(Opcode::ShiftLeftLogical64, base, shift);
1165 default:
1166 ThrowInvalidType(base.Type());
1167 }
1168}
1169
1170U32U64 IREmitter::ShiftRightLogical(const U32U64& base, const U32& shift) {
1171 switch (base.Type()) {
1172 case Type::U32:
1173 return Inst<U32>(Opcode::ShiftRightLogical32, base, shift);
1174 case Type::U64:
1175 return Inst<U64>(Opcode::ShiftRightLogical64, base, shift);
1176 default:
1177 ThrowInvalidType(base.Type());
1178 }
1179}
1180
1181U32U64 IREmitter::ShiftRightArithmetic(const U32U64& base, const U32& shift) {
1182 switch (base.Type()) {
1183 case Type::U32:
1184 return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift);
1185 case Type::U64:
1186 return Inst<U64>(Opcode::ShiftRightArithmetic64, base, shift);
1187 default:
1188 ThrowInvalidType(base.Type());
1189 }
1190}
1191
1192U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
1193 return Inst<U32>(Opcode::BitwiseAnd32, a, b);
1194}
1195
1196U32 IREmitter::BitwiseOr(const U32& a, const U32& b) {
1197 return Inst<U32>(Opcode::BitwiseOr32, a, b);
1198}
1199
1200U32 IREmitter::BitwiseXor(const U32& a, const U32& b) {
1201 return Inst<U32>(Opcode::BitwiseXor32, a, b);
1202}
1203
1204U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
1205 const U32& count) {
1206 return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count);
1207}
1208
1209U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count,
1210 bool is_signed) {
1211 return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset,
1212 count);
1213}
1214
1215U32 IREmitter::BitReverse(const U32& value) {
1216 return Inst<U32>(Opcode::BitReverse32, value);
1217}
1218
1219U32 IREmitter::BitCount(const U32& value) {
1220 return Inst<U32>(Opcode::BitCount32, value);
1221}
1222
1223U32 IREmitter::BitwiseNot(const U32& value) {
1224 return Inst<U32>(Opcode::BitwiseNot32, value);
1225}
1226
1227U32 IREmitter::FindSMsb(const U32& value) {
1228 return Inst<U32>(Opcode::FindSMsb32, value);
1229}
1230
1231U32 IREmitter::FindUMsb(const U32& value) {
1232 return Inst<U32>(Opcode::FindUMsb32, value);
1233}
1234
1235U32 IREmitter::SMin(const U32& a, const U32& b) {
1236 return Inst<U32>(Opcode::SMin32, a, b);
1237}
1238
1239U32 IREmitter::UMin(const U32& a, const U32& b) {
1240 return Inst<U32>(Opcode::UMin32, a, b);
1241}
1242
1243U32 IREmitter::IMin(const U32& a, const U32& b, bool is_signed) {
1244 return is_signed ? SMin(a, b) : UMin(a, b);
1245}
1246
1247U32 IREmitter::SMax(const U32& a, const U32& b) {
1248 return Inst<U32>(Opcode::SMax32, a, b);
1249}
1250
1251U32 IREmitter::UMax(const U32& a, const U32& b) {
1252 return Inst<U32>(Opcode::UMax32, a, b);
1253}
1254
1255U32 IREmitter::IMax(const U32& a, const U32& b, bool is_signed) {
1256 return is_signed ? SMax(a, b) : UMax(a, b);
1257}
1258
1259U32 IREmitter::SClamp(const U32& value, const U32& min, const U32& max) {
1260 return Inst<U32>(Opcode::SClamp32, value, min, max);
1261}
1262
1263U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) {
1264 return Inst<U32>(Opcode::UClamp32, value, min, max);
1265}
1266
1267U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
1268 return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
1269}
1270
1271U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) {
1272 if (lhs.Type() != rhs.Type()) {
1273 throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
1274 }
1275 switch (lhs.Type()) {
1276 case Type::U32:
1277 return Inst<U1>(Opcode::IEqual, lhs, rhs);
1278 case Type::U64: {
1279 // Manually compare the unpacked values
1280 const Value lhs_vector{UnpackUint2x32(lhs)};
1281 const Value rhs_vector{UnpackUint2x32(rhs)};
1282 return LogicalAnd(IEqual(IR::U32{CompositeExtract(lhs_vector, 0)},
1283 IR::U32{CompositeExtract(rhs_vector, 0)}),
1284 IEqual(IR::U32{CompositeExtract(lhs_vector, 1)},
1285 IR::U32{CompositeExtract(rhs_vector, 1)}));
1286 }
1287 default:
1288 ThrowInvalidType(lhs.Type());
1289 }
1290}
1291
1292U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
1293 return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
1294}
1295
1296U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
1297 return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
1298}
1299
1300U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) {
1301 return Inst<U1>(Opcode::INotEqual, lhs, rhs);
1302}
1303
1304U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
1305 return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
1306}
1307
1308U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) {
1309 return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value);
1310}
1311
1312U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) {
1313 return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value);
1314}
1315
1316U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) {
1317 return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value);
1318}
1319
1320U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) {
1321 return is_signed ? SharedAtomicSMin(pointer_offset, value)
1322 : SharedAtomicUMin(pointer_offset, value);
1323}
1324
1325U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) {
1326 return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value);
1327}
1328
1329U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) {
1330 return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value);
1331}
1332
1333U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) {
1334 return is_signed ? SharedAtomicSMax(pointer_offset, value)
1335 : SharedAtomicUMax(pointer_offset, value);
1336}
1337
1338U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) {
1339 return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value);
1340}
1341
1342U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) {
1343 return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value);
1344}
1345
1346U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) {
1347 return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value);
1348}
1349
1350U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) {
1351 return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value);
1352}
1353
1354U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) {
1355 return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value);
1356}
1357
1358U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) {
1359 switch (value.Type()) {
1360 case Type::U32:
1361 return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value);
1362 case Type::U64:
1363 return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value);
1364 default:
1365 ThrowInvalidType(pointer_offset.Type());
1366 }
1367}
1368
1369U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) {
1370 switch (value.Type()) {
1371 case Type::U32:
1372 return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value);
1373 case Type::U64:
1374 return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value);
1375 default:
1376 ThrowInvalidType(value.Type());
1377 }
1378}
1379
1380U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) {
1381 switch (value.Type()) {
1382 case Type::U32:
1383 return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value);
1384 case Type::U64:
1385 return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value);
1386 default:
1387 ThrowInvalidType(value.Type());
1388 }
1389}
1390
1391U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) {
1392 switch (value.Type()) {
1393 case Type::U32:
1394 return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value);
1395 case Type::U64:
1396 return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value);
1397 default:
1398 ThrowInvalidType(value.Type());
1399 }
1400}
1401
1402U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) {
1403 return is_signed ? GlobalAtomicSMin(pointer_offset, value)
1404 : GlobalAtomicUMin(pointer_offset, value);
1405}
1406
1407U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) {
1408 switch (value.Type()) {
1409 case Type::U32:
1410 return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value);
1411 case Type::U64:
1412 return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value);
1413 default:
1414 ThrowInvalidType(value.Type());
1415 }
1416}
1417
1418U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) {
1419 switch (value.Type()) {
1420 case Type::U32:
1421 return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value);
1422 case Type::U64:
1423 return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value);
1424 default:
1425 ThrowInvalidType(value.Type());
1426 }
1427}
1428
1429U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) {
1430 return is_signed ? GlobalAtomicSMax(pointer_offset, value)
1431 : GlobalAtomicUMax(pointer_offset, value);
1432}
1433
1434U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) {
1435 return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value);
1436}
1437
1438U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) {
1439 return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value);
1440}
1441
1442U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) {
1443 switch (value.Type()) {
1444 case Type::U32:
1445 return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value);
1446 case Type::U64:
1447 return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value);
1448 default:
1449 ThrowInvalidType(value.Type());
1450 }
1451}
1452
1453U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) {
1454 switch (value.Type()) {
1455 case Type::U32:
1456 return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value);
1457 case Type::U64:
1458 return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value);
1459 default:
1460 ThrowInvalidType(value.Type());
1461 }
1462}
1463
1464U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) {
1465 switch (value.Type()) {
1466 case Type::U32:
1467 return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value);
1468 case Type::U64:
1469 return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value);
1470 default:
1471 ThrowInvalidType(value.Type());
1472 }
1473}
1474
1475U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) {
1476 switch (value.Type()) {
1477 case Type::U32:
1478 return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value);
1479 case Type::U64:
1480 return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value);
1481 default:
1482 ThrowInvalidType(pointer_offset.Type());
1483 }
1484}
1485
1486F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
1487 const FpControl control) {
1488 return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value);
1489}
1490
1491Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
1492 const FpControl control) {
1493 return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value);
1494}
1495
1496Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
1497 const FpControl control) {
1498 return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value);
1499}
1500
1501Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
1502 const FpControl control) {
1503 return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value);
1504}
1505
1506U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
1507 return Inst<U1>(Opcode::LogicalOr, a, b);
1508}
1509
1510U1 IREmitter::LogicalAnd(const U1& a, const U1& b) {
1511 return Inst<U1>(Opcode::LogicalAnd, a, b);
1512}
1513
1514U1 IREmitter::LogicalXor(const U1& a, const U1& b) {
1515 return Inst<U1>(Opcode::LogicalXor, a, b);
1516}
1517
1518U1 IREmitter::LogicalNot(const U1& value) {
1519 return Inst<U1>(Opcode::LogicalNot, value);
1520}
1521
1522U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) {
1523 switch (bitsize) {
1524 case 16:
1525 switch (value.Type()) {
1526 case Type::F16:
1527 return Inst<U32>(Opcode::ConvertS16F16, value);
1528 case Type::F32:
1529 return Inst<U32>(Opcode::ConvertS16F32, value);
1530 case Type::F64:
1531 return Inst<U32>(Opcode::ConvertS16F64, value);
1532 default:
1533 ThrowInvalidType(value.Type());
1534 }
1535 case 32:
1536 switch (value.Type()) {
1537 case Type::F16:
1538 return Inst<U32>(Opcode::ConvertS32F16, value);
1539 case Type::F32:
1540 return Inst<U32>(Opcode::ConvertS32F32, value);
1541 case Type::F64:
1542 return Inst<U32>(Opcode::ConvertS32F64, value);
1543 default:
1544 ThrowInvalidType(value.Type());
1545 }
1546 case 64:
1547 switch (value.Type()) {
1548 case Type::F16:
1549 return Inst<U64>(Opcode::ConvertS64F16, value);
1550 case Type::F32:
1551 return Inst<U64>(Opcode::ConvertS64F32, value);
1552 case Type::F64:
1553 return Inst<U64>(Opcode::ConvertS64F64, value);
1554 default:
1555 ThrowInvalidType(value.Type());
1556 }
1557 default:
1558 throw InvalidArgument("Invalid destination bitsize {}", bitsize);
1559 }
1560}
1561
1562U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) {
1563 switch (bitsize) {
1564 case 16:
1565 switch (value.Type()) {
1566 case Type::F16:
1567 return Inst<U32>(Opcode::ConvertU16F16, value);
1568 case Type::F32:
1569 return Inst<U32>(Opcode::ConvertU16F32, value);
1570 case Type::F64:
1571 return Inst<U32>(Opcode::ConvertU16F64, value);
1572 default:
1573 ThrowInvalidType(value.Type());
1574 }
1575 case 32:
1576 switch (value.Type()) {
1577 case Type::F16:
1578 return Inst<U32>(Opcode::ConvertU32F16, value);
1579 case Type::F32:
1580 return Inst<U32>(Opcode::ConvertU32F32, value);
1581 case Type::F64:
1582 return Inst<U32>(Opcode::ConvertU32F64, value);
1583 default:
1584 ThrowInvalidType(value.Type());
1585 }
1586 case 64:
1587 switch (value.Type()) {
1588 case Type::F16:
1589 return Inst<U64>(Opcode::ConvertU64F16, value);
1590 case Type::F32:
1591 return Inst<U64>(Opcode::ConvertU64F32, value);
1592 case Type::F64:
1593 return Inst<U64>(Opcode::ConvertU64F64, value);
1594 default:
1595 ThrowInvalidType(value.Type());
1596 }
1597 default:
1598 throw InvalidArgument("Invalid destination bitsize {}", bitsize);
1599 }
1600}
1601
1602U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) {
1603 return is_signed ? ConvertFToS(bitsize, value) : ConvertFToU(bitsize, value);
1604}
1605
1606F16F32F64 IREmitter::ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
1607 FpControl control) {
1608 switch (dest_bitsize) {
1609 case 16:
1610 switch (src_bitsize) {
1611 case 8:
1612 return Inst<F16>(Opcode::ConvertF16S8, Flags{control}, value);
1613 case 16:
1614 return Inst<F16>(Opcode::ConvertF16S16, Flags{control}, value);
1615 case 32:
1616 return Inst<F16>(Opcode::ConvertF16S32, Flags{control}, value);
1617 case 64:
1618 return Inst<F16>(Opcode::ConvertF16S64, Flags{control}, value);
1619 }
1620 break;
1621 case 32:
1622 switch (src_bitsize) {
1623 case 8:
1624 return Inst<F32>(Opcode::ConvertF32S8, Flags{control}, value);
1625 case 16:
1626 return Inst<F32>(Opcode::ConvertF32S16, Flags{control}, value);
1627 case 32:
1628 return Inst<F32>(Opcode::ConvertF32S32, Flags{control}, value);
1629 case 64:
1630 return Inst<F32>(Opcode::ConvertF32S64, Flags{control}, value);
1631 }
1632 break;
1633 case 64:
1634 switch (src_bitsize) {
1635 case 8:
1636 return Inst<F64>(Opcode::ConvertF64S8, Flags{control}, value);
1637 case 16:
1638 return Inst<F64>(Opcode::ConvertF64S16, Flags{control}, value);
1639 case 32:
1640 return Inst<F64>(Opcode::ConvertF64S32, Flags{control}, value);
1641 case 64:
1642 return Inst<F64>(Opcode::ConvertF64S64, Flags{control}, value);
1643 }
1644 break;
1645 }
1646 throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
1647}
1648
1649F16F32F64 IREmitter::ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
1650 FpControl control) {
1651 switch (dest_bitsize) {
1652 case 16:
1653 switch (src_bitsize) {
1654 case 8:
1655 return Inst<F16>(Opcode::ConvertF16U8, Flags{control}, value);
1656 case 16:
1657 return Inst<F16>(Opcode::ConvertF16U16, Flags{control}, value);
1658 case 32:
1659 return Inst<F16>(Opcode::ConvertF16U32, Flags{control}, value);
1660 case 64:
1661 return Inst<F16>(Opcode::ConvertF16U64, Flags{control}, value);
1662 }
1663 break;
1664 case 32:
1665 switch (src_bitsize) {
1666 case 8:
1667 return Inst<F32>(Opcode::ConvertF32U8, Flags{control}, value);
1668 case 16:
1669 return Inst<F32>(Opcode::ConvertF32U16, Flags{control}, value);
1670 case 32:
1671 return Inst<F32>(Opcode::ConvertF32U32, Flags{control}, value);
1672 case 64:
1673 return Inst<F32>(Opcode::ConvertF32U64, Flags{control}, value);
1674 }
1675 break;
1676 case 64:
1677 switch (src_bitsize) {
1678 case 8:
1679 return Inst<F64>(Opcode::ConvertF64U8, Flags{control}, value);
1680 case 16:
1681 return Inst<F64>(Opcode::ConvertF64U16, Flags{control}, value);
1682 case 32:
1683 return Inst<F64>(Opcode::ConvertF64U32, Flags{control}, value);
1684 case 64:
1685 return Inst<F64>(Opcode::ConvertF64U64, Flags{control}, value);
1686 }
1687 break;
1688 }
1689 throw InvalidArgument("Invalid bit size combination dst={} src={}", dest_bitsize, src_bitsize);
1690}
1691
1692F16F32F64 IREmitter::ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
1693 const Value& value, FpControl control) {
1694 return is_signed ? ConvertSToF(dest_bitsize, src_bitsize, value, control)
1695 : ConvertUToF(dest_bitsize, src_bitsize, value, control);
1696}
1697
1698U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
1699 switch (result_bitsize) {
1700 case 32:
1701 switch (value.Type()) {
1702 case Type::U32:
1703 // Nothing to do
1704 return value;
1705 case Type::U64:
1706 return Inst<U32>(Opcode::ConvertU32U64, value);
1707 default:
1708 break;
1709 }
1710 break;
1711 case 64:
1712 switch (value.Type()) {
1713 case Type::U32:
1714 return Inst<U64>(Opcode::ConvertU64U32, value);
1715 case Type::U64:
1716 // Nothing to do
1717 return value;
1718 default:
1719 break;
1720 }
1721 }
1722 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
1723}
1724
1725F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value, FpControl control) {
1726 switch (result_bitsize) {
1727 case 16:
1728 switch (value.Type()) {
1729 case Type::F16:
1730 // Nothing to do
1731 return value;
1732 case Type::F32:
1733 return Inst<F16>(Opcode::ConvertF16F32, Flags{control}, value);
1734 case Type::F64:
1735 throw LogicError("Illegal conversion from F64 to F16");
1736 default:
1737 break;
1738 }
1739 break;
1740 case 32:
1741 switch (value.Type()) {
1742 case Type::F16:
1743 return Inst<F32>(Opcode::ConvertF32F16, Flags{control}, value);
1744 case Type::F32:
1745 // Nothing to do
1746 return value;
1747 case Type::F64:
1748 return Inst<F32>(Opcode::ConvertF32F64, Flags{control}, value);
1749 default:
1750 break;
1751 }
1752 break;
1753 case 64:
1754 switch (value.Type()) {
1755 case Type::F16:
1756 throw LogicError("Illegal conversion from F16 to F64");
1757 case Type::F32:
1758 return Inst<F64>(Opcode::ConvertF64F32, Flags{control}, value);
1759 case Type::F64:
1760 // Nothing to do
1761 return value;
1762 default:
1763 break;
1764 }
1765 break;
1766 }
1767 throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
1768}
1769
1770Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
1771 const Value& offset, const F32& lod_clamp,
1772 TextureInstInfo info) {
1773 const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
1774 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod
1775 : Opcode::BindlessImageSampleImplicitLod};
1776 return Inst(op, Flags{info}, handle, coords, bias_lc, offset);
1777}
1778
1779Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
1780 const Value& offset, TextureInstInfo info) {
1781 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod
1782 : Opcode::BindlessImageSampleExplicitLod};
1783 return Inst(op, Flags{info}, handle, coords, lod, offset);
1784}
1785
1786F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref,
1787 const F32& bias, const Value& offset,
1788 const F32& lod_clamp, TextureInstInfo info) {
1789 const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
1790 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod
1791 : Opcode::BindlessImageSampleDrefImplicitLod};
1792 return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset);
1793}
1794
1795F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref,
1796 const F32& lod, const Value& offset,
1797 TextureInstInfo info) {
1798 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod
1799 : Opcode::BindlessImageSampleDrefExplicitLod};
1800 return Inst<F32>(op, Flags{info}, handle, coords, dref, lod, offset);
1801}
1802
1803Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset,
1804 const Value& offset2, TextureInstInfo info) {
1805 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGather : Opcode::BindlessImageGather};
1806 return Inst(op, Flags{info}, handle, coords, offset, offset2);
1807}
1808
1809Value IREmitter::ImageGatherDref(const Value& handle, const Value& coords, const Value& offset,
1810 const Value& offset2, const F32& dref, TextureInstInfo info) {
1811 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGatherDref
1812 : Opcode::BindlessImageGatherDref};
1813 return Inst(op, Flags{info}, handle, coords, offset, offset2, dref);
1814}
1815
1816Value IREmitter::ImageFetch(const Value& handle, const Value& coords, const Value& offset,
1817 const U32& lod, const U32& multisampling, TextureInstInfo info) {
1818 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageFetch : Opcode::BindlessImageFetch};
1819 return Inst(op, Flags{info}, handle, coords, offset, lod, multisampling);
1820}
1821
1822Value IREmitter::ImageQueryDimension(const Value& handle, const IR::U32& lod) {
1823 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryDimensions
1824 : Opcode::BindlessImageQueryDimensions};
1825 return Inst(op, handle, lod);
1826}
1827
1828Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, TextureInstInfo info) {
1829 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageQueryLod
1830 : Opcode::BindlessImageQueryLod};
1831 return Inst(op, Flags{info}, handle, coords);
1832}
1833
1834Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivates,
1835 const Value& offset, const F32& lod_clamp, TextureInstInfo info) {
1836 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageGradient
1837 : Opcode::BindlessImageGradient};
1838 return Inst(op, Flags{info}, handle, coords, derivates, offset, lod_clamp);
1839}
1840
1841Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
1842 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageRead : Opcode::BindlessImageRead};
1843 return Inst(op, Flags{info}, handle, coords);
1844}
1845
1846void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color,
1847 TextureInstInfo info) {
1848 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite};
1849 Inst(op, Flags{info}, handle, coords, color);
1850}
1851
1852Value IREmitter::ImageAtomicIAdd(const Value& handle, const Value& coords, const Value& value,
1853 TextureInstInfo info) {
1854 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicIAdd32
1855 : Opcode::BindlessImageAtomicIAdd32};
1856 return Inst(op, Flags{info}, handle, coords, value);
1857}
1858
1859Value IREmitter::ImageAtomicSMin(const Value& handle, const Value& coords, const Value& value,
1860 TextureInstInfo info) {
1861 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMin32
1862 : Opcode::BindlessImageAtomicSMin32};
1863 return Inst(op, Flags{info}, handle, coords, value);
1864}
1865
1866Value IREmitter::ImageAtomicUMin(const Value& handle, const Value& coords, const Value& value,
1867 TextureInstInfo info) {
1868 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMin32
1869 : Opcode::BindlessImageAtomicUMin32};
1870 return Inst(op, Flags{info}, handle, coords, value);
1871}
1872
1873Value IREmitter::ImageAtomicIMin(const Value& handle, const Value& coords, const Value& value,
1874 bool is_signed, TextureInstInfo info) {
1875 return is_signed ? ImageAtomicSMin(handle, coords, value, info)
1876 : ImageAtomicUMin(handle, coords, value, info);
1877}
1878
1879Value IREmitter::ImageAtomicSMax(const Value& handle, const Value& coords, const Value& value,
1880 TextureInstInfo info) {
1881 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicSMax32
1882 : Opcode::BindlessImageAtomicSMax32};
1883 return Inst(op, Flags{info}, handle, coords, value);
1884}
1885
1886Value IREmitter::ImageAtomicUMax(const Value& handle, const Value& coords, const Value& value,
1887 TextureInstInfo info) {
1888 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicUMax32
1889 : Opcode::BindlessImageAtomicUMax32};
1890 return Inst(op, Flags{info}, handle, coords, value);
1891}
1892
1893Value IREmitter::ImageAtomicIMax(const Value& handle, const Value& coords, const Value& value,
1894 bool is_signed, TextureInstInfo info) {
1895 return is_signed ? ImageAtomicSMax(handle, coords, value, info)
1896 : ImageAtomicUMax(handle, coords, value, info);
1897}
1898
1899Value IREmitter::ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
1900 TextureInstInfo info) {
1901 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicInc32
1902 : Opcode::BindlessImageAtomicInc32};
1903 return Inst(op, Flags{info}, handle, coords, value);
1904}
1905
1906Value IREmitter::ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
1907 TextureInstInfo info) {
1908 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicDec32
1909 : Opcode::BindlessImageAtomicDec32};
1910 return Inst(op, Flags{info}, handle, coords, value);
1911}
1912
1913Value IREmitter::ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
1914 TextureInstInfo info) {
1915 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicAnd32
1916 : Opcode::BindlessImageAtomicAnd32};
1917 return Inst(op, Flags{info}, handle, coords, value);
1918}
1919
1920Value IREmitter::ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
1921 TextureInstInfo info) {
1922 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicOr32
1923 : Opcode::BindlessImageAtomicOr32};
1924 return Inst(op, Flags{info}, handle, coords, value);
1925}
1926
1927Value IREmitter::ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
1928 TextureInstInfo info) {
1929 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicXor32
1930 : Opcode::BindlessImageAtomicXor32};
1931 return Inst(op, Flags{info}, handle, coords, value);
1932}
1933
1934Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, const Value& value,
1935 TextureInstInfo info) {
1936 const Opcode op{handle.IsImmediate() ? Opcode::BoundImageAtomicExchange32
1937 : Opcode::BindlessImageAtomicExchange32};
1938 return Inst(op, Flags{info}, handle, coords, value);
1939}
1940
1941U1 IREmitter::VoteAll(const U1& value) {
1942 return Inst<U1>(Opcode::VoteAll, value);
1943}
1944
1945U1 IREmitter::VoteAny(const U1& value) {
1946 return Inst<U1>(Opcode::VoteAny, value);
1947}
1948
1949U1 IREmitter::VoteEqual(const U1& value) {
1950 return Inst<U1>(Opcode::VoteEqual, value);
1951}
1952
1953U32 IREmitter::SubgroupBallot(const U1& value) {
1954 return Inst<U32>(Opcode::SubgroupBallot, value);
1955}
1956
1957U32 IREmitter::SubgroupEqMask() {
1958 return Inst<U32>(Opcode::SubgroupEqMask);
1959}
1960
1961U32 IREmitter::SubgroupLtMask() {
1962 return Inst<U32>(Opcode::SubgroupLtMask);
1963}
1964
1965U32 IREmitter::SubgroupLeMask() {
1966 return Inst<U32>(Opcode::SubgroupLeMask);
1967}
1968
1969U32 IREmitter::SubgroupGtMask() {
1970 return Inst<U32>(Opcode::SubgroupGtMask);
1971}
1972
1973U32 IREmitter::SubgroupGeMask() {
1974 return Inst<U32>(Opcode::SubgroupGeMask);
1975}
1976
1977U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
1978 const IR::U32& seg_mask) {
1979 return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask);
1980}
1981
1982U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
1983 const IR::U32& seg_mask) {
1984 return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask);
1985}
1986
1987U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
1988 const IR::U32& seg_mask) {
1989 return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask);
1990}
1991
1992U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
1993 const IR::U32& seg_mask) {
1994 return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask);
1995}
1996
1997F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpControl control) {
1998 return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle);
1999}
2000
2001F32 IREmitter::DPdxFine(const F32& a) {
2002 return Inst<F32>(Opcode::DPdxFine, a);
2003}
2004
2005F32 IREmitter::DPdyFine(const F32& a) {
2006 return Inst<F32>(Opcode::DPdyFine, a);
2007}
2008
2009F32 IREmitter::DPdxCoarse(const F32& a) {
2010 return Inst<F32>(Opcode::DPdxCoarse, a);
2011}
2012
2013F32 IREmitter::DPdyCoarse(const F32& a) {
2014 return Inst<F32>(Opcode::DPdyCoarse, a);
2015}
2016
2017} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
new file mode 100644
index 000000000..53f7b3b06
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -0,0 +1,413 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <cstring>
8#include <type_traits>
9
10#include "shader_recompiler/frontend/ir/attribute.h"
11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/frontend/ir/modifiers.h"
13#include "shader_recompiler/frontend/ir/value.h"
14
15namespace Shader::IR {
16
17class IREmitter {
18public:
19 explicit IREmitter(Block& block_) : block{&block_}, insertion_point{block->end()} {}
20 explicit IREmitter(Block& block_, Block::iterator insertion_point_)
21 : block{&block_}, insertion_point{insertion_point_} {}
22
23 Block* block;
24
25 [[nodiscard]] U1 Imm1(bool value) const;
26 [[nodiscard]] U8 Imm8(u8 value) const;
27 [[nodiscard]] U16 Imm16(u16 value) const;
28 [[nodiscard]] U32 Imm32(u32 value) const;
29 [[nodiscard]] U32 Imm32(s32 value) const;
30 [[nodiscard]] F32 Imm32(f32 value) const;
31 [[nodiscard]] U64 Imm64(u64 value) const;
32 [[nodiscard]] U64 Imm64(s64 value) const;
33 [[nodiscard]] F64 Imm64(f64 value) const;
34
35 U1 ConditionRef(const U1& value);
36 void Reference(const Value& value);
37
38 void PhiMove(IR::Inst& phi, const Value& value);
39
40 void Prologue();
41 void Epilogue();
42 void DemoteToHelperInvocation();
43 void EmitVertex(const U32& stream);
44 void EndPrimitive(const U32& stream);
45
46 [[nodiscard]] U32 GetReg(IR::Reg reg);
47 void SetReg(IR::Reg reg, const U32& value);
48
49 [[nodiscard]] U1 GetPred(IR::Pred pred, bool is_negated = false);
50 void SetPred(IR::Pred pred, const U1& value);
51
52 [[nodiscard]] U1 GetGotoVariable(u32 id);
53 void SetGotoVariable(u32 id, const U1& value);
54
55 [[nodiscard]] U32 GetIndirectBranchVariable();
56 void SetIndirectBranchVariable(const U32& value);
57
58 [[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset);
59 [[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
60 bool is_signed);
61 [[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset);
62
63 [[nodiscard]] U1 GetZFlag();
64 [[nodiscard]] U1 GetSFlag();
65 [[nodiscard]] U1 GetCFlag();
66 [[nodiscard]] U1 GetOFlag();
67
68 void SetZFlag(const U1& value);
69 void SetSFlag(const U1& value);
70 void SetCFlag(const U1& value);
71 void SetOFlag(const U1& value);
72
73 [[nodiscard]] U1 Condition(IR::Condition cond);
74 [[nodiscard]] U1 GetFlowTestResult(FlowTest test);
75
76 [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
77 [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex);
78 void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex);
79
80 [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address);
81 [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex);
82 void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex);
83
84 [[nodiscard]] F32 GetPatch(Patch patch);
85 void SetPatch(Patch patch, const F32& value);
86
87 void SetFragColor(u32 index, u32 component, const F32& value);
88 void SetSampleMask(const U32& value);
89 void SetFragDepth(const F32& value);
90
91 [[nodiscard]] U32 WorkgroupIdX();
92 [[nodiscard]] U32 WorkgroupIdY();
93 [[nodiscard]] U32 WorkgroupIdZ();
94
95 [[nodiscard]] Value LocalInvocationId();
96 [[nodiscard]] U32 LocalInvocationIdX();
97 [[nodiscard]] U32 LocalInvocationIdY();
98 [[nodiscard]] U32 LocalInvocationIdZ();
99
100 [[nodiscard]] U32 InvocationId();
101 [[nodiscard]] U32 SampleId();
102 [[nodiscard]] U1 IsHelperInvocation();
103 [[nodiscard]] F32 YDirection();
104
105 [[nodiscard]] U32 LaneId();
106
107 [[nodiscard]] U32 LoadGlobalU8(const U64& address);
108 [[nodiscard]] U32 LoadGlobalS8(const U64& address);
109 [[nodiscard]] U32 LoadGlobalU16(const U64& address);
110 [[nodiscard]] U32 LoadGlobalS16(const U64& address);
111 [[nodiscard]] U32 LoadGlobal32(const U64& address);
112 [[nodiscard]] Value LoadGlobal64(const U64& address);
113 [[nodiscard]] Value LoadGlobal128(const U64& address);
114
115 void WriteGlobalU8(const U64& address, const U32& value);
116 void WriteGlobalS8(const U64& address, const U32& value);
117 void WriteGlobalU16(const U64& address, const U32& value);
118 void WriteGlobalS16(const U64& address, const U32& value);
119 void WriteGlobal32(const U64& address, const U32& value);
120 void WriteGlobal64(const U64& address, const IR::Value& vector);
121 void WriteGlobal128(const U64& address, const IR::Value& vector);
122
123 [[nodiscard]] U32 LoadLocal(const U32& word_offset);
124 void WriteLocal(const U32& word_offset, const U32& value);
125
126 [[nodiscard]] Value LoadShared(int bit_size, bool is_signed, const U32& offset);
127 void WriteShared(int bit_size, const U32& offset, const Value& value);
128
129 [[nodiscard]] U1 GetZeroFromOp(const Value& op);
130 [[nodiscard]] U1 GetSignFromOp(const Value& op);
131 [[nodiscard]] U1 GetCarryFromOp(const Value& op);
132 [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
133 [[nodiscard]] U1 GetSparseFromOp(const Value& op);
134 [[nodiscard]] U1 GetInBoundsFromOp(const Value& op);
135
136 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
137 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
138 [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
139 const Value& e4);
140 [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
141 [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
142
143 [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
144 const Value& false_value);
145
146 void Barrier();
147 void WorkgroupMemoryBarrier();
148 void DeviceMemoryBarrier();
149
150 template <typename Dest, typename Source>
151 [[nodiscard]] Dest BitCast(const Source& value);
152
153 [[nodiscard]] U64 PackUint2x32(const Value& vector);
154 [[nodiscard]] Value UnpackUint2x32(const U64& value);
155
156 [[nodiscard]] U32 PackFloat2x16(const Value& vector);
157 [[nodiscard]] Value UnpackFloat2x16(const U32& value);
158
159 [[nodiscard]] U32 PackHalf2x16(const Value& vector);
160 [[nodiscard]] Value UnpackHalf2x16(const U32& value);
161
162 [[nodiscard]] F64 PackDouble2x32(const Value& vector);
163 [[nodiscard]] Value UnpackDouble2x32(const F64& value);
164
165 [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
166 [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
167 [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
168 FpControl control = {});
169
170 [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value);
171 [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
172 [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);
173
174 [[nodiscard]] F32 FPCos(const F32& value);
175 [[nodiscard]] F32 FPSin(const F32& value);
176 [[nodiscard]] F32 FPExp2(const F32& value);
177 [[nodiscard]] F32 FPLog2(const F32& value);
178 [[nodiscard]] F32F64 FPRecip(const F32F64& value);
179 [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
180 [[nodiscard]] F32 FPSqrt(const F32& value);
181 [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
182 [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value,
183 const F16F32F64& max_value);
184 [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {});
185 [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {});
186 [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
187 [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {});
188
189 [[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
190 bool ordered = true);
191 [[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
192 bool ordered = true);
193 [[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
194 bool ordered = true);
195 [[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs,
196 FpControl control = {}, bool ordered = true);
197 [[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
198 FpControl control = {}, bool ordered = true);
199 [[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
200 FpControl control = {}, bool ordered = true);
201 [[nodiscard]] U1 FPIsNan(const F16F32F64& value);
202 [[nodiscard]] U1 FPOrdered(const F16F32F64& lhs, const F16F32F64& rhs);
203 [[nodiscard]] U1 FPUnordered(const F16F32F64& lhs, const F16F32F64& rhs);
204 [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
205 [[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, FpControl control = {});
206
207 [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
208 [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
209 [[nodiscard]] U32 IMul(const U32& a, const U32& b);
210 [[nodiscard]] U32U64 INeg(const U32U64& value);
211 [[nodiscard]] U32 IAbs(const U32& value);
212 [[nodiscard]] U32U64 ShiftLeftLogical(const U32U64& base, const U32& shift);
213 [[nodiscard]] U32U64 ShiftRightLogical(const U32U64& base, const U32& shift);
214 [[nodiscard]] U32U64 ShiftRightArithmetic(const U32U64& base, const U32& shift);
215 [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
216 [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
217 [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
218 [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
219 const U32& count);
220 [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
221 bool is_signed = false);
222 [[nodiscard]] U32 BitReverse(const U32& value);
223 [[nodiscard]] U32 BitCount(const U32& value);
224 [[nodiscard]] U32 BitwiseNot(const U32& value);
225
226 [[nodiscard]] U32 FindSMsb(const U32& value);
227 [[nodiscard]] U32 FindUMsb(const U32& value);
228 [[nodiscard]] U32 SMin(const U32& a, const U32& b);
229 [[nodiscard]] U32 UMin(const U32& a, const U32& b);
230 [[nodiscard]] U32 IMin(const U32& a, const U32& b, bool is_signed);
231 [[nodiscard]] U32 SMax(const U32& a, const U32& b);
232 [[nodiscard]] U32 UMax(const U32& a, const U32& b);
233 [[nodiscard]] U32 IMax(const U32& a, const U32& b, bool is_signed);
234 [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max);
235 [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max);
236
237 [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
238 [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs);
239 [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
240 [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
241 [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
242 [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
243
244 [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value);
245 [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value);
246 [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value);
247 [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed);
248 [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value);
249 [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value);
250 [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed);
251 [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value);
252 [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value);
253 [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value);
254 [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value);
255 [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value);
256 [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value);
257
258 [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value);
259 [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value);
260 [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value);
261 [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value,
262 bool is_signed);
263 [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value);
264 [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value);
265 [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value,
266 bool is_signed);
267 [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value);
268 [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value);
269 [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value);
270 [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value);
271 [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value);
272 [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value);
273
274 [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value,
275 const FpControl control = {});
276 [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value,
277 const FpControl control = {});
278 [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value,
279 const FpControl control = {});
280 [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value,
281 const FpControl control = {});
282
283 [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
284 [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
285 [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
286 [[nodiscard]] U1 LogicalNot(const U1& value);
287
288 [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
289 [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
290 [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
291 [[nodiscard]] F16F32F64 ConvertSToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
292 FpControl control = {});
293 [[nodiscard]] F16F32F64 ConvertUToF(size_t dest_bitsize, size_t src_bitsize, const Value& value,
294 FpControl control = {});
295 [[nodiscard]] F16F32F64 ConvertIToF(size_t dest_bitsize, size_t src_bitsize, bool is_signed,
296 const Value& value, FpControl control = {});
297
298 [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
299 [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value,
300 FpControl control = {});
301
302 [[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
303 const F32& bias, const Value& offset,
304 const F32& lod_clamp, TextureInstInfo info);
305 [[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
306 const F32& lod, const Value& offset,
307 TextureInstInfo info);
308 [[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
309 const F32& dref, const F32& bias,
310 const Value& offset, const F32& lod_clamp,
311 TextureInstInfo info);
312 [[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
313 const F32& dref, const F32& lod,
314 const Value& offset, TextureInstInfo info);
315 [[nodiscard]] Value ImageQueryDimension(const Value& handle, const IR::U32& lod);
316
317 [[nodiscard]] Value ImageQueryLod(const Value& handle, const Value& coords,
318 TextureInstInfo info);
319 [[nodiscard]] Value ImageGather(const Value& handle, const Value& coords, const Value& offset,
320 const Value& offset2, TextureInstInfo info);
321 [[nodiscard]] Value ImageGatherDref(const Value& handle, const Value& coords,
322 const Value& offset, const Value& offset2, const F32& dref,
323 TextureInstInfo info);
324 [[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
325 const U32& lod, const U32& multisampling, TextureInstInfo info);
326 [[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
327 const Value& derivates, const Value& offset,
328 const F32& lod_clamp, TextureInstInfo info);
329 [[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
330 [[nodiscard]] void ImageWrite(const Value& handle, const Value& coords, const Value& color,
331 TextureInstInfo info);
332
333 [[nodiscard]] Value ImageAtomicIAdd(const Value& handle, const Value& coords,
334 const Value& value, TextureInstInfo info);
335 [[nodiscard]] Value ImageAtomicSMin(const Value& handle, const Value& coords,
336 const Value& value, TextureInstInfo info);
337 [[nodiscard]] Value ImageAtomicUMin(const Value& handle, const Value& coords,
338 const Value& value, TextureInstInfo info);
339 [[nodiscard]] Value ImageAtomicIMin(const Value& handle, const Value& coords,
340 const Value& value, bool is_signed, TextureInstInfo info);
341 [[nodiscard]] Value ImageAtomicSMax(const Value& handle, const Value& coords,
342 const Value& value, TextureInstInfo info);
343 [[nodiscard]] Value ImageAtomicUMax(const Value& handle, const Value& coords,
344 const Value& value, TextureInstInfo info);
345 [[nodiscard]] Value ImageAtomicIMax(const Value& handle, const Value& coords,
346 const Value& value, bool is_signed, TextureInstInfo info);
347 [[nodiscard]] Value ImageAtomicInc(const Value& handle, const Value& coords, const Value& value,
348 TextureInstInfo info);
349 [[nodiscard]] Value ImageAtomicDec(const Value& handle, const Value& coords, const Value& value,
350 TextureInstInfo info);
351 [[nodiscard]] Value ImageAtomicAnd(const Value& handle, const Value& coords, const Value& value,
352 TextureInstInfo info);
353 [[nodiscard]] Value ImageAtomicOr(const Value& handle, const Value& coords, const Value& value,
354 TextureInstInfo info);
355 [[nodiscard]] Value ImageAtomicXor(const Value& handle, const Value& coords, const Value& value,
356 TextureInstInfo info);
357 [[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
358 const Value& value, TextureInstInfo info);
359 [[nodiscard]] U1 VoteAll(const U1& value);
360 [[nodiscard]] U1 VoteAny(const U1& value);
361 [[nodiscard]] U1 VoteEqual(const U1& value);
362 [[nodiscard]] U32 SubgroupBallot(const U1& value);
363 [[nodiscard]] U32 SubgroupEqMask();
364 [[nodiscard]] U32 SubgroupLtMask();
365 [[nodiscard]] U32 SubgroupLeMask();
366 [[nodiscard]] U32 SubgroupGtMask();
367 [[nodiscard]] U32 SubgroupGeMask();
368 [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
369 const IR::U32& seg_mask);
370 [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
371 const IR::U32& seg_mask);
372 [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp,
373 const IR::U32& seg_mask);
374 [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index,
375 const IR::U32& clamp, const IR::U32& seg_mask);
376 [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle,
377 FpControl control = {});
378
379 [[nodiscard]] F32 DPdxFine(const F32& a);
380
381 [[nodiscard]] F32 DPdyFine(const F32& a);
382
383 [[nodiscard]] F32 DPdxCoarse(const F32& a);
384
385 [[nodiscard]] F32 DPdyCoarse(const F32& a);
386
387private:
388 IR::Block::iterator insertion_point;
389
390 template <typename T = Value, typename... Args>
391 T Inst(Opcode op, Args... args) {
392 auto it{block->PrependNewInst(insertion_point, op, {Value{args}...})};
393 return T{Value{&*it}};
394 }
395
396 template <typename T>
397 requires(sizeof(T) <= sizeof(u32) && std::is_trivially_copyable_v<T>) struct Flags {
398 Flags() = default;
399 Flags(T proxy_) : proxy{proxy_} {}
400
401 T proxy;
402 };
403
404 template <typename T = Value, typename FlagType, typename... Args>
405 T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
406 u32 raw_flags{};
407 std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
408 auto it{block->PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
409 return T{Value{&*it}};
410 }
411};
412
413} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/microinstruction.cpp b/src/shader_recompiler/frontend/ir/microinstruction.cpp
new file mode 100644
index 000000000..3dfa5a880
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/microinstruction.cpp
@@ -0,0 +1,411 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/type.h"
10#include "shader_recompiler/frontend/ir/value.h"
11
12namespace Shader::IR {
13namespace {
14void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
15 if (inst && inst->GetOpcode() != opcode) {
16 throw LogicError("Invalid pseudo-instruction");
17 }
18}
19
20void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
21 if (dest_inst) {
22 throw LogicError("Only one of each type of pseudo-op allowed");
23 }
24 dest_inst = pseudo_inst;
25}
26
27void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
28 if (inst->GetOpcode() != expected_opcode) {
29 throw LogicError("Undoing use of invalid pseudo-op");
30 }
31 inst = nullptr;
32}
33
34void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
35 if (!associated_insts) {
36 associated_insts = std::make_unique<AssociatedInsts>();
37 }
38}
39} // Anonymous namespace
40
41Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
42 if (op == Opcode::Phi) {
43 std::construct_at(&phi_args);
44 } else {
45 std::construct_at(&args);
46 }
47}
48
49Inst::~Inst() {
50 if (op == Opcode::Phi) {
51 std::destroy_at(&phi_args);
52 } else {
53 std::destroy_at(&args);
54 }
55}
56
57bool Inst::MayHaveSideEffects() const noexcept {
58 switch (op) {
59 case Opcode::ConditionRef:
60 case Opcode::Reference:
61 case Opcode::PhiMove:
62 case Opcode::Prologue:
63 case Opcode::Epilogue:
64 case Opcode::Join:
65 case Opcode::DemoteToHelperInvocation:
66 case Opcode::Barrier:
67 case Opcode::WorkgroupMemoryBarrier:
68 case Opcode::DeviceMemoryBarrier:
69 case Opcode::EmitVertex:
70 case Opcode::EndPrimitive:
71 case Opcode::SetAttribute:
72 case Opcode::SetAttributeIndexed:
73 case Opcode::SetPatch:
74 case Opcode::SetFragColor:
75 case Opcode::SetSampleMask:
76 case Opcode::SetFragDepth:
77 case Opcode::WriteGlobalU8:
78 case Opcode::WriteGlobalS8:
79 case Opcode::WriteGlobalU16:
80 case Opcode::WriteGlobalS16:
81 case Opcode::WriteGlobal32:
82 case Opcode::WriteGlobal64:
83 case Opcode::WriteGlobal128:
84 case Opcode::WriteStorageU8:
85 case Opcode::WriteStorageS8:
86 case Opcode::WriteStorageU16:
87 case Opcode::WriteStorageS16:
88 case Opcode::WriteStorage32:
89 case Opcode::WriteStorage64:
90 case Opcode::WriteStorage128:
91 case Opcode::WriteLocal:
92 case Opcode::WriteSharedU8:
93 case Opcode::WriteSharedU16:
94 case Opcode::WriteSharedU32:
95 case Opcode::WriteSharedU64:
96 case Opcode::WriteSharedU128:
97 case Opcode::SharedAtomicIAdd32:
98 case Opcode::SharedAtomicSMin32:
99 case Opcode::SharedAtomicUMin32:
100 case Opcode::SharedAtomicSMax32:
101 case Opcode::SharedAtomicUMax32:
102 case Opcode::SharedAtomicInc32:
103 case Opcode::SharedAtomicDec32:
104 case Opcode::SharedAtomicAnd32:
105 case Opcode::SharedAtomicOr32:
106 case Opcode::SharedAtomicXor32:
107 case Opcode::SharedAtomicExchange32:
108 case Opcode::SharedAtomicExchange64:
109 case Opcode::GlobalAtomicIAdd32:
110 case Opcode::GlobalAtomicSMin32:
111 case Opcode::GlobalAtomicUMin32:
112 case Opcode::GlobalAtomicSMax32:
113 case Opcode::GlobalAtomicUMax32:
114 case Opcode::GlobalAtomicInc32:
115 case Opcode::GlobalAtomicDec32:
116 case Opcode::GlobalAtomicAnd32:
117 case Opcode::GlobalAtomicOr32:
118 case Opcode::GlobalAtomicXor32:
119 case Opcode::GlobalAtomicExchange32:
120 case Opcode::GlobalAtomicIAdd64:
121 case Opcode::GlobalAtomicSMin64:
122 case Opcode::GlobalAtomicUMin64:
123 case Opcode::GlobalAtomicSMax64:
124 case Opcode::GlobalAtomicUMax64:
125 case Opcode::GlobalAtomicAnd64:
126 case Opcode::GlobalAtomicOr64:
127 case Opcode::GlobalAtomicXor64:
128 case Opcode::GlobalAtomicExchange64:
129 case Opcode::GlobalAtomicAddF32:
130 case Opcode::GlobalAtomicAddF16x2:
131 case Opcode::GlobalAtomicAddF32x2:
132 case Opcode::GlobalAtomicMinF16x2:
133 case Opcode::GlobalAtomicMinF32x2:
134 case Opcode::GlobalAtomicMaxF16x2:
135 case Opcode::GlobalAtomicMaxF32x2:
136 case Opcode::StorageAtomicIAdd32:
137 case Opcode::StorageAtomicSMin32:
138 case Opcode::StorageAtomicUMin32:
139 case Opcode::StorageAtomicSMax32:
140 case Opcode::StorageAtomicUMax32:
141 case Opcode::StorageAtomicInc32:
142 case Opcode::StorageAtomicDec32:
143 case Opcode::StorageAtomicAnd32:
144 case Opcode::StorageAtomicOr32:
145 case Opcode::StorageAtomicXor32:
146 case Opcode::StorageAtomicExchange32:
147 case Opcode::StorageAtomicIAdd64:
148 case Opcode::StorageAtomicSMin64:
149 case Opcode::StorageAtomicUMin64:
150 case Opcode::StorageAtomicSMax64:
151 case Opcode::StorageAtomicUMax64:
152 case Opcode::StorageAtomicAnd64:
153 case Opcode::StorageAtomicOr64:
154 case Opcode::StorageAtomicXor64:
155 case Opcode::StorageAtomicExchange64:
156 case Opcode::StorageAtomicAddF32:
157 case Opcode::StorageAtomicAddF16x2:
158 case Opcode::StorageAtomicAddF32x2:
159 case Opcode::StorageAtomicMinF16x2:
160 case Opcode::StorageAtomicMinF32x2:
161 case Opcode::StorageAtomicMaxF16x2:
162 case Opcode::StorageAtomicMaxF32x2:
163 case Opcode::BindlessImageWrite:
164 case Opcode::BoundImageWrite:
165 case Opcode::ImageWrite:
166 case IR::Opcode::BindlessImageAtomicIAdd32:
167 case IR::Opcode::BindlessImageAtomicSMin32:
168 case IR::Opcode::BindlessImageAtomicUMin32:
169 case IR::Opcode::BindlessImageAtomicSMax32:
170 case IR::Opcode::BindlessImageAtomicUMax32:
171 case IR::Opcode::BindlessImageAtomicInc32:
172 case IR::Opcode::BindlessImageAtomicDec32:
173 case IR::Opcode::BindlessImageAtomicAnd32:
174 case IR::Opcode::BindlessImageAtomicOr32:
175 case IR::Opcode::BindlessImageAtomicXor32:
176 case IR::Opcode::BindlessImageAtomicExchange32:
177 case IR::Opcode::BoundImageAtomicIAdd32:
178 case IR::Opcode::BoundImageAtomicSMin32:
179 case IR::Opcode::BoundImageAtomicUMin32:
180 case IR::Opcode::BoundImageAtomicSMax32:
181 case IR::Opcode::BoundImageAtomicUMax32:
182 case IR::Opcode::BoundImageAtomicInc32:
183 case IR::Opcode::BoundImageAtomicDec32:
184 case IR::Opcode::BoundImageAtomicAnd32:
185 case IR::Opcode::BoundImageAtomicOr32:
186 case IR::Opcode::BoundImageAtomicXor32:
187 case IR::Opcode::BoundImageAtomicExchange32:
188 case IR::Opcode::ImageAtomicIAdd32:
189 case IR::Opcode::ImageAtomicSMin32:
190 case IR::Opcode::ImageAtomicUMin32:
191 case IR::Opcode::ImageAtomicSMax32:
192 case IR::Opcode::ImageAtomicUMax32:
193 case IR::Opcode::ImageAtomicInc32:
194 case IR::Opcode::ImageAtomicDec32:
195 case IR::Opcode::ImageAtomicAnd32:
196 case IR::Opcode::ImageAtomicOr32:
197 case IR::Opcode::ImageAtomicXor32:
198 case IR::Opcode::ImageAtomicExchange32:
199 return true;
200 default:
201 return false;
202 }
203}
204
205bool Inst::IsPseudoInstruction() const noexcept {
206 switch (op) {
207 case Opcode::GetZeroFromOp:
208 case Opcode::GetSignFromOp:
209 case Opcode::GetCarryFromOp:
210 case Opcode::GetOverflowFromOp:
211 case Opcode::GetSparseFromOp:
212 case Opcode::GetInBoundsFromOp:
213 return true;
214 default:
215 return false;
216 }
217}
218
219bool Inst::AreAllArgsImmediates() const {
220 if (op == Opcode::Phi) {
221 throw LogicError("Testing for all arguments are immediates on phi instruction");
222 }
223 return std::all_of(args.begin(), args.begin() + NumArgs(),
224 [](const IR::Value& value) { return value.IsImmediate(); });
225}
226
227Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
228 if (!associated_insts) {
229 return nullptr;
230 }
231 switch (opcode) {
232 case Opcode::GetZeroFromOp:
233 CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp);
234 return associated_insts->zero_inst;
235 case Opcode::GetSignFromOp:
236 CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp);
237 return associated_insts->sign_inst;
238 case Opcode::GetCarryFromOp:
239 CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp);
240 return associated_insts->carry_inst;
241 case Opcode::GetOverflowFromOp:
242 CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp);
243 return associated_insts->overflow_inst;
244 case Opcode::GetSparseFromOp:
245 CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp);
246 return associated_insts->sparse_inst;
247 case Opcode::GetInBoundsFromOp:
248 CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp);
249 return associated_insts->in_bounds_inst;
250 default:
251 throw InvalidArgument("{} is not a pseudo-instruction", opcode);
252 }
253}
254
255IR::Type Inst::Type() const {
256 return TypeOf(op);
257}
258
259void Inst::SetArg(size_t index, Value value) {
260 if (index >= NumArgs()) {
261 throw InvalidArgument("Out of bounds argument index {} in opcode {}", index, op);
262 }
263 const IR::Value arg{Arg(index)};
264 if (!arg.IsImmediate()) {
265 UndoUse(arg);
266 }
267 if (!value.IsImmediate()) {
268 Use(value);
269 }
270 if (op == Opcode::Phi) {
271 phi_args[index].second = value;
272 } else {
273 args[index] = value;
274 }
275}
276
277Block* Inst::PhiBlock(size_t index) const {
278 if (op != Opcode::Phi) {
279 throw LogicError("{} is not a Phi instruction", op);
280 }
281 if (index >= phi_args.size()) {
282 throw InvalidArgument("Out of bounds argument index {} in phi instruction");
283 }
284 return phi_args[index].first;
285}
286
287void Inst::AddPhiOperand(Block* predecessor, const Value& value) {
288 if (!value.IsImmediate()) {
289 Use(value);
290 }
291 phi_args.emplace_back(predecessor, value);
292}
293
294void Inst::Invalidate() {
295 ClearArgs();
296 ReplaceOpcode(Opcode::Void);
297}
298
299void Inst::ClearArgs() {
300 if (op == Opcode::Phi) {
301 for (auto& pair : phi_args) {
302 IR::Value& value{pair.second};
303 if (!value.IsImmediate()) {
304 UndoUse(value);
305 }
306 }
307 phi_args.clear();
308 } else {
309 for (auto& value : args) {
310 if (!value.IsImmediate()) {
311 UndoUse(value);
312 }
313 }
314 // Reset arguments to null
315 // std::memset was measured to be faster on MSVC than std::ranges:fill
316 std::memset(reinterpret_cast<char*>(&args), 0, sizeof(args));
317 }
318}
319
320void Inst::ReplaceUsesWith(Value replacement) {
321 Invalidate();
322 ReplaceOpcode(Opcode::Identity);
323 if (!replacement.IsImmediate()) {
324 Use(replacement);
325 }
326 args[0] = replacement;
327}
328
329void Inst::ReplaceOpcode(IR::Opcode opcode) {
330 if (opcode == IR::Opcode::Phi) {
331 throw LogicError("Cannot transition into Phi");
332 }
333 if (op == Opcode::Phi) {
334 // Transition out of phi arguments into non-phi
335 std::destroy_at(&phi_args);
336 std::construct_at(&args);
337 }
338 op = opcode;
339}
340
341void Inst::Use(const Value& value) {
342 Inst* const inst{value.Inst()};
343 ++inst->use_count;
344
345 std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
346 switch (op) {
347 case Opcode::GetZeroFromOp:
348 AllocAssociatedInsts(assoc_inst);
349 SetPseudoInstruction(assoc_inst->zero_inst, this);
350 break;
351 case Opcode::GetSignFromOp:
352 AllocAssociatedInsts(assoc_inst);
353 SetPseudoInstruction(assoc_inst->sign_inst, this);
354 break;
355 case Opcode::GetCarryFromOp:
356 AllocAssociatedInsts(assoc_inst);
357 SetPseudoInstruction(assoc_inst->carry_inst, this);
358 break;
359 case Opcode::GetOverflowFromOp:
360 AllocAssociatedInsts(assoc_inst);
361 SetPseudoInstruction(assoc_inst->overflow_inst, this);
362 break;
363 case Opcode::GetSparseFromOp:
364 AllocAssociatedInsts(assoc_inst);
365 SetPseudoInstruction(assoc_inst->sparse_inst, this);
366 break;
367 case Opcode::GetInBoundsFromOp:
368 AllocAssociatedInsts(assoc_inst);
369 SetPseudoInstruction(assoc_inst->in_bounds_inst, this);
370 break;
371 default:
372 break;
373 }
374}
375
376void Inst::UndoUse(const Value& value) {
377 Inst* const inst{value.Inst()};
378 --inst->use_count;
379
380 std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
381 switch (op) {
382 case Opcode::GetZeroFromOp:
383 AllocAssociatedInsts(assoc_inst);
384 RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp);
385 break;
386 case Opcode::GetSignFromOp:
387 AllocAssociatedInsts(assoc_inst);
388 RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp);
389 break;
390 case Opcode::GetCarryFromOp:
391 AllocAssociatedInsts(assoc_inst);
392 RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp);
393 break;
394 case Opcode::GetOverflowFromOp:
395 AllocAssociatedInsts(assoc_inst);
396 RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
397 break;
398 case Opcode::GetSparseFromOp:
399 AllocAssociatedInsts(assoc_inst);
400 RemovePseudoInstruction(assoc_inst->sparse_inst, Opcode::GetSparseFromOp);
401 break;
402 case Opcode::GetInBoundsFromOp:
403 AllocAssociatedInsts(assoc_inst);
404 RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp);
405 break;
406 default:
407 break;
408 }
409}
410
411} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
new file mode 100644
index 000000000..77cda1f8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -0,0 +1,49 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/shader_info.h"
10
11namespace Shader::IR {
12
13enum class FmzMode : u8 {
14 DontCare, // Not specified for this instruction
15 FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK)
16 FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9)
17 None, // Denorms are not flushed, NAN is propagated (nouveau)
18};
19
20enum class FpRounding : u8 {
21 DontCare, // Not specified for this instruction
22 RN, // Round to nearest even,
23 RM, // Round towards negative infinity
24 RP, // Round towards positive infinity
25 RZ, // Round towards zero
26};
27
28struct FpControl {
29 bool no_contraction{false};
30 FpRounding rounding{FpRounding::DontCare};
31 FmzMode fmz_mode{FmzMode::DontCare};
32};
33static_assert(sizeof(FpControl) <= sizeof(u32));
34
35union TextureInstInfo {
36 u32 raw;
37 BitField<0, 16, u32> descriptor_index;
38 BitField<16, 3, TextureType> type;
39 BitField<19, 1, u32> is_depth;
40 BitField<20, 1, u32> has_bias;
41 BitField<21, 1, u32> has_lod_clamp;
42 BitField<22, 1, u32> relaxed_precision;
43 BitField<23, 2, u32> gather_component;
44 BitField<25, 2, u32> num_derivates;
45 BitField<27, 3, ImageFormat> image_format;
46};
47static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
48
49} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.cpp b/src/shader_recompiler/frontend/ir/opcodes.cpp
new file mode 100644
index 000000000..24d024ad7
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.cpp
@@ -0,0 +1,15 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <string_view>
6
7#include "shader_recompiler/frontend/ir/opcodes.h"
8
9namespace Shader::IR {
10
11std::string_view NameOf(Opcode op) {
12 return Detail::META_TABLE[static_cast<size_t>(op)].name;
13}
14
15} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/opcodes.h b/src/shader_recompiler/frontend/ir/opcodes.h
new file mode 100644
index 000000000..9ab108292
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.h
@@ -0,0 +1,110 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9#include <string_view>
10
11#include <fmt/format.h>
12
13#include "shader_recompiler/frontend/ir/type.h"
14
15namespace Shader::IR {
16
17enum class Opcode {
18#define OPCODE(name, ...) name,
19#include "opcodes.inc"
20#undef OPCODE
21};
22
23namespace Detail {
24struct OpcodeMeta {
25 std::string_view name;
26 Type type;
27 std::array<Type, 5> arg_types;
28};
29
30// using enum Type;
31constexpr Type Void{Type::Void};
32constexpr Type Opaque{Type::Opaque};
33constexpr Type Reg{Type::Reg};
34constexpr Type Pred{Type::Pred};
35constexpr Type Attribute{Type::Attribute};
36constexpr Type Patch{Type::Patch};
37constexpr Type U1{Type::U1};
38constexpr Type U8{Type::U8};
39constexpr Type U16{Type::U16};
40constexpr Type U32{Type::U32};
41constexpr Type U64{Type::U64};
42constexpr Type F16{Type::F16};
43constexpr Type F32{Type::F32};
44constexpr Type F64{Type::F64};
45constexpr Type U32x2{Type::U32x2};
46constexpr Type U32x3{Type::U32x3};
47constexpr Type U32x4{Type::U32x4};
48constexpr Type F16x2{Type::F16x2};
49constexpr Type F16x3{Type::F16x3};
50constexpr Type F16x4{Type::F16x4};
51constexpr Type F32x2{Type::F32x2};
52constexpr Type F32x3{Type::F32x3};
53constexpr Type F32x4{Type::F32x4};
54constexpr Type F64x2{Type::F64x2};
55constexpr Type F64x3{Type::F64x3};
56constexpr Type F64x4{Type::F64x4};
57
58constexpr OpcodeMeta META_TABLE[]{
59#define OPCODE(name_token, type_token, ...) \
60 { \
61 .name{#name_token}, \
62 .type = type_token, \
63 .arg_types{__VA_ARGS__}, \
64 },
65#include "opcodes.inc"
66#undef OPCODE
67};
68constexpr size_t CalculateNumArgsOf(Opcode op) {
69 const auto& arg_types{META_TABLE[static_cast<size_t>(op)].arg_types};
70 return static_cast<size_t>(
71 std::distance(arg_types.begin(), std::ranges::find(arg_types, Type::Void)));
72}
73
74constexpr u8 NUM_ARGS[]{
75#define OPCODE(name_token, type_token, ...) static_cast<u8>(CalculateNumArgsOf(Opcode::name_token)),
76#include "opcodes.inc"
77#undef OPCODE
78};
79} // namespace Detail
80
81/// Get return type of an opcode
82[[nodiscard]] inline Type TypeOf(Opcode op) noexcept {
83 return Detail::META_TABLE[static_cast<size_t>(op)].type;
84}
85
86/// Get the number of arguments an opcode accepts
87[[nodiscard]] inline size_t NumArgsOf(Opcode op) noexcept {
88 return static_cast<size_t>(Detail::NUM_ARGS[static_cast<size_t>(op)]);
89}
90
91/// Get the required type of an argument of an opcode
92[[nodiscard]] inline Type ArgTypeOf(Opcode op, size_t arg_index) noexcept {
93 return Detail::META_TABLE[static_cast<size_t>(op)].arg_types[arg_index];
94}
95
96/// Get the name of an opcode
97[[nodiscard]] std::string_view NameOf(Opcode op);
98
99} // namespace Shader::IR
100
101template <>
102struct fmt::formatter<Shader::IR::Opcode> {
103 constexpr auto parse(format_parse_context& ctx) {
104 return ctx.begin();
105 }
106 template <typename FormatContext>
107 auto format(const Shader::IR::Opcode& op, FormatContext& ctx) {
108 return format_to(ctx.out(), "{}", Shader::IR::NameOf(op));
109 }
110};
diff --git a/src/shader_recompiler/frontend/ir/opcodes.inc b/src/shader_recompiler/frontend/ir/opcodes.inc
new file mode 100644
index 000000000..d91098c80
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/opcodes.inc
@@ -0,0 +1,550 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ...
6OPCODE(Phi, Opaque, )
7OPCODE(Identity, Opaque, Opaque, )
8OPCODE(Void, Void, )
9OPCODE(ConditionRef, U1, U1, )
10OPCODE(Reference, Void, Opaque, )
11OPCODE(PhiMove, Void, Opaque, Opaque, )
12
13// Special operations
14OPCODE(Prologue, Void, )
15OPCODE(Epilogue, Void, )
16OPCODE(Join, Void, )
17OPCODE(DemoteToHelperInvocation, Void, )
18OPCODE(EmitVertex, Void, U32, )
19OPCODE(EndPrimitive, Void, U32, )
20
21// Barriers
22OPCODE(Barrier, Void, )
23OPCODE(WorkgroupMemoryBarrier, Void, )
24OPCODE(DeviceMemoryBarrier, Void, )
25
26// Context getters/setters
27OPCODE(GetRegister, U32, Reg, )
28OPCODE(SetRegister, Void, Reg, U32, )
29OPCODE(GetPred, U1, Pred, )
30OPCODE(SetPred, Void, Pred, U1, )
31OPCODE(GetGotoVariable, U1, U32, )
32OPCODE(SetGotoVariable, Void, U32, U1, )
33OPCODE(GetIndirectBranchVariable, U32, )
34OPCODE(SetIndirectBranchVariable, Void, U32, )
35OPCODE(GetCbufU8, U32, U32, U32, )
36OPCODE(GetCbufS8, U32, U32, U32, )
37OPCODE(GetCbufU16, U32, U32, U32, )
38OPCODE(GetCbufS16, U32, U32, U32, )
39OPCODE(GetCbufU32, U32, U32, U32, )
40OPCODE(GetCbufF32, F32, U32, U32, )
41OPCODE(GetCbufU32x2, U32x2, U32, U32, )
42OPCODE(GetAttribute, F32, Attribute, U32, )
43OPCODE(SetAttribute, Void, Attribute, F32, U32, )
44OPCODE(GetAttributeIndexed, F32, U32, U32, )
45OPCODE(SetAttributeIndexed, Void, U32, F32, U32, )
46OPCODE(GetPatch, F32, Patch, )
47OPCODE(SetPatch, Void, Patch, F32, )
48OPCODE(SetFragColor, Void, U32, U32, F32, )
49OPCODE(SetSampleMask, Void, U32, )
50OPCODE(SetFragDepth, Void, F32, )
51OPCODE(GetZFlag, U1, Void, )
52OPCODE(GetSFlag, U1, Void, )
53OPCODE(GetCFlag, U1, Void, )
54OPCODE(GetOFlag, U1, Void, )
55OPCODE(SetZFlag, Void, U1, )
56OPCODE(SetSFlag, Void, U1, )
57OPCODE(SetCFlag, Void, U1, )
58OPCODE(SetOFlag, Void, U1, )
59OPCODE(WorkgroupId, U32x3, )
60OPCODE(LocalInvocationId, U32x3, )
61OPCODE(InvocationId, U32, )
62OPCODE(SampleId, U32, )
63OPCODE(IsHelperInvocation, U1, )
64OPCODE(YDirection, F32, )
65
66// Undefined
67OPCODE(UndefU1, U1, )
68OPCODE(UndefU8, U8, )
69OPCODE(UndefU16, U16, )
70OPCODE(UndefU32, U32, )
71OPCODE(UndefU64, U64, )
72
73// Memory operations
74OPCODE(LoadGlobalU8, U32, Opaque, )
75OPCODE(LoadGlobalS8, U32, Opaque, )
76OPCODE(LoadGlobalU16, U32, Opaque, )
77OPCODE(LoadGlobalS16, U32, Opaque, )
78OPCODE(LoadGlobal32, U32, Opaque, )
79OPCODE(LoadGlobal64, U32x2, Opaque, )
80OPCODE(LoadGlobal128, U32x4, Opaque, )
81OPCODE(WriteGlobalU8, Void, Opaque, U32, )
82OPCODE(WriteGlobalS8, Void, Opaque, U32, )
83OPCODE(WriteGlobalU16, Void, Opaque, U32, )
84OPCODE(WriteGlobalS16, Void, Opaque, U32, )
85OPCODE(WriteGlobal32, Void, Opaque, U32, )
86OPCODE(WriteGlobal64, Void, Opaque, U32x2, )
87OPCODE(WriteGlobal128, Void, Opaque, U32x4, )
88
89// Storage buffer operations
90OPCODE(LoadStorageU8, U32, U32, U32, )
91OPCODE(LoadStorageS8, U32, U32, U32, )
92OPCODE(LoadStorageU16, U32, U32, U32, )
93OPCODE(LoadStorageS16, U32, U32, U32, )
94OPCODE(LoadStorage32, U32, U32, U32, )
95OPCODE(LoadStorage64, U32x2, U32, U32, )
96OPCODE(LoadStorage128, U32x4, U32, U32, )
97OPCODE(WriteStorageU8, Void, U32, U32, U32, )
98OPCODE(WriteStorageS8, Void, U32, U32, U32, )
99OPCODE(WriteStorageU16, Void, U32, U32, U32, )
100OPCODE(WriteStorageS16, Void, U32, U32, U32, )
101OPCODE(WriteStorage32, Void, U32, U32, U32, )
102OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
103OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
104
105// Local memory operations
106OPCODE(LoadLocal, U32, U32, )
107OPCODE(WriteLocal, Void, U32, U32, )
108
109// Shared memory operations
110OPCODE(LoadSharedU8, U32, U32, )
111OPCODE(LoadSharedS8, U32, U32, )
112OPCODE(LoadSharedU16, U32, U32, )
113OPCODE(LoadSharedS16, U32, U32, )
114OPCODE(LoadSharedU32, U32, U32, )
115OPCODE(LoadSharedU64, U32x2, U32, )
116OPCODE(LoadSharedU128, U32x4, U32, )
117OPCODE(WriteSharedU8, Void, U32, U32, )
118OPCODE(WriteSharedU16, Void, U32, U32, )
119OPCODE(WriteSharedU32, Void, U32, U32, )
120OPCODE(WriteSharedU64, Void, U32, U32x2, )
121OPCODE(WriteSharedU128, Void, U32, U32x4, )
122
123// Vector utility
124OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
125OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
126OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
127OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
128OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
129OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
130OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
131OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
132OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
133OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
134OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
135OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
136OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
137OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
138OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
139OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
140OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
141OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
142OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
143OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
144OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
145OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
146OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
147OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
148OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
149OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
150OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
151OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
152OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
153OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
154OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
155OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
156OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
157OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
158OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
159OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
160
161// Select operations
162OPCODE(SelectU1, U1, U1, U1, U1, )
163OPCODE(SelectU8, U8, U1, U8, U8, )
164OPCODE(SelectU16, U16, U1, U16, U16, )
165OPCODE(SelectU32, U32, U1, U32, U32, )
166OPCODE(SelectU64, U64, U1, U64, U64, )
167OPCODE(SelectF16, F16, U1, F16, F16, )
168OPCODE(SelectF32, F32, U1, F32, F32, )
169OPCODE(SelectF64, F64, U1, F64, F64, )
170
171// Bitwise conversions
172OPCODE(BitCastU16F16, U16, F16, )
173OPCODE(BitCastU32F32, U32, F32, )
174OPCODE(BitCastU64F64, U64, F64, )
175OPCODE(BitCastF16U16, F16, U16, )
176OPCODE(BitCastF32U32, F32, U32, )
177OPCODE(BitCastF64U64, F64, U64, )
178OPCODE(PackUint2x32, U64, U32x2, )
179OPCODE(UnpackUint2x32, U32x2, U64, )
180OPCODE(PackFloat2x16, U32, F16x2, )
181OPCODE(UnpackFloat2x16, F16x2, U32, )
182OPCODE(PackHalf2x16, U32, F32x2, )
183OPCODE(UnpackHalf2x16, F32x2, U32, )
184OPCODE(PackDouble2x32, F64, U32x2, )
185OPCODE(UnpackDouble2x32, U32x2, F64, )
186
187// Pseudo-operation, handled specially at final emit
188OPCODE(GetZeroFromOp, U1, Opaque, )
189OPCODE(GetSignFromOp, U1, Opaque, )
190OPCODE(GetCarryFromOp, U1, Opaque, )
191OPCODE(GetOverflowFromOp, U1, Opaque, )
192OPCODE(GetSparseFromOp, U1, Opaque, )
193OPCODE(GetInBoundsFromOp, U1, Opaque, )
194
195// Floating-point operations
196OPCODE(FPAbs16, F16, F16, )
197OPCODE(FPAbs32, F32, F32, )
198OPCODE(FPAbs64, F64, F64, )
199OPCODE(FPAdd16, F16, F16, F16, )
200OPCODE(FPAdd32, F32, F32, F32, )
201OPCODE(FPAdd64, F64, F64, F64, )
202OPCODE(FPFma16, F16, F16, F16, F16, )
203OPCODE(FPFma32, F32, F32, F32, F32, )
204OPCODE(FPFma64, F64, F64, F64, F64, )
205OPCODE(FPMax32, F32, F32, F32, )
206OPCODE(FPMax64, F64, F64, F64, )
207OPCODE(FPMin32, F32, F32, F32, )
208OPCODE(FPMin64, F64, F64, F64, )
209OPCODE(FPMul16, F16, F16, F16, )
210OPCODE(FPMul32, F32, F32, F32, )
211OPCODE(FPMul64, F64, F64, F64, )
212OPCODE(FPNeg16, F16, F16, )
213OPCODE(FPNeg32, F32, F32, )
214OPCODE(FPNeg64, F64, F64, )
215OPCODE(FPRecip32, F32, F32, )
216OPCODE(FPRecip64, F64, F64, )
217OPCODE(FPRecipSqrt32, F32, F32, )
218OPCODE(FPRecipSqrt64, F64, F64, )
219OPCODE(FPSqrt, F32, F32, )
220OPCODE(FPSin, F32, F32, )
221OPCODE(FPExp2, F32, F32, )
222OPCODE(FPCos, F32, F32, )
223OPCODE(FPLog2, F32, F32, )
224OPCODE(FPSaturate16, F16, F16, )
225OPCODE(FPSaturate32, F32, F32, )
226OPCODE(FPSaturate64, F64, F64, )
227OPCODE(FPClamp16, F16, F16, F16, F16, )
228OPCODE(FPClamp32, F32, F32, F32, F32, )
229OPCODE(FPClamp64, F64, F64, F64, F64, )
230OPCODE(FPRoundEven16, F16, F16, )
231OPCODE(FPRoundEven32, F32, F32, )
232OPCODE(FPRoundEven64, F64, F64, )
233OPCODE(FPFloor16, F16, F16, )
234OPCODE(FPFloor32, F32, F32, )
235OPCODE(FPFloor64, F64, F64, )
236OPCODE(FPCeil16, F16, F16, )
237OPCODE(FPCeil32, F32, F32, )
238OPCODE(FPCeil64, F64, F64, )
239OPCODE(FPTrunc16, F16, F16, )
240OPCODE(FPTrunc32, F32, F32, )
241OPCODE(FPTrunc64, F64, F64, )
242
243OPCODE(FPOrdEqual16, U1, F16, F16, )
244OPCODE(FPOrdEqual32, U1, F32, F32, )
245OPCODE(FPOrdEqual64, U1, F64, F64, )
246OPCODE(FPUnordEqual16, U1, F16, F16, )
247OPCODE(FPUnordEqual32, U1, F32, F32, )
248OPCODE(FPUnordEqual64, U1, F64, F64, )
249OPCODE(FPOrdNotEqual16, U1, F16, F16, )
250OPCODE(FPOrdNotEqual32, U1, F32, F32, )
251OPCODE(FPOrdNotEqual64, U1, F64, F64, )
252OPCODE(FPUnordNotEqual16, U1, F16, F16, )
253OPCODE(FPUnordNotEqual32, U1, F32, F32, )
254OPCODE(FPUnordNotEqual64, U1, F64, F64, )
255OPCODE(FPOrdLessThan16, U1, F16, F16, )
256OPCODE(FPOrdLessThan32, U1, F32, F32, )
257OPCODE(FPOrdLessThan64, U1, F64, F64, )
258OPCODE(FPUnordLessThan16, U1, F16, F16, )
259OPCODE(FPUnordLessThan32, U1, F32, F32, )
260OPCODE(FPUnordLessThan64, U1, F64, F64, )
261OPCODE(FPOrdGreaterThan16, U1, F16, F16, )
262OPCODE(FPOrdGreaterThan32, U1, F32, F32, )
263OPCODE(FPOrdGreaterThan64, U1, F64, F64, )
264OPCODE(FPUnordGreaterThan16, U1, F16, F16, )
265OPCODE(FPUnordGreaterThan32, U1, F32, F32, )
266OPCODE(FPUnordGreaterThan64, U1, F64, F64, )
267OPCODE(FPOrdLessThanEqual16, U1, F16, F16, )
268OPCODE(FPOrdLessThanEqual32, U1, F32, F32, )
269OPCODE(FPOrdLessThanEqual64, U1, F64, F64, )
270OPCODE(FPUnordLessThanEqual16, U1, F16, F16, )
271OPCODE(FPUnordLessThanEqual32, U1, F32, F32, )
272OPCODE(FPUnordLessThanEqual64, U1, F64, F64, )
273OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, )
274OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, )
275OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, )
276OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )
277OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
278OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
279OPCODE(FPIsNan16, U1, F16, )
280OPCODE(FPIsNan32, U1, F32, )
281OPCODE(FPIsNan64, U1, F64, )
282
283// Integer operations
284OPCODE(IAdd32, U32, U32, U32, )
285OPCODE(IAdd64, U64, U64, U64, )
286OPCODE(ISub32, U32, U32, U32, )
287OPCODE(ISub64, U64, U64, U64, )
288OPCODE(IMul32, U32, U32, U32, )
289OPCODE(INeg32, U32, U32, )
290OPCODE(INeg64, U64, U64, )
291OPCODE(IAbs32, U32, U32, )
292OPCODE(ShiftLeftLogical32, U32, U32, U32, )
293OPCODE(ShiftLeftLogical64, U64, U64, U32, )
294OPCODE(ShiftRightLogical32, U32, U32, U32, )
295OPCODE(ShiftRightLogical64, U64, U64, U32, )
296OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
297OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
298OPCODE(BitwiseAnd32, U32, U32, U32, )
299OPCODE(BitwiseOr32, U32, U32, U32, )
300OPCODE(BitwiseXor32, U32, U32, U32, )
301OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
302OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
303OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
304OPCODE(BitReverse32, U32, U32, )
305OPCODE(BitCount32, U32, U32, )
306OPCODE(BitwiseNot32, U32, U32, )
307
308OPCODE(FindSMsb32, U32, U32, )
309OPCODE(FindUMsb32, U32, U32, )
310OPCODE(SMin32, U32, U32, U32, )
311OPCODE(UMin32, U32, U32, U32, )
312OPCODE(SMax32, U32, U32, U32, )
313OPCODE(UMax32, U32, U32, U32, )
314OPCODE(SClamp32, U32, U32, U32, U32, )
315OPCODE(UClamp32, U32, U32, U32, U32, )
316OPCODE(SLessThan, U1, U32, U32, )
317OPCODE(ULessThan, U1, U32, U32, )
318OPCODE(IEqual, U1, U32, U32, )
319OPCODE(SLessThanEqual, U1, U32, U32, )
320OPCODE(ULessThanEqual, U1, U32, U32, )
321OPCODE(SGreaterThan, U1, U32, U32, )
322OPCODE(UGreaterThan, U1, U32, U32, )
323OPCODE(INotEqual, U1, U32, U32, )
324OPCODE(SGreaterThanEqual, U1, U32, U32, )
325OPCODE(UGreaterThanEqual, U1, U32, U32, )
326
327// Atomic operations
328OPCODE(SharedAtomicIAdd32, U32, U32, U32, )
329OPCODE(SharedAtomicSMin32, U32, U32, U32, )
330OPCODE(SharedAtomicUMin32, U32, U32, U32, )
331OPCODE(SharedAtomicSMax32, U32, U32, U32, )
332OPCODE(SharedAtomicUMax32, U32, U32, U32, )
333OPCODE(SharedAtomicInc32, U32, U32, U32, )
334OPCODE(SharedAtomicDec32, U32, U32, U32, )
335OPCODE(SharedAtomicAnd32, U32, U32, U32, )
336OPCODE(SharedAtomicOr32, U32, U32, U32, )
337OPCODE(SharedAtomicXor32, U32, U32, U32, )
338OPCODE(SharedAtomicExchange32, U32, U32, U32, )
339OPCODE(SharedAtomicExchange64, U64, U32, U64, )
340
341OPCODE(GlobalAtomicIAdd32, U32, U64, U32, )
342OPCODE(GlobalAtomicSMin32, U32, U64, U32, )
343OPCODE(GlobalAtomicUMin32, U32, U64, U32, )
344OPCODE(GlobalAtomicSMax32, U32, U64, U32, )
345OPCODE(GlobalAtomicUMax32, U32, U64, U32, )
346OPCODE(GlobalAtomicInc32, U32, U64, U32, )
347OPCODE(GlobalAtomicDec32, U32, U64, U32, )
348OPCODE(GlobalAtomicAnd32, U32, U64, U32, )
349OPCODE(GlobalAtomicOr32, U32, U64, U32, )
350OPCODE(GlobalAtomicXor32, U32, U64, U32, )
351OPCODE(GlobalAtomicExchange32, U32, U64, U32, )
352OPCODE(GlobalAtomicIAdd64, U64, U64, U64, )
353OPCODE(GlobalAtomicSMin64, U64, U64, U64, )
354OPCODE(GlobalAtomicUMin64, U64, U64, U64, )
355OPCODE(GlobalAtomicSMax64, U64, U64, U64, )
356OPCODE(GlobalAtomicUMax64, U64, U64, U64, )
357OPCODE(GlobalAtomicAnd64, U64, U64, U64, )
358OPCODE(GlobalAtomicOr64, U64, U64, U64, )
359OPCODE(GlobalAtomicXor64, U64, U64, U64, )
360OPCODE(GlobalAtomicExchange64, U64, U64, U64, )
361OPCODE(GlobalAtomicAddF32, F32, U64, F32, )
362OPCODE(GlobalAtomicAddF16x2, U32, U64, F16x2, )
363OPCODE(GlobalAtomicAddF32x2, U32, U64, F32x2, )
364OPCODE(GlobalAtomicMinF16x2, U32, U64, F16x2, )
365OPCODE(GlobalAtomicMinF32x2, U32, U64, F32x2, )
366OPCODE(GlobalAtomicMaxF16x2, U32, U64, F16x2, )
367OPCODE(GlobalAtomicMaxF32x2, U32, U64, F32x2, )
368
369OPCODE(StorageAtomicIAdd32, U32, U32, U32, U32, )
370OPCODE(StorageAtomicSMin32, U32, U32, U32, U32, )
371OPCODE(StorageAtomicUMin32, U32, U32, U32, U32, )
372OPCODE(StorageAtomicSMax32, U32, U32, U32, U32, )
373OPCODE(StorageAtomicUMax32, U32, U32, U32, U32, )
374OPCODE(StorageAtomicInc32, U32, U32, U32, U32, )
375OPCODE(StorageAtomicDec32, U32, U32, U32, U32, )
376OPCODE(StorageAtomicAnd32, U32, U32, U32, U32, )
377OPCODE(StorageAtomicOr32, U32, U32, U32, U32, )
378OPCODE(StorageAtomicXor32, U32, U32, U32, U32, )
379OPCODE(StorageAtomicExchange32, U32, U32, U32, U32, )
380OPCODE(StorageAtomicIAdd64, U64, U32, U32, U64, )
381OPCODE(StorageAtomicSMin64, U64, U32, U32, U64, )
382OPCODE(StorageAtomicUMin64, U64, U32, U32, U64, )
383OPCODE(StorageAtomicSMax64, U64, U32, U32, U64, )
384OPCODE(StorageAtomicUMax64, U64, U32, U32, U64, )
385OPCODE(StorageAtomicAnd64, U64, U32, U32, U64, )
386OPCODE(StorageAtomicOr64, U64, U32, U32, U64, )
387OPCODE(StorageAtomicXor64, U64, U32, U32, U64, )
388OPCODE(StorageAtomicExchange64, U64, U32, U32, U64, )
389OPCODE(StorageAtomicAddF32, F32, U32, U32, F32, )
390OPCODE(StorageAtomicAddF16x2, U32, U32, U32, F16x2, )
391OPCODE(StorageAtomicAddF32x2, U32, U32, U32, F32x2, )
392OPCODE(StorageAtomicMinF16x2, U32, U32, U32, F16x2, )
393OPCODE(StorageAtomicMinF32x2, U32, U32, U32, F32x2, )
394OPCODE(StorageAtomicMaxF16x2, U32, U32, U32, F16x2, )
395OPCODE(StorageAtomicMaxF32x2, U32, U32, U32, F32x2, )
396
397// Logical operations
398OPCODE(LogicalOr, U1, U1, U1, )
399OPCODE(LogicalAnd, U1, U1, U1, )
400OPCODE(LogicalXor, U1, U1, U1, )
401OPCODE(LogicalNot, U1, U1, )
402
403// Conversion operations
404OPCODE(ConvertS16F16, U32, F16, )
405OPCODE(ConvertS16F32, U32, F32, )
406OPCODE(ConvertS16F64, U32, F64, )
407OPCODE(ConvertS32F16, U32, F16, )
408OPCODE(ConvertS32F32, U32, F32, )
409OPCODE(ConvertS32F64, U32, F64, )
410OPCODE(ConvertS64F16, U64, F16, )
411OPCODE(ConvertS64F32, U64, F32, )
412OPCODE(ConvertS64F64, U64, F64, )
413OPCODE(ConvertU16F16, U32, F16, )
414OPCODE(ConvertU16F32, U32, F32, )
415OPCODE(ConvertU16F64, U32, F64, )
416OPCODE(ConvertU32F16, U32, F16, )
417OPCODE(ConvertU32F32, U32, F32, )
418OPCODE(ConvertU32F64, U32, F64, )
419OPCODE(ConvertU64F16, U64, F16, )
420OPCODE(ConvertU64F32, U64, F32, )
421OPCODE(ConvertU64F64, U64, F64, )
422OPCODE(ConvertU64U32, U64, U32, )
423OPCODE(ConvertU32U64, U32, U64, )
424OPCODE(ConvertF16F32, F16, F32, )
425OPCODE(ConvertF32F16, F32, F16, )
426OPCODE(ConvertF32F64, F32, F64, )
427OPCODE(ConvertF64F32, F64, F32, )
428OPCODE(ConvertF16S8, F16, U32, )
429OPCODE(ConvertF16S16, F16, U32, )
430OPCODE(ConvertF16S32, F16, U32, )
431OPCODE(ConvertF16S64, F16, U64, )
432OPCODE(ConvertF16U8, F16, U32, )
433OPCODE(ConvertF16U16, F16, U32, )
434OPCODE(ConvertF16U32, F16, U32, )
435OPCODE(ConvertF16U64, F16, U64, )
436OPCODE(ConvertF32S8, F32, U32, )
437OPCODE(ConvertF32S16, F32, U32, )
438OPCODE(ConvertF32S32, F32, U32, )
439OPCODE(ConvertF32S64, F32, U64, )
440OPCODE(ConvertF32U8, F32, U32, )
441OPCODE(ConvertF32U16, F32, U32, )
442OPCODE(ConvertF32U32, F32, U32, )
443OPCODE(ConvertF32U64, F32, U64, )
444OPCODE(ConvertF64S8, F64, U32, )
445OPCODE(ConvertF64S16, F64, U32, )
446OPCODE(ConvertF64S32, F64, U32, )
447OPCODE(ConvertF64S64, F64, U64, )
448OPCODE(ConvertF64U8, F64, U32, )
449OPCODE(ConvertF64U16, F64, U32, )
450OPCODE(ConvertF64U32, F64, U32, )
451OPCODE(ConvertF64U64, F64, U64, )
452
453// Image operations
454OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
455OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
456OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
457OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
458OPCODE(BindlessImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
459OPCODE(BindlessImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
460OPCODE(BindlessImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
461OPCODE(BindlessImageQueryDimensions, U32x4, U32, U32, )
462OPCODE(BindlessImageQueryLod, F32x4, U32, Opaque, )
463OPCODE(BindlessImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
464OPCODE(BindlessImageRead, U32x4, U32, Opaque, )
465OPCODE(BindlessImageWrite, Void, U32, Opaque, U32x4, )
466
467OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
468OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
469OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
470OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
471OPCODE(BoundImageGather, F32x4, U32, Opaque, Opaque, Opaque, )
472OPCODE(BoundImageGatherDref, F32x4, U32, Opaque, Opaque, Opaque, F32, )
473OPCODE(BoundImageFetch, F32x4, U32, Opaque, Opaque, U32, Opaque, )
474OPCODE(BoundImageQueryDimensions, U32x4, U32, U32, )
475OPCODE(BoundImageQueryLod, F32x4, U32, Opaque, )
476OPCODE(BoundImageGradient, F32x4, U32, Opaque, Opaque, Opaque, Opaque, )
477OPCODE(BoundImageRead, U32x4, U32, Opaque, )
478OPCODE(BoundImageWrite, Void, U32, Opaque, U32x4, )
479
480OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
481OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, Opaque, Opaque, )
482OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
483OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, F32, Opaque, Opaque, )
484OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, Opaque, )
485OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, Opaque, F32, )
486OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
487OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, )
488OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
489OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )
490OPCODE(ImageRead, U32x4, Opaque, Opaque, )
491OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
492
493// Atomic Image operations
494
495OPCODE(BindlessImageAtomicIAdd32, U32, U32, Opaque, U32, )
496OPCODE(BindlessImageAtomicSMin32, U32, U32, Opaque, U32, )
497OPCODE(BindlessImageAtomicUMin32, U32, U32, Opaque, U32, )
498OPCODE(BindlessImageAtomicSMax32, U32, U32, Opaque, U32, )
499OPCODE(BindlessImageAtomicUMax32, U32, U32, Opaque, U32, )
500OPCODE(BindlessImageAtomicInc32, U32, U32, Opaque, U32, )
501OPCODE(BindlessImageAtomicDec32, U32, U32, Opaque, U32, )
502OPCODE(BindlessImageAtomicAnd32, U32, U32, Opaque, U32, )
503OPCODE(BindlessImageAtomicOr32, U32, U32, Opaque, U32, )
504OPCODE(BindlessImageAtomicXor32, U32, U32, Opaque, U32, )
505OPCODE(BindlessImageAtomicExchange32, U32, U32, Opaque, U32, )
506
507OPCODE(BoundImageAtomicIAdd32, U32, U32, Opaque, U32, )
508OPCODE(BoundImageAtomicSMin32, U32, U32, Opaque, U32, )
509OPCODE(BoundImageAtomicUMin32, U32, U32, Opaque, U32, )
510OPCODE(BoundImageAtomicSMax32, U32, U32, Opaque, U32, )
511OPCODE(BoundImageAtomicUMax32, U32, U32, Opaque, U32, )
512OPCODE(BoundImageAtomicInc32, U32, U32, Opaque, U32, )
513OPCODE(BoundImageAtomicDec32, U32, U32, Opaque, U32, )
514OPCODE(BoundImageAtomicAnd32, U32, U32, Opaque, U32, )
515OPCODE(BoundImageAtomicOr32, U32, U32, Opaque, U32, )
516OPCODE(BoundImageAtomicXor32, U32, U32, Opaque, U32, )
517OPCODE(BoundImageAtomicExchange32, U32, U32, Opaque, U32, )
518
519OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
520OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
521OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
522OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
523OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
524OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
525OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
526OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
527OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
528OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
529OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
530
531// Warp operations
532OPCODE(LaneId, U32, )
533OPCODE(VoteAll, U1, U1, )
534OPCODE(VoteAny, U1, U1, )
535OPCODE(VoteEqual, U1, U1, )
536OPCODE(SubgroupBallot, U32, U1, )
537OPCODE(SubgroupEqMask, U32, )
538OPCODE(SubgroupLtMask, U32, )
539OPCODE(SubgroupLeMask, U32, )
540OPCODE(SubgroupGtMask, U32, )
541OPCODE(SubgroupGeMask, U32, )
542OPCODE(ShuffleIndex, U32, U32, U32, U32, U32, )
543OPCODE(ShuffleUp, U32, U32, U32, U32, U32, )
544OPCODE(ShuffleDown, U32, U32, U32, U32, U32, )
545OPCODE(ShuffleButterfly, U32, U32, U32, U32, U32, )
546OPCODE(FSwizzleAdd, F32, F32, F32, U32, )
547OPCODE(DPdxFine, F32, F32, )
548OPCODE(DPdyFine, F32, F32, )
549OPCODE(DPdxCoarse, F32, F32, )
550OPCODE(DPdyCoarse, F32, F32, )
diff --git a/src/shader_recompiler/frontend/ir/patch.cpp b/src/shader_recompiler/frontend/ir/patch.cpp
new file mode 100644
index 000000000..4c956a970
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.cpp
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/exception.h"
6#include "shader_recompiler/frontend/ir/patch.h"
7
8namespace Shader::IR {
9
10bool IsGeneric(Patch patch) noexcept {
11 return patch >= Patch::Component0 && patch <= Patch::Component119;
12}
13
14u32 GenericPatchIndex(Patch patch) {
15 if (!IsGeneric(patch)) {
16 throw InvalidArgument("Patch {} is not generic", patch);
17 }
18 return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) / 4;
19}
20
21u32 GenericPatchElement(Patch patch) {
22 if (!IsGeneric(patch)) {
23 throw InvalidArgument("Patch {} is not generic", patch);
24 }
25 return (static_cast<u32>(patch) - static_cast<u32>(Patch::Component0)) % 4;
26}
27
28} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/patch.h b/src/shader_recompiler/frontend/ir/patch.h
new file mode 100644
index 000000000..6d66ff0d6
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/patch.h
@@ -0,0 +1,149 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8
9namespace Shader::IR {
10
11enum class Patch : u64 {
12 TessellationLodLeft,
13 TessellationLodTop,
14 TessellationLodRight,
15 TessellationLodBottom,
16 TessellationLodInteriorU,
17 TessellationLodInteriorV,
18 ComponentPadding0,
19 ComponentPadding1,
20 Component0,
21 Component1,
22 Component2,
23 Component3,
24 Component4,
25 Component5,
26 Component6,
27 Component7,
28 Component8,
29 Component9,
30 Component10,
31 Component11,
32 Component12,
33 Component13,
34 Component14,
35 Component15,
36 Component16,
37 Component17,
38 Component18,
39 Component19,
40 Component20,
41 Component21,
42 Component22,
43 Component23,
44 Component24,
45 Component25,
46 Component26,
47 Component27,
48 Component28,
49 Component29,
50 Component30,
51 Component31,
52 Component32,
53 Component33,
54 Component34,
55 Component35,
56 Component36,
57 Component37,
58 Component38,
59 Component39,
60 Component40,
61 Component41,
62 Component42,
63 Component43,
64 Component44,
65 Component45,
66 Component46,
67 Component47,
68 Component48,
69 Component49,
70 Component50,
71 Component51,
72 Component52,
73 Component53,
74 Component54,
75 Component55,
76 Component56,
77 Component57,
78 Component58,
79 Component59,
80 Component60,
81 Component61,
82 Component62,
83 Component63,
84 Component64,
85 Component65,
86 Component66,
87 Component67,
88 Component68,
89 Component69,
90 Component70,
91 Component71,
92 Component72,
93 Component73,
94 Component74,
95 Component75,
96 Component76,
97 Component77,
98 Component78,
99 Component79,
100 Component80,
101 Component81,
102 Component82,
103 Component83,
104 Component84,
105 Component85,
106 Component86,
107 Component87,
108 Component88,
109 Component89,
110 Component90,
111 Component91,
112 Component92,
113 Component93,
114 Component94,
115 Component95,
116 Component96,
117 Component97,
118 Component98,
119 Component99,
120 Component100,
121 Component101,
122 Component102,
123 Component103,
124 Component104,
125 Component105,
126 Component106,
127 Component107,
128 Component108,
129 Component109,
130 Component110,
131 Component111,
132 Component112,
133 Component113,
134 Component114,
135 Component115,
136 Component116,
137 Component117,
138 Component118,
139 Component119,
140};
141static_assert(static_cast<u64>(Patch::Component119) == 127);
142
143[[nodiscard]] bool IsGeneric(Patch patch) noexcept;
144
145[[nodiscard]] u32 GenericPatchIndex(Patch patch);
146
147[[nodiscard]] u32 GenericPatchElement(Patch patch);
148
149} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/post_order.cpp b/src/shader_recompiler/frontend/ir/post_order.cpp
new file mode 100644
index 000000000..16bc44101
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/post_order.cpp
@@ -0,0 +1,46 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6
7#include <boost/container/flat_set.hpp>
8#include <boost/container/small_vector.hpp>
9
10#include "shader_recompiler/frontend/ir/basic_block.h"
11#include "shader_recompiler/frontend/ir/post_order.h"
12
13namespace Shader::IR {
14
15BlockList PostOrder(const AbstractSyntaxNode& root) {
16 boost::container::small_vector<Block*, 16> block_stack;
17 boost::container::flat_set<Block*> visited;
18 BlockList post_order_blocks;
19
20 if (root.type != AbstractSyntaxNode::Type::Block) {
21 throw LogicError("First node in abstract syntax list root is not a block");
22 }
23 Block* const first_block{root.data.block};
24 visited.insert(first_block);
25 block_stack.push_back(first_block);
26
27 while (!block_stack.empty()) {
28 Block* const block{block_stack.back()};
29 const auto visit{[&](Block* branch) {
30 if (!visited.insert(branch).second) {
31 return false;
32 }
33 // Calling push_back twice is faster than insert on MSVC
34 block_stack.push_back(block);
35 block_stack.push_back(branch);
36 return true;
37 }};
38 block_stack.pop_back();
39 if (std::ranges::none_of(block->ImmSuccessors(), visit)) {
40 post_order_blocks.push_back(block);
41 }
42 }
43 return post_order_blocks;
44}
45
46} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/post_order.h b/src/shader_recompiler/frontend/ir/post_order.h
new file mode 100644
index 000000000..07bfbadc3
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/post_order.h
@@ -0,0 +1,14 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9
10namespace Shader::IR {
11
12BlockList PostOrder(const AbstractSyntaxNode& root);
13
14} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/pred.h b/src/shader_recompiler/frontend/ir/pred.h
new file mode 100644
index 000000000..4e7f32423
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/pred.h
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9namespace Shader::IR {
10
11enum class Pred : u64 {
12 P0,
13 P1,
14 P2,
15 P3,
16 P4,
17 P5,
18 P6,
19 PT,
20};
21
22constexpr size_t NUM_USER_PREDS = 7;
23constexpr size_t NUM_PREDS = 8;
24
25[[nodiscard]] constexpr size_t PredIndex(Pred pred) noexcept {
26 return static_cast<size_t>(pred);
27}
28
29} // namespace Shader::IR
30
31template <>
32struct fmt::formatter<Shader::IR::Pred> {
33 constexpr auto parse(format_parse_context& ctx) {
34 return ctx.begin();
35 }
36 template <typename FormatContext>
37 auto format(const Shader::IR::Pred& pred, FormatContext& ctx) {
38 if (pred == Shader::IR::Pred::PT) {
39 return fmt::format_to(ctx.out(), "PT");
40 } else {
41 return fmt::format_to(ctx.out(), "P{}", static_cast<int>(pred));
42 }
43 }
44};
diff --git a/src/shader_recompiler/frontend/ir/program.cpp b/src/shader_recompiler/frontend/ir/program.cpp
new file mode 100644
index 000000000..3fc06f855
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/program.cpp
@@ -0,0 +1,32 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <map>
6#include <string>
7
8#include <fmt/format.h>
9
10#include "shader_recompiler/frontend/ir/basic_block.h"
11#include "shader_recompiler/frontend/ir/program.h"
12#include "shader_recompiler/frontend/ir/value.h"
13
14namespace Shader::IR {
15
16std::string DumpProgram(const Program& program) {
17 size_t index{0};
18 std::map<const IR::Inst*, size_t> inst_to_index;
19 std::map<const IR::Block*, size_t> block_to_index;
20
21 for (const IR::Block* const block : program.blocks) {
22 block_to_index.emplace(block, index);
23 ++index;
24 }
25 std::string ret;
26 for (const auto& block : program.blocks) {
27 ret += IR::DumpBlock(*block, block_to_index, inst_to_index, index) + '\n';
28 }
29 return ret;
30}
31
32} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/program.h b/src/shader_recompiler/frontend/ir/program.h
new file mode 100644
index 000000000..ebcaa8bc2
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/program.h
@@ -0,0 +1,35 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <string>
9
10#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/program_header.h"
13#include "shader_recompiler/shader_info.h"
14#include "shader_recompiler/stage.h"
15
16namespace Shader::IR {
17
18struct Program {
19 AbstractSyntaxList syntax_list;
20 BlockList blocks;
21 BlockList post_order_blocks;
22 Info info;
23 Stage stage{};
24 std::array<u32, 3> workgroup_size{};
25 OutputTopology output_topology{};
26 u32 output_vertices{};
27 u32 invocations{};
28 u32 local_memory_size{};
29 u32 shared_memory_size{};
30 bool is_geometry_passthrough{};
31};
32
33[[nodiscard]] std::string DumpProgram(const Program& program);
34
35} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/reg.h b/src/shader_recompiler/frontend/ir/reg.h
new file mode 100644
index 000000000..a4b635792
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/reg.h
@@ -0,0 +1,332 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9#include "common/common_types.h"
10#include "shader_recompiler/exception.h"
11
12namespace Shader::IR {
13
14enum class Reg : u64 {
15 R0,
16 R1,
17 R2,
18 R3,
19 R4,
20 R5,
21 R6,
22 R7,
23 R8,
24 R9,
25 R10,
26 R11,
27 R12,
28 R13,
29 R14,
30 R15,
31 R16,
32 R17,
33 R18,
34 R19,
35 R20,
36 R21,
37 R22,
38 R23,
39 R24,
40 R25,
41 R26,
42 R27,
43 R28,
44 R29,
45 R30,
46 R31,
47 R32,
48 R33,
49 R34,
50 R35,
51 R36,
52 R37,
53 R38,
54 R39,
55 R40,
56 R41,
57 R42,
58 R43,
59 R44,
60 R45,
61 R46,
62 R47,
63 R48,
64 R49,
65 R50,
66 R51,
67 R52,
68 R53,
69 R54,
70 R55,
71 R56,
72 R57,
73 R58,
74 R59,
75 R60,
76 R61,
77 R62,
78 R63,
79 R64,
80 R65,
81 R66,
82 R67,
83 R68,
84 R69,
85 R70,
86 R71,
87 R72,
88 R73,
89 R74,
90 R75,
91 R76,
92 R77,
93 R78,
94 R79,
95 R80,
96 R81,
97 R82,
98 R83,
99 R84,
100 R85,
101 R86,
102 R87,
103 R88,
104 R89,
105 R90,
106 R91,
107 R92,
108 R93,
109 R94,
110 R95,
111 R96,
112 R97,
113 R98,
114 R99,
115 R100,
116 R101,
117 R102,
118 R103,
119 R104,
120 R105,
121 R106,
122 R107,
123 R108,
124 R109,
125 R110,
126 R111,
127 R112,
128 R113,
129 R114,
130 R115,
131 R116,
132 R117,
133 R118,
134 R119,
135 R120,
136 R121,
137 R122,
138 R123,
139 R124,
140 R125,
141 R126,
142 R127,
143 R128,
144 R129,
145 R130,
146 R131,
147 R132,
148 R133,
149 R134,
150 R135,
151 R136,
152 R137,
153 R138,
154 R139,
155 R140,
156 R141,
157 R142,
158 R143,
159 R144,
160 R145,
161 R146,
162 R147,
163 R148,
164 R149,
165 R150,
166 R151,
167 R152,
168 R153,
169 R154,
170 R155,
171 R156,
172 R157,
173 R158,
174 R159,
175 R160,
176 R161,
177 R162,
178 R163,
179 R164,
180 R165,
181 R166,
182 R167,
183 R168,
184 R169,
185 R170,
186 R171,
187 R172,
188 R173,
189 R174,
190 R175,
191 R176,
192 R177,
193 R178,
194 R179,
195 R180,
196 R181,
197 R182,
198 R183,
199 R184,
200 R185,
201 R186,
202 R187,
203 R188,
204 R189,
205 R190,
206 R191,
207 R192,
208 R193,
209 R194,
210 R195,
211 R196,
212 R197,
213 R198,
214 R199,
215 R200,
216 R201,
217 R202,
218 R203,
219 R204,
220 R205,
221 R206,
222 R207,
223 R208,
224 R209,
225 R210,
226 R211,
227 R212,
228 R213,
229 R214,
230 R215,
231 R216,
232 R217,
233 R218,
234 R219,
235 R220,
236 R221,
237 R222,
238 R223,
239 R224,
240 R225,
241 R226,
242 R227,
243 R228,
244 R229,
245 R230,
246 R231,
247 R232,
248 R233,
249 R234,
250 R235,
251 R236,
252 R237,
253 R238,
254 R239,
255 R240,
256 R241,
257 R242,
258 R243,
259 R244,
260 R245,
261 R246,
262 R247,
263 R248,
264 R249,
265 R250,
266 R251,
267 R252,
268 R253,
269 R254,
270 RZ,
271};
272static_assert(static_cast<int>(Reg::RZ) == 255);
273
274constexpr size_t NUM_USER_REGS = 255;
275constexpr size_t NUM_REGS = 256;
276
277[[nodiscard]] constexpr Reg operator+(Reg reg, int num) {
278 if (reg == Reg::RZ) {
279 // Adding or subtracting registers from RZ yields RZ
280 return Reg::RZ;
281 }
282 const int result{static_cast<int>(reg) + num};
283 if (result >= static_cast<int>(Reg::RZ)) {
284 throw LogicError("Overflow on register arithmetic");
285 }
286 if (result < 0) {
287 throw LogicError("Underflow on register arithmetic");
288 }
289 return static_cast<Reg>(result);
290}
291
292[[nodiscard]] constexpr Reg operator-(Reg reg, int num) {
293 return reg + (-num);
294}
295
296constexpr Reg operator++(Reg& reg) {
297 reg = reg + 1;
298 return reg;
299}
300
301constexpr Reg operator++(Reg& reg, int) {
302 const Reg copy{reg};
303 reg = reg + 1;
304 return copy;
305}
306
307[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
308 return static_cast<size_t>(reg);
309}
310
311[[nodiscard]] constexpr bool IsAligned(Reg reg, size_t align) {
312 return RegIndex(reg) % align == 0 || reg == Reg::RZ;
313}
314
315} // namespace Shader::IR
316
317template <>
318struct fmt::formatter<Shader::IR::Reg> {
319 constexpr auto parse(format_parse_context& ctx) {
320 return ctx.begin();
321 }
322 template <typename FormatContext>
323 auto format(const Shader::IR::Reg& reg, FormatContext& ctx) {
324 if (reg == Shader::IR::Reg::RZ) {
325 return fmt::format_to(ctx.out(), "RZ");
326 } else if (static_cast<int>(reg) >= 0 && static_cast<int>(reg) < 255) {
327 return fmt::format_to(ctx.out(), "R{}", static_cast<int>(reg));
328 } else {
329 throw Shader::LogicError("Invalid register with raw value {}", static_cast<int>(reg));
330 }
331 }
332};
diff --git a/src/shader_recompiler/frontend/ir/type.cpp b/src/shader_recompiler/frontend/ir/type.cpp
new file mode 100644
index 000000000..f28341bfe
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/type.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <string>
7
8#include "shader_recompiler/frontend/ir/type.h"
9
10namespace Shader::IR {
11
12std::string NameOf(Type type) {
13 static constexpr std::array names{
14 "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32",
15 "U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3",
16 "F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4",
17 };
18 const size_t bits{static_cast<size_t>(type)};
19 if (bits == 0) {
20 return "Void";
21 }
22 std::string result;
23 for (size_t i = 0; i < names.size(); i++) {
24 if ((bits & (size_t{1} << i)) != 0) {
25 if (!result.empty()) {
26 result += '|';
27 }
28 result += names[i];
29 }
30 }
31 return result;
32}
33
34bool AreTypesCompatible(Type lhs, Type rhs) noexcept {
35 return lhs == rhs || lhs == Type::Opaque || rhs == Type::Opaque;
36}
37
38} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/type.h b/src/shader_recompiler/frontend/ir/type.h
new file mode 100644
index 000000000..294b230c4
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/type.h
@@ -0,0 +1,61 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <string>
8
9#include <fmt/format.h>
10
11#include "common/common_funcs.h"
12#include "shader_recompiler/exception.h"
13
14namespace Shader::IR {
15
16enum class Type {
17 Void = 0,
18 Opaque = 1 << 0,
19 Reg = 1 << 1,
20 Pred = 1 << 2,
21 Attribute = 1 << 3,
22 Patch = 1 << 4,
23 U1 = 1 << 5,
24 U8 = 1 << 6,
25 U16 = 1 << 7,
26 U32 = 1 << 8,
27 U64 = 1 << 9,
28 F16 = 1 << 10,
29 F32 = 1 << 11,
30 F64 = 1 << 12,
31 U32x2 = 1 << 13,
32 U32x3 = 1 << 14,
33 U32x4 = 1 << 15,
34 F16x2 = 1 << 16,
35 F16x3 = 1 << 17,
36 F16x4 = 1 << 18,
37 F32x2 = 1 << 19,
38 F32x3 = 1 << 20,
39 F32x4 = 1 << 21,
40 F64x2 = 1 << 22,
41 F64x3 = 1 << 23,
42 F64x4 = 1 << 24,
43};
44DECLARE_ENUM_FLAG_OPERATORS(Type)
45
46[[nodiscard]] std::string NameOf(Type type);
47
48[[nodiscard]] bool AreTypesCompatible(Type lhs, Type rhs) noexcept;
49
50} // namespace Shader::IR
51
52template <>
53struct fmt::formatter<Shader::IR::Type> {
54 constexpr auto parse(format_parse_context& ctx) {
55 return ctx.begin();
56 }
57 template <typename FormatContext>
58 auto format(const Shader::IR::Type& type, FormatContext& ctx) {
59 return fmt::format_to(ctx.out(), "{}", NameOf(type));
60 }
61};
diff --git a/src/shader_recompiler/frontend/ir/value.cpp b/src/shader_recompiler/frontend/ir/value.cpp
new file mode 100644
index 000000000..d365ea1bc
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/value.cpp
@@ -0,0 +1,99 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/ir/opcodes.h"
6#include "shader_recompiler/frontend/ir/value.h"
7
8namespace Shader::IR {
9
10Value::Value(IR::Inst* value) noexcept : type{Type::Opaque}, inst{value} {}
11
12Value::Value(IR::Reg value) noexcept : type{Type::Reg}, reg{value} {}
13
14Value::Value(IR::Pred value) noexcept : type{Type::Pred}, pred{value} {}
15
16Value::Value(IR::Attribute value) noexcept : type{Type::Attribute}, attribute{value} {}
17
18Value::Value(IR::Patch value) noexcept : type{Type::Patch}, patch{value} {}
19
20Value::Value(bool value) noexcept : type{Type::U1}, imm_u1{value} {}
21
22Value::Value(u8 value) noexcept : type{Type::U8}, imm_u8{value} {}
23
24Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}
25
26Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {}
27
28Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}
29
30Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
31
32Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
33
34IR::Type Value::Type() const noexcept {
35 if (IsPhi()) {
36 // The type of a phi node is stored in its flags
37 return inst->Flags<IR::Type>();
38 }
39 if (IsIdentity()) {
40 return inst->Arg(0).Type();
41 }
42 if (type == Type::Opaque) {
43 return inst->Type();
44 }
45 return type;
46}
47
48bool Value::operator==(const Value& other) const {
49 if (type != other.type) {
50 return false;
51 }
52 switch (type) {
53 case Type::Void:
54 return true;
55 case Type::Opaque:
56 return inst == other.inst;
57 case Type::Reg:
58 return reg == other.reg;
59 case Type::Pred:
60 return pred == other.pred;
61 case Type::Attribute:
62 return attribute == other.attribute;
63 case Type::Patch:
64 return patch == other.patch;
65 case Type::U1:
66 return imm_u1 == other.imm_u1;
67 case Type::U8:
68 return imm_u8 == other.imm_u8;
69 case Type::U16:
70 case Type::F16:
71 return imm_u16 == other.imm_u16;
72 case Type::U32:
73 case Type::F32:
74 return imm_u32 == other.imm_u32;
75 case Type::U64:
76 case Type::F64:
77 return imm_u64 == other.imm_u64;
78 case Type::U32x2:
79 case Type::U32x3:
80 case Type::U32x4:
81 case Type::F16x2:
82 case Type::F16x3:
83 case Type::F16x4:
84 case Type::F32x2:
85 case Type::F32x3:
86 case Type::F32x4:
87 case Type::F64x2:
88 case Type::F64x3:
89 case Type::F64x4:
90 break;
91 }
92 throw LogicError("Invalid type {}", type);
93}
94
95bool Value::operator!=(const Value& other) const {
96 return !operator==(other);
97}
98
99} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h
new file mode 100644
index 000000000..0c6bf684d
--- /dev/null
+++ b/src/shader_recompiler/frontend/ir/value.h
@@ -0,0 +1,398 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <array>
8#include <cstring>
9#include <memory>
10#include <type_traits>
11#include <utility>
12#include <vector>
13
14#include <boost/container/small_vector.hpp>
15#include <boost/intrusive/list.hpp>
16
17#include "common/assert.h"
18#include "common/bit_cast.h"
19#include "common/common_types.h"
20#include "shader_recompiler/exception.h"
21#include "shader_recompiler/frontend/ir/attribute.h"
22#include "shader_recompiler/frontend/ir/opcodes.h"
23#include "shader_recompiler/frontend/ir/patch.h"
24#include "shader_recompiler/frontend/ir/pred.h"
25#include "shader_recompiler/frontend/ir/reg.h"
26#include "shader_recompiler/frontend/ir/type.h"
27#include "shader_recompiler/frontend/ir/value.h"
28
29namespace Shader::IR {
30
31class Block;
32class Inst;
33
34struct AssociatedInsts;
35
36class Value {
37public:
38 Value() noexcept = default;
39 explicit Value(IR::Inst* value) noexcept;
40 explicit Value(IR::Reg value) noexcept;
41 explicit Value(IR::Pred value) noexcept;
42 explicit Value(IR::Attribute value) noexcept;
43 explicit Value(IR::Patch value) noexcept;
44 explicit Value(bool value) noexcept;
45 explicit Value(u8 value) noexcept;
46 explicit Value(u16 value) noexcept;
47 explicit Value(u32 value) noexcept;
48 explicit Value(f32 value) noexcept;
49 explicit Value(u64 value) noexcept;
50 explicit Value(f64 value) noexcept;
51
52 [[nodiscard]] bool IsIdentity() const noexcept;
53 [[nodiscard]] bool IsPhi() const noexcept;
54 [[nodiscard]] bool IsEmpty() const noexcept;
55 [[nodiscard]] bool IsImmediate() const noexcept;
56 [[nodiscard]] IR::Type Type() const noexcept;
57
58 [[nodiscard]] IR::Inst* Inst() const;
59 [[nodiscard]] IR::Inst* InstRecursive() const;
60 [[nodiscard]] IR::Value Resolve() const;
61 [[nodiscard]] IR::Reg Reg() const;
62 [[nodiscard]] IR::Pred Pred() const;
63 [[nodiscard]] IR::Attribute Attribute() const;
64 [[nodiscard]] IR::Patch Patch() const;
65 [[nodiscard]] bool U1() const;
66 [[nodiscard]] u8 U8() const;
67 [[nodiscard]] u16 U16() const;
68 [[nodiscard]] u32 U32() const;
69 [[nodiscard]] f32 F32() const;
70 [[nodiscard]] u64 U64() const;
71 [[nodiscard]] f64 F64() const;
72
73 [[nodiscard]] bool operator==(const Value& other) const;
74 [[nodiscard]] bool operator!=(const Value& other) const;
75
76private:
77 IR::Type type{};
78 union {
79 IR::Inst* inst{};
80 IR::Reg reg;
81 IR::Pred pred;
82 IR::Attribute attribute;
83 IR::Patch patch;
84 bool imm_u1;
85 u8 imm_u8;
86 u16 imm_u16;
87 u32 imm_u32;
88 f32 imm_f32;
89 u64 imm_u64;
90 f64 imm_f64;
91 };
92};
93static_assert(static_cast<u32>(IR::Type::Void) == 0, "memset relies on IR::Type being zero");
94static_assert(std::is_trivially_copyable_v<Value>);
95
96template <IR::Type type_>
97class TypedValue : public Value {
98public:
99 TypedValue() = default;
100
101 template <IR::Type other_type>
102 requires((other_type & type_) != IR::Type::Void) explicit(false)
103 TypedValue(const TypedValue<other_type>& value)
104 : Value(value) {}
105
106 explicit TypedValue(const Value& value) : Value(value) {
107 if ((value.Type() & type_) == IR::Type::Void) {
108 throw InvalidArgument("Incompatible types {} and {}", type_, value.Type());
109 }
110 }
111
112 explicit TypedValue(IR::Inst* inst_) : TypedValue(Value(inst_)) {}
113};
114
115class Inst : public boost::intrusive::list_base_hook<> {
116public:
117 explicit Inst(IR::Opcode op_, u32 flags_) noexcept;
118 ~Inst();
119
120 Inst& operator=(const Inst&) = delete;
121 Inst(const Inst&) = delete;
122
123 Inst& operator=(Inst&&) = delete;
124 Inst(Inst&&) = delete;
125
126 /// Get the number of uses this instruction has.
127 [[nodiscard]] int UseCount() const noexcept {
128 return use_count;
129 }
130
131 /// Determines whether this instruction has uses or not.
132 [[nodiscard]] bool HasUses() const noexcept {
133 return use_count > 0;
134 }
135
136 /// Get the opcode this microinstruction represents.
137 [[nodiscard]] IR::Opcode GetOpcode() const noexcept {
138 return op;
139 }
140
141 /// Determines if there is a pseudo-operation associated with this instruction.
142 [[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
143 return associated_insts != nullptr;
144 }
145
146 /// Determines whether or not this instruction may have side effects.
147 [[nodiscard]] bool MayHaveSideEffects() const noexcept;
148
149 /// Determines whether or not this instruction is a pseudo-instruction.
150 /// Pseudo-instructions depend on their parent instructions for their semantics.
151 [[nodiscard]] bool IsPseudoInstruction() const noexcept;
152
153 /// Determines if all arguments of this instruction are immediates.
154 [[nodiscard]] bool AreAllArgsImmediates() const;
155
156 /// Gets a pseudo-operation associated with this instruction
157 [[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
158
159 /// Get the type this instruction returns.
160 [[nodiscard]] IR::Type Type() const;
161
162 /// Get the number of arguments this instruction has.
163 [[nodiscard]] size_t NumArgs() const {
164 return op == IR::Opcode::Phi ? phi_args.size() : NumArgsOf(op);
165 }
166
167 /// Get the value of a given argument index.
168 [[nodiscard]] Value Arg(size_t index) const noexcept {
169 if (op == IR::Opcode::Phi) {
170 return phi_args[index].second;
171 } else {
172 return args[index];
173 }
174 }
175
176 /// Set the value of a given argument index.
177 void SetArg(size_t index, Value value);
178
179 /// Get a pointer to the block of a phi argument.
180 [[nodiscard]] Block* PhiBlock(size_t index) const;
181 /// Add phi operand to a phi instruction.
182 void AddPhiOperand(Block* predecessor, const Value& value);
183
184 void Invalidate();
185 void ClearArgs();
186
187 void ReplaceUsesWith(Value replacement);
188
189 void ReplaceOpcode(IR::Opcode opcode);
190
191 template <typename FlagsType>
192 requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
193 [[nodiscard]] FlagsType Flags() const noexcept {
194 FlagsType ret;
195 std::memcpy(reinterpret_cast<char*>(&ret), &flags, sizeof(ret));
196 return ret;
197 }
198
199 template <typename FlagsType>
200 requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
201 [[nodiscard]] void SetFlags(FlagsType value) noexcept {
202 std::memcpy(&flags, &value, sizeof(value));
203 }
204
205 /// Intrusively store the host definition of this instruction.
206 template <typename DefinitionType>
207 void SetDefinition(DefinitionType def) {
208 definition = Common::BitCast<u32>(def);
209 }
210
211 /// Return the intrusively stored host definition of this instruction.
212 template <typename DefinitionType>
213 [[nodiscard]] DefinitionType Definition() const noexcept {
214 return Common::BitCast<DefinitionType>(definition);
215 }
216
217 /// Destructively remove one reference count from the instruction
218 /// Useful for register allocation
219 void DestructiveRemoveUsage() {
220 --use_count;
221 }
222
223 /// Destructively add usages to the instruction
224 /// Useful for register allocation
225 void DestructiveAddUsage(int count) {
226 use_count += count;
227 }
228
229private:
230 struct NonTriviallyDummy {
231 NonTriviallyDummy() noexcept {}
232 };
233
234 void Use(const Value& value);
235 void UndoUse(const Value& value);
236
237 IR::Opcode op{};
238 int use_count{};
239 u32 flags{};
240 u32 definition{};
241 union {
242 NonTriviallyDummy dummy{};
243 boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
244 std::array<Value, 5> args;
245 };
246 std::unique_ptr<AssociatedInsts> associated_insts;
247};
248static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
249
250struct AssociatedInsts {
251 union {
252 Inst* in_bounds_inst;
253 Inst* sparse_inst;
254 Inst* zero_inst{};
255 };
256 Inst* sign_inst{};
257 Inst* carry_inst{};
258 Inst* overflow_inst{};
259};
260
261using U1 = TypedValue<Type::U1>;
262using U8 = TypedValue<Type::U8>;
263using U16 = TypedValue<Type::U16>;
264using U32 = TypedValue<Type::U32>;
265using U64 = TypedValue<Type::U64>;
266using F16 = TypedValue<Type::F16>;
267using F32 = TypedValue<Type::F32>;
268using F64 = TypedValue<Type::F64>;
269using U32U64 = TypedValue<Type::U32 | Type::U64>;
270using F32F64 = TypedValue<Type::F32 | Type::F64>;
271using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
272using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>;
273using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
274
275inline bool Value::IsIdentity() const noexcept {
276 return type == Type::Opaque && inst->GetOpcode() == Opcode::Identity;
277}
278
279inline bool Value::IsPhi() const noexcept {
280 return type == Type::Opaque && inst->GetOpcode() == Opcode::Phi;
281}
282
283inline bool Value::IsEmpty() const noexcept {
284 return type == Type::Void;
285}
286
287inline bool Value::IsImmediate() const noexcept {
288 IR::Type current_type{type};
289 const IR::Inst* current_inst{inst};
290 while (current_type == Type::Opaque && current_inst->GetOpcode() == Opcode::Identity) {
291 const Value& arg{current_inst->Arg(0)};
292 current_type = arg.type;
293 current_inst = arg.inst;
294 }
295 return current_type != Type::Opaque;
296}
297
298inline IR::Inst* Value::Inst() const {
299 DEBUG_ASSERT(type == Type::Opaque);
300 return inst;
301}
302
303inline IR::Inst* Value::InstRecursive() const {
304 DEBUG_ASSERT(type == Type::Opaque);
305 if (IsIdentity()) {
306 return inst->Arg(0).InstRecursive();
307 }
308 return inst;
309}
310
311inline IR::Value Value::Resolve() const {
312 if (IsIdentity()) {
313 return inst->Arg(0).Resolve();
314 }
315 return *this;
316}
317
318inline IR::Reg Value::Reg() const {
319 DEBUG_ASSERT(type == Type::Reg);
320 return reg;
321}
322
323inline IR::Pred Value::Pred() const {
324 DEBUG_ASSERT(type == Type::Pred);
325 return pred;
326}
327
328inline IR::Attribute Value::Attribute() const {
329 DEBUG_ASSERT(type == Type::Attribute);
330 return attribute;
331}
332
333inline IR::Patch Value::Patch() const {
334 DEBUG_ASSERT(type == Type::Patch);
335 return patch;
336}
337
338inline bool Value::U1() const {
339 if (IsIdentity()) {
340 return inst->Arg(0).U1();
341 }
342 DEBUG_ASSERT(type == Type::U1);
343 return imm_u1;
344}
345
346inline u8 Value::U8() const {
347 if (IsIdentity()) {
348 return inst->Arg(0).U8();
349 }
350 DEBUG_ASSERT(type == Type::U8);
351 return imm_u8;
352}
353
354inline u16 Value::U16() const {
355 if (IsIdentity()) {
356 return inst->Arg(0).U16();
357 }
358 DEBUG_ASSERT(type == Type::U16);
359 return imm_u16;
360}
361
362inline u32 Value::U32() const {
363 if (IsIdentity()) {
364 return inst->Arg(0).U32();
365 }
366 DEBUG_ASSERT(type == Type::U32);
367 return imm_u32;
368}
369
370inline f32 Value::F32() const {
371 if (IsIdentity()) {
372 return inst->Arg(0).F32();
373 }
374 DEBUG_ASSERT(type == Type::F32);
375 return imm_f32;
376}
377
378inline u64 Value::U64() const {
379 if (IsIdentity()) {
380 return inst->Arg(0).U64();
381 }
382 DEBUG_ASSERT(type == Type::U64);
383 return imm_u64;
384}
385
386inline f64 Value::F64() const {
387 if (IsIdentity()) {
388 return inst->Arg(0).F64();
389 }
390 DEBUG_ASSERT(type == Type::F64);
391 return imm_f64;
392}
393
394[[nodiscard]] inline bool IsPhi(const Inst& inst) {
395 return inst.GetOpcode() == Opcode::Phi;
396}
397
398} // namespace Shader::IR
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
new file mode 100644
index 000000000..1a954a509
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -0,0 +1,642 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <optional>
8#include <string>
9#include <utility>
10
11#include <fmt/format.h>
12
13#include "shader_recompiler/exception.h"
14#include "shader_recompiler/frontend/maxwell/control_flow.h"
15#include "shader_recompiler/frontend/maxwell/decode.h"
16#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
17#include "shader_recompiler/frontend/maxwell/location.h"
18
19namespace Shader::Maxwell::Flow {
20namespace {
21struct Compare {
22 bool operator()(const Block& lhs, Location rhs) const noexcept {
23 return lhs.begin < rhs;
24 }
25
26 bool operator()(Location lhs, const Block& rhs) const noexcept {
27 return lhs < rhs.begin;
28 }
29
30 bool operator()(const Block& lhs, const Block& rhs) const noexcept {
31 return lhs.begin < rhs.begin;
32 }
33};
34
35u32 BranchOffset(Location pc, Instruction inst) {
36 return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u;
37}
38
39void Split(Block* old_block, Block* new_block, Location pc) {
40 if (pc <= old_block->begin || pc >= old_block->end) {
41 throw InvalidArgument("Invalid address to split={}", pc);
42 }
43 *new_block = Block{};
44 new_block->begin = pc;
45 new_block->end = old_block->end;
46 new_block->end_class = old_block->end_class;
47 new_block->cond = old_block->cond;
48 new_block->stack = old_block->stack;
49 new_block->branch_true = old_block->branch_true;
50 new_block->branch_false = old_block->branch_false;
51 new_block->function_call = old_block->function_call;
52 new_block->return_block = old_block->return_block;
53 new_block->branch_reg = old_block->branch_reg;
54 new_block->branch_offset = old_block->branch_offset;
55 new_block->indirect_branches = std::move(old_block->indirect_branches);
56
57 const Location old_begin{old_block->begin};
58 Stack old_stack{std::move(old_block->stack)};
59 *old_block = Block{};
60 old_block->begin = old_begin;
61 old_block->end = pc;
62 old_block->end_class = EndClass::Branch;
63 old_block->cond = IR::Condition(true);
64 old_block->stack = old_stack;
65 old_block->branch_true = new_block;
66 old_block->branch_false = nullptr;
67}
68
69Token OpcodeToken(Opcode opcode) {
70 switch (opcode) {
71 case Opcode::PBK:
72 case Opcode::BRK:
73 return Token::PBK;
74 case Opcode::PCNT:
75 case Opcode::CONT:
76 return Token::PBK;
77 case Opcode::PEXIT:
78 case Opcode::EXIT:
79 return Token::PEXIT;
80 case Opcode::PLONGJMP:
81 case Opcode::LONGJMP:
82 return Token::PLONGJMP;
83 case Opcode::PRET:
84 case Opcode::RET:
85 case Opcode::CAL:
86 return Token::PRET;
87 case Opcode::SSY:
88 case Opcode::SYNC:
89 return Token::SSY;
90 default:
91 throw InvalidArgument("{}", opcode);
92 }
93}
94
95bool IsAbsoluteJump(Opcode opcode) {
96 switch (opcode) {
97 case Opcode::JCAL:
98 case Opcode::JMP:
99 case Opcode::JMX:
100 return true;
101 default:
102 return false;
103 }
104}
105
106bool HasFlowTest(Opcode opcode) {
107 switch (opcode) {
108 case Opcode::BRA:
109 case Opcode::BRX:
110 case Opcode::EXIT:
111 case Opcode::JMP:
112 case Opcode::JMX:
113 case Opcode::KIL:
114 case Opcode::BRK:
115 case Opcode::CONT:
116 case Opcode::LONGJMP:
117 case Opcode::RET:
118 case Opcode::SYNC:
119 return true;
120 case Opcode::CAL:
121 case Opcode::JCAL:
122 return false;
123 default:
124 throw InvalidArgument("Invalid branch {}", opcode);
125 }
126}
127
128std::string NameOf(const Block& block) {
129 if (block.begin.IsVirtual()) {
130 return fmt::format("\"Virtual {}\"", block.begin);
131 } else {
132 return fmt::format("\"{}\"", block.begin);
133 }
134}
135} // Anonymous namespace
136
137void Stack::Push(Token token, Location target) {
138 entries.push_back({
139 .token = token,
140 .target{target},
141 });
142}
143
144std::pair<Location, Stack> Stack::Pop(Token token) const {
145 const std::optional<Location> pc{Peek(token)};
146 if (!pc) {
147 throw LogicError("Token could not be found");
148 }
149 return {*pc, Remove(token)};
150}
151
152std::optional<Location> Stack::Peek(Token token) const {
153 const auto it{std::find_if(entries.rbegin(), entries.rend(),
154 [token](const auto& entry) { return entry.token == token; })};
155 if (it == entries.rend()) {
156 return std::nullopt;
157 }
158 return it->target;
159}
160
161Stack Stack::Remove(Token token) const {
162 const auto it{std::find_if(entries.rbegin(), entries.rend(),
163 [token](const auto& entry) { return entry.token == token; })};
164 const auto pos{std::distance(entries.rbegin(), it)};
165 Stack result;
166 result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1);
167 return result;
168}
169
170bool Block::Contains(Location pc) const noexcept {
171 return pc >= begin && pc < end;
172}
173
174Function::Function(ObjectPool<Block>& block_pool, Location start_address)
175 : entrypoint{start_address} {
176 Label& label{labels.emplace_back()};
177 label.address = start_address;
178 label.block = block_pool.Create(Block{});
179 label.block->begin = start_address;
180 label.block->end = start_address;
181 label.block->end_class = EndClass::Branch;
182 label.block->cond = IR::Condition(true);
183 label.block->branch_true = nullptr;
184 label.block->branch_false = nullptr;
185}
186
187CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address,
188 bool exits_to_dispatcher_)
189 : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{
190 exits_to_dispatcher_} {
191 if (exits_to_dispatcher) {
192 dispatch_block = block_pool.Create(Block{});
193 dispatch_block->begin = {};
194 dispatch_block->end = {};
195 dispatch_block->end_class = EndClass::Exit;
196 dispatch_block->cond = IR::Condition(true);
197 dispatch_block->stack = {};
198 dispatch_block->branch_true = nullptr;
199 dispatch_block->branch_false = nullptr;
200 }
201 functions.emplace_back(block_pool, start_address);
202 for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) {
203 while (!functions[function_id].labels.empty()) {
204 Function& function{functions[function_id]};
205 Label label{function.labels.back()};
206 function.labels.pop_back();
207 AnalyzeLabel(function_id, label);
208 }
209 }
210 if (exits_to_dispatcher) {
211 const auto last_block{functions[0].blocks.rbegin()};
212 dispatch_block->begin = last_block->end + 1;
213 dispatch_block->end = last_block->end + 1;
214 functions[0].blocks.insert(*dispatch_block);
215 }
216}
217
218void CFG::AnalyzeLabel(FunctionId function_id, Label& label) {
219 if (InspectVisitedBlocks(function_id, label)) {
220 // Label address has been visited
221 return;
222 }
223 // Try to find the next block
224 Function* const function{&functions[function_id]};
225 Location pc{label.address};
226 const auto next_it{function->blocks.upper_bound(pc, Compare{})};
227 const bool is_last{next_it == function->blocks.end()};
228 Block* const next{is_last ? nullptr : &*next_it};
229 // Insert before the next block
230 Block* const block{label.block};
231 // Analyze instructions until it reaches an already visited block or there's a branch
232 bool is_branch{false};
233 while (!next || pc < next->begin) {
234 is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch;
235 if (is_branch) {
236 break;
237 }
238 ++pc;
239 }
240 if (!is_branch) {
241 // If the block finished without a branch,
242 // it means that the next instruction is already visited, jump to it
243 block->end = pc;
244 block->cond = IR::Condition{true};
245 block->branch_true = next;
246 block->branch_false = nullptr;
247 }
248 // Function's pointer might be invalid, resolve it again
249 // Insert the new block
250 functions[function_id].blocks.insert(*block);
251}
252
253bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) {
254 const Location pc{label.address};
255 Function& function{functions[function_id]};
256 const auto it{
257 std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })};
258 if (it == function.blocks.end()) {
259 // Address has not been visited
260 return false;
261 }
262 Block* const visited_block{&*it};
263 if (visited_block->begin == pc) {
264 throw LogicError("Dangling block");
265 }
266 Block* const new_block{label.block};
267 Split(visited_block, new_block, pc);
268 function.blocks.insert(it, *new_block);
269 return true;
270}
271
272CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) {
273 const Instruction inst{env.ReadInstruction(pc.Offset())};
274 const Opcode opcode{Decode(inst.raw)};
275 switch (opcode) {
276 case Opcode::BRA:
277 case Opcode::JMP:
278 case Opcode::RET:
279 if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
280 return AnalysisState::Continue;
281 }
282 switch (opcode) {
283 case Opcode::BRA:
284 case Opcode::JMP:
285 AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode));
286 break;
287 case Opcode::RET:
288 block->end_class = EndClass::Return;
289 break;
290 default:
291 break;
292 }
293 block->end = pc;
294 return AnalysisState::Branch;
295 case Opcode::BRK:
296 case Opcode::CONT:
297 case Opcode::LONGJMP:
298 case Opcode::SYNC: {
299 if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
300 return AnalysisState::Continue;
301 }
302 const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))};
303 block->branch_true = AddLabel(block, new_stack, stack_pc, function_id);
304 block->end = pc;
305 return AnalysisState::Branch;
306 }
307 case Opcode::KIL: {
308 const Predicate pred{inst.Pred()};
309 const auto ir_pred{static_cast<IR::Pred>(pred.index)};
310 const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated};
311 AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond);
312 return AnalysisState::Branch;
313 }
314 case Opcode::PBK:
315 case Opcode::PCNT:
316 case Opcode::PEXIT:
317 case Opcode::PLONGJMP:
318 case Opcode::SSY:
319 block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst));
320 return AnalysisState::Continue;
321 case Opcode::BRX:
322 case Opcode::JMX:
323 return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id);
324 case Opcode::EXIT:
325 return AnalyzeEXIT(block, function_id, pc, inst);
326 case Opcode::PRET:
327 throw NotImplementedException("PRET flow analysis");
328 case Opcode::CAL:
329 case Opcode::JCAL: {
330 const bool is_absolute{IsAbsoluteJump(opcode)};
331 const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
332 // Technically CAL pushes into PRET, but that's implicit in the function call for us
333 // Insert the function into the list if it doesn't exist
334 const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
335 const bool exists{it != functions.end()};
336 const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it))
337 : functions.size()};
338 if (!exists) {
339 functions.emplace_back(block_pool, cal_pc);
340 }
341 block->end_class = EndClass::Call;
342 block->function_call = call_id;
343 block->return_block = AddLabel(block, block->stack, pc + 1, function_id);
344 block->end = pc;
345 return AnalysisState::Branch;
346 }
347 default:
348 break;
349 }
350 const Predicate pred{inst.Pred()};
351 if (pred == Predicate{true} || pred == Predicate{false}) {
352 return AnalysisState::Continue;
353 }
354 const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated};
355 AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond);
356 return AnalysisState::Branch;
357}
358
359void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
360 EndClass insn_end_class, IR::Condition cond) {
361 if (block->begin != pc) {
362 // If the block doesn't start in the conditional instruction
363 // mark it as a label to visit it later
364 block->end = pc;
365 block->cond = IR::Condition{true};
366 block->branch_true = AddLabel(block, block->stack, pc, function_id);
367 block->branch_false = nullptr;
368 return;
369 }
370 // Create a virtual block and a conditional block
371 Block* const conditional_block{block_pool.Create()};
372 Block virtual_block{};
373 virtual_block.begin = block->begin.Virtual();
374 virtual_block.end = block->begin.Virtual();
375 virtual_block.end_class = EndClass::Branch;
376 virtual_block.stack = block->stack;
377 virtual_block.cond = cond;
378 virtual_block.branch_true = conditional_block;
379 virtual_block.branch_false = nullptr;
380 // Save the contents of the visited block in the conditional block
381 *conditional_block = std::move(*block);
382 // Impersonate the visited block with a virtual block
383 *block = std::move(virtual_block);
384 // Set the end properties of the conditional instruction
385 conditional_block->end = pc + 1;
386 conditional_block->end_class = insn_end_class;
387 // Add a label to the instruction after the conditional instruction
388 Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
389 // Branch to the next instruction from the virtual block
390 block->branch_false = endif_block;
391 // And branch to it from the conditional instruction if it is a branch or a kill instruction
392 // Kill instructions are considered a branch because they demote to a helper invocation and
393 // execution may continue.
394 if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) {
395 conditional_block->cond = IR::Condition{true};
396 conditional_block->branch_true = endif_block;
397 conditional_block->branch_false = nullptr;
398 }
399 // Finally insert the condition block into the list of blocks
400 functions[function_id].blocks.insert(*conditional_block);
401}
402
403bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
404 Opcode opcode) {
405 if (inst.branch.is_cbuf) {
406 throw NotImplementedException("Branch with constant buffer offset");
407 }
408 const Predicate pred{inst.Pred()};
409 if (pred == Predicate{false}) {
410 return false;
411 }
412 const bool has_flow_test{HasFlowTest(opcode)};
413 const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T};
414 if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
415 block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated);
416 block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
417 } else {
418 block->cond = IR::Condition{true};
419 }
420 return true;
421}
422
423void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
424 bool is_absolute) {
425 const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
426 block->branch_true = AddLabel(block, block->stack, bra_pc, function_id);
427}
428
429CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
430 FunctionId function_id) {
431 const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)};
432 if (!brx_table) {
433 TrackIndirectBranchTable(env, pc, program_start);
434 throw NotImplementedException("Failed to track indirect branch");
435 }
436 const IR::FlowTest flow_test{inst.branch.flow_test};
437 const Predicate pred{inst.Pred()};
438 if (flow_test != IR::FlowTest::T || pred != Predicate{true}) {
439 throw NotImplementedException("Conditional indirect branch");
440 }
441 std::vector<u32> targets;
442 targets.reserve(brx_table->num_entries);
443 for (u32 i = 0; i < brx_table->num_entries; ++i) {
444 u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)};
445 if (!is_absolute) {
446 target += pc.Offset();
447 }
448 target += static_cast<u32>(brx_table->branch_offset);
449 target += 8;
450 targets.push_back(target);
451 }
452 std::ranges::sort(targets);
453 targets.erase(std::unique(targets.begin(), targets.end()), targets.end());
454
455 block->indirect_branches.reserve(targets.size());
456 for (const u32 target : targets) {
457 Block* const branch{AddLabel(block, block->stack, target, function_id)};
458 block->indirect_branches.push_back({
459 .block = branch,
460 .address = target,
461 });
462 }
463 block->cond = IR::Condition{true};
464 block->end = pc + 1;
465 block->end_class = EndClass::IndirectBranch;
466 block->branch_reg = brx_table->branch_reg;
467 block->branch_offset = brx_table->branch_offset + 8;
468 if (!is_absolute) {
469 block->branch_offset += pc.Offset();
470 }
471 return AnalysisState::Branch;
472}
473
474CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc,
475 Instruction inst) {
476 const IR::FlowTest flow_test{inst.branch.flow_test};
477 const Predicate pred{inst.Pred()};
478 if (pred == Predicate{false} || flow_test == IR::FlowTest::F) {
479 // EXIT will never be taken
480 return AnalysisState::Continue;
481 }
482 if (exits_to_dispatcher && function_id != 0) {
483 throw NotImplementedException("Dispatch EXIT on external function");
484 }
485 if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
486 if (block->stack.Peek(Token::PEXIT).has_value()) {
487 throw NotImplementedException("Conditional EXIT with PEXIT token");
488 }
489 const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
490 if (exits_to_dispatcher) {
491 block->end = pc;
492 block->end_class = EndClass::Branch;
493 block->cond = cond;
494 block->branch_true = dispatch_block;
495 block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
496 return AnalysisState::Branch;
497 }
498 AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
499 return AnalysisState::Branch;
500 }
501 if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
502 const Stack popped_stack{block->stack.Remove(Token::PEXIT)};
503 block->cond = IR::Condition{true};
504 block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id);
505 block->branch_false = nullptr;
506 return AnalysisState::Branch;
507 }
508 if (exits_to_dispatcher) {
509 block->cond = IR::Condition{true};
510 block->end = pc;
511 block->end_class = EndClass::Branch;
512 block->branch_true = dispatch_block;
513 block->branch_false = nullptr;
514 return AnalysisState::Branch;
515 }
516 block->end = pc + 1;
517 block->end_class = EndClass::Exit;
518 return AnalysisState::Branch;
519}
520
521Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) {
522 Function& function{functions[function_id]};
523 if (block->begin == pc) {
524 // Jumps to itself
525 return block;
526 }
527 if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) {
528 // Block already exists and it has been visited
529 if (function.blocks.begin() != it) {
530 // Check if the previous node is the virtual variant of the label
531 // This won't exist if a virtual node is not needed or it hasn't been visited
532 // If it hasn't been visited and a virtual node is needed, this will still behave as
533 // expected because the node impersonated with its virtual node.
534 const auto prev{std::prev(it)};
535 if (it->begin.Virtual() == prev->begin) {
536 return &*prev;
537 }
538 }
539 return &*it;
540 }
541 // Make sure we don't insert the same layer twice
542 const auto label_it{std::ranges::find(function.labels, pc, &Label::address)};
543 if (label_it != function.labels.end()) {
544 return label_it->block;
545 }
546 Block* const new_block{block_pool.Create()};
547 new_block->begin = pc;
548 new_block->end = pc;
549 new_block->end_class = EndClass::Branch;
550 new_block->cond = IR::Condition(true);
551 new_block->stack = stack;
552 new_block->branch_true = nullptr;
553 new_block->branch_false = nullptr;
554 function.labels.push_back(Label{
555 .address{pc},
556 .block = new_block,
557 .stack{std::move(stack)},
558 });
559 return new_block;
560}
561
562std::string CFG::Dot() const {
563 int node_uid{0};
564
565 std::string dot{"digraph shader {\n"};
566 for (const Function& function : functions) {
567 dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint);
568 dot += fmt::format("\t\tnode [style=filled];\n");
569 for (const Block& block : function.blocks) {
570 const std::string name{NameOf(block)};
571 const auto add_branch = [&](Block* branch, bool add_label) {
572 dot += fmt::format("\t\t{}->{}", name, NameOf(*branch));
573 if (add_label && block.cond != IR::Condition{true} &&
574 block.cond != IR::Condition{false}) {
575 dot += fmt::format(" [label=\"{}\"]", block.cond);
576 }
577 dot += '\n';
578 };
579 dot += fmt::format("\t\t{};\n", name);
580 switch (block.end_class) {
581 case EndClass::Branch:
582 if (block.cond != IR::Condition{false}) {
583 add_branch(block.branch_true, true);
584 }
585 if (block.cond != IR::Condition{true}) {
586 add_branch(block.branch_false, false);
587 }
588 break;
589 case EndClass::IndirectBranch:
590 for (const IndirectBranch& branch : block.indirect_branches) {
591 add_branch(branch.block, false);
592 }
593 break;
594 case EndClass::Call:
595 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
596 dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block));
597 dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n",
598 node_uid, block.function_call);
599 dot += '\n';
600 ++node_uid;
601 break;
602 case EndClass::Exit:
603 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
604 dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n",
605 node_uid);
606 ++node_uid;
607 break;
608 case EndClass::Return:
609 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
610 dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n",
611 node_uid);
612 ++node_uid;
613 break;
614 case EndClass::Kill:
615 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
616 dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
617 node_uid);
618 ++node_uid;
619 break;
620 }
621 }
622 if (function.entrypoint == 8) {
623 dot += fmt::format("\t\tlabel = \"main\";\n");
624 } else {
625 dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint);
626 }
627 dot += "\t}\n";
628 }
629 if (!functions.empty()) {
630 auto& function{functions.front()};
631 if (function.blocks.empty()) {
632 dot += "Start;\n";
633 } else {
634 dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin()));
635 }
636 dot += fmt::format("\tStart [shape=diamond];\n");
637 }
638 dot += "}\n";
639 return dot;
640}
641
642} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
new file mode 100644
index 000000000..a6bd3e196
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -0,0 +1,169 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8#include <optional>
9#include <span>
10#include <string>
11#include <vector>
12
13#include <boost/container/small_vector.hpp>
14#include <boost/intrusive/set.hpp>
15
16#include "shader_recompiler/environment.h"
17#include "shader_recompiler/frontend/ir/condition.h"
18#include "shader_recompiler/frontend/maxwell/instruction.h"
19#include "shader_recompiler/frontend/maxwell/location.h"
20#include "shader_recompiler/frontend/maxwell/opcodes.h"
21#include "shader_recompiler/object_pool.h"
22
23namespace Shader::Maxwell::Flow {
24
25struct Block;
26
27using FunctionId = size_t;
28
29enum class EndClass {
30 Branch,
31 IndirectBranch,
32 Call,
33 Exit,
34 Return,
35 Kill,
36};
37
38enum class Token {
39 SSY,
40 PBK,
41 PEXIT,
42 PRET,
43 PCNT,
44 PLONGJMP,
45};
46
47struct StackEntry {
48 auto operator<=>(const StackEntry&) const noexcept = default;
49
50 Token token;
51 Location target;
52};
53
54class Stack {
55public:
56 void Push(Token token, Location target);
57 [[nodiscard]] std::pair<Location, Stack> Pop(Token token) const;
58 [[nodiscard]] std::optional<Location> Peek(Token token) const;
59 [[nodiscard]] Stack Remove(Token token) const;
60
61private:
62 boost::container::small_vector<StackEntry, 3> entries;
63};
64
65struct IndirectBranch {
66 Block* block;
67 u32 address;
68};
69
70struct Block : boost::intrusive::set_base_hook<
71 // Normal link is ~2.5% faster compared to safe link
72 boost::intrusive::link_mode<boost::intrusive::normal_link>> {
73 [[nodiscard]] bool Contains(Location pc) const noexcept;
74
75 bool operator<(const Block& rhs) const noexcept {
76 return begin < rhs.begin;
77 }
78
79 Location begin;
80 Location end;
81 EndClass end_class{};
82 IR::Condition cond{};
83 Stack stack;
84 Block* branch_true{};
85 Block* branch_false{};
86 FunctionId function_call{};
87 Block* return_block{};
88 IR::Reg branch_reg{};
89 s32 branch_offset{};
90 std::vector<IndirectBranch> indirect_branches;
91};
92
93struct Label {
94 Location address;
95 Block* block;
96 Stack stack;
97};
98
99struct Function {
100 explicit Function(ObjectPool<Block>& block_pool, Location start_address);
101
102 Location entrypoint;
103 boost::container::small_vector<Label, 16> labels;
104 boost::intrusive::set<Block> blocks;
105};
106
107class CFG {
108 enum class AnalysisState {
109 Branch,
110 Continue,
111 };
112
113public:
114 explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address,
115 bool exits_to_dispatcher = false);
116
117 CFG& operator=(const CFG&) = delete;
118 CFG(const CFG&) = delete;
119
120 CFG& operator=(CFG&&) = delete;
121 CFG(CFG&&) = delete;
122
123 [[nodiscard]] std::string Dot() const;
124
125 [[nodiscard]] std::span<const Function> Functions() const noexcept {
126 return std::span(functions.data(), functions.size());
127 }
128 [[nodiscard]] std::span<Function> Functions() noexcept {
129 return std::span(functions.data(), functions.size());
130 }
131
132 [[nodiscard]] bool ExitsToDispatcher() const {
133 return exits_to_dispatcher;
134 }
135
136private:
137 void AnalyzeLabel(FunctionId function_id, Label& label);
138
139 /// Inspect already visited blocks.
140 /// Return true when the block has already been visited
141 bool InspectVisitedBlocks(FunctionId function_id, const Label& label);
142
143 AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
144
145 void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
146 IR::Condition cond);
147
148 /// Return true when the branch instruction is confirmed to be a branch
149 bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
150 Opcode opcode);
151
152 void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
153 bool is_absolute);
154 AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
155 FunctionId function_id);
156 AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst);
157
158 /// Return the branch target block id
159 Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id);
160
161 Environment& env;
162 ObjectPool<Block>& block_pool;
163 boost::container::small_vector<Function, 1> functions;
164 Location program_start;
165 bool exits_to_dispatcher{};
166 Block* dispatch_block{};
167};
168
169} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp
new file mode 100644
index 000000000..972f677dc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.cpp
@@ -0,0 +1,149 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <bit>
8#include <memory>
9#include <string_view>
10
11#include "common/common_types.h"
12#include "shader_recompiler/exception.h"
13#include "shader_recompiler/frontend/maxwell/decode.h"
14#include "shader_recompiler/frontend/maxwell/opcodes.h"
15
16namespace Shader::Maxwell {
17namespace {
18struct MaskValue {
19 u64 mask;
20 u64 value;
21};
22
23constexpr MaskValue MaskValueFromEncoding(const char* encoding) {
24 u64 mask{};
25 u64 value{};
26 u64 bit{u64(1) << 63};
27 while (*encoding) {
28 switch (*encoding) {
29 case '0':
30 mask |= bit;
31 break;
32 case '1':
33 mask |= bit;
34 value |= bit;
35 break;
36 case '-':
37 break;
38 case ' ':
39 break;
40 default:
41 throw LogicError("Invalid encoding character '{}'", *encoding);
42 }
43 ++encoding;
44 if (*encoding != ' ') {
45 bit >>= 1;
46 }
47 }
48 return MaskValue{.mask = mask, .value = value};
49}
50
51struct InstEncoding {
52 MaskValue mask_value;
53 Opcode opcode;
54};
55constexpr std::array UNORDERED_ENCODINGS{
56#define INST(name, cute, encode) \
57 InstEncoding{ \
58 .mask_value{MaskValueFromEncoding(encode)}, \
59 .opcode = Opcode::name, \
60 },
61#include "maxwell.inc"
62#undef INST
63};
64
65constexpr auto SortedEncodings() {
66 std::array encodings{UNORDERED_ENCODINGS};
67 std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) {
68 return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask);
69 });
70 return encodings;
71}
72constexpr auto ENCODINGS{SortedEncodings()};
73
74constexpr int WidestLeftBits() {
75 int bits{64};
76 for (const InstEncoding& encoding : ENCODINGS) {
77 bits = std::min(bits, std::countr_zero(encoding.mask_value.mask));
78 }
79 return 64 - bits;
80}
81constexpr int WIDEST_LEFT_BITS{WidestLeftBits()};
82constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS};
83
84constexpr size_t ToFastLookupIndex(u64 value) {
85 return static_cast<size_t>(value >> MASK_SHIFT);
86}
87
88constexpr size_t FastLookupSize() {
89 size_t max_width{};
90 for (const InstEncoding& encoding : ENCODINGS) {
91 max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask));
92 }
93 return max_width + 1;
94}
95constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()};
96
97struct InstInfo {
98 [[nodiscard]] u64 Mask() const noexcept {
99 return static_cast<u64>(high_mask) << MASK_SHIFT;
100 }
101
102 [[nodiscard]] u64 Value() const noexcept {
103 return static_cast<u64>(high_value) << MASK_SHIFT;
104 }
105
106 u16 high_mask;
107 u16 high_value;
108 Opcode opcode;
109};
110
111constexpr auto MakeFastLookupTableIndex(size_t index) {
112 std::array<InstInfo, 2> encodings{};
113 size_t element{};
114 for (const auto& encoding : ENCODINGS) {
115 const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)};
116 const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
117 if ((index & mask) == value) {
118 encodings.at(element) = InstInfo{
119 .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT),
120 .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT),
121 .opcode = encoding.opcode,
122 };
123 ++element;
124 }
125 }
126 return encodings;
127}
128
129/*constexpr*/ auto MakeFastLookupTable() {
130 auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()};
131 for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) {
132 (*encodings)[index] = MakeFastLookupTableIndex(index);
133 }
134 return encodings;
135}
136const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()};
137} // Anonymous namespace
138
139Opcode Decode(u64 insn) {
140 const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]};
141 const auto it{std::ranges::find_if(
142 table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })};
143 if (it == table.end()) {
144 throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn);
145 }
146 return it->opcode;
147}
148
149} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h
new file mode 100644
index 000000000..b4f080fd7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.h
@@ -0,0 +1,14 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9
10namespace Shader::Maxwell {
11
12[[nodiscard]] Opcode Decode(u64 insn);
13
14} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
new file mode 100644
index 000000000..008625cb3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
@@ -0,0 +1,108 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/decode.h"
10#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
11#include "shader_recompiler/frontend/maxwell/opcodes.h"
12#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
13
14namespace Shader::Maxwell {
15namespace {
16union Encoding {
17 u64 raw;
18 BitField<0, 8, IR::Reg> dest_reg;
19 BitField<8, 8, IR::Reg> src_reg;
20 BitField<20, 19, u64> immediate;
21 BitField<56, 1, u64> is_negative;
22 BitField<20, 24, s64> brx_offset;
23};
24
25template <typename Callable>
26std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) {
27 while (pos >= block_begin) {
28 const u64 insn{env.ReadInstruction(pos.Offset())};
29 --pos;
30 if (func(insn, Decode(insn))) {
31 return insn;
32 }
33 }
34 return std::nullopt;
35}
36
37std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos,
38 IR::Reg brx_reg) {
39 return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) {
40 const LDC::Encoding ldc{insn};
41 return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 &&
42 ldc.mode == LDC::Mode::Default;
43 });
44}
45
46std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos,
47 IR::Reg ldc_reg) {
48 return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) {
49 const Encoding shl{insn};
50 return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg;
51 });
52}
53
54std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos,
55 IR::Reg shl_reg) {
56 return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) {
57 const Encoding imnmx{insn};
58 return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg;
59 });
60}
61} // Anonymous namespace
62
63std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
64 Location block_begin) {
65 const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())};
66 const Opcode brx_opcode{Decode(brx_insn)};
67 if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) {
68 throw LogicError("Tracked instruction is not BRX or JMX");
69 }
70 const IR::Reg brx_reg{Encoding{brx_insn}.src_reg};
71 const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)};
72
73 Location pos{brx_pos};
74 const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)};
75 if (!ldc_insn) {
76 return std::nullopt;
77 }
78 const LDC::Encoding ldc{*ldc_insn};
79 const u32 cbuf_index{static_cast<u32>(ldc.index)};
80 const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))};
81 const IR::Reg ldc_reg{ldc.src_reg};
82
83 const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)};
84 if (!shl_insn) {
85 return std::nullopt;
86 }
87 const Encoding shl{*shl_insn};
88 const IR::Reg shl_reg{shl.src_reg};
89
90 const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)};
91 if (!imnmx_insn) {
92 return std::nullopt;
93 }
94 const Encoding imnmx{*imnmx_insn};
95 if (imnmx.is_negative != 0) {
96 return std::nullopt;
97 }
98 const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
99 return IndirectBranchTableInfo{
100 .cbuf_index = cbuf_index,
101 .cbuf_offset = cbuf_offset,
102 .num_entries = imnmx_immediate + 1,
103 .branch_offset = brx_offset,
104 .branch_reg = brx_reg,
105 };
106}
107
108} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
new file mode 100644
index 000000000..eee5102fa
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8
9#include "common/bit_field.h"
10#include "common/common_types.h"
11#include "shader_recompiler/environment.h"
12#include "shader_recompiler/frontend/ir/reg.h"
13#include "shader_recompiler/frontend/maxwell/location.h"
14
15namespace Shader::Maxwell {
16
17struct IndirectBranchTableInfo {
18 u32 cbuf_index{};
19 u32 cbuf_offset{};
20 u32 num_entries{};
21 s32 branch_offset{};
22 IR::Reg branch_reg{};
23};
24
25std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
26 Location block_begin);
27
28} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h
new file mode 100644
index 000000000..743d68d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/instruction.h
@@ -0,0 +1,63 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/flow_test.h"
10#include "shader_recompiler/frontend/ir/reg.h"
11
12namespace Shader::Maxwell {
13
14struct Predicate {
15 Predicate() = default;
16 Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {}
17 Predicate(bool value) : index{7}, negated{!value} {}
18 Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {}
19
20 unsigned index;
21 bool negated;
22};
23
24inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept {
25 return lhs.index == rhs.index && lhs.negated == rhs.negated;
26}
27
28inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept {
29 return !(lhs == rhs);
30}
31
32union Instruction {
33 Instruction(u64 raw_) : raw{raw_} {}
34
35 u64 raw;
36
37 union {
38 BitField<5, 1, u64> is_cbuf;
39 BitField<0, 5, IR::FlowTest> flow_test;
40
41 [[nodiscard]] u32 Absolute() const noexcept {
42 return static_cast<u32>(absolute);
43 }
44
45 [[nodiscard]] s32 Offset() const noexcept {
46 return static_cast<s32>(offset);
47 }
48
49 private:
50 BitField<20, 24, s64> offset;
51 BitField<20, 32, u64> absolute;
52 } branch;
53
54 [[nodiscard]] Predicate Pred() const noexcept {
55 return Predicate{pred};
56 }
57
58private:
59 BitField<16, 4, u64> pred;
60};
61static_assert(std::is_trivially_copyable_v<Instruction>);
62
63} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h
new file mode 100644
index 000000000..26d29eae2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/location.h
@@ -0,0 +1,112 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8#include <iterator>
9
10#include <fmt/format.h>
11
12#include "common/common_types.h"
13#include "shader_recompiler/exception.h"
14
15namespace Shader::Maxwell {
16
17class Location {
18 static constexpr u32 VIRTUAL_BIAS{4};
19
20public:
21 constexpr Location() = default;
22
23 constexpr Location(u32 initial_offset) : offset{initial_offset} {
24 if (initial_offset % 8 != 0) {
25 throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset);
26 }
27 Align();
28 }
29
30 constexpr Location Virtual() const noexcept {
31 Location virtual_location;
32 virtual_location.offset = offset - VIRTUAL_BIAS;
33 return virtual_location;
34 }
35
36 [[nodiscard]] constexpr u32 Offset() const noexcept {
37 return offset;
38 }
39
40 [[nodiscard]] constexpr bool IsVirtual() const {
41 return offset % 8 == VIRTUAL_BIAS;
42 }
43
44 constexpr auto operator<=>(const Location&) const noexcept = default;
45
46 constexpr Location operator++() noexcept {
47 const Location copy{*this};
48 Step();
49 return copy;
50 }
51
52 constexpr Location operator++(int) noexcept {
53 Step();
54 return *this;
55 }
56
57 constexpr Location operator--() noexcept {
58 const Location copy{*this};
59 Back();
60 return copy;
61 }
62
63 constexpr Location operator--(int) noexcept {
64 Back();
65 return *this;
66 }
67
68 constexpr Location operator+(int number) const {
69 Location new_pc{*this};
70 while (number > 0) {
71 --number;
72 ++new_pc;
73 }
74 while (number < 0) {
75 ++number;
76 --new_pc;
77 }
78 return new_pc;
79 }
80
81 constexpr Location operator-(int number) const {
82 return operator+(-number);
83 }
84
85private:
86 constexpr void Align() {
87 offset += offset % 32 == 0 ? 8 : 0;
88 }
89
90 constexpr void Step() {
91 offset += 8 + (offset % 32 == 24 ? 8 : 0);
92 }
93
94 constexpr void Back() {
95 offset -= 8 + (offset % 32 == 8 ? 8 : 0);
96 }
97
98 u32 offset{0xcccccccc};
99};
100
101} // namespace Shader::Maxwell
102
103template <>
104struct fmt::formatter<Shader::Maxwell::Location> {
105 constexpr auto parse(format_parse_context& ctx) {
106 return ctx.begin();
107 }
108 template <typename FormatContext>
109 auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) {
110 return fmt::format_to(ctx.out(), "{:04x}", location.Offset());
111 }
112};
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
new file mode 100644
index 000000000..2fee591bb
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -0,0 +1,286 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5INST(AL2P, "AL2P", "1110 1111 1010 0---")
6INST(ALD, "ALD", "1110 1111 1101 1---")
7INST(AST, "AST", "1110 1111 1111 0---")
8INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----")
9INST(ATOM, "ATOM", "1110 1101 ---- ----")
10INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----")
11INST(ATOMS, "ATOMS", "1110 1100 ---- ----")
12INST(B2R, "B2R", "1111 0000 1011 1---")
13INST(BAR, "BAR", "1111 0000 1010 1---")
14INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---")
15INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---")
16INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---")
17INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---")
18INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---")
19INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---")
20INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---")
21INST(BPT, "BPT", "1110 0011 1010 ----")
22INST(BRA, "BRA", "1110 0010 0100 ----")
23INST(BRK, "BRK", "1110 0011 0100 ----")
24INST(BRX, "BRX", "1110 0010 0101 ----")
25INST(CAL, "CAL", "1110 0010 0110 ----")
26INST(CCTL, "CCTL", "1110 1111 011- ----")
27INST(CCTLL, "CCTLL", "1110 1111 100- ----")
28INST(CONT, "CONT", "1110 0011 0101 ----")
29INST(CS2R, "CS2R", "0101 0000 1100 1---")
30INST(CSET, "CSET", "0101 0000 1001 1---")
31INST(CSETP, "CSETP", "0101 0000 1010 0---")
32INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---")
33INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---")
34INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---")
35INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---")
36INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----")
37INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----")
38INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----")
39INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----")
40INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---")
41INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---")
42INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---")
43INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---")
44INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---")
45INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---")
46INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----")
47INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----")
48INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----")
49INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----")
50INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----")
51INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----")
52INST(EXIT, "EXIT", "1110 0011 0000 ----")
53INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---")
54INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---")
55INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---")
56INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---")
57INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---")
58INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---")
59INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---")
60INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---")
61INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---")
62INST(FADD32I, "FADD32I", "0000 10-- ---- ----")
63INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---")
64INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---")
65INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---")
66INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----")
67INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----")
68INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----")
69INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----")
70INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----")
71INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----")
72INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----")
73INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----")
74INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----")
75INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---")
76INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---")
77INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---")
78INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---")
79INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---")
80INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---")
81INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---")
82INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---")
83INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---")
84INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----")
85INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----")
86INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----")
87INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----")
88INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----")
89INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----")
90INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----")
91INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---")
92INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----")
93INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----")
94INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---")
95INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----")
96INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----")
97INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----")
98INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---")
99INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----")
100INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----")
101INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----")
102INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----")
103INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---")
104INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----")
105INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----")
106INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----")
107INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---")
108INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----")
109INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----")
110INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---")
111INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----")
112INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----")
113INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---")
114INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---")
115INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---")
116INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---")
117INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---")
118INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---")
119INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---")
120INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---")
121INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---")
122INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----")
123INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----")
124INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----")
125INST(IADD32I, "IADD32I", "0001 110- ---- ----")
126INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----")
127INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----")
128INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----")
129INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----")
130INST(IDE, "IDE", "1110 0011 1001 ----")
131INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---")
132INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---")
133INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----")
134INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----")
135INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----")
136INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----")
137INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----")
138INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----")
139INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----")
140INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----")
141INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----")
142INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---")
143INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---")
144INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---")
145INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---")
146INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---")
147INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---")
148INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----")
149INST(IPA, "IPA", "1110 0000 ---- ----")
150INST(ISBERD, "ISBERD", "1110 1111 1101 0---")
151INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---")
152INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---")
153INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---")
154INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----")
155INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----")
156INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----")
157INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----")
158INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----")
159INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----")
160INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----")
161INST(JCAL, "JCAL", "1110 0010 0010 ----")
162INST(JMP, "JMP", "1110 0010 0001 ----")
163INST(JMX, "JMX", "1110 0010 0000 ----")
164INST(KIL, "KIL", "1110 0011 0011 ----")
165INST(LD, "LD", "100- ---- ---- ----")
166INST(LDC, "LDC", "1110 1111 1001 0---")
167INST(LDG, "LDG", "1110 1110 1101 0---")
168INST(LDL, "LDL", "1110 1111 0100 0---")
169INST(LDS, "LDS", "1110 1111 0100 1---")
170INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---")
171INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----")
172INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---")
173INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----")
174INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---")
175INST(LEPC, "LEPC", "0101 0000 1101 0---")
176INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----")
177INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---")
178INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---")
179INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---")
180INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---")
181INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----")
182INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----")
183INST(LOP32I, "LOP32I", "0000 01-- ---- ----")
184INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---")
185INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---")
186INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---")
187INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---")
188INST(MOV32I, "MOV32I", "0000 0001 0000 ----")
189INST(MUFU, "MUFU", "0101 0000 1000 0---")
190INST(NOP, "NOP", "0101 0000 1011 0---")
191INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---")
192INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---")
193INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---")
194INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---")
195INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---")
196INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---")
197INST(PBK, "PBK", "1110 0010 1010 ----")
198INST(PCNT, "PCNT", "1110 0010 1011 ----")
199INST(PEXIT, "PEXIT", "1110 0010 0011 ----")
200INST(PIXLD, "PIXLD", "1110 1111 1110 1---")
201INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----")
202INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---")
203INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---")
204INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---")
205INST(PRET, "PRET", "1110 0010 0111 ----")
206INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----")
207INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----")
208INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----")
209INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----")
210INST(PSET, "PSET", "0101 0000 1000 1---")
211INST(PSETP, "PSETP", "0101 0000 1001 0---")
212INST(R2B, "R2B", "1111 0000 1100 0---")
213INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---")
214INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---")
215INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---")
216INST(RAM, "RAM", "1110 0011 1000 ----")
217INST(RED, "RED", "1110 1011 1111 1---")
218INST(RET, "RET", "1110 0011 0010 ----")
219INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---")
220INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---")
221INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---")
222INST(RTT, "RTT", "1110 0011 0110 ----")
223INST(S2R, "S2R", "1111 0000 1100 1---")
224INST(SAM, "SAM", "1110 0011 0111 ----")
225INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---")
226INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---")
227INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---")
228INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----")
229INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----")
230INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---")
231INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---")
232INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---")
233INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---")
234INST(SHFL, "SHFL", "1110 1111 0001 0---")
235INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---")
236INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---")
237INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---")
238INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---")
239INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---")
240INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---")
241INST(SSY, "SSY", "1110 0010 1001 ----")
242INST(ST, "ST", "101- ---- ---- ----")
243INST(STG, "STG", "1110 1110 1101 1---")
244INST(STL, "STL", "1110 1111 0101 0---")
245INST(STP, "STP", "1110 1110 1010 0---")
246INST(STS, "STS", "1110 1111 0101 1---")
247INST(SUATOM, "SUATOM", "1110 1010 0--- ----")
248INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----")
249INST(SULD, "SULD", "1110 1011 000- ----")
250INST(SURED, "SURED", "1110 1011 010- ----")
251INST(SUST, "SUST", "1110 1011 001- ----")
252INST(SYNC, "SYNC", "1111 0000 1111 1---")
253INST(TEX, "TEX", "1100 0--- ---- ----")
254INST(TEX_b, "TEX (b)", "1101 1110 10-- ----")
255INST(TEXS, "TEXS", "1101 -00- ---- ----")
256INST(TLD, "TLD", "1101 1100 ---- ----")
257INST(TLD_b, "TLD (b)", "1101 1101 ---- ----")
258INST(TLD4, "TLD4", "1100 10-- ---- ----")
259INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----")
260INST(TLD4S, "TLD4S", "1101 1111 -0-- ----")
261INST(TLDS, "TLDS", "1101 -01- ---- ----")
262INST(TMML, "TMML", "1101 1111 0101 1---")
263INST(TMML_b, "TMML (b)", "1101 1111 0110 0---")
264INST(TXA, "TXA", "1101 1111 0100 0---")
265INST(TXD, "TXD", "1101 1110 00-- ----")
266INST(TXD_b, "TXD (b)", "1101 1110 01-- ----")
267INST(TXQ, "TXQ", "1101 1111 0100 1---")
268INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---")
269INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----")
270INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----")
271INST(VADD, "VADD", "0010 00-- ---- ----")
272INST(VMAD, "VMAD", "0101 1111 ---- ----")
273INST(VMNMX, "VMNMX", "0011 101- ---- ----")
274INST(VOTE, "VOTE", "0101 0000 1101 1---")
275INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---")
276INST(VSET, "VSET", "0100 000- ---- ----")
277INST(VSETP, "VSETP", "0101 0000 1111 0---")
278INST(VSHL, "VSHL", "0101 0111 ---- ----")
279INST(VSHR, "VSHR", "0101 0110 ---- ----")
280INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----")
281INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----")
282INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----")
283INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----")
284
285// Removed due to its weird formatting making fast tables larger
286// INST(CCTLT, "CCTLT", "1110 1011 1111 0--0")
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
new file mode 100644
index 000000000..ccc40c20c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
@@ -0,0 +1,26 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9
10namespace Shader::Maxwell {
11namespace {
12constexpr std::array NAME_TABLE{
13#define INST(name, cute, encode) cute,
14#include "maxwell.inc"
15#undef INST
16};
17} // Anonymous namespace
18
19const char* NameOf(Opcode opcode) {
20 if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) {
21 throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode));
22 }
23 return NAME_TABLE[static_cast<size_t>(opcode)];
24}
25
26} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h
new file mode 100644
index 000000000..cd574f29d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.h
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9namespace Shader::Maxwell {
10
11enum class Opcode {
12#define INST(name, cute, encode) name,
13#include "maxwell.inc"
14#undef INST
15};
16
17const char* NameOf(Opcode opcode);
18
19} // namespace Shader::Maxwell
20
21template <>
22struct fmt::formatter<Shader::Maxwell::Opcode> {
23 constexpr auto parse(format_parse_context& ctx) {
24 return ctx.begin();
25 }
26 template <typename FormatContext>
27 auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) {
28 return format_to(ctx.out(), "{}", NameOf(opcode));
29 }
30};
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
new file mode 100644
index 000000000..8b3e0a15c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -0,0 +1,883 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <string>
8#include <unordered_map>
9#include <utility>
10#include <vector>
11#include <version>
12
13#include <fmt/format.h>
14
15#include <boost/intrusive/list.hpp>
16
17#include "shader_recompiler/environment.h"
18#include "shader_recompiler/frontend/ir/basic_block.h"
19#include "shader_recompiler/frontend/ir/ir_emitter.h"
20#include "shader_recompiler/frontend/maxwell/decode.h"
21#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
22#include "shader_recompiler/frontend/maxwell/translate/translate.h"
23#include "shader_recompiler/object_pool.h"
24
25namespace Shader::Maxwell {
26namespace {
27struct Statement;
28
29// Use normal_link because we are not guaranteed to destroy the tree in order
30using ListBaseHook =
31 boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>;
32
33using Tree = boost::intrusive::list<Statement,
34 // Allow using Statement without a definition
35 boost::intrusive::base_hook<ListBaseHook>,
36 // Avoid linear complexity on splice, size is never called
37 boost::intrusive::constant_time_size<false>>;
38using Node = Tree::iterator;
39
40enum class StatementType {
41 Code,
42 Goto,
43 Label,
44 If,
45 Loop,
46 Break,
47 Return,
48 Kill,
49 Unreachable,
50 Function,
51 Identity,
52 Not,
53 Or,
54 SetVariable,
55 SetIndirectBranchVariable,
56 Variable,
57 IndirectBranchCond,
58};
59
60bool HasChildren(StatementType type) {
61 switch (type) {
62 case StatementType::If:
63 case StatementType::Loop:
64 case StatementType::Function:
65 return true;
66 default:
67 return false;
68 }
69}
70
71struct Goto {};
72struct Label {};
73struct If {};
74struct Loop {};
75struct Break {};
76struct Return {};
77struct Kill {};
78struct Unreachable {};
79struct FunctionTag {};
80struct Identity {};
81struct Not {};
82struct Or {};
83struct SetVariable {};
84struct SetIndirectBranchVariable {};
85struct Variable {};
86struct IndirectBranchCond {};
87
88#ifdef _MSC_VER
89#pragma warning(push)
90#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement
91#endif
92struct Statement : ListBaseHook {
93 Statement(const Flow::Block* block_, Statement* up_)
94 : block{block_}, up{up_}, type{StatementType::Code} {}
95 Statement(Goto, Statement* cond_, Node label_, Statement* up_)
96 : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {}
97 Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {}
98 Statement(If, Statement* cond_, Tree&& children_, Statement* up_)
99 : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {}
100 Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_)
101 : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {}
102 Statement(Break, Statement* cond_, Statement* up_)
103 : cond{cond_}, up{up_}, type{StatementType::Break} {}
104 Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {}
105 Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {}
106 Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {}
107 Statement(FunctionTag) : children{}, type{StatementType::Function} {}
108 Statement(Identity, IR::Condition cond_, Statement* up_)
109 : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {}
110 Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {}
111 Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_)
112 : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {}
113 Statement(SetVariable, u32 id_, Statement* op_, Statement* up_)
114 : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {}
115 Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_)
116 : branch_offset{branch_offset_},
117 branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {}
118 Statement(Variable, u32 id_, Statement* up_)
119 : id{id_}, up{up_}, type{StatementType::Variable} {}
120 Statement(IndirectBranchCond, u32 location_, Statement* up_)
121 : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {}
122
123 ~Statement() {
124 if (HasChildren(type)) {
125 std::destroy_at(&children);
126 }
127 }
128
129 union {
130 const Flow::Block* block;
131 Node label;
132 Tree children;
133 IR::Condition guest_cond;
134 Statement* op;
135 Statement* op_a;
136 u32 location;
137 s32 branch_offset;
138 };
139 union {
140 Statement* cond;
141 Statement* op_b;
142 u32 id;
143 IR::Reg branch_reg;
144 };
145 Statement* up{};
146 StatementType type;
147};
148#ifdef _MSC_VER
149#pragma warning(pop)
150#endif
151
152std::string DumpExpr(const Statement* stmt) {
153 switch (stmt->type) {
154 case StatementType::Identity:
155 return fmt::format("{}", stmt->guest_cond);
156 case StatementType::Not:
157 return fmt::format("!{}", DumpExpr(stmt->op));
158 case StatementType::Or:
159 return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b));
160 case StatementType::Variable:
161 return fmt::format("goto_L{}", stmt->id);
162 case StatementType::IndirectBranchCond:
163 return fmt::format("(indirect_branch == {:x})", stmt->location);
164 default:
165 return "<invalid type>";
166 }
167}
168
169[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) {
170 std::string ret;
171 std::string indent(indentation, ' ');
172 for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) {
173 switch (stmt->type) {
174 case StatementType::Code:
175 ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent,
176 stmt->block->begin.Offset(), stmt->block->end.Offset(),
177 reinterpret_cast<uintptr_t>(stmt->block));
178 break;
179 case StatementType::Goto:
180 ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond),
181 stmt->label->id);
182 break;
183 case StatementType::Label:
184 ret += fmt::format("{}L{}:\n", indent, stmt->id);
185 break;
186 case StatementType::If:
187 ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond));
188 ret += DumpTree(stmt->children, indentation + 4);
189 ret += fmt::format("{} }}\n", indent);
190 break;
191 case StatementType::Loop:
192 ret += fmt::format("{} do {{\n", indent);
193 ret += DumpTree(stmt->children, indentation + 4);
194 ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond));
195 break;
196 case StatementType::Break:
197 ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond));
198 break;
199 case StatementType::Return:
200 ret += fmt::format("{} return;\n", indent);
201 break;
202 case StatementType::Kill:
203 ret += fmt::format("{} kill;\n", indent);
204 break;
205 case StatementType::Unreachable:
206 ret += fmt::format("{} unreachable;\n", indent);
207 break;
208 case StatementType::SetVariable:
209 ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
210 break;
211 case StatementType::SetIndirectBranchVariable:
212 ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg,
213 stmt->branch_offset);
214 break;
215 case StatementType::Function:
216 case StatementType::Identity:
217 case StatementType::Not:
218 case StatementType::Or:
219 case StatementType::Variable:
220 case StatementType::IndirectBranchCond:
221 throw LogicError("Statement can't be printed");
222 }
223 }
224 return ret;
225}
226
227void SanitizeNoBreaks(const Tree& tree) {
228 if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) {
229 throw NotImplementedException("Capturing statement with break nodes");
230 }
231}
232
233size_t Level(Node stmt) {
234 size_t level{0};
235 Statement* node{stmt->up};
236 while (node) {
237 ++level;
238 node = node->up;
239 }
240 return level;
241}
242
243bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) {
244 const size_t goto_level{Level(goto_stmt)};
245 const size_t label_level{Level(label_stmt)};
246 size_t min_level;
247 size_t max_level;
248 Node min;
249 Node max;
250 if (label_level < goto_level) {
251 min_level = label_level;
252 max_level = goto_level;
253 min = label_stmt;
254 max = goto_stmt;
255 } else { // goto_level < label_level
256 min_level = goto_level;
257 max_level = label_level;
258 min = goto_stmt;
259 max = label_stmt;
260 }
261 while (max_level > min_level) {
262 --max_level;
263 max = max->up;
264 }
265 return min->up == max->up;
266}
267
268bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) {
269 return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt);
270}
271
272[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept {
273 Node it{goto_stmt};
274 do {
275 if (it == label_stmt) {
276 return true;
277 }
278 --it;
279 } while (it != goto_stmt->up->children.begin());
280 while (it != goto_stmt->up->children.end()) {
281 if (it == label_stmt) {
282 return true;
283 }
284 ++it;
285 }
286 return false;
287}
288
289Node SiblingFromNephew(Node uncle, Node nephew) noexcept {
290 Statement* const parent{uncle->up};
291 Statement* it{&*nephew};
292 while (it->up != parent) {
293 it = it->up;
294 }
295 return Tree::s_iterator_to(*it);
296}
297
298bool AreOrdered(Node left_sibling, Node right_sibling) noexcept {
299 const Node end{right_sibling->up->children.end()};
300 for (auto it = right_sibling; it != end; ++it) {
301 if (it == left_sibling) {
302 return false;
303 }
304 }
305 return true;
306}
307
308bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
309 const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)};
310 return AreOrdered(sibling, goto_stmt);
311}
312
313class GotoPass {
314public:
315 explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
316 std::vector gotos{BuildTree(cfg)};
317 const auto end{gotos.rend()};
318 for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
319 RemoveGoto(*goto_stmt);
320 }
321 }
322
323 Statement& RootStatement() noexcept {
324 return root_stmt;
325 }
326
327private:
328 void RemoveGoto(Node goto_stmt) {
329 // Force goto_stmt and label_stmt to be directly related
330 const Node label_stmt{goto_stmt->label};
331 if (IsIndirectlyRelated(goto_stmt, label_stmt)) {
332 // Move goto_stmt out using outward-movement transformation until it becomes
333 // directly related to label_stmt
334 while (!IsDirectlyRelated(goto_stmt, label_stmt)) {
335 goto_stmt = MoveOutward(goto_stmt);
336 }
337 }
338 // Force goto_stmt and label_stmt to be siblings
339 if (IsDirectlyRelated(goto_stmt, label_stmt)) {
340 const size_t label_level{Level(label_stmt)};
341 size_t goto_level{Level(goto_stmt)};
342 if (goto_level > label_level) {
343 // Move goto_stmt out of its level using outward-movement transformations
344 while (goto_level > label_level) {
345 goto_stmt = MoveOutward(goto_stmt);
346 --goto_level;
347 }
348 } else { // Level(goto_stmt) < Level(label_stmt)
349 if (NeedsLift(goto_stmt, label_stmt)) {
350 // Lift goto_stmt to above stmt containing label_stmt using goto-lifting
351 // transformations
352 goto_stmt = Lift(goto_stmt);
353 }
354 // Move goto_stmt into label_stmt's level using inward-movement transformation
355 while (goto_level < label_level) {
356 goto_stmt = MoveInward(goto_stmt);
357 ++goto_level;
358 }
359 }
360 }
361 // Expensive operation:
362 // if (!AreSiblings(goto_stmt, label_stmt)) {
363 // throw LogicError("Goto is not a sibling with the label");
364 // }
365 // goto_stmt and label_stmt are guaranteed to be siblings, eliminate
366 if (std::next(goto_stmt) == label_stmt) {
367 // Simply eliminate the goto if the label is next to it
368 goto_stmt->up->children.erase(goto_stmt);
369 } else if (AreOrdered(goto_stmt, label_stmt)) {
370 // Eliminate goto_stmt with a conditional
371 EliminateAsConditional(goto_stmt, label_stmt);
372 } else {
373 // Eliminate goto_stmt with a loop
374 EliminateAsLoop(goto_stmt, label_stmt);
375 }
376 }
377
378 std::vector<Node> BuildTree(Flow::CFG& cfg) {
379 u32 label_id{0};
380 std::vector<Node> gotos;
381 Flow::Function& first_function{cfg.Functions().front()};
382 BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt);
383 return gotos;
384 }
385
386 void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id,
387 std::vector<Node>& gotos, Node function_insert_point,
388 std::optional<Node> return_label) {
389 Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)};
390 Tree& root{root_stmt.children};
391 std::unordered_map<Flow::Block*, Node> local_labels;
392 local_labels.reserve(function.blocks.size());
393
394 for (Flow::Block& block : function.blocks) {
395 Statement* const label{pool.Create(Label{}, label_id, &root_stmt)};
396 const Node label_it{root.insert(function_insert_point, *label)};
397 local_labels.emplace(&block, label_it);
398 ++label_id;
399 }
400 for (Flow::Block& block : function.blocks) {
401 const Node label{local_labels.at(&block)};
402 // Insertion point
403 const Node ip{std::next(label)};
404
405 // Reset goto variables before the first block and after its respective label
406 const auto make_reset_variable{[&]() -> Statement& {
407 return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt);
408 }};
409 root.push_front(make_reset_variable());
410 root.insert(ip, make_reset_variable());
411 root.insert(ip, *pool.Create(&block, &root_stmt));
412
413 switch (block.end_class) {
414 case Flow::EndClass::Branch: {
415 Statement* const always_cond{
416 pool.Create(Identity{}, IR::Condition{true}, &root_stmt)};
417 if (block.cond == IR::Condition{true}) {
418 const Node true_label{local_labels.at(block.branch_true)};
419 gotos.push_back(
420 root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt)));
421 } else if (block.cond == IR::Condition{false}) {
422 const Node false_label{local_labels.at(block.branch_false)};
423 gotos.push_back(root.insert(
424 ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
425 } else {
426 const Node true_label{local_labels.at(block.branch_true)};
427 const Node false_label{local_labels.at(block.branch_false)};
428 Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
429 gotos.push_back(
430 root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt)));
431 gotos.push_back(root.insert(
432 ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
433 }
434 break;
435 }
436 case Flow::EndClass::IndirectBranch:
437 root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg,
438 block.branch_offset, &root_stmt));
439 for (const Flow::IndirectBranch& indirect : block.indirect_branches) {
440 const Node indirect_label{local_labels.at(indirect.block)};
441 Statement* cond{
442 pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)};
443 Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)};
444 gotos.push_back(root.insert(ip, *goto_stmt));
445 }
446 root.insert(ip, *pool.Create(Unreachable{}, &root_stmt));
447 break;
448 case Flow::EndClass::Call: {
449 Flow::Function& call{cfg.Functions()[block.function_call]};
450 const Node call_return_label{local_labels.at(block.return_block)};
451 BuildTree(cfg, call, label_id, gotos, ip, call_return_label);
452 break;
453 }
454 case Flow::EndClass::Exit:
455 root.insert(ip, *pool.Create(Return{}, &root_stmt));
456 break;
457 case Flow::EndClass::Return: {
458 Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
459 auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)};
460 gotos.push_back(root.insert(ip, *goto_stmt));
461 break;
462 }
463 case Flow::EndClass::Kill:
464 root.insert(ip, *pool.Create(Kill{}, &root_stmt));
465 break;
466 }
467 }
468 }
469
470 void UpdateTreeUp(Statement* tree) {
471 for (Statement& stmt : tree->children) {
472 stmt.up = tree;
473 }
474 }
475
476 void EliminateAsConditional(Node goto_stmt, Node label_stmt) {
477 Tree& body{goto_stmt->up->children};
478 Tree if_body;
479 if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt);
480 Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)};
481 Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)};
482 UpdateTreeUp(if_stmt);
483 body.insert(goto_stmt, *if_stmt);
484 body.erase(goto_stmt);
485 }
486
487 void EliminateAsLoop(Node goto_stmt, Node label_stmt) {
488 Tree& body{goto_stmt->up->children};
489 Tree loop_body;
490 loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt);
491 Statement* const cond{goto_stmt->cond};
492 Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)};
493 UpdateTreeUp(loop);
494 body.insert(goto_stmt, *loop);
495 body.erase(goto_stmt);
496 }
497
498 [[nodiscard]] Node MoveOutward(Node goto_stmt) {
499 switch (goto_stmt->up->type) {
500 case StatementType::If:
501 return MoveOutwardIf(goto_stmt);
502 case StatementType::Loop:
503 return MoveOutwardLoop(goto_stmt);
504 default:
505 throw LogicError("Invalid outward movement");
506 }
507 }
508
509 [[nodiscard]] Node MoveInward(Node goto_stmt) {
510 Statement* const parent{goto_stmt->up};
511 Tree& body{parent->children};
512 const Node label{goto_stmt->label};
513 const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
514 const u32 label_id{label->id};
515
516 Statement* const goto_cond{goto_stmt->cond};
517 Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
518 body.insert(goto_stmt, *set_var);
519
520 Tree if_body;
521 if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt);
522 Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
523 Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)};
524 if (!if_body.empty()) {
525 Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)};
526 UpdateTreeUp(if_stmt);
527 body.insert(goto_stmt, *if_stmt);
528 }
529 body.erase(goto_stmt);
530
531 switch (label_nested_stmt->type) {
532 case StatementType::If:
533 // Update nested if condition
534 label_nested_stmt->cond =
535 pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt);
536 break;
537 case StatementType::Loop:
538 break;
539 default:
540 throw LogicError("Invalid inward movement");
541 }
542 Tree& nested_tree{label_nested_stmt->children};
543 Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
544 return nested_tree.insert(nested_tree.begin(), *new_goto);
545 }
546
547 [[nodiscard]] Node Lift(Node goto_stmt) {
548 Statement* const parent{goto_stmt->up};
549 Tree& body{parent->children};
550 const Node label{goto_stmt->label};
551 const u32 label_id{label->id};
552 const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
553
554 Tree loop_body;
555 loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
556 SanitizeNoBreaks(loop_body);
557 Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
558 Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
559 UpdateTreeUp(loop_stmt);
560 body.insert(goto_stmt, *loop_stmt);
561
562 Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
563 loop_stmt->children.push_front(*new_goto);
564 const Node new_goto_node{loop_stmt->children.begin()};
565
566 Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)};
567 loop_stmt->children.push_back(*set_var);
568
569 body.erase(goto_stmt);
570 return new_goto_node;
571 }
572
573 Node MoveOutwardIf(Node goto_stmt) {
574 const Node parent{Tree::s_iterator_to(*goto_stmt->up)};
575 Tree& body{parent->children};
576 const u32 label_id{goto_stmt->label->id};
577 Statement* const goto_cond{goto_stmt->cond};
578 Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)};
579 body.insert(goto_stmt, *set_goto_var);
580
581 Tree if_body;
582 if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end());
583 if_body.pop_front();
584 Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
585 Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)};
586 Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)};
587 UpdateTreeUp(if_stmt);
588 body.insert(goto_stmt, *if_stmt);
589
590 body.erase(goto_stmt);
591
592 Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)};
593 Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)};
594 Tree& parent_tree{parent->up->children};
595 return parent_tree.insert(std::next(parent), *new_goto);
596 }
597
598 Node MoveOutwardLoop(Node goto_stmt) {
599 Statement* const parent{goto_stmt->up};
600 Tree& body{parent->children};
601 const u32 label_id{goto_stmt->label->id};
602 Statement* const goto_cond{goto_stmt->cond};
603 Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
604 Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
605 Statement* const break_stmt{pool.Create(Break{}, cond, parent)};
606 body.insert(goto_stmt, *set_goto_var);
607 body.insert(goto_stmt, *break_stmt);
608 body.erase(goto_stmt);
609
610 const Node loop{Tree::s_iterator_to(*goto_stmt->up)};
611 Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)};
612 Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)};
613 Tree& parent_tree{loop->up->children};
614 return parent_tree.insert(std::next(loop), *new_goto);
615 }
616
617 ObjectPool<Statement>& pool;
618 Statement root_stmt{FunctionTag{}};
619};
620
621[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) {
622 Tree& tree{stmt.up->children};
623 const Node end{tree.end()};
624 Node forward_node{std::next(Tree::s_iterator_to(stmt))};
625 while (forward_node != end && !HasChildren(forward_node->type)) {
626 if (forward_node->type == StatementType::Code) {
627 return &*forward_node;
628 }
629 ++forward_node;
630 }
631 return nullptr;
632}
633
634[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) {
635 switch (stmt.type) {
636 case StatementType::Identity:
637 return ir.Condition(stmt.guest_cond);
638 case StatementType::Not:
639 return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)});
640 case StatementType::Or:
641 return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b));
642 case StatementType::Variable:
643 return ir.GetGotoVariable(stmt.id);
644 case StatementType::IndirectBranchCond:
645 return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location));
646 default:
647 throw NotImplementedException("Statement type {}", stmt.type);
648 }
649}
650
651class TranslatePass {
652public:
653 TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
654 ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
655 IR::AbstractSyntaxList& syntax_list_)
656 : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
657 syntax_list{syntax_list_} {
658 Visit(root_stmt, nullptr, nullptr);
659
660 IR::Block& first_block{*syntax_list.front().data.block};
661 IR::IREmitter ir(first_block, first_block.begin());
662 ir.Prologue();
663 }
664
665private:
666 void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) {
667 IR::Block* current_block{};
668 const auto ensure_block{[&] {
669 if (current_block) {
670 return;
671 }
672 current_block = block_pool.Create(inst_pool);
673 auto& node{syntax_list.emplace_back()};
674 node.type = IR::AbstractSyntaxNode::Type::Block;
675 node.data.block = current_block;
676 }};
677 Tree& tree{parent.children};
678 for (auto it = tree.begin(); it != tree.end(); ++it) {
679 Statement& stmt{*it};
680 switch (stmt.type) {
681 case StatementType::Label:
682 // Labels can be ignored
683 break;
684 case StatementType::Code: {
685 ensure_block();
686 Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset());
687 break;
688 }
689 case StatementType::SetVariable: {
690 ensure_block();
691 IR::IREmitter ir{*current_block};
692 ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op));
693 break;
694 }
695 case StatementType::SetIndirectBranchVariable: {
696 ensure_block();
697 IR::IREmitter ir{*current_block};
698 IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))};
699 ir.SetIndirectBranchVariable(address);
700 break;
701 }
702 case StatementType::If: {
703 ensure_block();
704 IR::Block* const merge_block{MergeBlock(parent, stmt)};
705
706 // Implement if header block
707 IR::IREmitter ir{*current_block};
708 const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
709
710 const size_t if_node_index{syntax_list.size()};
711 syntax_list.emplace_back();
712
713 // Visit children
714 const size_t then_block_index{syntax_list.size()};
715 Visit(stmt, break_block, merge_block);
716
717 IR::Block* const then_block{syntax_list.at(then_block_index).data.block};
718 current_block->AddBranch(then_block);
719 current_block->AddBranch(merge_block);
720 current_block = merge_block;
721
722 auto& if_node{syntax_list[if_node_index]};
723 if_node.type = IR::AbstractSyntaxNode::Type::If;
724 if_node.data.if_node.cond = cond;
725 if_node.data.if_node.body = then_block;
726 if_node.data.if_node.merge = merge_block;
727
728 auto& endif_node{syntax_list.emplace_back()};
729 endif_node.type = IR::AbstractSyntaxNode::Type::EndIf;
730 endif_node.data.end_if.merge = merge_block;
731
732 auto& merge{syntax_list.emplace_back()};
733 merge.type = IR::AbstractSyntaxNode::Type::Block;
734 merge.data.block = merge_block;
735 break;
736 }
737 case StatementType::Loop: {
738 IR::Block* const loop_header_block{block_pool.Create(inst_pool)};
739 if (current_block) {
740 current_block->AddBranch(loop_header_block);
741 }
742 auto& header_node{syntax_list.emplace_back()};
743 header_node.type = IR::AbstractSyntaxNode::Type::Block;
744 header_node.data.block = loop_header_block;
745
746 IR::Block* const continue_block{block_pool.Create(inst_pool)};
747 IR::Block* const merge_block{MergeBlock(parent, stmt)};
748
749 const size_t loop_node_index{syntax_list.size()};
750 syntax_list.emplace_back();
751
752 // Visit children
753 const size_t body_block_index{syntax_list.size()};
754 Visit(stmt, merge_block, continue_block);
755
756 // The continue block is located at the end of the loop
757 IR::IREmitter ir{*continue_block};
758 const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
759
760 IR::Block* const body_block{syntax_list.at(body_block_index).data.block};
761 loop_header_block->AddBranch(body_block);
762
763 continue_block->AddBranch(loop_header_block);
764 continue_block->AddBranch(merge_block);
765
766 current_block = merge_block;
767
768 auto& loop{syntax_list[loop_node_index]};
769 loop.type = IR::AbstractSyntaxNode::Type::Loop;
770 loop.data.loop.body = body_block;
771 loop.data.loop.continue_block = continue_block;
772 loop.data.loop.merge = merge_block;
773
774 auto& continue_block_node{syntax_list.emplace_back()};
775 continue_block_node.type = IR::AbstractSyntaxNode::Type::Block;
776 continue_block_node.data.block = continue_block;
777
778 auto& repeat{syntax_list.emplace_back()};
779 repeat.type = IR::AbstractSyntaxNode::Type::Repeat;
780 repeat.data.repeat.cond = cond;
781 repeat.data.repeat.loop_header = loop_header_block;
782 repeat.data.repeat.merge = merge_block;
783
784 auto& merge{syntax_list.emplace_back()};
785 merge.type = IR::AbstractSyntaxNode::Type::Block;
786 merge.data.block = merge_block;
787 break;
788 }
789 case StatementType::Break: {
790 ensure_block();
791 IR::Block* const skip_block{MergeBlock(parent, stmt)};
792
793 IR::IREmitter ir{*current_block};
794 const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
795 current_block->AddBranch(break_block);
796 current_block->AddBranch(skip_block);
797 current_block = skip_block;
798
799 auto& break_node{syntax_list.emplace_back()};
800 break_node.type = IR::AbstractSyntaxNode::Type::Break;
801 break_node.data.break_node.cond = cond;
802 break_node.data.break_node.merge = break_block;
803 break_node.data.break_node.skip = skip_block;
804
805 auto& merge{syntax_list.emplace_back()};
806 merge.type = IR::AbstractSyntaxNode::Type::Block;
807 merge.data.block = skip_block;
808 break;
809 }
810 case StatementType::Return: {
811 ensure_block();
812 IR::IREmitter{*current_block}.Epilogue();
813 current_block = nullptr;
814 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
815 break;
816 }
817 case StatementType::Kill: {
818 ensure_block();
819 IR::Block* demote_block{MergeBlock(parent, stmt)};
820 IR::IREmitter{*current_block}.DemoteToHelperInvocation();
821 current_block->AddBranch(demote_block);
822 current_block = demote_block;
823
824 auto& merge{syntax_list.emplace_back()};
825 merge.type = IR::AbstractSyntaxNode::Type::Block;
826 merge.data.block = demote_block;
827 break;
828 }
829 case StatementType::Unreachable: {
830 ensure_block();
831 current_block = nullptr;
832 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
833 break;
834 }
835 default:
836 throw NotImplementedException("Statement type {}", stmt.type);
837 }
838 }
839 if (current_block) {
840 if (fallthrough_block) {
841 current_block->AddBranch(fallthrough_block);
842 } else {
843 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
844 }
845 }
846 }
847
848 IR::Block* MergeBlock(Statement& parent, Statement& stmt) {
849 Statement* merge_stmt{TryFindForwardBlock(stmt)};
850 if (!merge_stmt) {
851 // Create a merge block we can visit later
852 merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent);
853 parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt);
854 }
855 return block_pool.Create(inst_pool);
856 }
857
858 ObjectPool<Statement>& stmt_pool;
859 ObjectPool<IR::Inst>& inst_pool;
860 ObjectPool<IR::Block>& block_pool;
861 Environment& env;
862 IR::AbstractSyntaxList& syntax_list;
863
864// TODO: C++20 Remove this when all compilers support constexpr std::vector
865#if __cpp_lib_constexpr_vector >= 201907
866 static constexpr Flow::Block dummy_flow_block;
867#else
868 const Flow::Block dummy_flow_block;
869#endif
870};
871} // Anonymous namespace
872
873IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
874 Environment& env, Flow::CFG& cfg) {
875 ObjectPool<Statement> stmt_pool{64};
876 GotoPass goto_pass{cfg, stmt_pool};
877 Statement& root{goto_pass.RootStatement()};
878 IR::AbstractSyntaxList syntax_list;
879 TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
880 return syntax_list;
881}
882
883} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
new file mode 100644
index 000000000..88b083649
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -0,0 +1,20 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
9#include "shader_recompiler/frontend/ir/basic_block.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/frontend/maxwell/control_flow.h"
12#include "shader_recompiler/object_pool.h"
13
14namespace Shader::Maxwell {
15
16[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
17 ObjectPool<IR::Block>& block_pool, Environment& env,
18 Flow::CFG& cfg);
19
20} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 000000000..d9f999e05
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,214 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class AtomOp : u64 {
12 ADD,
13 MIN,
14 MAX,
15 INC,
16 DEC,
17 AND,
18 OR,
19 XOR,
20 EXCH,
21 SAFEADD,
22};
23
24enum class AtomSize : u64 {
25 U32,
26 S32,
27 U64,
28 F32,
29 F16x2,
30 S64,
31};
32
33IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
34 AtomOp op, bool is_signed) {
35 switch (op) {
36 case AtomOp::ADD:
37 return ir.GlobalAtomicIAdd(offset, op_b);
38 case AtomOp::MIN:
39 return ir.GlobalAtomicIMin(offset, op_b, is_signed);
40 case AtomOp::MAX:
41 return ir.GlobalAtomicIMax(offset, op_b, is_signed);
42 case AtomOp::INC:
43 return ir.GlobalAtomicInc(offset, op_b);
44 case AtomOp::DEC:
45 return ir.GlobalAtomicDec(offset, op_b);
46 case AtomOp::AND:
47 return ir.GlobalAtomicAnd(offset, op_b);
48 case AtomOp::OR:
49 return ir.GlobalAtomicOr(offset, op_b);
50 case AtomOp::XOR:
51 return ir.GlobalAtomicXor(offset, op_b);
52 case AtomOp::EXCH:
53 return ir.GlobalAtomicExchange(offset, op_b);
54 default:
55 throw NotImplementedException("Integer Atom Operation {}", op);
56 }
57}
58
59IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
60 AtomSize size) {
61 static constexpr IR::FpControl f16_control{
62 .no_contraction = false,
63 .rounding = IR::FpRounding::RN,
64 .fmz_mode = IR::FmzMode::DontCare,
65 };
66 static constexpr IR::FpControl f32_control{
67 .no_contraction = false,
68 .rounding = IR::FpRounding::RN,
69 .fmz_mode = IR::FmzMode::FTZ,
70 };
71 switch (op) {
72 case AtomOp::ADD:
73 return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
74 : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
75 case AtomOp::MIN:
76 return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
77 case AtomOp::MAX:
78 return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
79 default:
80 throw NotImplementedException("FP Atom Operation {}", op);
81 }
82}
83
84IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
85 union {
86 u64 raw;
87 BitField<8, 8, IR::Reg> addr_reg;
88 BitField<28, 20, s64> addr_offset;
89 BitField<28, 20, u64> rz_addr_offset;
90 BitField<48, 1, u64> e;
91 } const mem{insn};
92
93 const IR::U64 address{[&]() -> IR::U64 {
94 if (mem.e == 0) {
95 return v.ir.UConvert(64, v.X(mem.addr_reg));
96 }
97 return v.L(mem.addr_reg);
98 }()};
99 const u64 addr_offset{[&]() -> u64 {
100 if (mem.addr_reg == IR::Reg::RZ) {
101 // When RZ is used, the address is an absolute address
102 return static_cast<u64>(mem.rz_addr_offset.Value());
103 } else {
104 return static_cast<u64>(mem.addr_offset.Value());
105 }
106 }()};
107 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
108}
109
110bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
111 // TODO: SAFEADD
112 switch (size) {
113 case AtomSize::S32:
114 case AtomSize::U64:
115 return (op == AtomOp::INC || op == AtomOp::DEC);
116 case AtomSize::S64:
117 return !(op == AtomOp::MIN || op == AtomOp::MAX);
118 case AtomSize::F32:
119 return op != AtomOp::ADD;
120 case AtomSize::F16x2:
121 return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
122 default:
123 return false;
124 }
125}
126
127IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
128 switch (size) {
129 case AtomSize::U32:
130 case AtomSize::S32:
131 case AtomSize::F32:
132 case AtomSize::F16x2:
133 return ir.LoadGlobal32(offset);
134 case AtomSize::U64:
135 case AtomSize::S64:
136 return ir.PackUint2x32(ir.LoadGlobal64(offset));
137 default:
138 throw NotImplementedException("Atom Size {}", size);
139 }
140}
141
142void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
143 switch (size) {
144 case AtomSize::U32:
145 case AtomSize::S32:
146 case AtomSize::F16x2:
147 return v.X(dest_reg, IR::U32{result});
148 case AtomSize::U64:
149 case AtomSize::S64:
150 return v.L(dest_reg, IR::U64{result});
151 case AtomSize::F32:
152 return v.F(dest_reg, IR::F32{result});
153 default:
154 break;
155 }
156}
157
158IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
159 AtomSize size, AtomOp op) {
160 switch (size) {
161 case AtomSize::U32:
162 case AtomSize::S32:
163 return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
164 case AtomSize::U64:
165 case AtomSize::S64:
166 return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
167 case AtomSize::F32:
168 return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
169 case AtomSize::F16x2: {
170 return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
171 }
172 default:
173 throw NotImplementedException("Atom Size {}", size);
174 }
175}
176
177void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
178 const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
179 IR::Value result;
180 if (AtomOpNotApplicable(size, op)) {
181 result = LoadGlobal(v.ir, offset, size);
182 } else {
183 result = ApplyAtomOp(v, operand_reg, offset, size, op);
184 }
185 if (write_dest) {
186 StoreResult(v, dest_reg, result, size);
187 }
188}
189} // Anonymous namespace
190
191void TranslatorVisitor::ATOM(u64 insn) {
192 union {
193 u64 raw;
194 BitField<0, 8, IR::Reg> dest_reg;
195 BitField<20, 8, IR::Reg> operand_reg;
196 BitField<49, 3, AtomSize> size;
197 BitField<52, 4, AtomOp> op;
198 } const atom{insn};
199 const IR::U64 offset{AtomOffset(*this, insn)};
200 GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
201}
202
203void TranslatorVisitor::RED(u64 insn) {
204 union {
205 u64 raw;
206 BitField<0, 8, IR::Reg> operand_reg;
207 BitField<20, 3, AtomSize> size;
208 BitField<23, 3, AtomOp> op;
209 } const red{insn};
210 const IR::U64 offset{AtomOffset(*this, insn)};
211 GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
212}
213
214} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 000000000..8b974621e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class AtomOp : u64 {
12 ADD,
13 MIN,
14 MAX,
15 INC,
16 DEC,
17 AND,
18 OR,
19 XOR,
20 EXCH,
21};
22
23enum class AtomsSize : u64 {
24 U32,
25 S32,
26 U64,
27};
28
29IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
30 bool is_signed) {
31 switch (op) {
32 case AtomOp::ADD:
33 return ir.SharedAtomicIAdd(offset, op_b);
34 case AtomOp::MIN:
35 return ir.SharedAtomicIMin(offset, op_b, is_signed);
36 case AtomOp::MAX:
37 return ir.SharedAtomicIMax(offset, op_b, is_signed);
38 case AtomOp::INC:
39 return ir.SharedAtomicInc(offset, op_b);
40 case AtomOp::DEC:
41 return ir.SharedAtomicDec(offset, op_b);
42 case AtomOp::AND:
43 return ir.SharedAtomicAnd(offset, op_b);
44 case AtomOp::OR:
45 return ir.SharedAtomicOr(offset, op_b);
46 case AtomOp::XOR:
47 return ir.SharedAtomicXor(offset, op_b);
48 case AtomOp::EXCH:
49 return ir.SharedAtomicExchange(offset, op_b);
50 default:
51 throw NotImplementedException("Integer Atoms Operation {}", op);
52 }
53}
54
55IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
56 union {
57 u64 raw;
58 BitField<8, 8, IR::Reg> offset_reg;
59 BitField<30, 22, u64> absolute_offset;
60 BitField<30, 22, s64> relative_offset;
61 } const encoding{insn};
62
63 if (encoding.offset_reg == IR::Reg::RZ) {
64 return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
65 } else {
66 const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
67 return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
68 }
69}
70
71void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
72 switch (size) {
73 case AtomsSize::U32:
74 case AtomsSize::S32:
75 return v.X(dest_reg, IR::U32{result});
76 case AtomsSize::U64:
77 return v.L(dest_reg, IR::U64{result});
78 default:
79 break;
80 }
81}
82} // Anonymous namespace
83
84void TranslatorVisitor::ATOMS(u64 insn) {
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<8, 8, IR::Reg> addr_reg;
89 BitField<20, 8, IR::Reg> src_reg_b;
90 BitField<28, 2, AtomsSize> size;
91 BitField<52, 4, AtomOp> op;
92 } const atoms{insn};
93
94 const bool size_64{atoms.size == AtomsSize::U64};
95 if (size_64 && atoms.op != AtomOp::EXCH) {
96 throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
97 }
98 const bool is_signed{atoms.size == AtomsSize::S32};
99 const IR::U32 offset{AtomsOffset(*this, insn)};
100
101 IR::Value result;
102 if (size_64) {
103 result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
104 } else {
105 result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
106 }
107 StoreResult(*this, atoms.dest_reg, result, atoms.size);
108}
109
110} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
new file mode 100644
index 000000000..fb3f00d3f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
@@ -0,0 +1,35 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/opcodes.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12enum class BitSize : u64 {
13 B32,
14 B64,
15 B96,
16 B128,
17};
18
19void TranslatorVisitor::AL2P(u64 inst) {
20 union {
21 u64 raw;
22 BitField<0, 8, IR::Reg> result_register;
23 BitField<8, 8, IR::Reg> indexing_register;
24 BitField<20, 11, s64> offset;
25 BitField<47, 2, BitSize> bitsize;
26 } al2p{inst};
27 if (al2p.bitsize != BitSize::B32) {
28 throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
29 }
30 const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
31 const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
32 X(al2p.result_register, result);
33}
34
35} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
new file mode 100644
index 000000000..86e433e41
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
@@ -0,0 +1,96 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13// Seems to be in CUDA terminology.
14enum class LocalScope : u64 {
15 CTA,
16 GL,
17 SYS,
18 VC,
19};
20} // Anonymous namespace
21
22void TranslatorVisitor::MEMBAR(u64 inst) {
23 union {
24 u64 raw;
25 BitField<8, 2, LocalScope> scope;
26 } const membar{inst};
27
28 if (membar.scope == LocalScope::CTA) {
29 ir.WorkgroupMemoryBarrier();
30 } else {
31 ir.DeviceMemoryBarrier();
32 }
33}
34
35void TranslatorVisitor::DEPBAR() {
36 // DEPBAR is a no-op
37}
38
39void TranslatorVisitor::BAR(u64 insn) {
40 enum class Mode {
41 RedPopc,
42 Scan,
43 RedAnd,
44 RedOr,
45 Sync,
46 Arrive,
47 };
48 union {
49 u64 raw;
50 BitField<43, 1, u64> is_a_imm;
51 BitField<44, 1, u64> is_b_imm;
52 BitField<8, 8, u64> imm_a;
53 BitField<20, 12, u64> imm_b;
54 BitField<42, 1, u64> neg_pred;
55 BitField<39, 3, IR::Pred> pred;
56 } const bar{insn};
57
58 const Mode mode{[insn] {
59 switch (insn & 0x0000009B00000000ULL) {
60 case 0x0000000200000000ULL:
61 return Mode::RedPopc;
62 case 0x0000000300000000ULL:
63 return Mode::Scan;
64 case 0x0000000A00000000ULL:
65 return Mode::RedAnd;
66 case 0x0000001200000000ULL:
67 return Mode::RedOr;
68 case 0x0000008000000000ULL:
69 return Mode::Sync;
70 case 0x0000008100000000ULL:
71 return Mode::Arrive;
72 }
73 throw NotImplementedException("Invalid encoding");
74 }()};
75 if (mode != Mode::Sync) {
76 throw NotImplementedException("BAR mode {}", mode);
77 }
78 if (bar.is_a_imm == 0) {
79 throw NotImplementedException("Non-immediate input A");
80 }
81 if (bar.imm_a != 0) {
82 throw NotImplementedException("Non-zero input A");
83 }
84 if (bar.is_b_imm == 0) {
85 throw NotImplementedException("Non-immediate input B");
86 }
87 if (bar.imm_b != 0) {
88 throw NotImplementedException("Non-zero input B");
89 }
90 if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
91 throw NotImplementedException("Non-true input predicate");
92 }
93 ir.Barrier();
94}
95
96} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
new file mode 100644
index 000000000..9d5a87e52
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
@@ -0,0 +1,74 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> offset_reg;
16 BitField<40, 1, u64> brev;
17 BitField<47, 1, u64> cc;
18 BitField<48, 1, u64> is_signed;
19 } const bfe{insn};
20
21 const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
22 const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
23
24 // Common constants
25 const IR::U32 zero{v.ir.Imm32(0)};
26 const IR::U32 one{v.ir.Imm32(1)};
27 const IR::U32 max_size{v.ir.Imm32(32)};
28 // Edge case conditions
29 const IR::U1 zero_count{v.ir.IEqual(count, zero)};
30 const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
31 const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
32
33 IR::U32 base{v.X(bfe.offset_reg)};
34 if (bfe.brev != 0) {
35 base = v.ir.BitReverse(base);
36 }
37 IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
38 if (bfe.is_signed != 0) {
39 const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
40 const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
41 const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
42 // Replicate condition
43 result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
44 // Exceeding condition
45 const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
46 result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
47 }
48 // Zero count condition
49 result = IR::U32{v.ir.Select(zero_count, zero, result)};
50
51 v.X(bfe.dest_reg, result);
52
53 if (bfe.cc != 0) {
54 v.SetZFlag(v.ir.IEqual(result, zero));
55 v.SetSFlag(v.ir.ILessThan(result, zero, true));
56 v.ResetCFlag();
57 v.ResetOFlag();
58 }
59}
60} // Anonymous namespace
61
62void TranslatorVisitor::BFE_reg(u64 insn) {
63 BFE(*this, insn, GetReg20(insn));
64}
65
66void TranslatorVisitor::BFE_cbuf(u64 insn) {
67 BFE(*this, insn, GetCbuf(insn));
68}
69
70void TranslatorVisitor::BFE_imm(u64 insn) {
71 BFE(*this, insn, GetImm20(insn));
72}
73
74} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
new file mode 100644
index 000000000..1e1ec2119
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> insert_reg;
16 BitField<47, 1, u64> cc;
17 } const bfi{insn};
18
19 const IR::U32 zero{v.ir.Imm32(0)};
20 const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
21 const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
22 const IR::U32 max_size{v.ir.Imm32(32)};
23
24 // Edge case conditions
25 const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
26 const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
27
28 const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
29 const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
30
31 const IR::U32 insert{v.X(bfi.insert_reg)};
32 IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
33
34 result = IR::U32{v.ir.Select(exceed_offset, base, result)};
35
36 v.X(bfi.dest_reg, result);
37 if (bfi.cc != 0) {
38 v.SetZFlag(v.ir.IEqual(result, zero));
39 v.SetSFlag(v.ir.ILessThan(result, zero, true));
40 v.ResetCFlag();
41 v.ResetOFlag();
42 }
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::BFI_reg(u64 insn) {
47 BFI(*this, insn, GetReg20(insn), GetReg39(insn));
48}
49
50void TranslatorVisitor::BFI_rc(u64 insn) {
51 BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
52}
53
54void TranslatorVisitor::BFI_cr(u64 insn) {
55 BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
56}
57
58void TranslatorVisitor::BFI_imm(u64 insn) {
59 BFI(*this, insn, GetImm20(insn), GetReg39(insn));
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
new file mode 100644
index 000000000..371c0e0f7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
@@ -0,0 +1,36 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void Check(u64 insn) {
13 union {
14 u64 raw;
15 BitField<5, 1, u64> cbuf_mode;
16 BitField<6, 1, u64> lmt;
17 } const encoding{insn};
18
19 if (encoding.cbuf_mode != 0) {
20 throw NotImplementedException("Constant buffer mode");
21 }
22 if (encoding.lmt != 0) {
23 throw NotImplementedException("LMT");
24 }
25}
26} // Anonymous namespace
27
28void TranslatorVisitor::BRX(u64 insn) {
29 Check(insn);
30}
31
32void TranslatorVisitor::JMX(u64 insn) {
33 Check(insn);
34}
35
36} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 000000000..fd73f656c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,57 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
11
12namespace Shader::Maxwell {
13
14enum class FpRounding : u64 {
15 RN,
16 RM,
17 RP,
18 RZ,
19};
20
21enum class FmzMode : u64 {
22 None,
23 FTZ,
24 FMZ,
25 INVALIDFMZ3,
26};
27
28inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
29 switch (fp_rounding) {
30 case FpRounding::RN:
31 return IR::FpRounding::RN;
32 case FpRounding::RM:
33 return IR::FpRounding::RM;
34 case FpRounding::RP:
35 return IR::FpRounding::RP;
36 case FpRounding::RZ:
37 return IR::FpRounding::RZ;
38 }
39 throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
40}
41
42inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
43 switch (fmz_mode) {
44 case FmzMode::None:
45 return IR::FmzMode::None;
46 case FmzMode::FTZ:
47 return IR::FmzMode::FTZ;
48 case FmzMode::FMZ:
49 // FMZ is manually handled in the instruction
50 return IR::FmzMode::FTZ;
51 case FmzMode::INVALIDFMZ3:
52 break;
53 }
54 throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
55}
56
57} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
new file mode 100644
index 000000000..20458d2ad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
@@ -0,0 +1,153 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
6
7namespace Shader::Maxwell {
8IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
9 CompareOp compare_op, bool is_signed) {
10 switch (compare_op) {
11 case CompareOp::False:
12 return ir.Imm1(false);
13 case CompareOp::LessThan:
14 return ir.ILessThan(operand_1, operand_2, is_signed);
15 case CompareOp::Equal:
16 return ir.IEqual(operand_1, operand_2);
17 case CompareOp::LessThanEqual:
18 return ir.ILessThanEqual(operand_1, operand_2, is_signed);
19 case CompareOp::GreaterThan:
20 return ir.IGreaterThan(operand_1, operand_2, is_signed);
21 case CompareOp::NotEqual:
22 return ir.INotEqual(operand_1, operand_2);
23 case CompareOp::GreaterThanEqual:
24 return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
25 case CompareOp::True:
26 return ir.Imm1(true);
27 default:
28 throw NotImplementedException("Invalid compare op {}", compare_op);
29 }
30}
31
32IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
33 CompareOp compare_op, bool is_signed) {
34 const IR::U32 zero{ir.Imm32(0)};
35 const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
36 const IR::U1 z_flag{ir.GetZFlag()};
37 const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
38 const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
39 : ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
40 ir.ILessThan(operand_2, zero, true))};
41 switch (compare_op) {
42 case CompareOp::False:
43 return ir.Imm1(false);
44 case CompareOp::LessThan:
45 return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
46 ir.ILessThan(intermediate, zero, true))};
47 case CompareOp::Equal:
48 return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
49 case CompareOp::LessThanEqual: {
50 const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
51 ir.ILessThan(intermediate, zero, true))};
52 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
53 }
54 case CompareOp::GreaterThan: {
55 const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
56 ir.IGreaterThan(intermediate, zero, true))};
57 const IR::U1 not_z{ir.LogicalNot(z_flag)};
58 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
59 }
60 case CompareOp::NotEqual:
61 return ir.LogicalOr(ir.INotEqual(intermediate, zero),
62 ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
63 case CompareOp::GreaterThanEqual: {
64 const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
65 ir.IGreaterThanEqual(intermediate, zero, true))};
66 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
67 }
68 case CompareOp::True:
69 return ir.Imm1(true);
70 default:
71 throw NotImplementedException("Invalid compare op {}", compare_op);
72 }
73}
74
75IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
76 BooleanOp bop) {
77 switch (bop) {
78 case BooleanOp::AND:
79 return ir.LogicalAnd(predicate_1, predicate_2);
80 case BooleanOp::OR:
81 return ir.LogicalOr(predicate_1, predicate_2);
82 case BooleanOp::XOR:
83 return ir.LogicalXor(predicate_1, predicate_2);
84 default:
85 throw NotImplementedException("Invalid bop {}", bop);
86 }
87}
88
89IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
90 switch (op) {
91 case PredicateOp::False:
92 return ir.Imm1(false);
93 case PredicateOp::True:
94 return ir.Imm1(true);
95 case PredicateOp::Zero:
96 return ir.IEqual(result, ir.Imm32(0));
97 case PredicateOp::NonZero:
98 return ir.INotEqual(result, ir.Imm32(0));
99 default:
100 throw NotImplementedException("Invalid Predicate operation {}", op);
101 }
102}
103
104bool IsCompareOpOrdered(FPCompareOp op) {
105 switch (op) {
106 case FPCompareOp::LTU:
107 case FPCompareOp::EQU:
108 case FPCompareOp::LEU:
109 case FPCompareOp::GTU:
110 case FPCompareOp::NEU:
111 case FPCompareOp::GEU:
112 return false;
113 default:
114 return true;
115 }
116}
117
118IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
119 const IR::F16F32F64& operand_2, FPCompareOp compare_op,
120 IR::FpControl control) {
121 const bool ordered{IsCompareOpOrdered(compare_op)};
122 switch (compare_op) {
123 case FPCompareOp::F:
124 return ir.Imm1(false);
125 case FPCompareOp::LT:
126 case FPCompareOp::LTU:
127 return ir.FPLessThan(operand_1, operand_2, control, ordered);
128 case FPCompareOp::EQ:
129 case FPCompareOp::EQU:
130 return ir.FPEqual(operand_1, operand_2, control, ordered);
131 case FPCompareOp::LE:
132 case FPCompareOp::LEU:
133 return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
134 case FPCompareOp::GT:
135 case FPCompareOp::GTU:
136 return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
137 case FPCompareOp::NE:
138 case FPCompareOp::NEU:
139 return ir.FPNotEqual(operand_1, operand_2, control, ordered);
140 case FPCompareOp::GE:
141 case FPCompareOp::GEU:
142 return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
143 case FPCompareOp::NUM:
144 return ir.FPOrdered(operand_1, operand_2);
145 case FPCompareOp::Nan:
146 return ir.FPUnordered(operand_1, operand_2);
147 case FPCompareOp::T:
148 return ir.Imm1(true);
149 default:
150 throw NotImplementedException("Invalid FP compare op {}", compare_op);
151 }
152}
153} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
new file mode 100644
index 000000000..214d0af3c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
12 const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
13
14[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
15 const IR::U32& operand_2, CompareOp compare_op,
16 bool is_signed);
17
18[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
19 const IR::U1& predicate_2, BooleanOp bop);
20
21[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
22
23[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
24
25[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
26 const IR::F16F32F64& operand_2, FPCompareOp compare_op,
27 IR::FpControl control = {});
28} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
new file mode 100644
index 000000000..420f2fb94
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12void TranslatorVisitor::CSET(u64 insn) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 5, IR::FlowTest> cc_test;
17 BitField<39, 3, IR::Pred> bop_pred;
18 BitField<42, 1, u64> neg_bop_pred;
19 BitField<44, 1, u64> bf;
20 BitField<45, 2, BooleanOp> bop;
21 BitField<47, 1, u64> cc;
22 } const cset{insn};
23
24 const IR::U32 one_mask{ir.Imm32(-1)};
25 const IR::U32 fp_one{ir.Imm32(0x3f800000)};
26 const IR::U32 zero{ir.Imm32(0)};
27 const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
28 const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
29 const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
30 const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
31 const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
32 X(cset.dest_reg, result);
33 if (cset.cc != 0) {
34 const IR::U1 is_zero{ir.IEqual(result, zero)};
35 SetZFlag(is_zero);
36 if (cset.bf != 0) {
37 ResetSFlag();
38 } else {
39 SetSFlag(ir.LogicalNot(is_zero));
40 }
41 ResetOFlag();
42 ResetCFlag();
43 }
44}
45
46void TranslatorVisitor::CSETP(u64 insn) {
47 union {
48 u64 raw;
49 BitField<0, 3, IR::Pred> dest_pred_b;
50 BitField<3, 3, IR::Pred> dest_pred_a;
51 BitField<8, 5, IR::FlowTest> cc_test;
52 BitField<39, 3, IR::Pred> bop_pred;
53 BitField<42, 1, u64> neg_bop_pred;
54 BitField<45, 2, BooleanOp> bop;
55 } const csetp{insn};
56
57 const BooleanOp bop{csetp.bop};
58 const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
59 const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
60 const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
61 const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
62 ir.SetPred(csetp.dest_pred_a, result_a);
63 ir.SetPred(csetp.dest_pred_b, result_b);
64}
65
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
new file mode 100644
index 000000000..5a1b3a8fc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<39, 2, FpRounding> fp_rounding;
19 BitField<45, 1, u64> neg_b;
20 BitField<46, 1, u64> abs_a;
21 BitField<47, 1, u64> cc;
22 BitField<48, 1, u64> neg_a;
23 BitField<49, 1, u64> abs_b;
24 } const dadd{insn};
25 if (dadd.cc != 0) {
26 throw NotImplementedException("DADD CC");
27 }
28
29 const IR::F64 src_a{v.D(dadd.src_a_reg)};
30 const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
31 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
32
33 const IR::FpControl control{
34 .no_contraction = true,
35 .rounding = CastFpRounding(dadd.fp_rounding),
36 .fmz_mode = IR::FmzMode::None,
37 };
38
39 v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
40}
41} // Anonymous namespace
42
43void TranslatorVisitor::DADD_reg(u64 insn) {
44 DADD(*this, insn, GetDoubleReg20(insn));
45}
46
47void TranslatorVisitor::DADD_cbuf(u64 insn) {
48 DADD(*this, insn, GetDoubleCbuf(insn));
49}
50
51void TranslatorVisitor::DADD_imm(u64 insn) {
52 DADD(*this, insn, GetDoubleImm20(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
new file mode 100644
index 000000000..1173192e4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
@@ -0,0 +1,72 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 BitField<43, 1, u64> negate_a;
20 BitField<44, 1, u64> abs_b;
21 BitField<45, 2, BooleanOp> bop;
22 BitField<47, 1, u64> cc;
23 BitField<48, 4, FPCompareOp> compare_op;
24 BitField<52, 1, u64> bf;
25 BitField<53, 1, u64> negate_b;
26 BitField<54, 1, u64> abs_a;
27 } const dset{insn};
28
29 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
30 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
31
32 IR::U1 pred{v.ir.GetPred(dset.pred)};
33 if (dset.neg_pred != 0) {
34 pred = v.ir.LogicalNot(pred);
35 }
36 const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
37 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
38
39 const IR::U32 one_mask{v.ir.Imm32(-1)};
40 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
41 const IR::U32 zero{v.ir.Imm32(0)};
42 const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
43 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
44
45 v.X(dset.dest_reg, result);
46 if (dset.cc != 0) {
47 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
48 v.SetZFlag(is_zero);
49 if (dset.bf != 0) {
50 v.ResetSFlag();
51 } else {
52 v.SetSFlag(v.ir.LogicalNot(is_zero));
53 }
54 v.ResetCFlag();
55 v.ResetOFlag();
56 }
57}
58} // Anonymous namespace
59
60void TranslatorVisitor::DSET_reg(u64 insn) {
61 DSET(*this, insn, GetDoubleReg20(insn));
62}
63
64void TranslatorVisitor::DSET_cbuf(u64 insn) {
65 DSET(*this, insn, GetDoubleCbuf(insn));
66}
67
68void TranslatorVisitor::DSET_imm(u64 insn) {
69 DSET(*this, insn, GetDoubleImm20(insn));
70}
71
72} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
new file mode 100644
index 000000000..f66097014
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<50, 2, FpRounding> fp_rounding;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> neg_b;
21 BitField<49, 1, u64> neg_c;
22 } const dfma{insn};
23
24 if (dfma.cc != 0) {
25 throw NotImplementedException("DFMA CC");
26 }
27
28 const IR::F64 src_a{v.D(dfma.src_a_reg)};
29 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
30 const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
31
32 const IR::FpControl control{
33 .no_contraction = true,
34 .rounding = CastFpRounding(dfma.fp_rounding),
35 .fmz_mode = IR::FmzMode::None,
36 };
37
38 v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
39}
40} // Anonymous namespace
41
42void TranslatorVisitor::DFMA_reg(u64 insn) {
43 DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
44}
45
46void TranslatorVisitor::DFMA_cr(u64 insn) {
47 DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
48}
49
50void TranslatorVisitor::DFMA_rc(u64 insn) {
51 DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
52}
53
54void TranslatorVisitor::DFMA_imm(u64 insn) {
55 DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
56}
57
58} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
new file mode 100644
index 000000000..6b551847c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<45, 1, u64> negate_b;
19 BitField<46, 1, u64> abs_a;
20 BitField<47, 1, u64> cc;
21 BitField<48, 1, u64> negate_a;
22 BitField<49, 1, u64> abs_b;
23 } const dmnmx{insn};
24
25 if (dmnmx.cc != 0) {
26 throw NotImplementedException("DMNMX CC");
27 }
28
29 const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
30 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
31 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
32
33 IR::F64 max{v.ir.FPMax(op_a, op_b)};
34 IR::F64 min{v.ir.FPMin(op_a, op_b)};
35
36 if (dmnmx.neg_pred != 0) {
37 std::swap(min, max);
38 }
39 v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
40}
41} // Anonymous namespace
42
43void TranslatorVisitor::DMNMX_reg(u64 insn) {
44 DMNMX(*this, insn, GetDoubleReg20(insn));
45}
46
47void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
48 DMNMX(*this, insn, GetDoubleCbuf(insn));
49}
50
51void TranslatorVisitor::DMNMX_imm(u64 insn) {
52 DMNMX(*this, insn, GetDoubleImm20(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
new file mode 100644
index 000000000..c0159fb65
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -0,0 +1,50 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<39, 2, FpRounding> fp_rounding;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> neg;
21 } const dmul{insn};
22
23 if (dmul.cc != 0) {
24 throw NotImplementedException("DMUL CC");
25 }
26
27 const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
28 const IR::FpControl control{
29 .no_contraction = true,
30 .rounding = CastFpRounding(dmul.fp_rounding),
31 .fmz_mode = IR::FmzMode::None,
32 };
33
34 v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
35}
36} // Anonymous namespace
37
38void TranslatorVisitor::DMUL_reg(u64 insn) {
39 DMUL(*this, insn, GetDoubleReg20(insn));
40}
41
42void TranslatorVisitor::DMUL_cbuf(u64 insn) {
43 DMUL(*this, insn, GetDoubleCbuf(insn));
44}
45
46void TranslatorVisitor::DMUL_imm(u64 insn) {
47 DMUL(*this, insn, GetDoubleImm20(insn));
48}
49
50} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
new file mode 100644
index 000000000..b8e74ee44
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
@@ -0,0 +1,54 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<3, 3, IR::Pred> dest_pred_a;
17 BitField<6, 1, u64> negate_b;
18 BitField<7, 1, u64> abs_a;
19 BitField<8, 8, IR::Reg> src_a_reg;
20 BitField<39, 3, IR::Pred> bop_pred;
21 BitField<42, 1, u64> neg_bop_pred;
22 BitField<43, 1, u64> negate_a;
23 BitField<44, 1, u64> abs_b;
24 BitField<45, 2, BooleanOp> bop;
25 BitField<48, 4, FPCompareOp> compare_op;
26 } const dsetp{insn};
27
28 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
29 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
30
31 const BooleanOp bop{dsetp.bop};
32 const FPCompareOp compare_op{dsetp.compare_op};
33 const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
34 const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
35 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
36 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
37 v.ir.SetPred(dsetp.dest_pred_a, result_a);
38 v.ir.SetPred(dsetp.dest_pred_b, result_b);
39}
40} // Anonymous namespace
41
42void TranslatorVisitor::DSETP_reg(u64 insn) {
43 DSETP(*this, insn, GetDoubleReg20(insn));
44}
45
46void TranslatorVisitor::DSETP_cbuf(u64 insn) {
47 DSETP(*this, insn, GetDoubleCbuf(insn));
48}
49
50void TranslatorVisitor::DSETP_imm(u64 insn) {
51 DSETP(*this, insn, GetDoubleImm20(insn));
52}
53
54} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
new file mode 100644
index 000000000..c2443c886
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -0,0 +1,43 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ExitFragment(TranslatorVisitor& v) {
12 const ProgramHeader sph{v.env.SPH()};
13 IR::Reg src_reg{IR::Reg::R0};
14 for (u32 render_target = 0; render_target < 8; ++render_target) {
15 const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
16 for (u32 component = 0; component < 4; ++component) {
17 if (!mask[component]) {
18 continue;
19 }
20 v.ir.SetFragColor(render_target, component, v.F(src_reg));
21 ++src_reg;
22 }
23 }
24 if (sph.ps.omap.sample_mask != 0) {
25 v.ir.SetSampleMask(v.X(src_reg));
26 }
27 if (sph.ps.omap.depth != 0) {
28 v.ir.SetFragDepth(v.F(src_reg + 1));
29 }
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::EXIT() {
34 switch (env.ShaderStage()) {
35 case Stage::Fragment:
36 ExitFragment(*this);
37 break;
38 default:
39 break;
40 }
41}
42
43} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
new file mode 100644
index 000000000..f0cb25d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
@@ -0,0 +1,47 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<40, 1, u64> tilde;
16 BitField<41, 1, u64> shift;
17 BitField<47, 1, u64> cc;
18 BitField<48, 1, u64> is_signed;
19 } const flo{insn};
20
21 if (flo.cc != 0) {
22 throw NotImplementedException("CC");
23 }
24 if (flo.tilde != 0) {
25 src = v.ir.BitwiseNot(src);
26 }
27 IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
28 if (flo.shift != 0) {
29 const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
30 result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
31 }
32 v.X(flo.dest_reg, result);
33}
34} // Anonymous namespace
35
36void TranslatorVisitor::FLO_reg(u64 insn) {
37 FLO(*this, insn, GetReg20(insn));
38}
39
40void TranslatorVisitor::FLO_cbuf(u64 insn) {
41 FLO(*this, insn, GetCbuf(insn));
42}
43
44void TranslatorVisitor::FLO_imm(u64 insn) {
45 FLO(*this, insn, GetImm20(insn));
46}
47} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 000000000..b8c89810c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,82 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
13 const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a;
18 } const fadd{insn};
19
20 if (cc) {
21 throw NotImplementedException("FADD CC");
22 }
23 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
24 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
25 IR::FpControl control{
26 .no_contraction = true,
27 .rounding = CastFpRounding(fp_rounding),
28 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
29 };
30 IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
31 if (sat) {
32 value = v.ir.FPSaturate(value);
33 }
34 v.F(fadd.dest_reg, value);
35}
36
37void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
38 union {
39 u64 raw;
40 BitField<39, 2, FpRounding> fp_rounding;
41 BitField<44, 1, u64> ftz;
42 BitField<45, 1, u64> neg_b;
43 BitField<46, 1, u64> abs_a;
44 BitField<47, 1, u64> cc;
45 BitField<48, 1, u64> neg_a;
46 BitField<49, 1, u64> abs_b;
47 BitField<50, 1, u64> sat;
48 } const fadd{insn};
49
50 FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
51 fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
52}
53} // Anonymous namespace
54
55void TranslatorVisitor::FADD_reg(u64 insn) {
56 FADD(*this, insn, GetFloatReg20(insn));
57}
58
59void TranslatorVisitor::FADD_cbuf(u64 insn) {
60 FADD(*this, insn, GetFloatCbuf(insn));
61}
62
63void TranslatorVisitor::FADD_imm(u64 insn) {
64 FADD(*this, insn, GetFloatImm20(insn));
65}
66
67void TranslatorVisitor::FADD32I(u64 insn) {
68 union {
69 u64 raw;
70 BitField<55, 1, u64> ftz;
71 BitField<56, 1, u64> neg_a;
72 BitField<54, 1, u64> abs_a;
73 BitField<52, 1, u64> cc;
74 BitField<53, 1, u64> neg_b;
75 BitField<57, 1, u64> abs_b;
76 } const fadd32i{insn};
77
78 FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
79 fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
80}
81
82} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
new file mode 100644
index 000000000..7127ebf54
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<47, 1, u64> ftz;
18 BitField<48, 4, FPCompareOp> compare_op;
19 } const fcmp{insn};
20
21 const IR::F32 zero{v.ir.Imm32(0.0f)};
22 const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
23 const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
24 const IR::U32 src_reg{v.X(fcmp.src_reg)};
25 const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
26
27 v.X(fcmp.dest_reg, result);
28}
29} // Anonymous namespace
30
31void TranslatorVisitor::FCMP_reg(u64 insn) {
32 FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
33}
34
35void TranslatorVisitor::FCMP_rc(u64 insn) {
36 FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
37}
38
39void TranslatorVisitor::FCMP_cr(u64 insn) {
40 FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
41}
42
43void TranslatorVisitor::FCMP_imm(u64 insn) {
44 union {
45 u64 raw;
46 BitField<20, 19, u64> value;
47 BitField<56, 1, u64> is_negative;
48 } const fcmp{insn};
49 const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
50 const u32 value{static_cast<u32>(fcmp.value) << 12};
51
52 FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
new file mode 100644
index 000000000..eece4f28f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
@@ -0,0 +1,78 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 BitField<43, 1, u64> negate_a;
20 BitField<44, 1, u64> abs_b;
21 BitField<45, 2, BooleanOp> bop;
22 BitField<47, 1, u64> cc;
23 BitField<48, 4, FPCompareOp> compare_op;
24 BitField<52, 1, u64> bf;
25 BitField<53, 1, u64> negate_b;
26 BitField<54, 1, u64> abs_a;
27 BitField<55, 1, u64> ftz;
28 } const fset{insn};
29
30 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
31 const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
32 const IR::FpControl control{
33 .no_contraction = false,
34 .rounding = IR::FpRounding::DontCare,
35 .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
36 };
37
38 IR::U1 pred{v.ir.GetPred(fset.pred)};
39 if (fset.neg_pred != 0) {
40 pred = v.ir.LogicalNot(pred);
41 }
42 const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
43 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
44
45 const IR::U32 one_mask{v.ir.Imm32(-1)};
46 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
47 const IR::U32 zero{v.ir.Imm32(0)};
48 const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
49 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
50
51 v.X(fset.dest_reg, result);
52 if (fset.cc != 0) {
53 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
54 v.SetZFlag(is_zero);
55 if (fset.bf != 0) {
56 v.ResetSFlag();
57 } else {
58 v.SetSFlag(v.ir.LogicalNot(is_zero));
59 }
60 v.ResetCFlag();
61 v.ResetOFlag();
62 }
63}
64} // Anonymous namespace
65
66void TranslatorVisitor::FSET_reg(u64 insn) {
67 FSET(*this, insn, GetFloatReg20(insn));
68}
69
70void TranslatorVisitor::FSET_cbuf(u64 insn) {
71 FSET(*this, insn, GetFloatCbuf(insn));
72}
73
74void TranslatorVisitor::FSET_imm(u64 insn) {
75 FSET(*this, insn, GetFloatImm20(insn));
76}
77
78} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
new file mode 100644
index 000000000..02ab023c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
@@ -0,0 +1,214 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
6#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
7
8namespace Shader::Maxwell {
9namespace {
10enum class FloatFormat : u64 {
11 F16 = 1,
12 F32 = 2,
13 F64 = 3,
14};
15
16enum class RoundingOp : u64 {
17 None = 0,
18 Pass = 3,
19 Round = 8,
20 Floor = 9,
21 Ceil = 10,
22 Trunc = 11,
23};
24
25[[nodiscard]] u32 WidthSize(FloatFormat width) {
26 switch (width) {
27 case FloatFormat::F16:
28 return 16;
29 case FloatFormat::F32:
30 return 32;
31 case FloatFormat::F64:
32 return 64;
33 default:
34 throw NotImplementedException("Invalid width {}", width);
35 }
36}
37
38void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
39 union {
40 u64 insn;
41 BitField<0, 8, IR::Reg> dest_reg;
42 BitField<44, 1, u64> ftz;
43 BitField<45, 1, u64> neg;
44 BitField<47, 1, u64> cc;
45 BitField<50, 1, u64> sat;
46 BitField<39, 4, u64> rounding_op;
47 BitField<39, 2, FpRounding> rounding;
48 BitField<10, 2, FloatFormat> src_size;
49 BitField<8, 2, FloatFormat> dst_size;
50
51 [[nodiscard]] RoundingOp RoundingOperation() const {
52 constexpr u64 rounding_mask = 0x0B;
53 return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
54 }
55 } const f2f{insn};
56
57 if (f2f.cc != 0) {
58 throw NotImplementedException("F2F CC");
59 }
60
61 IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
62
63 const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
64 IR::FpControl fp_control{
65 .no_contraction = false,
66 .rounding = IR::FpRounding::DontCare,
67 .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
68 };
69 if (f2f.src_size != f2f.dst_size) {
70 fp_control.rounding = CastFpRounding(f2f.rounding);
71 input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
72 } else {
73 switch (f2f.RoundingOperation()) {
74 case RoundingOp::None:
75 case RoundingOp::Pass:
76 // Make sure NANs are handled properly
77 switch (f2f.src_size) {
78 case FloatFormat::F16:
79 input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
80 break;
81 case FloatFormat::F32:
82 input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
83 break;
84 case FloatFormat::F64:
85 input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
86 break;
87 }
88 break;
89 case RoundingOp::Round:
90 input = v.ir.FPRoundEven(input, fp_control);
91 break;
92 case RoundingOp::Floor:
93 input = v.ir.FPFloor(input, fp_control);
94 break;
95 case RoundingOp::Ceil:
96 input = v.ir.FPCeil(input, fp_control);
97 break;
98 case RoundingOp::Trunc:
99 input = v.ir.FPTrunc(input, fp_control);
100 break;
101 default:
102 throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
103 }
104 }
105 if (f2f.sat != 0 && !any_fp64) {
106 input = v.ir.FPSaturate(input);
107 }
108
109 switch (f2f.dst_size) {
110 case FloatFormat::F16: {
111 const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
112 v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
113 break;
114 }
115 case FloatFormat::F32:
116 v.F(f2f.dest_reg, input);
117 break;
118 case FloatFormat::F64:
119 v.D(f2f.dest_reg, input);
120 break;
121 default:
122 throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
123 }
124}
125} // Anonymous namespace
126
127void TranslatorVisitor::F2F_reg(u64 insn) {
128 union {
129 u64 insn;
130 BitField<49, 1, u64> abs;
131 BitField<10, 2, FloatFormat> src_size;
132 BitField<41, 1, u64> selector;
133 } const f2f{insn};
134
135 IR::F16F32F64 src_a;
136 switch (f2f.src_size) {
137 case FloatFormat::F16: {
138 auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
139 src_a = f2f.selector != 0 ? rhs_a : lhs_a;
140 break;
141 }
142 case FloatFormat::F32:
143 src_a = GetFloatReg20(insn);
144 break;
145 case FloatFormat::F64:
146 src_a = GetDoubleReg20(insn);
147 break;
148 default:
149 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
150 }
151 F2F(*this, insn, src_a, f2f.abs != 0);
152}
153
154void TranslatorVisitor::F2F_cbuf(u64 insn) {
155 union {
156 u64 insn;
157 BitField<49, 1, u64> abs;
158 BitField<10, 2, FloatFormat> src_size;
159 BitField<41, 1, u64> selector;
160 } const f2f{insn};
161
162 IR::F16F32F64 src_a;
163 switch (f2f.src_size) {
164 case FloatFormat::F16: {
165 auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
166 src_a = f2f.selector != 0 ? rhs_a : lhs_a;
167 break;
168 }
169 case FloatFormat::F32:
170 src_a = GetFloatCbuf(insn);
171 break;
172 case FloatFormat::F64:
173 src_a = GetDoubleCbuf(insn);
174 break;
175 default:
176 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
177 }
178 F2F(*this, insn, src_a, f2f.abs != 0);
179}
180
181void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
182 union {
183 u64 insn;
184 BitField<49, 1, u64> abs;
185 BitField<10, 2, FloatFormat> src_size;
186 BitField<41, 1, u64> selector;
187 BitField<20, 19, u64> imm;
188 BitField<56, 1, u64> imm_neg;
189 } const f2f{insn};
190
191 IR::F16F32F64 src_a;
192 switch (f2f.src_size) {
193 case FloatFormat::F16: {
194 const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
195 const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
196 src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
197 if (f2f.imm_neg != 0) {
198 throw NotImplementedException("Neg bit on F16");
199 }
200 break;
201 }
202 case FloatFormat::F32:
203 src_a = GetFloatImm20(insn);
204 break;
205 case FloatFormat::F64:
206 src_a = GetDoubleImm20(insn);
207 break;
208 default:
209 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
210 }
211 F2F(*this, insn, src_a, f2f.abs != 0);
212}
213
214} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
new file mode 100644
index 000000000..92b1ce015
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -0,0 +1,253 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/opcodes.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class DestFormat : u64 {
15 Invalid,
16 I16,
17 I32,
18 I64,
19};
20enum class SrcFormat : u64 {
21 Invalid,
22 F16,
23 F32,
24 F64,
25};
26enum class Rounding : u64 {
27 Round,
28 Floor,
29 Ceil,
30 Trunc,
31};
32
33union F2I {
34 u64 raw;
35 BitField<0, 8, IR::Reg> dest_reg;
36 BitField<8, 2, DestFormat> dest_format;
37 BitField<10, 2, SrcFormat> src_format;
38 BitField<12, 1, u64> is_signed;
39 BitField<39, 2, Rounding> rounding;
40 BitField<41, 1, u64> half;
41 BitField<44, 1, u64> ftz;
42 BitField<45, 1, u64> abs;
43 BitField<47, 1, u64> cc;
44 BitField<49, 1, u64> neg;
45};
46
47size_t BitSize(DestFormat dest_format) {
48 switch (dest_format) {
49 case DestFormat::I16:
50 return 16;
51 case DestFormat::I32:
52 return 32;
53 case DestFormat::I64:
54 return 64;
55 default:
56 throw NotImplementedException("Invalid destination format {}", dest_format);
57 }
58}
59
60std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
61 if (is_signed) {
62 switch (format) {
63 case DestFormat::I16:
64 return {static_cast<f64>(std::numeric_limits<s16>::max()),
65 static_cast<f64>(std::numeric_limits<s16>::min())};
66 case DestFormat::I32:
67 return {static_cast<f64>(std::numeric_limits<s32>::max()),
68 static_cast<f64>(std::numeric_limits<s32>::min())};
69 case DestFormat::I64:
70 return {static_cast<f64>(std::numeric_limits<s64>::max()),
71 static_cast<f64>(std::numeric_limits<s64>::min())};
72 default:
73 break;
74 }
75 } else {
76 switch (format) {
77 case DestFormat::I16:
78 return {static_cast<f64>(std::numeric_limits<u16>::max()),
79 static_cast<f64>(std::numeric_limits<u16>::min())};
80 case DestFormat::I32:
81 return {static_cast<f64>(std::numeric_limits<u32>::max()),
82 static_cast<f64>(std::numeric_limits<u32>::min())};
83 case DestFormat::I64:
84 return {static_cast<f64>(std::numeric_limits<u64>::max()),
85 static_cast<f64>(std::numeric_limits<u64>::min())};
86 default:
87 break;
88 }
89 }
90 throw NotImplementedException("Invalid destination format {}", format);
91}
92
93IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
94 union {
95 u64 raw;
96 BitField<20, 14, s64> offset;
97 BitField<34, 5, u64> binding;
98 } const cbuf{insn};
99 if (cbuf.binding >= 18) {
100 throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
101 }
102 if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
103 throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
104 }
105 if (cbuf.offset % 2 != 0) {
106 throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
107 }
108 const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
109 const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
110 const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
111 const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
112 return v.ir.PackDouble2x32(vector);
113}
114
115void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
116 // F2I is used to convert from a floating point value to an integer
117 const F2I f2i{insn};
118
119 const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
120 f2i.dest_format != DestFormat::I64};
121 IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
122 if (denorm_cares) {
123 fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
124 }
125 const IR::FpControl fp_control{
126 .no_contraction = true,
127 .rounding = IR::FpRounding::DontCare,
128 .fmz_mode = fmz_mode,
129 };
130 const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
131 const IR::F16F32F64 rounded_value{[&] {
132 switch (f2i.rounding) {
133 case Rounding::Round:
134 return v.ir.FPRoundEven(op_a, fp_control);
135 case Rounding::Floor:
136 return v.ir.FPFloor(op_a, fp_control);
137 case Rounding::Ceil:
138 return v.ir.FPCeil(op_a, fp_control);
139 case Rounding::Trunc:
140 return v.ir.FPTrunc(op_a, fp_control);
141 default:
142 throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
143 }
144 }()};
145 const bool is_signed{f2i.is_signed != 0};
146 const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
147
148 IR::F16F32F64 intermediate;
149 switch (f2i.src_format) {
150 case SrcFormat::F16: {
151 const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
152 const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
153 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
154 break;
155 }
156 case SrcFormat::F32: {
157 const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
158 const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
159 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
160 break;
161 }
162 case SrcFormat::F64: {
163 const IR::F64 max_val{v.ir.Imm64(max_bound)};
164 const IR::F64 min_val{v.ir.Imm64(min_bound)};
165 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
166 break;
167 }
168 default:
169 throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
170 }
171
172 const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
173 IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
174
175 bool handled_special_case = false;
176 const bool special_nan_cases =
177 (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
178 if (special_nan_cases) {
179 if (f2i.dest_format == DestFormat::I32) {
180 handled_special_case = true;
181 result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
182 } else if (f2i.dest_format == DestFormat::I64) {
183 handled_special_case = true;
184 result = IR::U64{
185 v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
186 }
187 }
188 if (!handled_special_case && is_signed) {
189 if (bitsize != 64) {
190 result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
191 } else {
192 result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
193 }
194 }
195
196 if (bitsize == 64) {
197 v.L(f2i.dest_reg, result);
198 } else {
199 v.X(f2i.dest_reg, result);
200 }
201
202 if (f2i.cc != 0) {
203 throw NotImplementedException("F2I CC");
204 }
205}
206} // Anonymous namespace
207
208void TranslatorVisitor::F2I_reg(u64 insn) {
209 union {
210 u64 raw;
211 F2I base;
212 BitField<20, 8, IR::Reg> src_reg;
213 } const f2i{insn};
214
215 const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
216 switch (f2i.base.src_format) {
217 case SrcFormat::F16:
218 return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
219 case SrcFormat::F32:
220 return F(f2i.src_reg);
221 case SrcFormat::F64:
222 return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
223 default:
224 throw NotImplementedException("Invalid F2I source format {}",
225 f2i.base.src_format.Value());
226 }
227 }()};
228 TranslateF2I(*this, insn, op_a);
229}
230
231void TranslatorVisitor::F2I_cbuf(u64 insn) {
232 const F2I f2i{insn};
233 const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
234 switch (f2i.src_format) {
235 case SrcFormat::F16:
236 return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
237 case SrcFormat::F32:
238 return GetFloatCbuf(insn);
239 case SrcFormat::F64: {
240 return UnpackCbuf(*this, insn);
241 }
242 default:
243 throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
244 }
245 }()};
246 TranslateF2I(*this, insn, op_a);
247}
248
249void TranslatorVisitor::F2I_imm(u64) {
250 throw NotImplementedException("{}", Opcode::F2I_imm);
251}
252
253} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fa2a7807b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,94 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
13 bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a;
18 } const ffma{insn};
19
20 if (cc) {
21 throw NotImplementedException("FFMA CC");
22 }
23 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
24 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
25 const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
26 const IR::FpControl fp_control{
27 .no_contraction = true,
28 .rounding = CastFpRounding(fp_rounding),
29 .fmz_mode = CastFmzMode(fmz_mode),
30 };
31 IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
32 if (fmz_mode == FmzMode::FMZ && !sat) {
33 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
34 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
35 const IR::F32 zero{v.ir.Imm32(0.0f)};
36 const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
37 const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
38 const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
39 value = IR::F32{v.ir.Select(any_zero, op_c, value)};
40 }
41 if (sat) {
42 value = v.ir.FPSaturate(value);
43 }
44 v.F(ffma.dest_reg, value);
45}
46
47void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
48 union {
49 u64 raw;
50 BitField<47, 1, u64> cc;
51 BitField<48, 1, u64> neg_b;
52 BitField<49, 1, u64> neg_c;
53 BitField<50, 1, u64> sat;
54 BitField<51, 2, FpRounding> fp_rounding;
55 BitField<53, 2, FmzMode> fmz_mode;
56 } const ffma{insn};
57
58 FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
59 ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
60}
61} // Anonymous namespace
62
63void TranslatorVisitor::FFMA_reg(u64 insn) {
64 FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
65}
66
67void TranslatorVisitor::FFMA_rc(u64 insn) {
68 FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
69}
70
71void TranslatorVisitor::FFMA_cr(u64 insn) {
72 FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
73}
74
75void TranslatorVisitor::FFMA_imm(u64 insn) {
76 FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
77}
78
79void TranslatorVisitor::FFMA32I(u64 insn) {
80 union {
81 u64 raw;
82 BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
83 BitField<52, 1, u64> cc;
84 BitField<53, 2, FmzMode> fmz_mode;
85 BitField<55, 1, u64> sat;
86 BitField<56, 1, u64> neg_a;
87 BitField<57, 1, u64> neg_c;
88 } const ffma32i{insn};
89
90 FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
91 ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
92}
93
94} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
new file mode 100644
index 000000000..c0d6ee5af
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<44, 1, u64> ftz;
19 BitField<45, 1, u64> negate_b;
20 BitField<46, 1, u64> abs_a;
21 BitField<47, 1, u64> cc;
22 BitField<48, 1, u64> negate_a;
23 BitField<49, 1, u64> abs_b;
24 } const fmnmx{insn};
25
26 if (fmnmx.cc) {
27 throw NotImplementedException("FMNMX CC");
28 }
29
30 const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
31 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
32 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
33
34 const IR::FpControl control{
35 .no_contraction = false,
36 .rounding = IR::FpRounding::DontCare,
37 .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
38 };
39 IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
40 IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
41
42 if (fmnmx.neg_pred != 0) {
43 std::swap(min, max);
44 }
45
46 v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
47}
48} // Anonymous namespace
49
50void TranslatorVisitor::FMNMX_reg(u64 insn) {
51 FMNMX(*this, insn, GetFloatReg20(insn));
52}
53
54void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
55 FMNMX(*this, insn, GetFloatCbuf(insn));
56}
57
58void TranslatorVisitor::FMNMX_imm(u64 insn) {
59 FMNMX(*this, insn, GetFloatImm20(insn));
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
new file mode 100644
index 000000000..2f8605619
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class Operation : u64 {
14 Cos = 0,
15 Sin = 1,
16 Ex2 = 2, // Base 2 exponent
17 Lg2 = 3, // Base 2 logarithm
18 Rcp = 4, // Reciprocal
19 Rsq = 5, // Reciprocal square root
20 Rcp64H = 6, // 64-bit reciprocal
21 Rsq64H = 7, // 64-bit reciprocal square root
22 Sqrt = 8,
23};
24} // Anonymous namespace
25
26void TranslatorVisitor::MUFU(u64 insn) {
27 // MUFU is used to implement a bunch of special functions. See Operation.
28 union {
29 u64 raw;
30 BitField<0, 8, IR::Reg> dest_reg;
31 BitField<8, 8, IR::Reg> src_reg;
32 BitField<20, 4, Operation> operation;
33 BitField<46, 1, u64> abs;
34 BitField<48, 1, u64> neg;
35 BitField<50, 1, u64> sat;
36 } const mufu{insn};
37
38 const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
39 IR::F32 value{[&]() -> IR::F32 {
40 switch (mufu.operation) {
41 case Operation::Cos:
42 return ir.FPCos(op_a);
43 case Operation::Sin:
44 return ir.FPSin(op_a);
45 case Operation::Ex2:
46 return ir.FPExp2(op_a);
47 case Operation::Lg2:
48 return ir.FPLog2(op_a);
49 case Operation::Rcp:
50 return ir.FPRecip(op_a);
51 case Operation::Rsq:
52 return ir.FPRecipSqrt(op_a);
53 case Operation::Rcp64H:
54 throw NotImplementedException("MUFU.RCP64H");
55 case Operation::Rsq64H:
56 throw NotImplementedException("MUFU.RSQ64H");
57 case Operation::Sqrt:
58 return ir.FPSqrt(op_a);
59 default:
60 throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
61 }
62 }()};
63
64 if (mufu.sat) {
65 value = ir.FPSaturate(value);
66 }
67
68 F(mufu.dest_reg, value);
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 000000000..06226b7ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,127 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/ir/ir_emitter.h"
8#include "shader_recompiler/frontend/ir/modifiers.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Scale : u64 {
15 None,
16 D2,
17 D4,
18 D8,
19 M8,
20 M4,
21 M2,
22 INVALIDSCALE37,
23};
24
25float ScaleFactor(Scale scale) {
26 switch (scale) {
27 case Scale::None:
28 return 1.0f;
29 case Scale::D2:
30 return 1.0f / 2.0f;
31 case Scale::D4:
32 return 1.0f / 4.0f;
33 case Scale::D8:
34 return 1.0f / 8.0f;
35 case Scale::M8:
36 return 8.0f;
37 case Scale::M4:
38 return 4.0f;
39 case Scale::M2:
40 return 2.0f;
41 case Scale::INVALIDSCALE37:
42 break;
43 }
44 throw NotImplementedException("Invalid FMUL scale {}", scale);
45}
46
47void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
48 FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
49 union {
50 u64 raw;
51 BitField<0, 8, IR::Reg> dest_reg;
52 BitField<8, 8, IR::Reg> src_a;
53 } const fmul{insn};
54
55 if (cc) {
56 throw NotImplementedException("FMUL CC");
57 }
58 IR::F32 op_a{v.F(fmul.src_a)};
59 if (scale != Scale::None) {
60 if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
61 throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
62 }
63 op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
64 }
65 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
66 const IR::FpControl fp_control{
67 .no_contraction = true,
68 .rounding = CastFpRounding(fp_rounding),
69 .fmz_mode = CastFmzMode(fmz_mode),
70 };
71 IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
72 if (fmz_mode == FmzMode::FMZ && !sat) {
73 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
74 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
75 const IR::F32 zero{v.ir.Imm32(0.0f)};
76 const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
77 const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
78 const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
79 value = IR::F32{v.ir.Select(any_zero, zero, value)};
80 }
81 if (sat) {
82 value = v.ir.FPSaturate(value);
83 }
84 v.F(fmul.dest_reg, value);
85}
86
87void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
88 union {
89 u64 raw;
90 BitField<39, 2, FpRounding> fp_rounding;
91 BitField<41, 3, Scale> scale;
92 BitField<44, 2, FmzMode> fmz;
93 BitField<47, 1, u64> cc;
94 BitField<48, 1, u64> neg_b;
95 BitField<50, 1, u64> sat;
96 } const fmul{insn};
97
98 FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
99 fmul.neg_b != 0);
100}
101} // Anonymous namespace
102
103void TranslatorVisitor::FMUL_reg(u64 insn) {
104 return FMUL(*this, insn, GetFloatReg20(insn));
105}
106
107void TranslatorVisitor::FMUL_cbuf(u64 insn) {
108 return FMUL(*this, insn, GetFloatCbuf(insn));
109}
110
111void TranslatorVisitor::FMUL_imm(u64 insn) {
112 return FMUL(*this, insn, GetFloatImm20(insn));
113}
114
115void TranslatorVisitor::FMUL32I(u64 insn) {
116 union {
117 u64 raw;
118 BitField<52, 1, u64> cc;
119 BitField<53, 2, FmzMode> fmz;
120 BitField<55, 1, u64> sat;
121 } const fmul32i{insn};
122
123 FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
124 fmul32i.sat != 0, fmul32i.cc != 0, false);
125}
126
127} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
new file mode 100644
index 000000000..f91b93fad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
@@ -0,0 +1,41 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 SINCOS,
13 EX2,
14};
15
16void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
17 union {
18 u64 raw;
19 BitField<0, 8, IR::Reg> dest_reg;
20 BitField<39, 1, Mode> mode;
21 BitField<45, 1, u64> neg;
22 BitField<49, 1, u64> abs;
23 } const rro{insn};
24
25 v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
26}
27} // Anonymous namespace
28
29void TranslatorVisitor::RRO_reg(u64 insn) {
30 RRO(*this, insn, GetFloatReg20(insn));
31}
32
33void TranslatorVisitor::RRO_cbuf(u64 insn) {
34 RRO(*this, insn, GetFloatCbuf(insn));
35}
36
37void TranslatorVisitor::RRO_imm(u64) {
38 throw NotImplementedException("RRO (imm)");
39}
40
41} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
new file mode 100644
index 000000000..5f93a1513
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
@@ -0,0 +1,60 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<3, 3, IR::Pred> dest_pred_a;
17 BitField<6, 1, u64> negate_b;
18 BitField<7, 1, u64> abs_a;
19 BitField<8, 8, IR::Reg> src_a_reg;
20 BitField<39, 3, IR::Pred> bop_pred;
21 BitField<42, 1, u64> neg_bop_pred;
22 BitField<43, 1, u64> negate_a;
23 BitField<44, 1, u64> abs_b;
24 BitField<45, 2, BooleanOp> bop;
25 BitField<47, 1, u64> ftz;
26 BitField<48, 4, FPCompareOp> compare_op;
27 } const fsetp{insn};
28
29 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
30 const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
31 const IR::FpControl control{
32 .no_contraction = false,
33 .rounding = IR::FpRounding::DontCare,
34 .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
35 };
36
37 const BooleanOp bop{fsetp.bop};
38 const FPCompareOp compare_op{fsetp.compare_op};
39 const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
40 const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
41 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
42 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
43 v.ir.SetPred(fsetp.dest_pred_a, result_a);
44 v.ir.SetPred(fsetp.dest_pred_b, result_b);
45}
46} // Anonymous namespace
47
48void TranslatorVisitor::FSETP_reg(u64 insn) {
49 FSETP(*this, insn, GetFloatReg20(insn));
50}
51
52void TranslatorVisitor::FSETP_cbuf(u64 insn) {
53 FSETP(*this, insn, GetFloatCbuf(insn));
54}
55
56void TranslatorVisitor::FSETP_imm(u64 insn) {
57 FSETP(*this, insn, GetFloatImm20(insn));
58}
59
60} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
new file mode 100644
index 000000000..7550a8d4c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::FSWZADD(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<28, 8, u64> swizzle;
16 BitField<38, 1, u64> ndv;
17 BitField<39, 2, FpRounding> round;
18 BitField<44, 1, u64> ftz;
19 BitField<47, 1, u64> cc;
20 } const fswzadd{insn};
21
22 if (fswzadd.ndv != 0) {
23 throw NotImplementedException("FSWZADD NDV");
24 }
25
26 const IR::F32 src_a{GetFloatReg8(insn)};
27 const IR::F32 src_b{GetFloatReg20(insn)};
28 const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
29
30 const IR::FpControl fp_control{
31 .no_contraction = false,
32 .rounding = CastFpRounding(fswzadd.round),
33 .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
34 };
35
36 const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
37 F(fswzadd.dest_reg, result);
38
39 if (fswzadd.cc != 0) {
40 throw NotImplementedException("FSWZADD CC");
41 }
42}
43
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
new file mode 100644
index 000000000..f2738a93b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -0,0 +1,125 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
10 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
11 union {
12 u64 raw;
13 BitField<0, 8, IR::Reg> dest_reg;
14 BitField<8, 8, IR::Reg> src_a;
15 } const hadd2{insn};
16
17 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
18 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
19 const bool promotion{lhs_a.Type() != lhs_b.Type()};
20 if (promotion) {
21 if (lhs_a.Type() == IR::Type::F16) {
22 lhs_a = v.ir.FPConvert(32, lhs_a);
23 rhs_a = v.ir.FPConvert(32, rhs_a);
24 }
25 if (lhs_b.Type() == IR::Type::F16) {
26 lhs_b = v.ir.FPConvert(32, lhs_b);
27 rhs_b = v.ir.FPConvert(32, rhs_b);
28 }
29 }
30 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
31 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
32
33 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
34 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
35
36 const IR::FpControl fp_control{
37 .no_contraction = true,
38 .rounding = IR::FpRounding::DontCare,
39 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
40 };
41 IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
42 IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
43 if (sat) {
44 lhs = v.ir.FPSaturate(lhs);
45 rhs = v.ir.FPSaturate(rhs);
46 }
47 if (promotion) {
48 lhs = v.ir.FPConvert(16, lhs);
49 rhs = v.ir.FPConvert(16, rhs);
50 }
51 v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
52}
53
54void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
55 const IR::U32& src_b) {
56 union {
57 u64 raw;
58 BitField<49, 2, Merge> merge;
59 BitField<39, 1, u64> ftz;
60 BitField<43, 1, u64> neg_a;
61 BitField<44, 1, u64> abs_a;
62 BitField<47, 2, Swizzle> swizzle_a;
63 } const hadd2{insn};
64
65 HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
66 hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
67}
68} // Anonymous namespace
69
70void TranslatorVisitor::HADD2_reg(u64 insn) {
71 union {
72 u64 raw;
73 BitField<32, 1, u64> sat;
74 BitField<31, 1, u64> neg_b;
75 BitField<30, 1, u64> abs_b;
76 BitField<28, 2, Swizzle> swizzle_b;
77 } const hadd2{insn};
78
79 HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
80 GetReg20(insn));
81}
82
83void TranslatorVisitor::HADD2_cbuf(u64 insn) {
84 union {
85 u64 raw;
86 BitField<52, 1, u64> sat;
87 BitField<56, 1, u64> neg_b;
88 BitField<54, 1, u64> abs_b;
89 } const hadd2{insn};
90
91 HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
92 GetCbuf(insn));
93}
94
95void TranslatorVisitor::HADD2_imm(u64 insn) {
96 union {
97 u64 raw;
98 BitField<52, 1, u64> sat;
99 BitField<56, 1, u64> neg_high;
100 BitField<30, 9, u64> high;
101 BitField<29, 1, u64> neg_low;
102 BitField<20, 9, u64> low;
103 } const hadd2{insn};
104
105 const u32 imm{
106 static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
107 static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
108 HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
109}
110
111void TranslatorVisitor::HADD2_32I(u64 insn) {
112 union {
113 u64 raw;
114 BitField<55, 1, u64> ftz;
115 BitField<52, 1, u64> sat;
116 BitField<56, 1, u64> neg_a;
117 BitField<53, 2, Swizzle> swizzle_a;
118 BitField<20, 32, u64> imm32;
119 } const hadd2{insn};
120
121 const u32 imm{static_cast<u32>(hadd2.imm32)};
122 HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
123 hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
124}
125} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fd7986701
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -0,0 +1,169 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
10 Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
11 bool sat, HalfPrecision precision) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a;
16 } const hfma2{insn};
17
18 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
19 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
20 auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
21 const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
22 if (promotion) {
23 if (lhs_a.Type() == IR::Type::F16) {
24 lhs_a = v.ir.FPConvert(32, lhs_a);
25 rhs_a = v.ir.FPConvert(32, rhs_a);
26 }
27 if (lhs_b.Type() == IR::Type::F16) {
28 lhs_b = v.ir.FPConvert(32, lhs_b);
29 rhs_b = v.ir.FPConvert(32, rhs_b);
30 }
31 if (lhs_c.Type() == IR::Type::F16) {
32 lhs_c = v.ir.FPConvert(32, lhs_c);
33 rhs_c = v.ir.FPConvert(32, rhs_c);
34 }
35 }
36
37 lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
38 rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
39
40 lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
41 rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
42
43 const IR::FpControl fp_control{
44 .no_contraction = true,
45 .rounding = IR::FpRounding::DontCare,
46 .fmz_mode = HalfPrecision2FmzMode(precision),
47 };
48 IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
49 IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
50 if (precision == HalfPrecision::FMZ && !sat) {
51 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
52 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
53 const IR::F32 zero{v.ir.Imm32(0.0f)};
54 const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
55 const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
56 const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
57 lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
58
59 const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
60 const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
61 const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
62 rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
63 }
64 if (sat) {
65 lhs = v.ir.FPSaturate(lhs);
66 rhs = v.ir.FPSaturate(rhs);
67 }
68 if (promotion) {
69 lhs = v.ir.FPConvert(16, lhs);
70 rhs = v.ir.FPConvert(16, rhs);
71 }
72 v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
73}
74
75void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
76 Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
77 HalfPrecision precision) {
78 union {
79 u64 raw;
80 BitField<47, 2, Swizzle> swizzle_a;
81 BitField<49, 2, Merge> merge;
82 } const hfma2{insn};
83
84 HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
85 sat, precision);
86}
87} // Anonymous namespace
88
89void TranslatorVisitor::HFMA2_reg(u64 insn) {
90 union {
91 u64 raw;
92 BitField<28, 2, Swizzle> swizzle_b;
93 BitField<32, 1, u64> saturate;
94 BitField<31, 1, u64> neg_b;
95 BitField<30, 1, u64> neg_c;
96 BitField<35, 2, Swizzle> swizzle_c;
97 BitField<37, 2, HalfPrecision> precision;
98 } const hfma2{insn};
99
100 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
101 GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
102}
103
104void TranslatorVisitor::HFMA2_rc(u64 insn) {
105 union {
106 u64 raw;
107 BitField<51, 1, u64> neg_c;
108 BitField<52, 1, u64> saturate;
109 BitField<53, 2, Swizzle> swizzle_b;
110 BitField<56, 1, u64> neg_b;
111 BitField<57, 2, HalfPrecision> precision;
112 } const hfma2{insn};
113
114 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
115 GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
116}
117
118void TranslatorVisitor::HFMA2_cr(u64 insn) {
119 union {
120 u64 raw;
121 BitField<51, 1, u64> neg_c;
122 BitField<52, 1, u64> saturate;
123 BitField<53, 2, Swizzle> swizzle_c;
124 BitField<56, 1, u64> neg_b;
125 BitField<57, 2, HalfPrecision> precision;
126 } const hfma2{insn};
127
128 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
129 GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
130}
131
132void TranslatorVisitor::HFMA2_imm(u64 insn) {
133 union {
134 u64 raw;
135 BitField<51, 1, u64> neg_c;
136 BitField<52, 1, u64> saturate;
137 BitField<53, 2, Swizzle> swizzle_c;
138
139 BitField<56, 1, u64> neg_high;
140 BitField<30, 9, u64> high;
141 BitField<29, 1, u64> neg_low;
142 BitField<20, 9, u64> low;
143 BitField<57, 2, HalfPrecision> precision;
144 } const hfma2{insn};
145
146 const u32 imm{
147 static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
148 static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
149
150 HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
151 GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
152}
153
154void TranslatorVisitor::HFMA2_32I(u64 insn) {
155 union {
156 u64 raw;
157 BitField<0, 8, IR::Reg> src_c;
158 BitField<20, 32, u64> imm32;
159 BitField<52, 1, u64> neg_c;
160 BitField<53, 2, Swizzle> swizzle_a;
161 BitField<55, 2, HalfPrecision> precision;
162 } const hfma2{insn};
163
164 const u32 imm{static_cast<u32>(hfma2.imm32)};
165 HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
166 Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
167}
168
169} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
new file mode 100644
index 000000000..0dbeb7f56
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8
9IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
10 switch (precision) {
11 case HalfPrecision::None:
12 return IR::FmzMode::None;
13 case HalfPrecision::FTZ:
14 return IR::FmzMode::FTZ;
15 case HalfPrecision::FMZ:
16 return IR::FmzMode::FMZ;
17 default:
18 return IR::FmzMode::DontCare;
19 }
20}
21
22std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
23 switch (swizzle) {
24 case Swizzle::H1_H0: {
25 const IR::Value vector{ir.UnpackFloat2x16(value)};
26 return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
27 }
28 case Swizzle::H0_H0: {
29 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
30 return {scalar, scalar};
31 }
32 case Swizzle::H1_H1: {
33 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
34 return {scalar, scalar};
35 }
36 case Swizzle::F32: {
37 const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
38 return {scalar, scalar};
39 }
40 }
41 throw InvalidArgument("Invalid swizzle {}", swizzle);
42}
43
44IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
45 Merge merge) {
46 switch (merge) {
47 case Merge::H1_H0:
48 return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
49 case Merge::F32:
50 return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
51 case Merge::MRG_H0:
52 case Merge::MRG_H1: {
53 const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
54 const bool is_h0{merge == Merge::MRG_H0};
55 const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
56 return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
57 }
58 }
59 throw InvalidArgument("Invalid merge {}", merge);
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
new file mode 100644
index 000000000..59da56a7e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
@@ -0,0 +1,42 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14
15enum class Merge : u64 {
16 H1_H0,
17 F32,
18 MRG_H0,
19 MRG_H1,
20};
21
22enum class Swizzle : u64 {
23 H1_H0,
24 F32,
25 H0_H0,
26 H1_H1,
27};
28
29enum class HalfPrecision : u64 {
30 None = 0,
31 FTZ = 1,
32 FMZ = 2,
33};
34
35IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
36
37std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
38
39IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
40 Merge merge);
41
42} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
new file mode 100644
index 000000000..3f548ce76
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -0,0 +1,143 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
10 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
11 HalfPrecision precision) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a;
16 } const hmul2{insn};
17
18 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
19 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
20 const bool promotion{lhs_a.Type() != lhs_b.Type()};
21 if (promotion) {
22 if (lhs_a.Type() == IR::Type::F16) {
23 lhs_a = v.ir.FPConvert(32, lhs_a);
24 rhs_a = v.ir.FPConvert(32, rhs_a);
25 }
26 if (lhs_b.Type() == IR::Type::F16) {
27 lhs_b = v.ir.FPConvert(32, lhs_b);
28 rhs_b = v.ir.FPConvert(32, rhs_b);
29 }
30 }
31 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
32 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
33
34 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
35 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
36
37 const IR::FpControl fp_control{
38 .no_contraction = true,
39 .rounding = IR::FpRounding::DontCare,
40 .fmz_mode = HalfPrecision2FmzMode(precision),
41 };
42 IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
43 IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
44 if (precision == HalfPrecision::FMZ && !sat) {
45 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
46 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
47 const IR::F32 zero{v.ir.Imm32(0.0f)};
48 const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
49 const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
50 const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
51 lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
52
53 const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
54 const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
55 const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
56 rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
57 }
58 if (sat) {
59 lhs = v.ir.FPSaturate(lhs);
60 rhs = v.ir.FPSaturate(rhs);
61 }
62 if (promotion) {
63 lhs = v.ir.FPConvert(16, lhs);
64 rhs = v.ir.FPConvert(16, rhs);
65 }
66 v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
67}
68
69void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
70 Swizzle swizzle_b, const IR::U32& src_b) {
71 union {
72 u64 raw;
73 BitField<49, 2, Merge> merge;
74 BitField<47, 2, Swizzle> swizzle_a;
75 BitField<39, 2, HalfPrecision> precision;
76 } const hmul2{insn};
77
78 HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
79 hmul2.precision);
80}
81} // Anonymous namespace
82
83void TranslatorVisitor::HMUL2_reg(u64 insn) {
84 union {
85 u64 raw;
86 BitField<32, 1, u64> sat;
87 BitField<31, 1, u64> neg_b;
88 BitField<30, 1, u64> abs_b;
89 BitField<44, 1, u64> abs_a;
90 BitField<28, 2, Swizzle> swizzle_b;
91 } const hmul2{insn};
92
93 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
94 hmul2.swizzle_b, GetReg20(insn));
95}
96
97void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
98 union {
99 u64 raw;
100 BitField<52, 1, u64> sat;
101 BitField<54, 1, u64> abs_b;
102 BitField<43, 1, u64> neg_a;
103 BitField<44, 1, u64> abs_a;
104 } const hmul2{insn};
105
106 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
107 Swizzle::F32, GetCbuf(insn));
108}
109
110void TranslatorVisitor::HMUL2_imm(u64 insn) {
111 union {
112 u64 raw;
113 BitField<52, 1, u64> sat;
114 BitField<56, 1, u64> neg_high;
115 BitField<30, 9, u64> high;
116 BitField<29, 1, u64> neg_low;
117 BitField<20, 9, u64> low;
118 BitField<43, 1, u64> neg_a;
119 BitField<44, 1, u64> abs_a;
120 } const hmul2{insn};
121
122 const u32 imm{
123 static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
124 static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
125 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
126 Swizzle::H1_H0, ir.Imm32(imm));
127}
128
129void TranslatorVisitor::HMUL2_32I(u64 insn) {
130 union {
131 u64 raw;
132 BitField<55, 2, HalfPrecision> precision;
133 BitField<52, 1, u64> sat;
134 BitField<53, 2, Swizzle> swizzle_a;
135 BitField<20, 32, u64> imm32;
136 } const hmul2{insn};
137
138 const u32 imm{static_cast<u32>(hmul2.imm32)};
139 HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
140 Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
141}
142
143} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
new file mode 100644
index 000000000..cca5b831f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -0,0 +1,117 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
10 bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
11 union {
12 u64 insn;
13 BitField<0, 8, IR::Reg> dest_reg;
14 BitField<8, 8, IR::Reg> src_a_reg;
15 BitField<39, 3, IR::Pred> pred;
16 BitField<42, 1, u64> neg_pred;
17 BitField<43, 1, u64> neg_a;
18 BitField<45, 2, BooleanOp> bop;
19 BitField<44, 1, u64> abs_a;
20 BitField<47, 2, Swizzle> swizzle_a;
21 } const hset2{insn};
22
23 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
24 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
25
26 if (lhs_a.Type() != lhs_b.Type()) {
27 if (lhs_a.Type() == IR::Type::F16) {
28 lhs_a = v.ir.FPConvert(32, lhs_a);
29 rhs_a = v.ir.FPConvert(32, rhs_a);
30 }
31 if (lhs_b.Type() == IR::Type::F16) {
32 lhs_b = v.ir.FPConvert(32, lhs_b);
33 rhs_b = v.ir.FPConvert(32, rhs_b);
34 }
35 }
36
37 lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
38 rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
39
40 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
41 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
42
43 const IR::FpControl control{
44 .no_contraction = false,
45 .rounding = IR::FpRounding::DontCare,
46 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
47 };
48
49 IR::U1 pred{v.ir.GetPred(hset2.pred)};
50 if (hset2.neg_pred != 0) {
51 pred = v.ir.LogicalNot(pred);
52 }
53 const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
54 const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
55 const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
56 const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
57
58 const u32 true_value = bf ? 0x3c00 : 0xffff;
59 const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
60 const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
61 const IR::U32 fail_result{v.ir.Imm32(0)};
62 const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
63 const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
64
65 v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
66}
67} // Anonymous namespace
68
69void TranslatorVisitor::HSET2_reg(u64 insn) {
70 union {
71 u64 insn;
72 BitField<30, 1, u64> abs_b;
73 BitField<49, 1, u64> bf;
74 BitField<31, 1, u64> neg_b;
75 BitField<50, 1, u64> ftz;
76 BitField<35, 4, FPCompareOp> compare_op;
77 BitField<28, 2, Swizzle> swizzle_b;
78 } const hset2{insn};
79
80 HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
81 hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
82}
83
84void TranslatorVisitor::HSET2_cbuf(u64 insn) {
85 union {
86 u64 insn;
87 BitField<53, 1, u64> bf;
88 BitField<56, 1, u64> neg_b;
89 BitField<54, 1, u64> ftz;
90 BitField<49, 4, FPCompareOp> compare_op;
91 } const hset2{insn};
92
93 HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
94 hset2.compare_op, Swizzle::F32);
95}
96
97void TranslatorVisitor::HSET2_imm(u64 insn) {
98 union {
99 u64 insn;
100 BitField<53, 1, u64> bf;
101 BitField<54, 1, u64> ftz;
102 BitField<49, 4, FPCompareOp> compare_op;
103 BitField<56, 1, u64> neg_high;
104 BitField<30, 9, u64> high;
105 BitField<29, 1, u64> neg_low;
106 BitField<20, 9, u64> low;
107 } const hset2{insn};
108
109 const u32 imm{
110 static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
111 static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
112
113 HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
114 Swizzle::H1_H0);
115}
116
117} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
new file mode 100644
index 000000000..b3931dae3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
@@ -0,0 +1,118 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
10 Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
11 union {
12 u64 insn;
13 BitField<8, 8, IR::Reg> src_a_reg;
14 BitField<3, 3, IR::Pred> dest_pred_a;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<43, 1, u64> neg_a;
19 BitField<45, 2, BooleanOp> bop;
20 BitField<44, 1, u64> abs_a;
21 BitField<6, 1, u64> ftz;
22 BitField<47, 2, Swizzle> swizzle_a;
23 } const hsetp2{insn};
24
25 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
26 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
27
28 if (lhs_a.Type() != lhs_b.Type()) {
29 if (lhs_a.Type() == IR::Type::F16) {
30 lhs_a = v.ir.FPConvert(32, lhs_a);
31 rhs_a = v.ir.FPConvert(32, rhs_a);
32 }
33 if (lhs_b.Type() == IR::Type::F16) {
34 lhs_b = v.ir.FPConvert(32, lhs_b);
35 rhs_b = v.ir.FPConvert(32, rhs_b);
36 }
37 }
38
39 lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
40 rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
41
42 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
43 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
44
45 const IR::FpControl control{
46 .no_contraction = false,
47 .rounding = IR::FpRounding::DontCare,
48 .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
49 };
50
51 IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
52 if (hsetp2.neg_pred != 0) {
53 pred = v.ir.LogicalNot(pred);
54 }
55 const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
56 const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
57 const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
58 const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
59
60 if (h_and) {
61 auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
62 v.ir.SetPred(hsetp2.dest_pred_a, result);
63 v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
64 } else {
65 v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
66 v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
67 }
68}
69} // Anonymous namespace
70
71void TranslatorVisitor::HSETP2_reg(u64 insn) {
72 union {
73 u64 insn;
74 BitField<30, 1, u64> abs_b;
75 BitField<49, 1, u64> h_and;
76 BitField<31, 1, u64> neg_b;
77 BitField<35, 4, FPCompareOp> compare_op;
78 BitField<28, 2, Swizzle> swizzle_b;
79 } const hsetp2{insn};
80 HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
81 hsetp2.compare_op, hsetp2.h_and != 0);
82}
83
84void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
85 union {
86 u64 insn;
87 BitField<53, 1, u64> h_and;
88 BitField<54, 1, u64> abs_b;
89 BitField<56, 1, u64> neg_b;
90 BitField<49, 4, FPCompareOp> compare_op;
91 } const hsetp2{insn};
92
93 HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
94 hsetp2.compare_op, hsetp2.h_and != 0);
95}
96
97void TranslatorVisitor::HSETP2_imm(u64 insn) {
98 union {
99 u64 insn;
100 BitField<53, 1, u64> h_and;
101 BitField<54, 1, u64> ftz;
102 BitField<49, 4, FPCompareOp> compare_op;
103 BitField<56, 1, u64> neg_high;
104 BitField<30, 9, u64> high;
105 BitField<29, 1, u64> neg_low;
106 BitField<20, 9, u64> low;
107 } const hsetp2{insn};
108
109 const u32 imm{static_cast<u32>(hsetp2.low << 6) |
110 static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
111 static_cast<u32>(hsetp2.high << 22) |
112 static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
113
114 HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
115 hsetp2.h_and != 0);
116}
117
118} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
new file mode 100644
index 000000000..b446aae0e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -0,0 +1,272 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/frontend/ir/ir_emitter.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
12 u32 offset) {
13 if (unaligned) {
14 return ir.Imm32(0);
15 }
16 return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
17}
18} // Anonymous namespace
19
20IR::U32 TranslatorVisitor::X(IR::Reg reg) {
21 return ir.GetReg(reg);
22}
23
24IR::U64 TranslatorVisitor::L(IR::Reg reg) {
25 if (!IR::IsAligned(reg, 2)) {
26 throw NotImplementedException("Unaligned source register {}", reg);
27 }
28 return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
29}
30
31IR::F32 TranslatorVisitor::F(IR::Reg reg) {
32 return ir.BitCast<IR::F32>(X(reg));
33}
34
35IR::F64 TranslatorVisitor::D(IR::Reg reg) {
36 if (!IR::IsAligned(reg, 2)) {
37 throw NotImplementedException("Unaligned source register {}", reg);
38 }
39 return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
40}
41
42void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
43 ir.SetReg(dest_reg, value);
44}
45
46void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
47 if (!IR::IsAligned(dest_reg, 2)) {
48 throw NotImplementedException("Unaligned destination register {}", dest_reg);
49 }
50 const IR::Value result{ir.UnpackUint2x32(value)};
51 for (int i = 0; i < 2; i++) {
52 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
53 }
54}
55
56void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
57 X(dest_reg, ir.BitCast<IR::U32>(value));
58}
59
60void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
61 if (!IR::IsAligned(dest_reg, 2)) {
62 throw NotImplementedException("Unaligned destination register {}", dest_reg);
63 }
64 const IR::Value result{ir.UnpackDouble2x32(value)};
65 for (int i = 0; i < 2; i++) {
66 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
67 }
68}
69
70IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
71 union {
72 u64 raw;
73 BitField<8, 8, IR::Reg> index;
74 } const reg{insn};
75 return X(reg.index);
76}
77
78IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
79 union {
80 u64 raw;
81 BitField<20, 8, IR::Reg> index;
82 } const reg{insn};
83 return X(reg.index);
84}
85
86IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
87 union {
88 u64 raw;
89 BitField<39, 8, IR::Reg> index;
90 } const reg{insn};
91 return X(reg.index);
92}
93
94IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
95 return ir.BitCast<IR::F32>(GetReg8(insn));
96}
97
98IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
99 return ir.BitCast<IR::F32>(GetReg20(insn));
100}
101
102IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
103 return ir.BitCast<IR::F32>(GetReg39(insn));
104}
105
106IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
107 union {
108 u64 raw;
109 BitField<20, 8, IR::Reg> index;
110 } const reg{insn};
111 return D(reg.index);
112}
113
114IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
115 union {
116 u64 raw;
117 BitField<39, 8, IR::Reg> index;
118 } const reg{insn};
119 return D(reg.index);
120}
121
122static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
123 union {
124 u64 raw;
125 BitField<20, 14, u64> offset;
126 BitField<34, 5, u64> binding;
127 } const cbuf{insn};
128
129 if (cbuf.binding >= 18) {
130 throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
131 }
132 if (cbuf.offset >= 0x10'000) {
133 throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
134 }
135 const IR::Value binding{static_cast<u32>(cbuf.binding)};
136 const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
137 return {IR::U32{binding}, IR::U32{byte_offset}};
138}
139
140IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
141 const auto [binding, byte_offset]{CbufAddr(insn)};
142 return ir.GetCbuf(binding, byte_offset);
143}
144
145IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
146 const auto [binding, byte_offset]{CbufAddr(insn)};
147 return ir.GetFloatCbuf(binding, byte_offset);
148}
149
150IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
151 union {
152 u64 raw;
153 BitField<20, 1, u64> unaligned;
154 } const cbuf{insn};
155
156 const auto [binding, offset_value]{CbufAddr(insn)};
157 const bool unaligned{cbuf.unaligned != 0};
158 const u32 offset{offset_value.U32()};
159 const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
160
161 const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
162 const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
163 return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
164}
165
166IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
167 union {
168 u64 raw;
169 BitField<20, 1, u64> unaligned;
170 } const cbuf{insn};
171
172 if (cbuf.unaligned != 0) {
173 throw NotImplementedException("Unaligned packed constant buffer read");
174 }
175 const auto [binding, lower_offset]{CbufAddr(insn)};
176 const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
177 const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
178 const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
179 return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
180}
181
182IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
183 union {
184 u64 raw;
185 BitField<20, 19, u64> value;
186 BitField<56, 1, u64> is_negative;
187 } const imm{insn};
188
189 if (imm.is_negative != 0) {
190 const s64 raw{static_cast<s64>(imm.value)};
191 return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
192 } else {
193 return ir.Imm32(static_cast<u32>(imm.value));
194 }
195}
196
197IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
198 union {
199 u64 raw;
200 BitField<20, 19, u64> value;
201 BitField<56, 1, u64> is_negative;
202 } const imm{insn};
203 const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
204 const u32 value{static_cast<u32>(imm.value) << 12};
205 return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
206}
207
208IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
209 union {
210 u64 raw;
211 BitField<20, 19, u64> value;
212 BitField<56, 1, u64> is_negative;
213 } const imm{insn};
214 const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
215 const u64 value{imm.value << 44};
216 return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
217}
218
219IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
220 const s64 value{GetImm20(insn).U32()};
221 return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
222}
223
224IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
225 union {
226 u64 raw;
227 BitField<20, 32, u64> value;
228 } const imm{insn};
229 return ir.Imm32(static_cast<u32>(imm.value));
230}
231
232IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
233 union {
234 u64 raw;
235 BitField<20, 32, u64> value;
236 } const imm{insn};
237 return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
238}
239
240void TranslatorVisitor::SetZFlag(const IR::U1& value) {
241 ir.SetZFlag(value);
242}
243
244void TranslatorVisitor::SetSFlag(const IR::U1& value) {
245 ir.SetSFlag(value);
246}
247
248void TranslatorVisitor::SetCFlag(const IR::U1& value) {
249 ir.SetCFlag(value);
250}
251
252void TranslatorVisitor::SetOFlag(const IR::U1& value) {
253 ir.SetOFlag(value);
254}
255
256void TranslatorVisitor::ResetZero() {
257 SetZFlag(ir.Imm1(false));
258}
259
260void TranslatorVisitor::ResetSFlag() {
261 SetSFlag(ir.Imm1(false));
262}
263
264void TranslatorVisitor::ResetCFlag() {
265 SetCFlag(ir.Imm1(false));
266}
267
268void TranslatorVisitor::ResetOFlag() {
269 SetOFlag(ir.Imm1(false));
270}
271
272} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
new file mode 100644
index 000000000..335e4f24f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -0,0 +1,387 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/ir/ir_emitter.h"
10#include "shader_recompiler/frontend/maxwell/instruction.h"
11
12namespace Shader::Maxwell {
13
14enum class CompareOp : u64 {
15 False,
16 LessThan,
17 Equal,
18 LessThanEqual,
19 GreaterThan,
20 NotEqual,
21 GreaterThanEqual,
22 True,
23};
24
25enum class BooleanOp : u64 {
26 AND,
27 OR,
28 XOR,
29};
30
31enum class PredicateOp : u64 {
32 False,
33 True,
34 Zero,
35 NonZero,
36};
37
38enum class FPCompareOp : u64 {
39 F,
40 LT,
41 EQ,
42 LE,
43 GT,
44 NE,
45 GE,
46 NUM,
47 Nan,
48 LTU,
49 EQU,
50 LEU,
51 GTU,
52 NEU,
53 GEU,
54 T,
55};
56
57class TranslatorVisitor {
58public:
59 explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
60
61 Environment& env;
62 IR::IREmitter ir;
63
64 void AL2P(u64 insn);
65 void ALD(u64 insn);
66 void AST(u64 insn);
67 void ATOM_cas(u64 insn);
68 void ATOM(u64 insn);
69 void ATOMS_cas(u64 insn);
70 void ATOMS(u64 insn);
71 void B2R(u64 insn);
72 void BAR(u64 insn);
73 void BFE_reg(u64 insn);
74 void BFE_cbuf(u64 insn);
75 void BFE_imm(u64 insn);
76 void BFI_reg(u64 insn);
77 void BFI_rc(u64 insn);
78 void BFI_cr(u64 insn);
79 void BFI_imm(u64 insn);
80 void BPT(u64 insn);
81 void BRA(u64 insn);
82 void BRK(u64 insn);
83 void BRX(u64 insn);
84 void CAL();
85 void CCTL(u64 insn);
86 void CCTLL(u64 insn);
87 void CONT(u64 insn);
88 void CS2R(u64 insn);
89 void CSET(u64 insn);
90 void CSETP(u64 insn);
91 void DADD_reg(u64 insn);
92 void DADD_cbuf(u64 insn);
93 void DADD_imm(u64 insn);
94 void DEPBAR();
95 void DFMA_reg(u64 insn);
96 void DFMA_rc(u64 insn);
97 void DFMA_cr(u64 insn);
98 void DFMA_imm(u64 insn);
99 void DMNMX_reg(u64 insn);
100 void DMNMX_cbuf(u64 insn);
101 void DMNMX_imm(u64 insn);
102 void DMUL_reg(u64 insn);
103 void DMUL_cbuf(u64 insn);
104 void DMUL_imm(u64 insn);
105 void DSET_reg(u64 insn);
106 void DSET_cbuf(u64 insn);
107 void DSET_imm(u64 insn);
108 void DSETP_reg(u64 insn);
109 void DSETP_cbuf(u64 insn);
110 void DSETP_imm(u64 insn);
111 void EXIT();
112 void F2F_reg(u64 insn);
113 void F2F_cbuf(u64 insn);
114 void F2F_imm(u64 insn);
115 void F2I_reg(u64 insn);
116 void F2I_cbuf(u64 insn);
117 void F2I_imm(u64 insn);
118 void FADD_reg(u64 insn);
119 void FADD_cbuf(u64 insn);
120 void FADD_imm(u64 insn);
121 void FADD32I(u64 insn);
122 void FCHK_reg(u64 insn);
123 void FCHK_cbuf(u64 insn);
124 void FCHK_imm(u64 insn);
125 void FCMP_reg(u64 insn);
126 void FCMP_rc(u64 insn);
127 void FCMP_cr(u64 insn);
128 void FCMP_imm(u64 insn);
129 void FFMA_reg(u64 insn);
130 void FFMA_rc(u64 insn);
131 void FFMA_cr(u64 insn);
132 void FFMA_imm(u64 insn);
133 void FFMA32I(u64 insn);
134 void FLO_reg(u64 insn);
135 void FLO_cbuf(u64 insn);
136 void FLO_imm(u64 insn);
137 void FMNMX_reg(u64 insn);
138 void FMNMX_cbuf(u64 insn);
139 void FMNMX_imm(u64 insn);
140 void FMUL_reg(u64 insn);
141 void FMUL_cbuf(u64 insn);
142 void FMUL_imm(u64 insn);
143 void FMUL32I(u64 insn);
144 void FSET_reg(u64 insn);
145 void FSET_cbuf(u64 insn);
146 void FSET_imm(u64 insn);
147 void FSETP_reg(u64 insn);
148 void FSETP_cbuf(u64 insn);
149 void FSETP_imm(u64 insn);
150 void FSWZADD(u64 insn);
151 void GETCRSPTR(u64 insn);
152 void GETLMEMBASE(u64 insn);
153 void HADD2_reg(u64 insn);
154 void HADD2_cbuf(u64 insn);
155 void HADD2_imm(u64 insn);
156 void HADD2_32I(u64 insn);
157 void HFMA2_reg(u64 insn);
158 void HFMA2_rc(u64 insn);
159 void HFMA2_cr(u64 insn);
160 void HFMA2_imm(u64 insn);
161 void HFMA2_32I(u64 insn);
162 void HMUL2_reg(u64 insn);
163 void HMUL2_cbuf(u64 insn);
164 void HMUL2_imm(u64 insn);
165 void HMUL2_32I(u64 insn);
166 void HSET2_reg(u64 insn);
167 void HSET2_cbuf(u64 insn);
168 void HSET2_imm(u64 insn);
169 void HSETP2_reg(u64 insn);
170 void HSETP2_cbuf(u64 insn);
171 void HSETP2_imm(u64 insn);
172 void I2F_reg(u64 insn);
173 void I2F_cbuf(u64 insn);
174 void I2F_imm(u64 insn);
175 void I2I_reg(u64 insn);
176 void I2I_cbuf(u64 insn);
177 void I2I_imm(u64 insn);
178 void IADD_reg(u64 insn);
179 void IADD_cbuf(u64 insn);
180 void IADD_imm(u64 insn);
181 void IADD3_reg(u64 insn);
182 void IADD3_cbuf(u64 insn);
183 void IADD3_imm(u64 insn);
184 void IADD32I(u64 insn);
185 void ICMP_reg(u64 insn);
186 void ICMP_rc(u64 insn);
187 void ICMP_cr(u64 insn);
188 void ICMP_imm(u64 insn);
189 void IDE(u64 insn);
190 void IDP_reg(u64 insn);
191 void IDP_imm(u64 insn);
192 void IMAD_reg(u64 insn);
193 void IMAD_rc(u64 insn);
194 void IMAD_cr(u64 insn);
195 void IMAD_imm(u64 insn);
196 void IMAD32I(u64 insn);
197 void IMADSP_reg(u64 insn);
198 void IMADSP_rc(u64 insn);
199 void IMADSP_cr(u64 insn);
200 void IMADSP_imm(u64 insn);
201 void IMNMX_reg(u64 insn);
202 void IMNMX_cbuf(u64 insn);
203 void IMNMX_imm(u64 insn);
204 void IMUL_reg(u64 insn);
205 void IMUL_cbuf(u64 insn);
206 void IMUL_imm(u64 insn);
207 void IMUL32I(u64 insn);
208 void IPA(u64 insn);
209 void ISBERD(u64 insn);
210 void ISCADD_reg(u64 insn);
211 void ISCADD_cbuf(u64 insn);
212 void ISCADD_imm(u64 insn);
213 void ISCADD32I(u64 insn);
214 void ISET_reg(u64 insn);
215 void ISET_cbuf(u64 insn);
216 void ISET_imm(u64 insn);
217 void ISETP_reg(u64 insn);
218 void ISETP_cbuf(u64 insn);
219 void ISETP_imm(u64 insn);
220 void JCAL(u64 insn);
221 void JMP(u64 insn);
222 void JMX(u64 insn);
223 void KIL();
224 void LD(u64 insn);
225 void LDC(u64 insn);
226 void LDG(u64 insn);
227 void LDL(u64 insn);
228 void LDS(u64 insn);
229 void LEA_hi_reg(u64 insn);
230 void LEA_hi_cbuf(u64 insn);
231 void LEA_lo_reg(u64 insn);
232 void LEA_lo_cbuf(u64 insn);
233 void LEA_lo_imm(u64 insn);
234 void LEPC(u64 insn);
235 void LONGJMP(u64 insn);
236 void LOP_reg(u64 insn);
237 void LOP_cbuf(u64 insn);
238 void LOP_imm(u64 insn);
239 void LOP3_reg(u64 insn);
240 void LOP3_cbuf(u64 insn);
241 void LOP3_imm(u64 insn);
242 void LOP32I(u64 insn);
243 void MEMBAR(u64 insn);
244 void MOV_reg(u64 insn);
245 void MOV_cbuf(u64 insn);
246 void MOV_imm(u64 insn);
247 void MOV32I(u64 insn);
248 void MUFU(u64 insn);
249 void NOP(u64 insn);
250 void OUT_reg(u64 insn);
251 void OUT_cbuf(u64 insn);
252 void OUT_imm(u64 insn);
253 void P2R_reg(u64 insn);
254 void P2R_cbuf(u64 insn);
255 void P2R_imm(u64 insn);
256 void PBK();
257 void PCNT();
258 void PEXIT(u64 insn);
259 void PIXLD(u64 insn);
260 void PLONGJMP(u64 insn);
261 void POPC_reg(u64 insn);
262 void POPC_cbuf(u64 insn);
263 void POPC_imm(u64 insn);
264 void PRET(u64 insn);
265 void PRMT_reg(u64 insn);
266 void PRMT_rc(u64 insn);
267 void PRMT_cr(u64 insn);
268 void PRMT_imm(u64 insn);
269 void PSET(u64 insn);
270 void PSETP(u64 insn);
271 void R2B(u64 insn);
272 void R2P_reg(u64 insn);
273 void R2P_cbuf(u64 insn);
274 void R2P_imm(u64 insn);
275 void RAM(u64 insn);
276 void RED(u64 insn);
277 void RET(u64 insn);
278 void RRO_reg(u64 insn);
279 void RRO_cbuf(u64 insn);
280 void RRO_imm(u64 insn);
281 void RTT(u64 insn);
282 void S2R(u64 insn);
283 void SAM(u64 insn);
284 void SEL_reg(u64 insn);
285 void SEL_cbuf(u64 insn);
286 void SEL_imm(u64 insn);
287 void SETCRSPTR(u64 insn);
288 void SETLMEMBASE(u64 insn);
289 void SHF_l_reg(u64 insn);
290 void SHF_l_imm(u64 insn);
291 void SHF_r_reg(u64 insn);
292 void SHF_r_imm(u64 insn);
293 void SHFL(u64 insn);
294 void SHL_reg(u64 insn);
295 void SHL_cbuf(u64 insn);
296 void SHL_imm(u64 insn);
297 void SHR_reg(u64 insn);
298 void SHR_cbuf(u64 insn);
299 void SHR_imm(u64 insn);
300 void SSY();
301 void ST(u64 insn);
302 void STG(u64 insn);
303 void STL(u64 insn);
304 void STP(u64 insn);
305 void STS(u64 insn);
306 void SUATOM(u64 insn);
307 void SUATOM_cas(u64 insn);
308 void SULD(u64 insn);
309 void SURED(u64 insn);
310 void SUST(u64 insn);
311 void SYNC(u64 insn);
312 void TEX(u64 insn);
313 void TEX_b(u64 insn);
314 void TEXS(u64 insn);
315 void TLD(u64 insn);
316 void TLD_b(u64 insn);
317 void TLD4(u64 insn);
318 void TLD4_b(u64 insn);
319 void TLD4S(u64 insn);
320 void TLDS(u64 insn);
321 void TMML(u64 insn);
322 void TMML_b(u64 insn);
323 void TXA(u64 insn);
324 void TXD(u64 insn);
325 void TXD_b(u64 insn);
326 void TXQ(u64 insn);
327 void TXQ_b(u64 insn);
328 void VABSDIFF(u64 insn);
329 void VABSDIFF4(u64 insn);
330 void VADD(u64 insn);
331 void VMAD(u64 insn);
332 void VMNMX(u64 insn);
333 void VOTE(u64 insn);
334 void VOTE_vtg(u64 insn);
335 void VSET(u64 insn);
336 void VSETP(u64 insn);
337 void VSHL(u64 insn);
338 void VSHR(u64 insn);
339 void XMAD_reg(u64 insn);
340 void XMAD_rc(u64 insn);
341 void XMAD_cr(u64 insn);
342 void XMAD_imm(u64 insn);
343
344 [[nodiscard]] IR::U32 X(IR::Reg reg);
345 [[nodiscard]] IR::U64 L(IR::Reg reg);
346 [[nodiscard]] IR::F32 F(IR::Reg reg);
347 [[nodiscard]] IR::F64 D(IR::Reg reg);
348
349 void X(IR::Reg dest_reg, const IR::U32& value);
350 void L(IR::Reg dest_reg, const IR::U64& value);
351 void F(IR::Reg dest_reg, const IR::F32& value);
352 void D(IR::Reg dest_reg, const IR::F64& value);
353
354 [[nodiscard]] IR::U32 GetReg8(u64 insn);
355 [[nodiscard]] IR::U32 GetReg20(u64 insn);
356 [[nodiscard]] IR::U32 GetReg39(u64 insn);
357 [[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
358 [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
359 [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
360 [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
361 [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
362
363 [[nodiscard]] IR::U32 GetCbuf(u64 insn);
364 [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
365 [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
366 [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
367
368 [[nodiscard]] IR::U32 GetImm20(u64 insn);
369 [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
370 [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
371 [[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
372
373 [[nodiscard]] IR::U32 GetImm32(u64 insn);
374 [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
375
376 void SetZFlag(const IR::U1& value);
377 void SetSFlag(const IR::U1& value);
378 void SetCFlag(const IR::U1& value);
379 void SetOFlag(const IR::U1& value);
380
381 void ResetZero();
382 void ResetSFlag();
383 void ResetCFlag();
384 void ResetOFlag();
385};
386
387} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 000000000..8ffd84867
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,105 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
12 bool cc) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a;
17 } const iadd{insn};
18
19 if (sat) {
20 throw NotImplementedException("IADD SAT");
21 }
22 if (x && po) {
23 throw NotImplementedException("IADD X+PO");
24 }
25 // Operand A is always read from here, negated if needed
26 IR::U32 op_a{v.X(iadd.src_a)};
27 if (neg_a) {
28 op_a = v.ir.INeg(op_a);
29 }
30 // Add both operands
31 IR::U32 result{v.ir.IAdd(op_a, op_b)};
32 if (x) {
33 const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
34 result = v.ir.IAdd(result, carry);
35 }
36 if (po) {
37 // .PO adds one to the result
38 result = v.ir.IAdd(result, v.ir.Imm32(1));
39 }
40 if (cc) {
41 // Store flags
42 // TODO: Does this grab the result pre-PO or after?
43 if (po) {
44 throw NotImplementedException("IADD CC+PO");
45 }
46 // TODO: How does CC behave when X is set?
47 if (x) {
48 throw NotImplementedException("IADD X+CC");
49 }
50 v.SetZFlag(v.ir.GetZeroFromOp(result));
51 v.SetSFlag(v.ir.GetSignFromOp(result));
52 v.SetCFlag(v.ir.GetCarryFromOp(result));
53 v.SetOFlag(v.ir.GetOverflowFromOp(result));
54 }
55 // Store result
56 v.X(iadd.dest_reg, result);
57}
58
59void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
60 union {
61 u64 insn;
62 BitField<43, 1, u64> x;
63 BitField<47, 1, u64> cc;
64 BitField<48, 2, u64> three_for_po;
65 BitField<48, 1, u64> neg_b;
66 BitField<49, 1, u64> neg_a;
67 BitField<50, 1, u64> sat;
68 } const iadd{insn};
69
70 const bool po{iadd.three_for_po == 3};
71 if (!po && iadd.neg_b != 0) {
72 op_b = v.ir.INeg(op_b);
73 }
74 IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
75}
76} // Anonymous namespace
77
78void TranslatorVisitor::IADD_reg(u64 insn) {
79 IADD(*this, insn, GetReg20(insn));
80}
81
82void TranslatorVisitor::IADD_cbuf(u64 insn) {
83 IADD(*this, insn, GetCbuf(insn));
84}
85
86void TranslatorVisitor::IADD_imm(u64 insn) {
87 IADD(*this, insn, GetImm20(insn));
88}
89
90void TranslatorVisitor::IADD32I(u64 insn) {
91 union {
92 u64 raw;
93 BitField<52, 1, u64> cc;
94 BitField<53, 1, u64> x;
95 BitField<54, 1, u64> sat;
96 BitField<55, 2, u64> three_for_po;
97 BitField<56, 1, u64> neg_a;
98 } const iadd32i{insn};
99
100 const bool po{iadd32i.three_for_po == 3};
101 const bool neg_a{!po && iadd32i.neg_a != 0};
102 IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
103}
104
105} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
new file mode 100644
index 000000000..040cfc10f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
@@ -0,0 +1,122 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Shift : u64 {
12 None,
13 Right,
14 Left,
15};
16enum class Half : u64 {
17 All,
18 Lower,
19 Upper,
20};
21
22[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
23 constexpr bool is_signed{false};
24 switch (half) {
25 case Half::All:
26 return value;
27 case Half::Lower:
28 return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
29 case Half::Upper:
30 return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
31 }
32 throw NotImplementedException("Invalid half");
33}
34
35[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
36 switch (shift) {
37 case Shift::None:
38 return value;
39 case Shift::Right: {
40 // 33-bit RS IADD3 edge case
41 const IR::U1 edge_case{ir.GetCarryFromOp(value)};
42 const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
43 return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
44 }
45 case Shift::Left:
46 return ir.ShiftLeftLogical(value, ir.Imm32(16));
47 }
48 throw NotImplementedException("Invalid shift");
49}
50
51void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
52 Shift shift = Shift::None) {
53 union {
54 u64 insn;
55 BitField<0, 8, IR::Reg> dest_reg;
56 BitField<47, 1, u64> cc;
57 BitField<48, 1, u64> x;
58 BitField<49, 1, u64> neg_c;
59 BitField<50, 1, u64> neg_b;
60 BitField<51, 1, u64> neg_a;
61 } iadd3{insn};
62
63 if (iadd3.neg_a != 0) {
64 op_a = v.ir.INeg(op_a);
65 }
66 if (iadd3.neg_b != 0) {
67 op_b = v.ir.INeg(op_b);
68 }
69 if (iadd3.neg_c != 0) {
70 op_c = v.ir.INeg(op_c);
71 }
72 IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
73 if (iadd3.x != 0) {
74 // TODO: How does RS behave when X is set?
75 if (shift == Shift::Right) {
76 throw NotImplementedException("IADD3 X+RS");
77 }
78 const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
79 lhs_1 = v.ir.IAdd(lhs_1, carry);
80 }
81 const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
82 const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
83
84 v.X(iadd3.dest_reg, result);
85 if (iadd3.cc != 0) {
86 // TODO: How does CC behave when X is set?
87 if (iadd3.x != 0) {
88 throw NotImplementedException("IADD3 X+CC");
89 }
90 v.SetZFlag(v.ir.GetZeroFromOp(result));
91 v.SetSFlag(v.ir.GetSignFromOp(result));
92 v.SetCFlag(v.ir.GetCarryFromOp(result));
93 const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
94 v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
95 }
96}
97} // Anonymous namespace
98
99void TranslatorVisitor::IADD3_reg(u64 insn) {
100 union {
101 u64 insn;
102 BitField<37, 2, Shift> shift;
103 BitField<35, 2, Half> half_a;
104 BitField<33, 2, Half> half_b;
105 BitField<31, 2, Half> half_c;
106 } const iadd3{insn};
107
108 const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
109 const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
110 const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
111 IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
112}
113
114void TranslatorVisitor::IADD3_cbuf(u64 insn) {
115 IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
116}
117
118void TranslatorVisitor::IADD3_imm(u64 insn) {
119 IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
120}
121
122} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
new file mode 100644
index 000000000..ba6e01926
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
@@ -0,0 +1,48 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<48, 1, u64> is_signed;
18 BitField<49, 3, CompareOp> compare_op;
19 } const icmp{insn};
20
21 const IR::U32 zero{v.ir.Imm32(0)};
22 const bool is_signed{icmp.is_signed != 0};
23 const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
24
25 const IR::U32 src_reg{v.X(icmp.src_reg)};
26 const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
27
28 v.X(icmp.dest_reg, result);
29}
30} // Anonymous namespace
31
32void TranslatorVisitor::ICMP_reg(u64 insn) {
33 ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
34}
35
36void TranslatorVisitor::ICMP_rc(u64 insn) {
37 ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
38}
39
40void TranslatorVisitor::ICMP_cr(u64 insn) {
41 ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
42}
43
44void TranslatorVisitor::ICMP_imm(u64 insn) {
45 ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
46}
47
48} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
new file mode 100644
index 000000000..8ce1aee04
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
@@ -0,0 +1,80 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
13 CompareOp compare_op, bool is_signed, bool x) {
14 return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
15 : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
16}
17
18void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
19 union {
20 u64 insn;
21 BitField<0, 8, IR::Reg> dest_reg;
22 BitField<8, 8, IR::Reg> src_reg;
23 BitField<39, 3, IR::Pred> pred;
24 BitField<42, 1, u64> neg_pred;
25 BitField<43, 1, u64> x;
26 BitField<44, 1, u64> bf;
27 BitField<45, 2, BooleanOp> bop;
28 BitField<47, 1, u64> cc;
29 BitField<48, 1, u64> is_signed;
30 BitField<49, 3, CompareOp> compare_op;
31 } const iset{insn};
32
33 const IR::U32 src_a{v.X(iset.src_reg)};
34 const bool is_signed{iset.is_signed != 0};
35 const IR::U32 zero{v.ir.Imm32(0)};
36 const bool x{iset.x != 0};
37 const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
38
39 IR::U1 pred{v.ir.GetPred(iset.pred)};
40 if (iset.neg_pred != 0) {
41 pred = v.ir.LogicalNot(pred);
42 }
43 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
44
45 const IR::U32 one_mask{v.ir.Imm32(-1)};
46 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
47 const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
48 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
49
50 v.X(iset.dest_reg, result);
51 if (iset.cc != 0) {
52 if (x) {
53 throw NotImplementedException("ISET.CC + X");
54 }
55 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
56 v.SetZFlag(is_zero);
57 if (iset.bf != 0) {
58 v.ResetSFlag();
59 } else {
60 v.SetSFlag(v.ir.LogicalNot(is_zero));
61 }
62 v.ResetCFlag();
63 v.ResetOFlag();
64 }
65}
66} // Anonymous namespace
67
68void TranslatorVisitor::ISET_reg(u64 insn) {
69 ISET(*this, insn, GetReg20(insn));
70}
71
72void TranslatorVisitor::ISET_cbuf(u64 insn) {
73 ISET(*this, insn, GetCbuf(insn));
74}
75
76void TranslatorVisitor::ISET_imm(u64 insn) {
77 ISET(*this, insn, GetImm20(insn));
78}
79
80} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
new file mode 100644
index 000000000..0b8119ddd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -0,0 +1,182 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class FloatFormat : u64 {
13 F16 = 1,
14 F32 = 2,
15 F64 = 3,
16};
17
18enum class IntFormat : u64 {
19 U8 = 0,
20 U16 = 1,
21 U32 = 2,
22 U64 = 3,
23};
24
25union Encoding {
26 u64 raw;
27 BitField<0, 8, IR::Reg> dest_reg;
28 BitField<8, 2, FloatFormat> float_format;
29 BitField<10, 2, IntFormat> int_format;
30 BitField<13, 1, u64> is_signed;
31 BitField<39, 2, FpRounding> fp_rounding;
32 BitField<41, 2, u64> selector;
33 BitField<47, 1, u64> cc;
34 BitField<45, 1, u64> neg;
35 BitField<49, 1, u64> abs;
36};
37
38bool Is64(u64 insn) {
39 return Encoding{insn}.int_format == IntFormat::U64;
40}
41
42int BitSize(FloatFormat format) {
43 switch (format) {
44 case FloatFormat::F16:
45 return 16;
46 case FloatFormat::F32:
47 return 32;
48 case FloatFormat::F64:
49 return 64;
50 }
51 throw NotImplementedException("Invalid float format {}", format);
52}
53
54IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
55 const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
56 const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
57 const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
58 const IR::U1 is_least{v.ir.IEqual(value, least_value)};
59 return IR::U32{v.ir.Select(is_least, value, absolute)};
60}
61
62void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
63 const Encoding i2f{insn};
64 if (i2f.cc != 0) {
65 throw NotImplementedException("I2F CC");
66 }
67 const bool is_signed{i2f.is_signed != 0};
68 int src_bitsize{};
69 switch (i2f.int_format) {
70 case IntFormat::U8:
71 src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
72 v.ir.Imm32(8), is_signed);
73 if (i2f.abs != 0) {
74 src = SmallAbs(v, src, 8);
75 }
76 src_bitsize = 8;
77 break;
78 case IntFormat::U16:
79 if (i2f.selector == 1 || i2f.selector == 3) {
80 throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
81 }
82 src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
83 v.ir.Imm32(16), is_signed);
84 if (i2f.abs != 0) {
85 src = SmallAbs(v, src, 16);
86 }
87 src_bitsize = 16;
88 break;
89 case IntFormat::U32:
90 case IntFormat::U64:
91 if (i2f.selector != 0) {
92 throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
93 }
94 if (i2f.abs != 0 && is_signed) {
95 src = v.ir.IAbs(src);
96 }
97 src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
98 break;
99 }
100 const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
101 const int dst_bitsize{BitSize(i2f.float_format)};
102 const IR::FpControl fp_control{
103 .no_contraction = false,
104 .rounding = CastFpRounding(i2f.fp_rounding),
105 .fmz_mode = IR::FmzMode::DontCare,
106 };
107 auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize),
108 static_cast<size_t>(conversion_src_bitsize), is_signed, src,
109 fp_control)};
110 if (i2f.neg != 0) {
111 if (i2f.abs != 0 || !is_signed) {
112 // We know the value is positive
113 value = v.ir.FPNeg(value);
114 } else {
115 // Only negate if the input isn't the lowest value
116 IR::U1 is_least;
117 if (src_bitsize == 64) {
118 is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
119 } else if (src_bitsize == 32) {
120 is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
121 } else {
122 const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
123 is_least = v.ir.IEqual(src, least_value);
124 }
125 value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
126 }
127 }
128 switch (i2f.float_format) {
129 case FloatFormat::F16: {
130 const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
131 v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
132 break;
133 }
134 case FloatFormat::F32:
135 v.F(i2f.dest_reg, value);
136 break;
137 case FloatFormat::F64: {
138 if (!IR::IsAligned(i2f.dest_reg, 2)) {
139 throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
140 }
141 const IR::Value vector{v.ir.UnpackDouble2x32(value)};
142 for (int i = 0; i < 2; ++i) {
143 v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
144 }
145 break;
146 }
147 default:
148 throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
149 }
150}
151} // Anonymous namespace
152
153void TranslatorVisitor::I2F_reg(u64 insn) {
154 if (Is64(insn)) {
155 union {
156 u64 raw;
157 BitField<20, 8, IR::Reg> reg;
158 } const value{insn};
159 const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
160 I2F(*this, insn, ir.PackUint2x32(regs));
161 } else {
162 I2F(*this, insn, GetReg20(insn));
163 }
164}
165
166void TranslatorVisitor::I2F_cbuf(u64 insn) {
167 if (Is64(insn)) {
168 I2F(*this, insn, GetPackedCbuf(insn));
169 } else {
170 I2F(*this, insn, GetCbuf(insn));
171 }
172}
173
174void TranslatorVisitor::I2F_imm(u64 insn) {
175 if (Is64(insn)) {
176 I2F(*this, insn, GetPackedImm20(insn));
177 } else {
178 I2F(*this, insn, GetImm20(insn));
179 }
180}
181
182} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
new file mode 100644
index 000000000..5feefc0ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
@@ -0,0 +1,82 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class MaxShift : u64 {
12 U32,
13 Undefined,
14 U64,
15 S64,
16};
17
18IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
19 bool right_shift, bool is_signed) {
20 if (!right_shift) {
21 return ir.ShiftLeftLogical(packed_int, safe_shift);
22 }
23 if (is_signed) {
24 return ir.ShiftRightArithmetic(packed_int, safe_shift);
25 }
26 return ir.ShiftRightLogical(packed_int, safe_shift);
27}
28
29void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
30 bool right_shift) {
31 union {
32 u64 insn;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<0, 8, IR::Reg> lo_bits_reg;
35 BitField<37, 2, MaxShift> max_shift;
36 BitField<47, 1, u64> cc;
37 BitField<48, 2, u64> x_mode;
38 BitField<50, 1, u64> wrap;
39 } const shf{insn};
40
41 if (shf.cc != 0) {
42 throw NotImplementedException("SHF CC");
43 }
44 if (shf.x_mode != 0) {
45 throw NotImplementedException("SHF X Mode");
46 }
47 if (shf.max_shift == MaxShift::Undefined) {
48 throw NotImplementedException("SHF Use of undefined MaxShift value");
49 }
50 const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
51 const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
52 const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
53 const IR::U32 safe_shift{shf.wrap != 0
54 ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
55 : v.ir.UMin(shift, max_shift)};
56
57 const bool is_signed{shf.max_shift == MaxShift::S64};
58 const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
59 const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
60
61 const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
62 v.X(shf.dest_reg, result);
63}
64} // Anonymous namespace
65
66void TranslatorVisitor::SHF_l_reg(u64 insn) {
67 SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
68}
69
70void TranslatorVisitor::SHF_l_imm(u64 insn) {
71 SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
72}
73
74void TranslatorVisitor::SHF_r_reg(u64 insn) {
75 SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
76}
77
78void TranslatorVisitor::SHF_r_imm(u64 insn) {
79 SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
80}
81
82} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
new file mode 100644
index 000000000..1badbacc4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
@@ -0,0 +1,64 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<43, 2, u64> mode;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> is_signed;
21 } const imnmx{insn};
22
23 if (imnmx.cc != 0) {
24 throw NotImplementedException("IMNMX CC");
25 }
26
27 if (imnmx.mode != 0) {
28 throw NotImplementedException("IMNMX.MODE");
29 }
30
31 const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
32 const IR::U32 op_a{v.X(imnmx.src_reg)};
33 IR::U32 min;
34 IR::U32 max;
35
36 if (imnmx.is_signed != 0) {
37 min = IR::U32{v.ir.SMin(op_a, op_b)};
38 max = IR::U32{v.ir.SMax(op_a, op_b)};
39 } else {
40 min = IR::U32{v.ir.UMin(op_a, op_b)};
41 max = IR::U32{v.ir.UMax(op_a, op_b)};
42 }
43 if (imnmx.neg_pred != 0) {
44 std::swap(min, max);
45 }
46
47 const IR::U32 result{v.ir.Select(pred, min, max)};
48 v.X(imnmx.dest_reg, result);
49}
50} // Anonymous namespace
51
52void TranslatorVisitor::IMNMX_reg(u64 insn) {
53 IMNMX(*this, insn, GetReg20(insn));
54}
55
56void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
57 IMNMX(*this, insn, GetCbuf(insn));
58}
59
60void TranslatorVisitor::IMNMX_imm(u64 insn) {
61 IMNMX(*this, insn, GetImm20(insn));
62}
63
64} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
new file mode 100644
index 000000000..5ece7678d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
@@ -0,0 +1,36 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<40, 1, u64> tilde;
16 } const popc{insn};
17
18 const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
19 const IR::U32 result = v.ir.BitCount(operand);
20 v.X(popc.dest_reg, result);
21}
22} // Anonymous namespace
23
24void TranslatorVisitor::POPC_reg(u64 insn) {
25 POPC(*this, insn, GetReg20(insn));
26}
27
28void TranslatorVisitor::POPC_cbuf(u64 insn) {
29 POPC(*this, insn, GetCbuf(insn));
30}
31
32void TranslatorVisitor::POPC_imm(u64 insn) {
33 POPC(*this, insn, GetImm20(insn));
34}
35
36} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 000000000..044671943
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,86 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
12 u64 scale_imm) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> op_a;
17 } const iscadd{insn};
18
19 const bool po{neg_a && neg_b};
20 IR::U32 op_a{v.X(iscadd.op_a)};
21 if (po) {
22 // When PO is present, add one
23 op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
24 } else {
25 // When PO is not present, the bits are interpreted as negation
26 if (neg_a) {
27 op_a = v.ir.INeg(op_a);
28 }
29 if (neg_b) {
30 op_b = v.ir.INeg(op_b);
31 }
32 }
33 // With the operands already processed, scale A
34 const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
35 const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
36
37 const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
38 v.X(iscadd.dest_reg, result);
39
40 if (cc) {
41 v.SetZFlag(v.ir.GetZeroFromOp(result));
42 v.SetSFlag(v.ir.GetSignFromOp(result));
43 const IR::U1 carry{v.ir.GetCarryFromOp(result)};
44 const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
45 v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
46 v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
47 }
48}
49
50void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
51 union {
52 u64 raw;
53 BitField<47, 1, u64> cc;
54 BitField<48, 1, u64> neg_b;
55 BitField<49, 1, u64> neg_a;
56 BitField<39, 5, u64> scale;
57 } const iscadd{insn};
58
59 ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
60}
61
62} // Anonymous namespace
63
64void TranslatorVisitor::ISCADD_reg(u64 insn) {
65 ISCADD(*this, insn, GetReg20(insn));
66}
67
68void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
69 ISCADD(*this, insn, GetCbuf(insn));
70}
71
72void TranslatorVisitor::ISCADD_imm(u64 insn) {
73 ISCADD(*this, insn, GetImm20(insn));
74}
75
76void TranslatorVisitor::ISCADD32I(u64 insn) {
77 union {
78 u64 raw;
79 BitField<52, 1, u64> cc;
80 BitField<53, 5, u64> scale;
81 } const iscadd{insn};
82
83 return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
84}
85
86} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 000000000..bee10e5b9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
13 CompareOp compare_op, bool is_signed, bool x) {
14 return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
15 : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
16}
17
18void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
19 union {
20 u64 raw;
21 BitField<0, 3, IR::Pred> dest_pred_b;
22 BitField<3, 3, IR::Pred> dest_pred_a;
23 BitField<8, 8, IR::Reg> src_reg_a;
24 BitField<39, 3, IR::Pred> bop_pred;
25 BitField<42, 1, u64> neg_bop_pred;
26 BitField<43, 1, u64> x;
27 BitField<45, 2, BooleanOp> bop;
28 BitField<48, 1, u64> is_signed;
29 BitField<49, 3, CompareOp> compare_op;
30 } const isetp{insn};
31
32 const bool is_signed{isetp.is_signed != 0};
33 const bool x{isetp.x != 0};
34 const BooleanOp bop{isetp.bop};
35 const CompareOp compare_op{isetp.compare_op};
36 const IR::U32 op_a{v.X(isetp.src_reg_a)};
37 const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)};
38 const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
39 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
40 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
41 v.ir.SetPred(isetp.dest_pred_a, result_a);
42 v.ir.SetPred(isetp.dest_pred_b, result_b);
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::ISETP_reg(u64 insn) {
47 ISETP(*this, insn, GetReg20(insn));
48}
49
50void TranslatorVisitor::ISETP_cbuf(u64 insn) {
51 ISETP(*this, insn, GetCbuf(insn));
52}
53
54void TranslatorVisitor::ISETP_imm(u64 insn) {
55 ISETP(*this, insn, GetImm20(insn));
56}
57
58} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 000000000..20af68852
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg_a;
16 BitField<39, 1, u64> w;
17 BitField<43, 1, u64> x;
18 BitField<47, 1, u64> cc;
19 } const shl{insn};
20
21 if (shl.x != 0) {
22 throw NotImplementedException("SHL.X");
23 }
24 if (shl.cc != 0) {
25 throw NotImplementedException("SHL.CC");
26 }
27 const IR::U32 base{v.X(shl.src_reg_a)};
28 IR::U32 result;
29 if (shl.w != 0) {
30 // When .W is set, the shift value is wrapped
31 // To emulate this we just have to wrap it ourselves.
32 const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
33 result = v.ir.ShiftLeftLogical(base, shift);
34 } else {
35 // When .W is not set, the shift value is clamped between 0 and 32.
36 // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
37 // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
38 //
39 // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
40 // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
41 // or equal to the bit width of the components of Base."
42 //
43 // And on the GLASM specification it is also safe to evaluate out of bounds:
44 //
45 // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
46 // "The results of a shift operation ("<<") are undefined if the value of the second operand
47 // is negative, or greater than or equal to the number of bits in the first operand."
48 //
49 // Emphasis on undefined results in contrast to undefined behavior.
50 //
51 const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
52 const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
53 result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
54 }
55 v.X(shl.dest_reg, result);
56}
57} // Anonymous namespace
58
59void TranslatorVisitor::SHL_reg(u64 insn) {
60 SHL(*this, insn, GetReg20(insn));
61}
62
63void TranslatorVisitor::SHL_cbuf(u64 insn) {
64 SHL(*this, insn, GetCbuf(insn));
65}
66
67void TranslatorVisitor::SHL_imm(u64 insn) {
68 SHL(*this, insn, GetImm20(insn));
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
new file mode 100644
index 000000000..be00bb605
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg_a;
16 BitField<39, 1, u64> is_wrapped;
17 BitField<40, 1, u64> brev;
18 BitField<43, 1, u64> xmode;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> is_signed;
21 } const shr{insn};
22
23 if (shr.xmode != 0) {
24 throw NotImplementedException("SHR.XMODE");
25 }
26 if (shr.cc != 0) {
27 throw NotImplementedException("SHR.CC");
28 }
29
30 IR::U32 base{v.X(shr.src_reg_a)};
31 if (shr.brev == 1) {
32 base = v.ir.BitReverse(base);
33 }
34 IR::U32 result;
35 const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
36 if (shr.is_signed == 1) {
37 result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
38 } else {
39 result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
40 }
41
42 if (shr.is_wrapped == 0) {
43 const IR::U32 zero{v.ir.Imm32(0)};
44 const IR::U32 safe_bits{v.ir.Imm32(32)};
45
46 const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
47 const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
48 const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
49 result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
50 }
51 v.X(shr.dest_reg, result);
52}
53} // Anonymous namespace
54
55void TranslatorVisitor::SHR_reg(u64 insn) {
56 SHR(*this, insn, GetReg20(insn));
57}
58
59void TranslatorVisitor::SHR_cbuf(u64 insn) {
60 SHR(*this, insn, GetCbuf(insn));
61}
62
63void TranslatorVisitor::SHR_imm(u64 insn) {
64 SHR(*this, insn, GetImm20(insn));
65}
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 000000000..2932cdc42
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,135 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SelectMode : u64 {
12 Default,
13 CLO,
14 CHI,
15 CSFU,
16 CBCC,
17};
18
19enum class Half : u64 {
20 H0, // Least-significant bits (15:0)
21 H1, // Most-significant bits (31:16)
22};
23
24IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
25 const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
26 return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
27}
28
29void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
30 SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
31 union {
32 u64 raw;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<8, 8, IR::Reg> src_reg_a;
35 BitField<47, 1, u64> cc;
36 BitField<48, 1, u64> is_a_signed;
37 BitField<49, 1, u64> is_b_signed;
38 BitField<53, 1, Half> half_a;
39 } const xmad{insn};
40
41 if (x) {
42 throw NotImplementedException("XMAD X");
43 }
44 const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
45 const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
46
47 IR::U32 product{v.ir.IMul(op_a, op_b)};
48 if (psl) {
49 // .PSL shifts the product 16 bits
50 product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
51 }
52 const IR::U32 op_c{[&]() -> IR::U32 {
53 switch (select_mode) {
54 case SelectMode::Default:
55 return src_c;
56 case SelectMode::CLO:
57 return ExtractHalf(v, src_c, Half::H0, false);
58 case SelectMode::CHI:
59 return ExtractHalf(v, src_c, Half::H1, false);
60 case SelectMode::CBCC:
61 return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
62 case SelectMode::CSFU:
63 throw NotImplementedException("XMAD CSFU");
64 }
65 throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
66 }()};
67 IR::U32 result{v.ir.IAdd(product, op_c)};
68 if (mrg) {
69 // .MRG inserts src_b [15:0] into result's [31:16].
70 const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
71 result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
72 }
73 if (xmad.cc) {
74 throw NotImplementedException("XMAD CC");
75 }
76 // Store result
77 v.X(xmad.dest_reg, result);
78}
79} // Anonymous namespace
80
81void TranslatorVisitor::XMAD_reg(u64 insn) {
82 union {
83 u64 raw;
84 BitField<35, 1, Half> half_b;
85 BitField<36, 1, u64> psl;
86 BitField<37, 1, u64> mrg;
87 BitField<38, 1, u64> x;
88 BitField<50, 3, SelectMode> select_mode;
89 } const xmad{insn};
90
91 XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
92 xmad.mrg != 0, xmad.x != 0);
93}
94
95void TranslatorVisitor::XMAD_rc(u64 insn) {
96 union {
97 u64 raw;
98 BitField<50, 2, SelectMode> select_mode;
99 BitField<52, 1, Half> half_b;
100 BitField<54, 1, u64> x;
101 } const xmad{insn};
102
103 XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
104 xmad.x != 0);
105}
106
107void TranslatorVisitor::XMAD_cr(u64 insn) {
108 union {
109 u64 raw;
110 BitField<50, 2, SelectMode> select_mode;
111 BitField<52, 1, Half> half_b;
112 BitField<54, 1, u64> x;
113 BitField<55, 1, u64> psl;
114 BitField<56, 1, u64> mrg;
115 } const xmad{insn};
116
117 XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
118 xmad.mrg != 0, xmad.x != 0);
119}
120
121void TranslatorVisitor::XMAD_imm(u64 insn) {
122 union {
123 u64 raw;
124 BitField<20, 16, u64> src_b;
125 BitField<36, 1, u64> psl;
126 BitField<37, 1, u64> mrg;
127 BitField<38, 1, u64> x;
128 BitField<50, 3, SelectMode> select_mode;
129 } const xmad{insn};
130
131 XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
132 Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
133}
134
135} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
new file mode 100644
index 000000000..53e8d8923
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
@@ -0,0 +1,126 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class IntegerWidth : u64 {
12 Byte,
13 Short,
14 Word,
15};
16
17[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
18 switch (width) {
19 case IntegerWidth::Byte:
20 return ir.Imm32(8);
21 case IntegerWidth::Short:
22 return ir.Imm32(16);
23 case IntegerWidth::Word:
24 return ir.Imm32(32);
25 default:
26 throw NotImplementedException("Invalid width {}", width);
27 }
28}
29
30[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
31 IntegerWidth dst_width) {
32 const IR::U32 zero{ir.Imm32(0)};
33 const IR::U32 count{WidthSize(ir, dst_width)};
34 return ir.BitFieldExtract(src, zero, count, false);
35}
36
37[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
38 bool dst_signed, bool src_signed) {
39 IR::U32 min{};
40 IR::U32 max{};
41 const IR::U32 zero{ir.Imm32(0)};
42 switch (dst_width) {
43 case IntegerWidth::Byte:
44 min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
45 max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
46 break;
47 case IntegerWidth::Short:
48 min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
49 max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
50 break;
51 case IntegerWidth::Word:
52 min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
53 max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
54 break;
55 default:
56 throw NotImplementedException("Invalid width {}", dst_width);
57 }
58 const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
59 return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
60}
61
62void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
63 union {
64 u64 insn;
65 BitField<0, 8, IR::Reg> dest_reg;
66 BitField<8, 2, IntegerWidth> dst_fmt;
67 BitField<12, 1, u64> dst_fmt_sign;
68 BitField<10, 2, IntegerWidth> src_fmt;
69 BitField<13, 1, u64> src_fmt_sign;
70 BitField<41, 3, u64> selector;
71 BitField<45, 1, u64> neg;
72 BitField<47, 1, u64> cc;
73 BitField<49, 1, u64> abs;
74 BitField<50, 1, u64> sat;
75 } const i2i{insn};
76
77 if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
78 throw NotImplementedException("16-bit source format incompatible with selector {}",
79 i2i.selector);
80 }
81 if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
82 throw NotImplementedException("32-bit source format incompatible with selector {}",
83 i2i.selector);
84 }
85
86 const s32 selector{static_cast<s32>(i2i.selector)};
87 const IR::U32 offset{v.ir.Imm32(selector * 8)};
88 const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
89 const bool src_signed{i2i.src_fmt_sign != 0};
90 const bool dst_signed{i2i.dst_fmt_sign != 0};
91 const bool sat{i2i.sat != 0};
92
93 IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
94 if (i2i.abs != 0) {
95 src_values = v.ir.IAbs(src_values);
96 }
97 if (i2i.neg != 0) {
98 src_values = v.ir.INeg(src_values);
99 }
100 const IR::U32 result{
101 sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
102 : ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
103
104 v.X(i2i.dest_reg, result);
105 if (i2i.cc != 0) {
106 v.SetZFlag(v.ir.GetZeroFromOp(result));
107 v.SetSFlag(v.ir.GetSignFromOp(result));
108 v.ResetCFlag();
109 v.ResetOFlag();
110 }
111}
112} // Anonymous namespace
113
114void TranslatorVisitor::I2I_reg(u64 insn) {
115 I2I(*this, insn, GetReg20(insn));
116}
117
118void TranslatorVisitor::I2I_cbuf(u64 insn) {
119 I2I(*this, insn, GetCbuf(insn));
120}
121
122void TranslatorVisitor::I2I_imm(u64 insn) {
123 I2I(*this, insn, GetImm20(insn));
124}
125
126} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
new file mode 100644
index 000000000..9b85f8059
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
@@ -0,0 +1,53 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 Default,
13 Patch,
14 Prim,
15 Attr,
16};
17
18enum class Shift : u64 {
19 Default,
20 U16,
21 B32,
22};
23
24} // Anonymous namespace
25
26void TranslatorVisitor::ISBERD(u64 insn) {
27 union {
28 u64 raw;
29 BitField<0, 8, IR::Reg> dest_reg;
30 BitField<8, 8, IR::Reg> src_reg;
31 BitField<31, 1, u64> skew;
32 BitField<32, 1, u64> o;
33 BitField<33, 2, Mode> mode;
34 BitField<47, 2, Shift> shift;
35 } const isberd{insn};
36
37 if (isberd.skew != 0) {
38 throw NotImplementedException("SKEW");
39 }
40 if (isberd.o != 0) {
41 throw NotImplementedException("O");
42 }
43 if (isberd.mode != Mode::Default) {
44 throw NotImplementedException("Mode {}", isberd.mode.Value());
45 }
46 if (isberd.shift != Shift::Default) {
47 throw NotImplementedException("Shift {}", isberd.shift.Value());
48 }
49 LOG_WARNING(Shader, "(STUBBED) called");
50 X(isberd.dest_reg, X(isberd.src_reg));
51}
52
53} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
new file mode 100644
index 000000000..2300088e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
9
10namespace Shader::Maxwell {
11using namespace LDC;
12namespace {
13std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
14 const IR::U32& reg, const IR::U32& imm) {
15 switch (mode) {
16 case Mode::Default:
17 return {imm_index, ir.IAdd(reg, imm)};
18 default:
19 break;
20 }
21 throw NotImplementedException("Mode {}", mode);
22}
23} // Anonymous namespace
24
25void TranslatorVisitor::LDC(u64 insn) {
26 const Encoding ldc{insn};
27 const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
28 const IR::U32 reg{X(ldc.src_reg)};
29 const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
30 const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
31 switch (ldc.size) {
32 case Size::U8:
33 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
34 break;
35 case Size::S8:
36 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
37 break;
38 case Size::U16:
39 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
40 break;
41 case Size::S16:
42 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
43 break;
44 case Size::B32:
45 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
46 break;
47 case Size::B64: {
48 if (!IR::IsAligned(ldc.dest_reg, 2)) {
49 throw NotImplementedException("Unaligned destination register");
50 }
51 const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
52 for (int i = 0; i < 2; ++i) {
53 X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
54 }
55 break;
56 }
57 default:
58 throw NotImplementedException("Invalid size {}", ldc.size.Value());
59 }
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
new file mode 100644
index 000000000..3074ea0e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
@@ -0,0 +1,39 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/reg.h"
10
11namespace Shader::Maxwell::LDC {
12
13enum class Mode : u64 {
14 Default,
15 IL,
16 IS,
17 ISL,
18};
19
20enum class Size : u64 {
21 U8,
22 S8,
23 U16,
24 S16,
25 B32,
26 B64,
27};
28
29union Encoding {
30 u64 raw;
31 BitField<0, 8, IR::Reg> dest_reg;
32 BitField<8, 8, IR::Reg> src_reg;
33 BitField<20, 16, s64> offset;
34 BitField<36, 5, u64> index;
35 BitField<44, 2, Mode> mode;
36 BitField<48, 3, Size> size;
37};
38
39} // namespace Shader::Maxwell::LDC
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
new file mode 100644
index 000000000..4a0f04e47
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
@@ -0,0 +1,108 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
12 bool neg, bool x) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> offset_lo_reg;
17 BitField<47, 1, u64> cc;
18 BitField<48, 3, IR::Pred> pred;
19 } const lea{insn};
20
21 if (x) {
22 throw NotImplementedException("LEA.HI X");
23 }
24 if (lea.pred != IR::Pred::PT) {
25 throw NotImplementedException("LEA.HI Pred");
26 }
27 if (lea.cc != 0) {
28 throw NotImplementedException("LEA.HI CC");
29 }
30
31 const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
32 const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
33 const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
34
35 const s32 hi_scale{32 - static_cast<s32>(scale)};
36 const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
37 const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
38
39 IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
40 v.X(lea.dest_reg, result);
41}
42
43void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
44 union {
45 u64 insn;
46 BitField<0, 8, IR::Reg> dest_reg;
47 BitField<8, 8, IR::Reg> offset_lo_reg;
48 BitField<39, 5, u64> scale;
49 BitField<45, 1, u64> neg;
50 BitField<46, 1, u64> x;
51 BitField<47, 1, u64> cc;
52 BitField<48, 3, IR::Pred> pred;
53 } const lea{insn};
54 if (lea.x != 0) {
55 throw NotImplementedException("LEA.LO X");
56 }
57 if (lea.pred != IR::Pred::PT) {
58 throw NotImplementedException("LEA.LO Pred");
59 }
60 if (lea.cc != 0) {
61 throw NotImplementedException("LEA.LO CC");
62 }
63
64 const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
65 const s32 scale{static_cast<s32>(lea.scale)};
66 const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
67 const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
68
69 IR::U32 result{v.ir.IAdd(base, scaled_offset)};
70 v.X(lea.dest_reg, result);
71}
72} // Anonymous namespace
73
74void TranslatorVisitor::LEA_hi_reg(u64 insn) {
75 union {
76 u64 insn;
77 BitField<28, 5, u64> scale;
78 BitField<37, 1, u64> neg;
79 BitField<38, 1, u64> x;
80 } const lea{insn};
81
82 LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
83}
84
85void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
86 union {
87 u64 insn;
88 BitField<51, 5, u64> scale;
89 BitField<56, 1, u64> neg;
90 BitField<57, 1, u64> x;
91 } const lea{insn};
92
93 LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
94}
95
96void TranslatorVisitor::LEA_lo_reg(u64 insn) {
97 LEA_lo(*this, insn, GetReg20(insn));
98}
99
100void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
101 LEA_lo(*this, insn, GetCbuf(insn));
102}
103
104void TranslatorVisitor::LEA_lo_imm(u64 insn) {
105 LEA_lo(*this, insn, GetImm20(insn));
106}
107
108} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
new file mode 100644
index 000000000..924fb7a40
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -0,0 +1,196 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/ir/ir_emitter.h"
9#include "shader_recompiler/frontend/maxwell/opcodes.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Size : u64 {
15 B32,
16 B64,
17 B96,
18 B128,
19};
20
21enum class InterpolationMode : u64 {
22 Pass,
23 Multiply,
24 Constant,
25 Sc,
26};
27
28enum class SampleMode : u64 {
29 Default,
30 Centroid,
31 Offset,
32};
33
34u32 NumElements(Size size) {
35 switch (size) {
36 case Size::B32:
37 return 1;
38 case Size::B64:
39 return 2;
40 case Size::B96:
41 return 3;
42 case Size::B128:
43 return 4;
44 }
45 throw InvalidArgument("Invalid size {}", size);
46}
47
48template <typename F>
49void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) {
50 const IR::U32 index_value{v.X(index_reg)};
51 for (u32 element = 0; element < num_elements; ++element) {
52 const IR::U32 final_offset{
53 element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}};
54 f(element, final_offset);
55 }
56}
57
58} // Anonymous namespace
59
60void TranslatorVisitor::ALD(u64 insn) {
61 union {
62 u64 raw;
63 BitField<0, 8, IR::Reg> dest_reg;
64 BitField<8, 8, IR::Reg> index_reg;
65 BitField<20, 10, u64> absolute_offset;
66 BitField<20, 11, s64> relative_offset;
67 BitField<39, 8, IR::Reg> vertex_reg;
68 BitField<32, 1, u64> o;
69 BitField<31, 1, u64> patch;
70 BitField<47, 2, Size> size;
71 } const ald{insn};
72
73 const u64 offset{ald.absolute_offset.Value()};
74 if (offset % 4 != 0) {
75 throw NotImplementedException("Unaligned absolute offset {}", offset);
76 }
77 const IR::U32 vertex{X(ald.vertex_reg)};
78 const u32 num_elements{NumElements(ald.size)};
79 if (ald.index_reg == IR::Reg::RZ) {
80 for (u32 element = 0; element < num_elements; ++element) {
81 if (ald.patch != 0) {
82 const IR::Patch patch{offset / 4 + element};
83 F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch));
84 } else {
85 const IR::Attribute attr{offset / 4 + element};
86 F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex));
87 }
88 }
89 return;
90 }
91 if (ald.patch != 0) {
92 throw NotImplementedException("Indirect patch read");
93 }
94 HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
95 F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex));
96 });
97}
98
99void TranslatorVisitor::AST(u64 insn) {
100 union {
101 u64 raw;
102 BitField<0, 8, IR::Reg> src_reg;
103 BitField<8, 8, IR::Reg> index_reg;
104 BitField<20, 10, u64> absolute_offset;
105 BitField<20, 11, s64> relative_offset;
106 BitField<31, 1, u64> patch;
107 BitField<39, 8, IR::Reg> vertex_reg;
108 BitField<47, 2, Size> size;
109 } const ast{insn};
110
111 if (ast.index_reg != IR::Reg::RZ) {
112 throw NotImplementedException("Indexed store");
113 }
114 const u64 offset{ast.absolute_offset.Value()};
115 if (offset % 4 != 0) {
116 throw NotImplementedException("Unaligned absolute offset {}", offset);
117 }
118 const IR::U32 vertex{X(ast.vertex_reg)};
119 const u32 num_elements{NumElements(ast.size)};
120 if (ast.index_reg == IR::Reg::RZ) {
121 for (u32 element = 0; element < num_elements; ++element) {
122 if (ast.patch != 0) {
123 const IR::Patch patch{offset / 4 + element};
124 ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element)));
125 } else {
126 const IR::Attribute attr{offset / 4 + element};
127 ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex);
128 }
129 }
130 return;
131 }
132 if (ast.patch != 0) {
133 throw NotImplementedException("Indexed tessellation patch store");
134 }
135 HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
136 ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex);
137 });
138}
139
140void TranslatorVisitor::IPA(u64 insn) {
141 // IPA is the instruction used to read varyings from a fragment shader.
142 // gl_FragCoord is mapped to the gl_Position attribute.
143 // It yields unknown results when used outside of the fragment shader stage.
144 union {
145 u64 raw;
146 BitField<0, 8, IR::Reg> dest_reg;
147 BitField<8, 8, IR::Reg> index_reg;
148 BitField<20, 8, IR::Reg> multiplier;
149 BitField<30, 8, IR::Attribute> attribute;
150 BitField<38, 1, u64> idx;
151 BitField<51, 1, u64> sat;
152 BitField<52, 2, SampleMode> sample_mode;
153 BitField<54, 2, InterpolationMode> interpolation_mode;
154 } const ipa{insn};
155
156 // Indexed IPAs are used for indexed varyings.
157 // For example:
158 //
159 // in vec4 colors[4];
160 // uniform int idx;
161 // void main() {
162 // gl_FragColor = colors[idx];
163 // }
164 const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
165 const IR::Attribute attribute{ipa.attribute};
166 IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
167 : ir.GetAttribute(attribute)};
168 if (IR::IsGeneric(attribute)) {
169 const ProgramHeader& sph{env.SPH()};
170 const u32 attr_index{IR::GenericAttributeIndex(attribute)};
171 const u32 element{static_cast<u32>(attribute) % 4};
172 const std::array input_map{sph.ps.GenericInputMap(attr_index)};
173 const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective};
174 if (is_perspective) {
175 const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)};
176 value = ir.FPMul(value, position_w);
177 }
178 }
179 if (ipa.interpolation_mode == InterpolationMode::Multiply) {
180 value = ir.FPMul(value, F(ipa.multiplier));
181 }
182
183 // Saturated IPAs are generally generated out of clamped varyings.
184 // For example: clamp(some_varying, 0.0, 1.0)
185 const bool is_saturated{ipa.sat != 0};
186 if (is_saturated) {
187 if (attribute == IR::Attribute::FrontFace) {
188 throw NotImplementedException("IPA.SAT on FrontFace");
189 }
190 value = ir.FPSaturate(value);
191 }
192
193 F(ipa.dest_reg, value);
194}
195
196} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 000000000..d2a1dbf61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,218 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Size : u64 {
12 U8,
13 S8,
14 U16,
15 S16,
16 B32,
17 B64,
18 B128,
19};
20
21IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
22 union {
23 u64 raw;
24 BitField<8, 8, IR::Reg> offset_reg;
25 BitField<20, 24, u64> absolute_offset;
26 BitField<20, 24, s64> relative_offset;
27 } const encoding{insn};
28
29 if (encoding.offset_reg == IR::Reg::RZ) {
30 return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
31 } else {
32 const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
33 return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
34 }
35}
36
37std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) {
38 const IR::U32 offset{Offset(v, insn)};
39 if (offset.IsImmediate()) {
40 return {v.ir.Imm32(offset.U32() / 4), offset};
41 } else {
42 return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset};
43 }
44}
45
46std::pair<int, bool> GetSize(u64 insn) {
47 union {
48 u64 raw;
49 BitField<48, 3, Size> size;
50 } const encoding{insn};
51
52 switch (encoding.size) {
53 case Size::U8:
54 return {8, false};
55 case Size::S8:
56 return {8, true};
57 case Size::U16:
58 return {16, false};
59 case Size::S16:
60 return {16, true};
61 case Size::B32:
62 return {32, false};
63 case Size::B64:
64 return {64, false};
65 case Size::B128:
66 return {128, false};
67 default:
68 throw NotImplementedException("Invalid size {}", encoding.size.Value());
69 }
70}
71
72IR::Reg Reg(u64 insn) {
73 union {
74 u64 raw;
75 BitField<0, 8, IR::Reg> reg;
76 } const encoding{insn};
77
78 return encoding.reg;
79}
80
81IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
82 return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
83}
84
85IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
86 return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
87}
88
89IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) {
90 const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())};
91 const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)};
92 return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))};
93}
94} // Anonymous namespace
95
96void TranslatorVisitor::LDL(u64 insn) {
97 const auto [word_offset, offset]{WordOffset(*this, insn)};
98 const IR::U32 word{LoadLocal(*this, word_offset, offset)};
99 const IR::Reg dest{Reg(insn)};
100 const auto [bit_size, is_signed]{GetSize(insn)};
101 switch (bit_size) {
102 case 8: {
103 const IR::U32 bit{ByteOffset(ir, offset)};
104 X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed));
105 break;
106 }
107 case 16: {
108 const IR::U32 bit{ShortOffset(ir, offset)};
109 X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed));
110 break;
111 }
112 case 32:
113 case 64:
114 case 128:
115 if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
116 throw NotImplementedException("Unaligned destination register {}", dest);
117 }
118 X(dest, word);
119 for (int i = 1; i < bit_size / 32; ++i) {
120 const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))};
121 const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))};
122 X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset));
123 }
124 break;
125 }
126}
127
128void TranslatorVisitor::LDS(u64 insn) {
129 const IR::U32 offset{Offset(*this, insn)};
130 const IR::Reg dest{Reg(insn)};
131 const auto [bit_size, is_signed]{GetSize(insn)};
132 const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
133 switch (bit_size) {
134 case 8:
135 case 16:
136 case 32:
137 X(dest, IR::U32{value});
138 break;
139 case 64:
140 case 128:
141 if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
142 throw NotImplementedException("Unaligned destination register {}", dest);
143 }
144 for (int element = 0; element < bit_size / 32; ++element) {
145 X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
146 }
147 break;
148 }
149}
150
151void TranslatorVisitor::STL(u64 insn) {
152 const auto [word_offset, offset]{WordOffset(*this, insn)};
153 if (offset.IsImmediate()) {
154 // TODO: Support storing out of bounds at runtime
155 if (offset.U32() >= env.LocalMemorySize()) {
156 LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping",
157 offset.U32(), env.LocalMemorySize());
158 return;
159 }
160 }
161 const IR::Reg reg{Reg(insn)};
162 const IR::U32 src{X(reg)};
163 const int bit_size{GetSize(insn).first};
164 switch (bit_size) {
165 case 8: {
166 const IR::U32 bit{ByteOffset(ir, offset)};
167 const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
168 ir.WriteLocal(word_offset, value);
169 break;
170 }
171 case 16: {
172 const IR::U32 bit{ShortOffset(ir, offset)};
173 const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
174 ir.WriteLocal(word_offset, value);
175 break;
176 }
177 case 32:
178 case 64:
179 case 128:
180 if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
181 throw NotImplementedException("Unaligned source register");
182 }
183 ir.WriteLocal(word_offset, src);
184 for (int i = 1; i < bit_size / 32; ++i) {
185 ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
186 }
187 break;
188 }
189}
190
191void TranslatorVisitor::STS(u64 insn) {
192 const IR::U32 offset{Offset(*this, insn)};
193 const IR::Reg reg{Reg(insn)};
194 const int bit_size{GetSize(insn).first};
195 switch (bit_size) {
196 case 8:
197 case 16:
198 case 32:
199 ir.WriteShared(bit_size, offset, X(reg));
200 break;
201 case 64:
202 if (!IR::IsAligned(reg, 2)) {
203 throw NotImplementedException("Unaligned source register {}", reg);
204 }
205 ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
206 break;
207 case 128: {
208 if (!IR::IsAligned(reg, 2)) {
209 throw NotImplementedException("Unaligned source register {}", reg);
210 }
211 const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
212 ir.WriteShared(128, offset, vector);
213 break;
214 }
215 }
216}
217
218} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
new file mode 100644
index 000000000..36c5cff2f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -0,0 +1,184 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class LoadSize : u64 {
14 U8, // Zero-extend
15 S8, // Sign-extend
16 U16, // Zero-extend
17 S16, // Sign-extend
18 B32,
19 B64,
20 B128,
21 U128, // ???
22};
23
24enum class StoreSize : u64 {
25 U8, // Zero-extend
26 S8, // Sign-extend
27 U16, // Zero-extend
28 S16, // Sign-extend
29 B32,
30 B64,
31 B128,
32};
33
34// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
35enum class LoadCache : u64 {
36 CA, // Cache at all levels, likely to be accessed again
37 CG, // Cache at global level (cache in L2 and below, not L1)
38 CI, // ???
39 CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
40};
41
42// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
43enum class StoreCache : u64 {
44 WB, // Cache write-back all coherent levels
45 CG, // Cache at global level
46 CS, // Cache streaming, likely to be accessed once
47 WT, // Cache write-through (to system memory)
48};
49
50IR::U64 Address(TranslatorVisitor& v, u64 insn) {
51 union {
52 u64 raw;
53 BitField<8, 8, IR::Reg> addr_reg;
54 BitField<20, 24, s64> addr_offset;
55 BitField<20, 24, u64> rz_addr_offset;
56 BitField<45, 1, u64> e;
57 } const mem{insn};
58
59 const IR::U64 address{[&]() -> IR::U64 {
60 if (mem.e == 0) {
61 // LDG/STG without .E uses a 32-bit pointer, zero-extend it
62 return v.ir.UConvert(64, v.X(mem.addr_reg));
63 }
64 if (!IR::IsAligned(mem.addr_reg, 2)) {
65 throw NotImplementedException("Unaligned address register");
66 }
67 // Pack two registers to build the 64-bit address
68 return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
69 }()};
70 const u64 addr_offset{[&]() -> u64 {
71 if (mem.addr_reg == IR::Reg::RZ) {
72 // When RZ is used, the address is an absolute address
73 return static_cast<u64>(mem.rz_addr_offset.Value());
74 } else {
75 return static_cast<u64>(mem.addr_offset.Value());
76 }
77 }()};
78 // Apply the offset
79 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
80}
81} // Anonymous namespace
82
83void TranslatorVisitor::LDG(u64 insn) {
84 // LDG loads global memory into registers
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<46, 2, LoadCache> cache;
89 BitField<48, 3, LoadSize> size;
90 } const ldg{insn};
91
92 // Pointer to load data from
93 const IR::U64 address{Address(*this, insn)};
94 const IR::Reg dest_reg{ldg.dest_reg};
95 switch (ldg.size) {
96 case LoadSize::U8:
97 X(dest_reg, ir.LoadGlobalU8(address));
98 break;
99 case LoadSize::S8:
100 X(dest_reg, ir.LoadGlobalS8(address));
101 break;
102 case LoadSize::U16:
103 X(dest_reg, ir.LoadGlobalU16(address));
104 break;
105 case LoadSize::S16:
106 X(dest_reg, ir.LoadGlobalS16(address));
107 break;
108 case LoadSize::B32:
109 X(dest_reg, ir.LoadGlobal32(address));
110 break;
111 case LoadSize::B64: {
112 if (!IR::IsAligned(dest_reg, 2)) {
113 throw NotImplementedException("Unaligned data registers");
114 }
115 const IR::Value vector{ir.LoadGlobal64(address)};
116 for (int i = 0; i < 2; ++i) {
117 X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
118 }
119 break;
120 }
121 case LoadSize::B128:
122 case LoadSize::U128: {
123 if (!IR::IsAligned(dest_reg, 4)) {
124 throw NotImplementedException("Unaligned data registers");
125 }
126 const IR::Value vector{ir.LoadGlobal128(address)};
127 for (int i = 0; i < 4; ++i) {
128 X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
129 }
130 break;
131 }
132 default:
133 throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
134 }
135}
136
137void TranslatorVisitor::STG(u64 insn) {
138 // STG stores registers into global memory.
139 union {
140 u64 raw;
141 BitField<0, 8, IR::Reg> data_reg;
142 BitField<46, 2, StoreCache> cache;
143 BitField<48, 3, StoreSize> size;
144 } const stg{insn};
145
146 // Pointer to store data into
147 const IR::U64 address{Address(*this, insn)};
148 const IR::Reg data_reg{stg.data_reg};
149 switch (stg.size) {
150 case StoreSize::U8:
151 ir.WriteGlobalU8(address, X(data_reg));
152 break;
153 case StoreSize::S8:
154 ir.WriteGlobalS8(address, X(data_reg));
155 break;
156 case StoreSize::U16:
157 ir.WriteGlobalU16(address, X(data_reg));
158 break;
159 case StoreSize::S16:
160 ir.WriteGlobalS16(address, X(data_reg));
161 break;
162 case StoreSize::B32:
163 ir.WriteGlobal32(address, X(data_reg));
164 break;
165 case StoreSize::B64: {
166 if (!IR::IsAligned(data_reg, 2)) {
167 throw NotImplementedException("Unaligned data registers");
168 }
169 const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
170 ir.WriteGlobal64(address, vector);
171 break;
172 }
173 case StoreSize::B128:
174 if (!IR::IsAligned(data_reg, 4)) {
175 throw NotImplementedException("Unaligned data registers");
176 }
177 const IR::Value vector{
178 ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
179 ir.WriteGlobal128(address, vector);
180 break;
181 }
182}
183
184} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
new file mode 100644
index 000000000..92cd27ed4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
@@ -0,0 +1,116 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class LogicalOp : u64 {
13 AND,
14 OR,
15 XOR,
16 PASS_B,
17};
18
19[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
20 const IR::U32& operand_2, LogicalOp op) {
21 switch (op) {
22 case LogicalOp::AND:
23 return ir.BitwiseAnd(operand_1, operand_2);
24 case LogicalOp::OR:
25 return ir.BitwiseOr(operand_1, operand_2);
26 case LogicalOp::XOR:
27 return ir.BitwiseXor(operand_1, operand_2);
28 case LogicalOp::PASS_B:
29 return operand_2;
30 default:
31 throw NotImplementedException("Invalid Logical operation {}", op);
32 }
33}
34
35void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b,
36 LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt,
37 IR::Pred dest_pred = IR::Pred::PT) {
38 union {
39 u64 insn;
40 BitField<0, 8, IR::Reg> dest_reg;
41 BitField<8, 8, IR::Reg> src_reg;
42 } const lop{insn};
43
44 if (x) {
45 throw NotImplementedException("X");
46 }
47 IR::U32 op_a{v.X(lop.src_reg)};
48 if (inv_a != 0) {
49 op_a = v.ir.BitwiseNot(op_a);
50 }
51 if (inv_b != 0) {
52 op_b = v.ir.BitwiseNot(op_b);
53 }
54
55 const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)};
56 if (pred_op) {
57 const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)};
58 v.ir.SetPred(dest_pred, pred_result);
59 }
60 if (cc) {
61 if (bit_op == LogicalOp::PASS_B) {
62 v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0)));
63 v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true));
64 } else {
65 v.SetZFlag(v.ir.GetZeroFromOp(result));
66 v.SetSFlag(v.ir.GetSignFromOp(result));
67 }
68 v.ResetCFlag();
69 v.ResetOFlag();
70 }
71 v.X(lop.dest_reg, result);
72}
73
74void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
75 union {
76 u64 insn;
77 BitField<39, 1, u64> inv_a;
78 BitField<40, 1, u64> inv_b;
79 BitField<41, 2, LogicalOp> bit_op;
80 BitField<43, 1, u64> x;
81 BitField<44, 2, PredicateOp> pred_op;
82 BitField<47, 1, u64> cc;
83 BitField<48, 3, IR::Pred> dest_pred;
84 } const lop{insn};
85
86 LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op,
87 lop.pred_op, lop.dest_pred);
88}
89} // Anonymous namespace
90
91void TranslatorVisitor::LOP_reg(u64 insn) {
92 LOP(*this, insn, GetReg20(insn));
93}
94
95void TranslatorVisitor::LOP_cbuf(u64 insn) {
96 LOP(*this, insn, GetCbuf(insn));
97}
98
99void TranslatorVisitor::LOP_imm(u64 insn) {
100 LOP(*this, insn, GetImm20(insn));
101}
102
103void TranslatorVisitor::LOP32I(u64 insn) {
104 union {
105 u64 raw;
106 BitField<53, 2, LogicalOp> bit_op;
107 BitField<57, 1, u64> x;
108 BitField<52, 1, u64> cc;
109 BitField<55, 1, u64> inv_a;
110 BitField<56, 1, u64> inv_b;
111 } const lop32i{insn};
112
113 LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0,
114 lop32i.inv_b != 0, lop32i.bit_op);
115}
116} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
new file mode 100644
index 000000000..e0fe47912
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -0,0 +1,122 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
13// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
14IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
15 u64 ttbl) {
16 IR::U32 r{ir.Imm32(0)};
17 const IR::U32 not_a{ir.BitwiseNot(a)};
18 const IR::U32 not_b{ir.BitwiseNot(b)};
19 const IR::U32 not_c{ir.BitwiseNot(c)};
20 if (ttbl & 0x01) {
21 // r |= ~a & ~b & ~c;
22 const auto lhs{ir.BitwiseAnd(not_a, not_b)};
23 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
24 r = ir.BitwiseOr(r, rhs);
25 }
26 if (ttbl & 0x02) {
27 // r |= ~a & ~b & c;
28 const auto lhs{ir.BitwiseAnd(not_a, not_b)};
29 const auto rhs{ir.BitwiseAnd(lhs, c)};
30 r = ir.BitwiseOr(r, rhs);
31 }
32 if (ttbl & 0x04) {
33 // r |= ~a & b & ~c;
34 const auto lhs{ir.BitwiseAnd(not_a, b)};
35 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
36 r = ir.BitwiseOr(r, rhs);
37 }
38 if (ttbl & 0x08) {
39 // r |= ~a & b & c;
40 const auto lhs{ir.BitwiseAnd(not_a, b)};
41 const auto rhs{ir.BitwiseAnd(lhs, c)};
42 r = ir.BitwiseOr(r, rhs);
43 }
44 if (ttbl & 0x10) {
45 // r |= a & ~b & ~c;
46 const auto lhs{ir.BitwiseAnd(a, not_b)};
47 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
48 r = ir.BitwiseOr(r, rhs);
49 }
50 if (ttbl & 0x20) {
51 // r |= a & ~b & c;
52 const auto lhs{ir.BitwiseAnd(a, not_b)};
53 const auto rhs{ir.BitwiseAnd(lhs, c)};
54 r = ir.BitwiseOr(r, rhs);
55 }
56 if (ttbl & 0x40) {
57 // r |= a & b & ~c;
58 const auto lhs{ir.BitwiseAnd(a, b)};
59 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
60 r = ir.BitwiseOr(r, rhs);
61 }
62 if (ttbl & 0x80) {
63 // r |= a & b & c;
64 const auto lhs{ir.BitwiseAnd(a, b)};
65 const auto rhs{ir.BitwiseAnd(lhs, c)};
66 r = ir.BitwiseOr(r, rhs);
67 }
68 return r;
69}
70
71IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
72 union {
73 u64 insn;
74 BitField<0, 8, IR::Reg> dest_reg;
75 BitField<8, 8, IR::Reg> src_reg;
76 BitField<47, 1, u64> cc;
77 } const lop3{insn};
78
79 if (lop3.cc != 0) {
80 throw NotImplementedException("LOP3 CC");
81 }
82
83 const IR::U32 op_a{v.X(lop3.src_reg)};
84 const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
85 v.X(lop3.dest_reg, result);
86 return result;
87}
88
89u64 GetLut48(u64 insn) {
90 union {
91 u64 raw;
92 BitField<48, 8, u64> lut;
93 } const lut{insn};
94 return lut.lut;
95}
96} // Anonymous namespace
97
98void TranslatorVisitor::LOP3_reg(u64 insn) {
99 union {
100 u64 insn;
101 BitField<28, 8, u64> lut;
102 BitField<38, 1, u64> x;
103 BitField<36, 2, PredicateOp> pred_op;
104 BitField<48, 3, IR::Pred> pred;
105 } const lop3{insn};
106
107 if (lop3.x != 0) {
108 throw NotImplementedException("LOP3 X");
109 }
110 const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
111 const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
112 ir.SetPred(lop3.pred, pred_result);
113}
114
115void TranslatorVisitor::LOP3_cbuf(u64 insn) {
116 LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
117}
118
119void TranslatorVisitor::LOP3_imm(u64 insn) {
120 LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
121}
122} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
new file mode 100644
index 000000000..4324fd443
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 PR,
13 CC,
14};
15} // Anonymous namespace
16
17void TranslatorVisitor::P2R_reg(u64) {
18 throw NotImplementedException("P2R (reg)");
19}
20
21void TranslatorVisitor::P2R_cbuf(u64) {
22 throw NotImplementedException("P2R (cbuf)");
23}
24
25void TranslatorVisitor::P2R_imm(u64 insn) {
26 union {
27 u64 raw;
28 BitField<0, 8, IR::Reg> dest_reg;
29 BitField<8, 8, IR::Reg> src;
30 BitField<40, 1, Mode> mode;
31 BitField<41, 2, u64> byte_selector;
32 } const p2r{insn};
33
34 const u32 mask{GetImm20(insn).U32()};
35 const bool pr_mode{p2r.mode == Mode::PR};
36 const u32 num_items{pr_mode ? 7U : 4U};
37 const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
38 IR::U32 insert{ir.Imm32(0)};
39 for (u32 index = 0; index < num_items; ++index) {
40 if (((mask >> index) & 1) == 0) {
41 continue;
42 }
43 const IR::U1 cond{[this, index, pr_mode] {
44 if (pr_mode) {
45 return ir.GetPred(IR::Pred{index});
46 }
47 switch (index) {
48 case 0:
49 return ir.GetZFlag();
50 case 1:
51 return ir.GetSFlag();
52 case 2:
53 return ir.GetCFlag();
54 case 3:
55 return ir.GetOFlag();
56 }
57 throw LogicError("Unreachable P2R index");
58 }()};
59 const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
60 insert = ir.BitwiseOr(insert, bit);
61 }
62 const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
63 X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
64}
65
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
new file mode 100644
index 000000000..6bb08db8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<39, 4, u64> mask;
18 BitField<12, 4, u64> mov32i_mask;
19 } const mov{insn};
20
21 if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
22 throw NotImplementedException("Non-full move mask");
23 }
24 v.X(mov.dest_reg, src);
25}
26} // Anonymous namespace
27
28void TranslatorVisitor::MOV_reg(u64 insn) {
29 MOV(*this, insn, GetReg20(insn));
30}
31
32void TranslatorVisitor::MOV_cbuf(u64 insn) {
33 MOV(*this, insn, GetCbuf(insn));
34}
35
36void TranslatorVisitor::MOV_imm(u64 insn) {
37 MOV(*this, insn, GetImm20(insn));
38}
39
40void TranslatorVisitor::MOV32I(u64 insn) {
41 MOV(*this, insn, GetImm32(insn), true);
42}
43
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
new file mode 100644
index 000000000..eda5f177b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 PR,
13 CC,
14};
15
16void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) {
17 switch (index) {
18 case 0:
19 return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)});
20 case 1:
21 return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)});
22 case 2:
23 return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)});
24 case 3:
25 return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)});
26 default:
27 throw LogicError("Unreachable R2P index");
28 }
29}
30
31void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) {
32 union {
33 u64 raw;
34 BitField<8, 8, IR::Reg> src_reg;
35 BitField<40, 1, Mode> mode;
36 BitField<41, 2, u64> byte_selector;
37 } const r2p{insn};
38 const IR::U32 src{v.X(r2p.src_reg)};
39 const IR::U32 count{v.ir.Imm32(1)};
40 const bool pr_mode{r2p.mode == Mode::PR};
41 const u32 num_items{pr_mode ? 7U : 4U};
42 const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8};
43 for (u32 index = 0; index < num_items; ++index) {
44 const IR::U32 offset{v.ir.Imm32(offset_base + index)};
45 const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))};
46 const IR::U1 src_bit{v.ir.LogicalNot(src_zero)};
47 const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)};
48 const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)};
49 if (pr_mode) {
50 const IR::Pred pred{index};
51 v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)});
52 } else {
53 SetFlag(v.ir, inv_mask_bit, src_bit, index);
54 }
55 }
56}
57} // Anonymous namespace
58
59void TranslatorVisitor::R2P_reg(u64 insn) {
60 R2P(*this, insn, GetReg20(insn));
61}
62
63void TranslatorVisitor::R2P_cbuf(u64 insn) {
64 R2P(*this, insn, GetCbuf(insn));
65}
66
67void TranslatorVisitor::R2P_imm(u64 insn) {
68 R2P(*this, insn, GetImm20(insn));
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 000000000..20cb2674e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,181 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SpecialRegister : u64 {
12 SR_LANEID = 0,
13 SR_CLOCK = 1,
14 SR_VIRTCFG = 2,
15 SR_VIRTID = 3,
16 SR_PM0 = 4,
17 SR_PM1 = 5,
18 SR_PM2 = 6,
19 SR_PM3 = 7,
20 SR_PM4 = 8,
21 SR_PM5 = 9,
22 SR_PM6 = 10,
23 SR_PM7 = 11,
24 SR12 = 12,
25 SR13 = 13,
26 SR14 = 14,
27 SR_ORDERING_TICKET = 15,
28 SR_PRIM_TYPE = 16,
29 SR_INVOCATION_ID = 17,
30 SR_Y_DIRECTION = 18,
31 SR_THREAD_KILL = 19,
32 SM_SHADER_TYPE = 20,
33 SR_DIRECTCBEWRITEADDRESSLOW = 21,
34 SR_DIRECTCBEWRITEADDRESSHIGH = 22,
35 SR_DIRECTCBEWRITEENABLE = 23,
36 SR_MACHINE_ID_0 = 24,
37 SR_MACHINE_ID_1 = 25,
38 SR_MACHINE_ID_2 = 26,
39 SR_MACHINE_ID_3 = 27,
40 SR_AFFINITY = 28,
41 SR_INVOCATION_INFO = 29,
42 SR_WSCALEFACTOR_XY = 30,
43 SR_WSCALEFACTOR_Z = 31,
44 SR_TID = 32,
45 SR_TID_X = 33,
46 SR_TID_Y = 34,
47 SR_TID_Z = 35,
48 SR_CTA_PARAM = 36,
49 SR_CTAID_X = 37,
50 SR_CTAID_Y = 38,
51 SR_CTAID_Z = 39,
52 SR_NTID = 40,
53 SR_CirQueueIncrMinusOne = 41,
54 SR_NLATC = 42,
55 SR43 = 43,
56 SR_SM_SPA_VERSION = 44,
57 SR_MULTIPASSSHADERINFO = 45,
58 SR_LWINHI = 46,
59 SR_SWINHI = 47,
60 SR_SWINLO = 48,
61 SR_SWINSZ = 49,
62 SR_SMEMSZ = 50,
63 SR_SMEMBANKS = 51,
64 SR_LWINLO = 52,
65 SR_LWINSZ = 53,
66 SR_LMEMLOSZ = 54,
67 SR_LMEMHIOFF = 55,
68 SR_EQMASK = 56,
69 SR_LTMASK = 57,
70 SR_LEMASK = 58,
71 SR_GTMASK = 59,
72 SR_GEMASK = 60,
73 SR_REGALLOC = 61,
74 SR_BARRIERALLOC = 62,
75 SR63 = 63,
76 SR_GLOBALERRORSTATUS = 64,
77 SR65 = 65,
78 SR_WARPERRORSTATUS = 66,
79 SR_WARPERRORSTATUSCLEAR = 67,
80 SR68 = 68,
81 SR69 = 69,
82 SR70 = 70,
83 SR71 = 71,
84 SR_PM_HI0 = 72,
85 SR_PM_HI1 = 73,
86 SR_PM_HI2 = 74,
87 SR_PM_HI3 = 75,
88 SR_PM_HI4 = 76,
89 SR_PM_HI5 = 77,
90 SR_PM_HI6 = 78,
91 SR_PM_HI7 = 79,
92 SR_CLOCKLO = 80,
93 SR_CLOCKHI = 81,
94 SR_GLOBALTIMERLO = 82,
95 SR_GLOBALTIMERHI = 83,
96 SR84 = 84,
97 SR85 = 85,
98 SR86 = 86,
99 SR87 = 87,
100 SR88 = 88,
101 SR89 = 89,
102 SR90 = 90,
103 SR91 = 91,
104 SR92 = 92,
105 SR93 = 93,
106 SR94 = 94,
107 SR95 = 95,
108 SR_HWTASKID = 96,
109 SR_CIRCULARQUEUEENTRYINDEX = 97,
110 SR_CIRCULARQUEUEENTRYADDRESSLOW = 98,
111 SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99,
112};
113
114[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
115 switch (special_register) {
116 case SpecialRegister::SR_INVOCATION_ID:
117 return ir.InvocationId();
118 case SpecialRegister::SR_THREAD_KILL:
119 return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
120 case SpecialRegister::SR_INVOCATION_INFO:
121 LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO");
122 return ir.Imm32(0x00ff'0000);
123 case SpecialRegister::SR_TID: {
124 const IR::Value tid{ir.LocalInvocationId()};
125 return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
126 IR::U32{ir.CompositeExtract(tid, 1)},
127 ir.Imm32(16), ir.Imm32(8)),
128 IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
129 }
130 case SpecialRegister::SR_TID_X:
131 return ir.LocalInvocationIdX();
132 case SpecialRegister::SR_TID_Y:
133 return ir.LocalInvocationIdY();
134 case SpecialRegister::SR_TID_Z:
135 return ir.LocalInvocationIdZ();
136 case SpecialRegister::SR_CTAID_X:
137 return ir.WorkgroupIdX();
138 case SpecialRegister::SR_CTAID_Y:
139 return ir.WorkgroupIdY();
140 case SpecialRegister::SR_CTAID_Z:
141 return ir.WorkgroupIdZ();
142 case SpecialRegister::SR_WSCALEFACTOR_XY:
143 LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY");
144 return ir.Imm32(Common::BitCast<u32>(1.0f));
145 case SpecialRegister::SR_WSCALEFACTOR_Z:
146 LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z");
147 return ir.Imm32(Common::BitCast<u32>(1.0f));
148 case SpecialRegister::SR_LANEID:
149 return ir.LaneId();
150 case SpecialRegister::SR_EQMASK:
151 return ir.SubgroupEqMask();
152 case SpecialRegister::SR_LTMASK:
153 return ir.SubgroupLtMask();
154 case SpecialRegister::SR_LEMASK:
155 return ir.SubgroupLeMask();
156 case SpecialRegister::SR_GTMASK:
157 return ir.SubgroupGtMask();
158 case SpecialRegister::SR_GEMASK:
159 return ir.SubgroupGeMask();
160 case SpecialRegister::SR_Y_DIRECTION:
161 return ir.BitCast<IR::U32>(ir.YDirection());
162 case SpecialRegister::SR_AFFINITY:
163 LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
164 return ir.Imm32(0); // This is the default value hardware returns.
165 default:
166 throw NotImplementedException("S2R special register {}", special_register);
167 }
168}
169} // Anonymous namespace
170
171void TranslatorVisitor::S2R(u64 insn) {
172 union {
173 u64 raw;
174 BitField<0, 8, IR::Reg> dest_reg;
175 BitField<20, 8, SpecialRegister> src_reg;
176 } const s2r{insn};
177
178 X(s2r.dest_reg, Read(ir, s2r.src_reg));
179}
180
181} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
new file mode 100644
index 000000000..7e26ab359
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -0,0 +1,283 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/opcodes.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
13 throw NotImplementedException("Instruction {} is not implemented", opcode);
14}
15
16void TranslatorVisitor::ATOM_cas(u64) {
17 ThrowNotImplemented(Opcode::ATOM_cas);
18}
19
20void TranslatorVisitor::ATOMS_cas(u64) {
21 ThrowNotImplemented(Opcode::ATOMS_cas);
22}
23
24void TranslatorVisitor::B2R(u64) {
25 ThrowNotImplemented(Opcode::B2R);
26}
27
28void TranslatorVisitor::BPT(u64) {
29 ThrowNotImplemented(Opcode::BPT);
30}
31
32void TranslatorVisitor::BRA(u64) {
33 ThrowNotImplemented(Opcode::BRA);
34}
35
36void TranslatorVisitor::BRK(u64) {
37 ThrowNotImplemented(Opcode::BRK);
38}
39
40void TranslatorVisitor::CAL() {
41 // CAL is a no-op
42}
43
44void TranslatorVisitor::CCTL(u64) {
45 ThrowNotImplemented(Opcode::CCTL);
46}
47
48void TranslatorVisitor::CCTLL(u64) {
49 ThrowNotImplemented(Opcode::CCTLL);
50}
51
52void TranslatorVisitor::CONT(u64) {
53 ThrowNotImplemented(Opcode::CONT);
54}
55
56void TranslatorVisitor::CS2R(u64) {
57 ThrowNotImplemented(Opcode::CS2R);
58}
59
60void TranslatorVisitor::FCHK_reg(u64) {
61 ThrowNotImplemented(Opcode::FCHK_reg);
62}
63
64void TranslatorVisitor::FCHK_cbuf(u64) {
65 ThrowNotImplemented(Opcode::FCHK_cbuf);
66}
67
68void TranslatorVisitor::FCHK_imm(u64) {
69 ThrowNotImplemented(Opcode::FCHK_imm);
70}
71
72void TranslatorVisitor::GETCRSPTR(u64) {
73 ThrowNotImplemented(Opcode::GETCRSPTR);
74}
75
76void TranslatorVisitor::GETLMEMBASE(u64) {
77 ThrowNotImplemented(Opcode::GETLMEMBASE);
78}
79
80void TranslatorVisitor::IDE(u64) {
81 ThrowNotImplemented(Opcode::IDE);
82}
83
84void TranslatorVisitor::IDP_reg(u64) {
85 ThrowNotImplemented(Opcode::IDP_reg);
86}
87
88void TranslatorVisitor::IDP_imm(u64) {
89 ThrowNotImplemented(Opcode::IDP_imm);
90}
91
92void TranslatorVisitor::IMAD_reg(u64) {
93 ThrowNotImplemented(Opcode::IMAD_reg);
94}
95
96void TranslatorVisitor::IMAD_rc(u64) {
97 ThrowNotImplemented(Opcode::IMAD_rc);
98}
99
100void TranslatorVisitor::IMAD_cr(u64) {
101 ThrowNotImplemented(Opcode::IMAD_cr);
102}
103
104void TranslatorVisitor::IMAD_imm(u64) {
105 ThrowNotImplemented(Opcode::IMAD_imm);
106}
107
108void TranslatorVisitor::IMAD32I(u64) {
109 ThrowNotImplemented(Opcode::IMAD32I);
110}
111
112void TranslatorVisitor::IMADSP_reg(u64) {
113 ThrowNotImplemented(Opcode::IMADSP_reg);
114}
115
116void TranslatorVisitor::IMADSP_rc(u64) {
117 ThrowNotImplemented(Opcode::IMADSP_rc);
118}
119
120void TranslatorVisitor::IMADSP_cr(u64) {
121 ThrowNotImplemented(Opcode::IMADSP_cr);
122}
123
124void TranslatorVisitor::IMADSP_imm(u64) {
125 ThrowNotImplemented(Opcode::IMADSP_imm);
126}
127
128void TranslatorVisitor::IMUL_reg(u64) {
129 ThrowNotImplemented(Opcode::IMUL_reg);
130}
131
132void TranslatorVisitor::IMUL_cbuf(u64) {
133 ThrowNotImplemented(Opcode::IMUL_cbuf);
134}
135
136void TranslatorVisitor::IMUL_imm(u64) {
137 ThrowNotImplemented(Opcode::IMUL_imm);
138}
139
140void TranslatorVisitor::IMUL32I(u64) {
141 ThrowNotImplemented(Opcode::IMUL32I);
142}
143
144void TranslatorVisitor::JCAL(u64) {
145 ThrowNotImplemented(Opcode::JCAL);
146}
147
148void TranslatorVisitor::JMP(u64) {
149 ThrowNotImplemented(Opcode::JMP);
150}
151
152void TranslatorVisitor::KIL() {
153 // KIL is a no-op
154}
155
156void TranslatorVisitor::LD(u64) {
157 ThrowNotImplemented(Opcode::LD);
158}
159
160void TranslatorVisitor::LEPC(u64) {
161 ThrowNotImplemented(Opcode::LEPC);
162}
163
164void TranslatorVisitor::LONGJMP(u64) {
165 ThrowNotImplemented(Opcode::LONGJMP);
166}
167
168void TranslatorVisitor::NOP(u64) {
169 // NOP is No-Op.
170}
171
172void TranslatorVisitor::PBK() {
173 // PBK is a no-op
174}
175
176void TranslatorVisitor::PCNT() {
177 // PCNT is a no-op
178}
179
180void TranslatorVisitor::PEXIT(u64) {
181 ThrowNotImplemented(Opcode::PEXIT);
182}
183
184void TranslatorVisitor::PLONGJMP(u64) {
185 ThrowNotImplemented(Opcode::PLONGJMP);
186}
187
188void TranslatorVisitor::PRET(u64) {
189 ThrowNotImplemented(Opcode::PRET);
190}
191
192void TranslatorVisitor::PRMT_reg(u64) {
193 ThrowNotImplemented(Opcode::PRMT_reg);
194}
195
196void TranslatorVisitor::PRMT_rc(u64) {
197 ThrowNotImplemented(Opcode::PRMT_rc);
198}
199
200void TranslatorVisitor::PRMT_cr(u64) {
201 ThrowNotImplemented(Opcode::PRMT_cr);
202}
203
204void TranslatorVisitor::PRMT_imm(u64) {
205 ThrowNotImplemented(Opcode::PRMT_imm);
206}
207
208void TranslatorVisitor::R2B(u64) {
209 ThrowNotImplemented(Opcode::R2B);
210}
211
212void TranslatorVisitor::RAM(u64) {
213 ThrowNotImplemented(Opcode::RAM);
214}
215
216void TranslatorVisitor::RET(u64) {
217 ThrowNotImplemented(Opcode::RET);
218}
219
220void TranslatorVisitor::RTT(u64) {
221 ThrowNotImplemented(Opcode::RTT);
222}
223
224void TranslatorVisitor::SAM(u64) {
225 ThrowNotImplemented(Opcode::SAM);
226}
227
228void TranslatorVisitor::SETCRSPTR(u64) {
229 ThrowNotImplemented(Opcode::SETCRSPTR);
230}
231
232void TranslatorVisitor::SETLMEMBASE(u64) {
233 ThrowNotImplemented(Opcode::SETLMEMBASE);
234}
235
236void TranslatorVisitor::SSY() {
237 // SSY is a no-op
238}
239
240void TranslatorVisitor::ST(u64) {
241 ThrowNotImplemented(Opcode::ST);
242}
243
244void TranslatorVisitor::STP(u64) {
245 ThrowNotImplemented(Opcode::STP);
246}
247
248void TranslatorVisitor::SUATOM_cas(u64) {
249 ThrowNotImplemented(Opcode::SUATOM_cas);
250}
251
252void TranslatorVisitor::SYNC(u64) {
253 ThrowNotImplemented(Opcode::SYNC);
254}
255
256void TranslatorVisitor::TXA(u64) {
257 ThrowNotImplemented(Opcode::TXA);
258}
259
260void TranslatorVisitor::VABSDIFF(u64) {
261 ThrowNotImplemented(Opcode::VABSDIFF);
262}
263
264void TranslatorVisitor::VABSDIFF4(u64) {
265 ThrowNotImplemented(Opcode::VABSDIFF4);
266}
267
268void TranslatorVisitor::VADD(u64) {
269 ThrowNotImplemented(Opcode::VADD);
270}
271
272void TranslatorVisitor::VSET(u64) {
273 ThrowNotImplemented(Opcode::VSET);
274}
275void TranslatorVisitor::VSHL(u64) {
276 ThrowNotImplemented(Opcode::VSHL);
277}
278
279void TranslatorVisitor::VSHR(u64) {
280 ThrowNotImplemented(Opcode::VSHR);
281}
282
283} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
new file mode 100644
index 000000000..01cfad88d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
@@ -0,0 +1,45 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> output_reg; // Not needed on host
16 BitField<39, 1, u64> emit;
17 BitField<40, 1, u64> cut;
18 } const out{insn};
19
20 stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
21
22 if (out.emit != 0) {
23 v.ir.EmitVertex(stream_index);
24 }
25 if (out.cut != 0) {
26 v.ir.EndPrimitive(stream_index);
27 }
28 // Host doesn't need the output register, but we can write to it to avoid undefined reads
29 v.X(out.dest_reg, v.ir.Imm32(0));
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::OUT_reg(u64 insn) {
34 OUT(*this, insn, GetReg20(insn));
35}
36
37void TranslatorVisitor::OUT_cbuf(u64 insn) {
38 OUT(*this, insn, GetCbuf(insn));
39}
40
41void TranslatorVisitor::OUT_imm(u64 insn) {
42 OUT(*this, insn, GetImm20(insn));
43}
44
45} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
new file mode 100644
index 000000000..b4767afb5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
@@ -0,0 +1,46 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 Default,
13 CovMask,
14 Covered,
15 Offset,
16 CentroidOffset,
17 MyIndex,
18};
19} // Anonymous namespace
20
21void TranslatorVisitor::PIXLD(u64 insn) {
22 union {
23 u64 raw;
24 BitField<31, 3, Mode> mode;
25 BitField<0, 8, IR::Reg> dest_reg;
26 BitField<8, 8, IR::Reg> addr_reg;
27 BitField<20, 8, s64> addr_offset;
28 BitField<45, 3, IR::Pred> dest_pred;
29 } const pixld{insn};
30
31 if (pixld.dest_pred != IR::Pred::PT) {
32 throw NotImplementedException("Destination predicate");
33 }
34 if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) {
35 throw NotImplementedException("Non-zero source register");
36 }
37 switch (pixld.mode) {
38 case Mode::MyIndex:
39 X(pixld.dest_reg, ir.SampleId());
40 break;
41 default:
42 throw NotImplementedException("Mode {}", pixld.mode.Value());
43 }
44}
45
46} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
new file mode 100644
index 000000000..75d1fa8c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::PSETP(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 3, IR::Pred> dest_pred_b;
15 BitField<3, 3, IR::Pred> dest_pred_a;
16 BitField<12, 3, IR::Pred> pred_a;
17 BitField<15, 1, u64> neg_pred_a;
18 BitField<24, 2, BooleanOp> bop_1;
19 BitField<29, 3, IR::Pred> pred_b;
20 BitField<32, 1, u64> neg_pred_b;
21 BitField<39, 3, IR::Pred> pred_c;
22 BitField<42, 1, u64> neg_pred_c;
23 BitField<45, 2, BooleanOp> bop_2;
24 } const pset{insn};
25
26 const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
27 const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
28 const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
29
30 const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
31 const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)};
32 const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)};
33 const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)};
34
35 ir.SetPred(pset.dest_pred_a, result_a);
36 ir.SetPred(pset.dest_pred_b, result_b);
37}
38} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
new file mode 100644
index 000000000..b02789874
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
@@ -0,0 +1,53 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::PSET(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<12, 3, IR::Pred> pred_a;
16 BitField<15, 1, u64> neg_pred_a;
17 BitField<24, 2, BooleanOp> bop_1;
18 BitField<29, 3, IR::Pred> pred_b;
19 BitField<32, 1, u64> neg_pred_b;
20 BitField<39, 3, IR::Pred> pred_c;
21 BitField<42, 1, u64> neg_pred_c;
22 BitField<44, 1, u64> bf;
23 BitField<45, 2, BooleanOp> bop_2;
24 BitField<47, 1, u64> cc;
25 } const pset{insn};
26
27 const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
28 const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
29 const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
30
31 const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
32 const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)};
33
34 const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)};
35 const IR::U32 zero{ir.Imm32(0)};
36
37 const IR::U32 result{ir.Select(res_2, true_result, zero)};
38
39 X(pset.dest_reg, result);
40 if (pset.cc != 0) {
41 const IR::U1 is_zero{ir.IEqual(result, zero)};
42 SetZFlag(is_zero);
43 if (pset.bf != 0) {
44 ResetSFlag();
45 } else {
46 SetSFlag(ir.LogicalNot(is_zero));
47 }
48 ResetOFlag();
49 ResetCFlag();
50 }
51}
52
53} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
new file mode 100644
index 000000000..93baa75a9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11
12void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 } const sel{insn};
20
21 const IR::U1 pred = v.ir.GetPred(sel.pred);
22 IR::U32 op_a{v.X(sel.src_reg)};
23 IR::U32 op_b{src};
24 if (sel.neg_pred != 0) {
25 std::swap(op_a, op_b);
26 }
27 const IR::U32 result{v.ir.Select(pred, op_a, op_b)};
28
29 v.X(sel.dest_reg, result);
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::SEL_reg(u64 insn) {
34 SEL(*this, insn, GetReg20(insn));
35}
36
37void TranslatorVisitor::SEL_cbuf(u64 insn) {
38 SEL(*this, insn, GetCbuf(insn));
39}
40
41void TranslatorVisitor::SEL_imm(u64 insn) {
42 SEL(*this, insn, GetImm20(insn));
43}
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
new file mode 100644
index 000000000..63b588ad4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
@@ -0,0 +1,205 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "shader_recompiler/frontend/ir/modifiers.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14namespace {
15enum class Type : u64 {
16 _1D,
17 BUFFER_1D,
18 ARRAY_1D,
19 _2D,
20 ARRAY_2D,
21 _3D,
22};
23
24enum class Size : u64 {
25 U32,
26 S32,
27 U64,
28 S64,
29 F32FTZRN,
30 F16x2FTZRN,
31 SD32,
32 SD64,
33};
34
35enum class AtomicOp : u64 {
36 ADD,
37 MIN,
38 MAX,
39 INC,
40 DEC,
41 AND,
42 OR,
43 XOR,
44 EXCH,
45};
46
47enum class Clamp : u64 {
48 IGN,
49 Default,
50 TRAP,
51};
52
53TextureType GetType(Type type) {
54 switch (type) {
55 case Type::_1D:
56 return TextureType::Color1D;
57 case Type::BUFFER_1D:
58 return TextureType::Buffer;
59 case Type::ARRAY_1D:
60 return TextureType::ColorArray1D;
61 case Type::_2D:
62 return TextureType::Color2D;
63 case Type::ARRAY_2D:
64 return TextureType::ColorArray2D;
65 case Type::_3D:
66 return TextureType::Color3D;
67 }
68 throw NotImplementedException("Invalid type {}", type);
69}
70
71IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
72 switch (type) {
73 case Type::_1D:
74 case Type::BUFFER_1D:
75 return v.X(reg);
76 case Type::_2D:
77 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
78 case Type::_3D:
79 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
80 default:
81 break;
82 }
83 throw NotImplementedException("Invalid type {}", type);
84}
85
86IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
87 const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
88 bool is_signed) {
89 switch (op) {
90 case AtomicOp::ADD:
91 return ir.ImageAtomicIAdd(handle, coords, op_b, info);
92 case AtomicOp::MIN:
93 return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
94 case AtomicOp::MAX:
95 return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
96 case AtomicOp::INC:
97 return ir.ImageAtomicInc(handle, coords, op_b, info);
98 case AtomicOp::DEC:
99 return ir.ImageAtomicDec(handle, coords, op_b, info);
100 case AtomicOp::AND:
101 return ir.ImageAtomicAnd(handle, coords, op_b, info);
102 case AtomicOp::OR:
103 return ir.ImageAtomicOr(handle, coords, op_b, info);
104 case AtomicOp::XOR:
105 return ir.ImageAtomicXor(handle, coords, op_b, info);
106 case AtomicOp::EXCH:
107 return ir.ImageAtomicExchange(handle, coords, op_b, info);
108 default:
109 throw NotImplementedException("Atomic Operation {}", op);
110 }
111}
112
113ImageFormat Format(Size size) {
114 switch (size) {
115 case Size::U32:
116 case Size::S32:
117 case Size::SD32:
118 return ImageFormat::R32_UINT;
119 default:
120 break;
121 }
122 throw NotImplementedException("Invalid size {}", size);
123}
124
125bool IsSizeInt32(Size size) {
126 switch (size) {
127 case Size::U32:
128 case Size::S32:
129 case Size::SD32:
130 return true;
131 default:
132 return false;
133 }
134}
135
136void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
137 IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
138 u64 bound_offset, bool is_bindless, bool write_result) {
139 if (clamp != Clamp::IGN) {
140 throw NotImplementedException("Clamp {}", clamp);
141 }
142 if (!IsSizeInt32(size)) {
143 throw NotImplementedException("Size {}", size);
144 }
145 const bool is_signed{size == Size::S32};
146 const ImageFormat format{Format(size)};
147 const TextureType tex_type{GetType(type)};
148 const IR::Value coords{MakeCoords(v, coord_reg, type)};
149
150 const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
151 : v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
152 IR::TextureInstInfo info{};
153 info.type.Assign(tex_type);
154 info.image_format.Assign(format);
155
156 // TODO: float/64-bit operand
157 const IR::Value op_b{v.X(operand_reg)};
158 const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
159
160 if (write_result) {
161 v.X(dest_reg, IR::U32{color});
162 }
163}
164} // Anonymous namespace
165
166void TranslatorVisitor::SUATOM(u64 insn) {
167 union {
168 u64 raw;
169 BitField<54, 1, u64> is_bindless;
170 BitField<29, 4, AtomicOp> op;
171 BitField<33, 3, Type> type;
172 BitField<51, 3, Size> size;
173 BitField<49, 2, Clamp> clamp;
174 BitField<0, 8, IR::Reg> dest_reg;
175 BitField<8, 8, IR::Reg> coord_reg;
176 BitField<20, 8, IR::Reg> operand_reg;
177 BitField<36, 13, u64> bound_offset; // !is_bindless
178 BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
179 } const suatom{insn};
180
181 ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
182 suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
183 suatom.is_bindless != 0, true);
184}
185
186void TranslatorVisitor::SURED(u64 insn) {
187 // TODO: confirm offsets
188 union {
189 u64 raw;
190 BitField<51, 1, u64> is_bound;
191 BitField<21, 3, AtomicOp> op;
192 BitField<33, 3, Type> type;
193 BitField<20, 3, Size> size;
194 BitField<49, 2, Clamp> clamp;
195 BitField<0, 8, IR::Reg> operand_reg;
196 BitField<8, 8, IR::Reg> coord_reg;
197 BitField<36, 13, u64> bound_offset; // is_bound
198 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
199 } const sured{insn};
200 ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
201 sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
202 sured.is_bound == 0, false);
203}
204
205} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
new file mode 100644
index 000000000..681220a8d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
@@ -0,0 +1,281 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "shader_recompiler/frontend/ir/modifiers.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14namespace {
15enum class Type : u64 {
16 _1D,
17 BUFFER_1D,
18 ARRAY_1D,
19 _2D,
20 ARRAY_2D,
21 _3D,
22};
23
24constexpr unsigned R = 1 << 0;
25constexpr unsigned G = 1 << 1;
26constexpr unsigned B = 1 << 2;
27constexpr unsigned A = 1 << 3;
28
29constexpr std::array MASK{
30 0U, //
31 R, //
32 G, //
33 R | G, //
34 B, //
35 R | B, //
36 G | B, //
37 R | G | B, //
38 A, //
39 R | A, //
40 G | A, //
41 R | G | A, //
42 B | A, //
43 R | B | A, //
44 G | B | A, //
45 R | G | B | A, //
46};
47
48enum class Size : u64 {
49 U8,
50 S8,
51 U16,
52 S16,
53 B32,
54 B64,
55 B128,
56};
57
58enum class Clamp : u64 {
59 IGN,
60 Default,
61 TRAP,
62};
63
64// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
65enum class LoadCache : u64 {
66 CA, // Cache at all levels, likely to be accessed again
67 CG, // Cache at global level (L2 and below, not L1)
68 CI, // ???
69 CV, // Don't cache and fetch again (volatile)
70};
71
72enum class StoreCache : u64 {
73 WB, // Cache write-back all coherent levels
74 CG, // Cache at global level (L2 and below, not L1)
75 CS, // Cache streaming, likely to be accessed once
76 WT, // Cache write-through (to system memory, volatile?)
77};
78
79ImageFormat Format(Size size) {
80 switch (size) {
81 case Size::U8:
82 return ImageFormat::R8_UINT;
83 case Size::S8:
84 return ImageFormat::R8_SINT;
85 case Size::U16:
86 return ImageFormat::R16_UINT;
87 case Size::S16:
88 return ImageFormat::R16_SINT;
89 case Size::B32:
90 return ImageFormat::R32_UINT;
91 case Size::B64:
92 return ImageFormat::R32G32_UINT;
93 case Size::B128:
94 return ImageFormat::R32G32B32A32_UINT;
95 }
96 throw NotImplementedException("Invalid size {}", size);
97}
98
99int SizeInRegs(Size size) {
100 switch (size) {
101 case Size::U8:
102 case Size::S8:
103 case Size::U16:
104 case Size::S16:
105 case Size::B32:
106 return 1;
107 case Size::B64:
108 return 2;
109 case Size::B128:
110 return 4;
111 }
112 throw NotImplementedException("Invalid size {}", size);
113}
114
115TextureType GetType(Type type) {
116 switch (type) {
117 case Type::_1D:
118 return TextureType::Color1D;
119 case Type::BUFFER_1D:
120 return TextureType::Buffer;
121 case Type::ARRAY_1D:
122 return TextureType::ColorArray1D;
123 case Type::_2D:
124 return TextureType::Color2D;
125 case Type::ARRAY_2D:
126 return TextureType::ColorArray2D;
127 case Type::_3D:
128 return TextureType::Color3D;
129 }
130 throw NotImplementedException("Invalid type {}", type);
131}
132
133IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
134 const auto array{[&](int index) {
135 return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
136 }};
137 switch (type) {
138 case Type::_1D:
139 case Type::BUFFER_1D:
140 return v.X(reg);
141 case Type::ARRAY_1D:
142 return v.ir.CompositeConstruct(v.X(reg), array(1));
143 case Type::_2D:
144 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
145 case Type::ARRAY_2D:
146 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
147 case Type::_3D:
148 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
149 }
150 throw NotImplementedException("Invalid type {}", type);
151}
152
153unsigned SwizzleMask(u64 swizzle) {
154 if (swizzle == 0 || swizzle >= MASK.size()) {
155 throw NotImplementedException("Invalid swizzle {}", swizzle);
156 }
157 return MASK[swizzle];
158}
159
160IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
161 std::array<IR::U32, 4> colors;
162 for (int i = 0; i < num_regs; ++i) {
163 colors[static_cast<size_t>(i)] = ir.GetReg(reg + i);
164 }
165 for (int i = num_regs; i < 4; ++i) {
166 colors[static_cast<size_t>(i)] = ir.Imm32(0);
167 }
168 return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
169}
170} // Anonymous namespace
171
172void TranslatorVisitor::SULD(u64 insn) {
173 union {
174 u64 raw;
175 BitField<51, 1, u64> is_bound;
176 BitField<52, 1, u64> d;
177 BitField<23, 1, u64> ba;
178 BitField<33, 3, Type> type;
179 BitField<24, 2, LoadCache> cache;
180 BitField<20, 3, Size> size; // .D
181 BitField<20, 4, u64> swizzle; // .P
182 BitField<49, 2, Clamp> clamp;
183 BitField<0, 8, IR::Reg> dest_reg;
184 BitField<8, 8, IR::Reg> coord_reg;
185 BitField<36, 13, u64> bound_offset; // is_bound
186 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
187 } const suld{insn};
188
189 if (suld.clamp != Clamp::IGN) {
190 throw NotImplementedException("Clamp {}", suld.clamp.Value());
191 }
192 if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
193 throw NotImplementedException("Cache {}", suld.cache.Value());
194 }
195 const bool is_typed{suld.d != 0};
196 if (is_typed && suld.ba != 0) {
197 throw NotImplementedException("BA");
198 }
199
200 const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
201 const TextureType type{GetType(suld.type)};
202 const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
203 const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
204 : X(suld.bindless_reg)};
205 IR::TextureInstInfo info{};
206 info.type.Assign(type);
207 info.image_format.Assign(format);
208
209 const IR::Value result{ir.ImageRead(handle, coords, info)};
210 IR::Reg dest_reg{suld.dest_reg};
211 if (is_typed) {
212 const int num_regs{SizeInRegs(suld.size)};
213 for (int i = 0; i < num_regs; ++i) {
214 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
215 }
216 } else {
217 const unsigned mask{SwizzleMask(suld.swizzle)};
218 const int bits{std::popcount(mask)};
219 if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) {
220 throw NotImplementedException("Unaligned destination register");
221 }
222 for (unsigned component = 0; component < 4; ++component) {
223 if (((mask >> component) & 1) == 0) {
224 continue;
225 }
226 X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
227 ++dest_reg;
228 }
229 }
230}
231
232void TranslatorVisitor::SUST(u64 insn) {
233 union {
234 u64 raw;
235 BitField<51, 1, u64> is_bound;
236 BitField<52, 1, u64> d;
237 BitField<23, 1, u64> ba;
238 BitField<33, 3, Type> type;
239 BitField<24, 2, StoreCache> cache;
240 BitField<20, 3, Size> size; // .D
241 BitField<20, 4, u64> swizzle; // .P
242 BitField<49, 2, Clamp> clamp;
243 BitField<0, 8, IR::Reg> data_reg;
244 BitField<8, 8, IR::Reg> coord_reg;
245 BitField<36, 13, u64> bound_offset; // is_bound
246 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
247 } const sust{insn};
248
249 if (sust.clamp != Clamp::IGN) {
250 throw NotImplementedException("Clamp {}", sust.clamp.Value());
251 }
252 if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
253 throw NotImplementedException("Cache {}", sust.cache.Value());
254 }
255 const bool is_typed{sust.d != 0};
256 if (is_typed && sust.ba != 0) {
257 throw NotImplementedException("BA");
258 }
259 const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
260 const TextureType type{GetType(sust.type)};
261 const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
262 const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
263 : X(sust.bindless_reg)};
264 IR::TextureInstInfo info{};
265 info.type.Assign(type);
266 info.image_format.Assign(format);
267
268 IR::Value color;
269 if (is_typed) {
270 color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
271 } else {
272 const unsigned mask{SwizzleMask(sust.swizzle)};
273 if (mask != 0xf) {
274 throw NotImplementedException("Non-full mask");
275 }
276 color = MakeColor(ir, sust.data_reg, 4);
277 }
278 ir.ImageWrite(handle, coords, color, info);
279}
280
281} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
new file mode 100644
index 000000000..0046b5edd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -0,0 +1,236 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Blod : u64 {
15 None,
16 LZ,
17 LB,
18 LL,
19 INVALIDBLOD4,
20 INVALIDBLOD5,
21 LBA,
22 LLA,
23};
24
25enum class TextureType : u64 {
26 _1D,
27 ARRAY_1D,
28 _2D,
29 ARRAY_2D,
30 _3D,
31 ARRAY_3D,
32 CUBE,
33 ARRAY_CUBE,
34};
35
36Shader::TextureType GetType(TextureType type) {
37 switch (type) {
38 case TextureType::_1D:
39 return Shader::TextureType::Color1D;
40 case TextureType::ARRAY_1D:
41 return Shader::TextureType::ColorArray1D;
42 case TextureType::_2D:
43 return Shader::TextureType::Color2D;
44 case TextureType::ARRAY_2D:
45 return Shader::TextureType::ColorArray2D;
46 case TextureType::_3D:
47 return Shader::TextureType::Color3D;
48 case TextureType::ARRAY_3D:
49 throw NotImplementedException("3D array texture type");
50 case TextureType::CUBE:
51 return Shader::TextureType::ColorCube;
52 case TextureType::ARRAY_CUBE:
53 return Shader::TextureType::ColorArrayCube;
54 }
55 throw NotImplementedException("Invalid texture type {}", type);
56}
57
58IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
59 const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
60 switch (type) {
61 case TextureType::_1D:
62 return v.F(reg);
63 case TextureType::ARRAY_1D:
64 return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
65 case TextureType::_2D:
66 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
67 case TextureType::ARRAY_2D:
68 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
69 case TextureType::_3D:
70 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
71 case TextureType::ARRAY_3D:
72 throw NotImplementedException("3D array texture type");
73 case TextureType::CUBE:
74 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
75 case TextureType::ARRAY_CUBE:
76 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
77 }
78 throw NotImplementedException("Invalid texture type {}", type);
79}
80
81IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
82 switch (blod) {
83 case Blod::None:
84 return v.ir.Imm32(0.0f);
85 case Blod::LZ:
86 return v.ir.Imm32(0.0f);
87 case Blod::LB:
88 case Blod::LL:
89 case Blod::LBA:
90 case Blod::LLA:
91 return v.F(reg++);
92 case Blod::INVALIDBLOD4:
93 case Blod::INVALIDBLOD5:
94 break;
95 }
96 throw NotImplementedException("Invalid blod {}", blod);
97}
98
99IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
100 const IR::U32 value{v.X(reg++)};
101 switch (type) {
102 case TextureType::_1D:
103 case TextureType::ARRAY_1D:
104 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
105 case TextureType::_2D:
106 case TextureType::ARRAY_2D:
107 return v.ir.CompositeConstruct(
108 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
109 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
110 case TextureType::_3D:
111 case TextureType::ARRAY_3D:
112 return v.ir.CompositeConstruct(
113 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
114 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
115 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
116 case TextureType::CUBE:
117 case TextureType::ARRAY_CUBE:
118 throw NotImplementedException("Illegal offset on CUBE sample");
119 }
120 throw NotImplementedException("Invalid texture type {}", type);
121}
122
123bool HasExplicitLod(Blod blod) {
124 switch (blod) {
125 case Blod::LL:
126 case Blod::LLA:
127 case Blod::LZ:
128 return true;
129 default:
130 return false;
131 }
132}
133
134void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
135 std::optional<u32> cbuf_offset) {
136 union {
137 u64 raw;
138 BitField<35, 1, u64> ndv;
139 BitField<49, 1, u64> nodep;
140 BitField<50, 1, u64> dc;
141 BitField<51, 3, IR::Pred> sparse_pred;
142 BitField<0, 8, IR::Reg> dest_reg;
143 BitField<8, 8, IR::Reg> coord_reg;
144 BitField<20, 8, IR::Reg> meta_reg;
145 BitField<28, 3, TextureType> type;
146 BitField<31, 4, u64> mask;
147 } const tex{insn};
148
149 if (lc) {
150 throw NotImplementedException("LC");
151 }
152 const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
153
154 IR::Reg meta_reg{tex.meta_reg};
155 IR::Value handle;
156 IR::Value offset;
157 IR::F32 dref;
158 IR::F32 lod_clamp;
159 if (cbuf_offset) {
160 handle = v.ir.Imm32(*cbuf_offset);
161 } else {
162 handle = v.X(meta_reg++);
163 }
164 const IR::F32 lod{MakeLod(v, meta_reg, blod)};
165 if (aoffi) {
166 offset = MakeOffset(v, meta_reg, tex.type);
167 }
168 if (tex.dc != 0) {
169 dref = v.F(meta_reg++);
170 }
171 IR::TextureInstInfo info{};
172 info.type.Assign(GetType(tex.type));
173 info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
174 info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
175 info.has_lod_clamp.Assign(lc ? 1 : 0);
176
177 const IR::Value sample{[&]() -> IR::Value {
178 if (tex.dc == 0) {
179 if (HasExplicitLod(blod)) {
180 return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info);
181 } else {
182 return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
183 }
184 }
185 if (HasExplicitLod(blod)) {
186 return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info);
187 } else {
188 return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
189 info);
190 }
191 }()};
192
193 IR::Reg dest_reg{tex.dest_reg};
194 for (int element = 0; element < 4; ++element) {
195 if (((tex.mask >> element) & 1) == 0) {
196 continue;
197 }
198 IR::F32 value;
199 if (tex.dc != 0) {
200 value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
201 } else {
202 value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
203 }
204 v.F(dest_reg, value);
205 ++dest_reg;
206 }
207 if (tex.sparse_pred != IR::Pred::PT) {
208 v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
209 }
210}
211} // Anonymous namespace
212
213void TranslatorVisitor::TEX(u64 insn) {
214 union {
215 u64 raw;
216 BitField<54, 1, u64> aoffi;
217 BitField<55, 3, Blod> blod;
218 BitField<58, 1, u64> lc;
219 BitField<36, 13, u64> cbuf_offset;
220 } const tex{insn};
221
222 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
223}
224
225void TranslatorVisitor::TEX_b(u64 insn) {
226 union {
227 u64 raw;
228 BitField<36, 1, u64> aoffi;
229 BitField<37, 3, Blod> blod;
230 BitField<40, 1, u64> lc;
231 } const tex{insn};
232
233 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
234}
235
236} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
new file mode 100644
index 000000000..154e7f1a1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -0,0 +1,266 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F16,
16 F32,
17};
18
19union Encoding {
20 u64 raw;
21 BitField<59, 1, Precision> precision;
22 BitField<53, 4, u64> encoding;
23 BitField<49, 1, u64> nodep;
24 BitField<28, 8, IR::Reg> dest_reg_b;
25 BitField<0, 8, IR::Reg> dest_reg_a;
26 BitField<8, 8, IR::Reg> src_reg_a;
27 BitField<20, 8, IR::Reg> src_reg_b;
28 BitField<36, 13, u64> cbuf_offset;
29 BitField<50, 3, u64> swizzle;
30};
31
32constexpr unsigned R = 1;
33constexpr unsigned G = 2;
34constexpr unsigned B = 4;
35constexpr unsigned A = 8;
36
37constexpr std::array RG_LUT{
38 R, //
39 G, //
40 B, //
41 A, //
42 R | G, //
43 R | A, //
44 G | A, //
45 B | A, //
46};
47
48constexpr std::array RGBA_LUT{
49 R | G | B, //
50 R | G | A, //
51 R | B | A, //
52 G | B | A, //
53 R | G | B | A, //
54};
55
56void CheckAlignment(IR::Reg reg, size_t alignment) {
57 if (!IR::IsAligned(reg, alignment)) {
58 throw NotImplementedException("Unaligned source register {}", reg);
59 }
60}
61
62template <typename... Args>
63IR::Value Composite(TranslatorVisitor& v, Args... regs) {
64 return v.ir.CompositeConstruct(v.F(regs)...);
65}
66
67IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
68 return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
69}
70
71IR::Value Sample(TranslatorVisitor& v, u64 insn) {
72 const Encoding texs{insn};
73 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
74 const IR::F32 zero{v.ir.Imm32(0.0f)};
75 const IR::Reg reg_a{texs.src_reg_a};
76 const IR::Reg reg_b{texs.src_reg_b};
77 IR::TextureInstInfo info{};
78 if (texs.precision == Precision::F16) {
79 info.relaxed_precision.Assign(1);
80 }
81 switch (texs.encoding) {
82 case 0: // 1D.LZ
83 info.type.Assign(TextureType::Color1D);
84 return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info);
85 case 1: // 2D
86 info.type.Assign(TextureType::Color2D);
87 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
88 case 2: // 2D.LZ
89 info.type.Assign(TextureType::Color2D);
90 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info);
91 case 3: // 2D.LL
92 CheckAlignment(reg_a, 2);
93 info.type.Assign(TextureType::Color2D);
94 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
95 info);
96 case 4: // 2D.DC
97 CheckAlignment(reg_a, 2);
98 info.type.Assign(TextureType::Color2D);
99 info.is_depth.Assign(1);
100 return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
101 {}, {}, {}, info);
102 case 5: // 2D.LL.DC
103 CheckAlignment(reg_a, 2);
104 CheckAlignment(reg_b, 2);
105 info.type.Assign(TextureType::Color2D);
106 info.is_depth.Assign(1);
107 return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
108 v.F(reg_b + 1), v.F(reg_b), {}, info);
109 case 6: // 2D.LZ.DC
110 CheckAlignment(reg_a, 2);
111 info.type.Assign(TextureType::Color2D);
112 info.is_depth.Assign(1);
113 return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
114 zero, {}, info);
115 case 7: // ARRAY_2D
116 CheckAlignment(reg_a, 2);
117 info.type.Assign(TextureType::ColorArray2D);
118 return v.ir.ImageSampleImplicitLod(
119 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
120 {}, {}, {}, info);
121 case 8: // ARRAY_2D.LZ
122 CheckAlignment(reg_a, 2);
123 info.type.Assign(TextureType::ColorArray2D);
124 return v.ir.ImageSampleExplicitLod(
125 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
126 zero, {}, info);
127 case 9: // ARRAY_2D.LZ.DC
128 CheckAlignment(reg_a, 2);
129 CheckAlignment(reg_b, 2);
130 info.type.Assign(TextureType::ColorArray2D);
131 info.is_depth.Assign(1);
132 return v.ir.ImageSampleDrefExplicitLod(
133 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
134 v.F(reg_b + 1), zero, {}, info);
135 case 10: // 3D
136 CheckAlignment(reg_a, 2);
137 info.type.Assign(TextureType::Color3D);
138 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
139 {}, info);
140 case 11: // 3D.LZ
141 CheckAlignment(reg_a, 2);
142 info.type.Assign(TextureType::Color3D);
143 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
144 info);
145 case 12: // CUBE
146 CheckAlignment(reg_a, 2);
147 info.type.Assign(TextureType::ColorCube);
148 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
149 {}, info);
150 case 13: // CUBE.LL
151 CheckAlignment(reg_a, 2);
152 CheckAlignment(reg_b, 2);
153 info.type.Assign(TextureType::ColorCube);
154 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
155 v.F(reg_b + 1), {}, info);
156 default:
157 throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
158 }
159}
160
161unsigned Swizzle(u64 insn) {
162 const Encoding texs{insn};
163 const size_t encoding{texs.swizzle};
164 if (texs.dest_reg_b == IR::Reg::RZ) {
165 if (encoding >= RG_LUT.size()) {
166 throw NotImplementedException("Illegal RG encoding {}", encoding);
167 }
168 return RG_LUT[encoding];
169 } else {
170 if (encoding >= RGBA_LUT.size()) {
171 throw NotImplementedException("Illegal RGBA encoding {}", encoding);
172 }
173 return RGBA_LUT[encoding];
174 }
175}
176
177IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
178 const bool is_shadow{sample.Type() == IR::Type::F32};
179 if (is_shadow) {
180 const bool is_alpha{component == 3};
181 return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
182 } else {
183 return IR::F32{v.ir.CompositeExtract(sample, component)};
184 }
185}
186
187IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
188 const Encoding texs{insn};
189 switch (index) {
190 case 0:
191 return texs.dest_reg_a;
192 case 1:
193 CheckAlignment(texs.dest_reg_a, 2);
194 return texs.dest_reg_a + 1;
195 case 2:
196 return texs.dest_reg_b;
197 case 3:
198 CheckAlignment(texs.dest_reg_b, 2);
199 return texs.dest_reg_b + 1;
200 }
201 throw LogicError("Invalid store index {}", index);
202}
203
204void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
205 const unsigned swizzle{Swizzle(insn)};
206 unsigned store_index{0};
207 for (unsigned component = 0; component < 4; ++component) {
208 if (((swizzle >> component) & 1) == 0) {
209 continue;
210 }
211 const IR::Reg dest{RegStoreComponent32(insn, store_index)};
212 v.F(dest, Extract(v, sample, component));
213 ++store_index;
214 }
215}
216
217IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
218 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
219}
220
221void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
222 const unsigned swizzle{Swizzle(insn)};
223 unsigned store_index{0};
224 std::array<IR::F32, 4> swizzled;
225 for (unsigned component = 0; component < 4; ++component) {
226 if (((swizzle >> component) & 1) == 0) {
227 continue;
228 }
229 swizzled[store_index] = Extract(v, sample, component);
230 ++store_index;
231 }
232 const IR::F32 zero{v.ir.Imm32(0.0f)};
233 const Encoding texs{insn};
234 switch (store_index) {
235 case 1:
236 v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
237 break;
238 case 2:
239 case 3:
240 case 4:
241 v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
242 switch (store_index) {
243 case 2:
244 break;
245 case 3:
246 v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
247 break;
248 case 4:
249 v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
250 break;
251 }
252 break;
253 }
254}
255} // Anonymous namespace
256
257void TranslatorVisitor::TEXS(u64 insn) {
258 const IR::Value sample{Sample(*this, insn)};
259 if (Encoding{insn}.precision == Precision::F32) {
260 Store32(*this, insn, sample);
261 } else {
262 Store16(*this, insn, sample);
263 }
264}
265
266} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
new file mode 100644
index 000000000..218cbc1a8
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
@@ -0,0 +1,208 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26enum class OffsetType : u64 {
27 None = 0,
28 AOFFI,
29 PTP,
30 Invalid,
31};
32
33enum class ComponentType : u64 {
34 R = 0,
35 G = 1,
36 B = 2,
37 A = 3,
38};
39
40Shader::TextureType GetType(TextureType type) {
41 switch (type) {
42 case TextureType::_1D:
43 return Shader::TextureType::Color1D;
44 case TextureType::ARRAY_1D:
45 return Shader::TextureType::ColorArray1D;
46 case TextureType::_2D:
47 return Shader::TextureType::Color2D;
48 case TextureType::ARRAY_2D:
49 return Shader::TextureType::ColorArray2D;
50 case TextureType::_3D:
51 return Shader::TextureType::Color3D;
52 case TextureType::ARRAY_3D:
53 throw NotImplementedException("3D array texture type");
54 case TextureType::CUBE:
55 return Shader::TextureType::ColorCube;
56 case TextureType::ARRAY_CUBE:
57 return Shader::TextureType::ColorArrayCube;
58 }
59 throw NotImplementedException("Invalid texture type {}", type);
60}
61
62IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
63 const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
64 switch (type) {
65 case TextureType::_1D:
66 return v.F(reg);
67 case TextureType::ARRAY_1D:
68 return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
69 case TextureType::_2D:
70 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
71 case TextureType::ARRAY_2D:
72 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
73 case TextureType::_3D:
74 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
75 case TextureType::ARRAY_3D:
76 throw NotImplementedException("3D array texture type");
77 case TextureType::CUBE:
78 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
79 case TextureType::ARRAY_CUBE:
80 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
81 }
82 throw NotImplementedException("Invalid texture type {}", type);
83}
84
85IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
86 const IR::U32 value{v.X(reg++)};
87 switch (type) {
88 case TextureType::_1D:
89 case TextureType::ARRAY_1D:
90 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true);
91 case TextureType::_2D:
92 case TextureType::ARRAY_2D:
93 return v.ir.CompositeConstruct(
94 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
95 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
96 case TextureType::_3D:
97 case TextureType::ARRAY_3D:
98 return v.ir.CompositeConstruct(
99 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
100 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true),
101 v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true));
102 case TextureType::CUBE:
103 case TextureType::ARRAY_CUBE:
104 throw NotImplementedException("Illegal offset on CUBE sample");
105 }
106 throw NotImplementedException("Invalid texture type {}", type);
107}
108
109std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
110 const IR::U32 value1{v.X(reg++)};
111 const IR::U32 value2{v.X(reg++)};
112 const IR::U32 bitsize{v.ir.Imm32(6)};
113 const auto make_vector{[&v, &bitsize](const IR::U32& value) {
114 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
115 v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
116 v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
117 v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
118 }};
119 return {make_vector(value1), make_vector(value2)};
120}
121
122void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
123 bool is_bindless) {
124 union {
125 u64 raw;
126 BitField<35, 1, u64> ndv;
127 BitField<49, 1, u64> nodep;
128 BitField<50, 1, u64> dc;
129 BitField<51, 3, IR::Pred> sparse_pred;
130 BitField<0, 8, IR::Reg> dest_reg;
131 BitField<8, 8, IR::Reg> coord_reg;
132 BitField<20, 8, IR::Reg> meta_reg;
133 BitField<28, 3, TextureType> type;
134 BitField<31, 4, u64> mask;
135 BitField<36, 13, u64> cbuf_offset;
136 } const tld4{insn};
137
138 const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)};
139
140 IR::Reg meta_reg{tld4.meta_reg};
141 IR::Value handle;
142 IR::Value offset;
143 IR::Value offset2;
144 IR::F32 dref;
145 if (!is_bindless) {
146 handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4));
147 } else {
148 handle = v.X(meta_reg++);
149 }
150 switch (offset_type) {
151 case OffsetType::None:
152 break;
153 case OffsetType::AOFFI:
154 offset = MakeOffset(v, meta_reg, tld4.type);
155 break;
156 case OffsetType::PTP:
157 std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
158 break;
159 default:
160 throw NotImplementedException("Invalid offset type {}", offset_type);
161 }
162 if (tld4.dc != 0) {
163 dref = v.F(meta_reg++);
164 }
165 IR::TextureInstInfo info{};
166 info.type.Assign(GetType(tld4.type));
167 info.is_depth.Assign(tld4.dc != 0 ? 1 : 0);
168 info.gather_component.Assign(static_cast<u32>(component_type));
169 const IR::Value sample{[&] {
170 if (tld4.dc == 0) {
171 return v.ir.ImageGather(handle, coords, offset, offset2, info);
172 }
173 return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info);
174 }()};
175
176 IR::Reg dest_reg{tld4.dest_reg};
177 for (size_t element = 0; element < 4; ++element) {
178 if (((tld4.mask >> element) & 1) == 0) {
179 continue;
180 }
181 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
182 ++dest_reg;
183 }
184 if (tld4.sparse_pred != IR::Pred::PT) {
185 v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
186 }
187}
188} // Anonymous namespace
189
190void TranslatorVisitor::TLD4(u64 insn) {
191 union {
192 u64 raw;
193 BitField<56, 2, ComponentType> component;
194 BitField<54, 2, OffsetType> offset;
195 } const tld4{insn};
196 Impl(*this, insn, tld4.component, tld4.offset, false);
197}
198
199void TranslatorVisitor::TLD4_b(u64 insn) {
200 union {
201 u64 raw;
202 BitField<38, 2, ComponentType> component;
203 BitField<36, 2, OffsetType> offset;
204 } const tld4{insn};
205 Impl(*this, insn, tld4.component, tld4.offset, true);
206}
207
208} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
new file mode 100644
index 000000000..34efa2d50
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
@@ -0,0 +1,134 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F32,
16 F16,
17};
18
19enum class ComponentType : u64 {
20 R = 0,
21 G = 1,
22 B = 2,
23 A = 3,
24};
25
26union Encoding {
27 u64 raw;
28 BitField<55, 1, Precision> precision;
29 BitField<52, 2, ComponentType> component_type;
30 BitField<51, 1, u64> aoffi;
31 BitField<50, 1, u64> dc;
32 BitField<49, 1, u64> nodep;
33 BitField<28, 8, IR::Reg> dest_reg_b;
34 BitField<0, 8, IR::Reg> dest_reg_a;
35 BitField<8, 8, IR::Reg> src_reg_a;
36 BitField<20, 8, IR::Reg> src_reg_b;
37 BitField<36, 13, u64> cbuf_offset;
38};
39
40void CheckAlignment(IR::Reg reg, size_t alignment) {
41 if (!IR::IsAligned(reg, alignment)) {
42 throw NotImplementedException("Unaligned source register {}", reg);
43 }
44}
45
46IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
47 const IR::U32 value{v.X(reg)};
48 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
49 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
50}
51
52IR::Value Sample(TranslatorVisitor& v, u64 insn) {
53 const Encoding tld4s{insn};
54 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))};
55 const IR::Reg reg_a{tld4s.src_reg_a};
56 const IR::Reg reg_b{tld4s.src_reg_b};
57 IR::TextureInstInfo info{};
58 if (tld4s.precision == Precision::F16) {
59 info.relaxed_precision.Assign(1);
60 }
61 info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value()));
62 info.type.Assign(Shader::TextureType::Color2D);
63 info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0);
64 IR::Value coords;
65 if (tld4s.aoffi != 0) {
66 CheckAlignment(reg_a, 2);
67 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
68 IR::Value offset = MakeOffset(v, reg_b);
69 if (tld4s.dc != 0) {
70 CheckAlignment(reg_b, 2);
71 IR::F32 dref = v.F(reg_b + 1);
72 return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info);
73 }
74 return v.ir.ImageGather(handle, coords, offset, {}, info);
75 }
76 if (tld4s.dc != 0) {
77 CheckAlignment(reg_a, 2);
78 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
79 IR::F32 dref = v.F(reg_b);
80 return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info);
81 }
82 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b));
83 return v.ir.ImageGather(handle, coords, {}, {}, info);
84}
85
86IR::Reg RegStoreComponent32(u64 insn, size_t index) {
87 const Encoding tlds4{insn};
88 switch (index) {
89 case 0:
90 return tlds4.dest_reg_a;
91 case 1:
92 CheckAlignment(tlds4.dest_reg_a, 2);
93 return tlds4.dest_reg_a + 1;
94 case 2:
95 return tlds4.dest_reg_b;
96 case 3:
97 CheckAlignment(tlds4.dest_reg_b, 2);
98 return tlds4.dest_reg_b + 1;
99 }
100 throw LogicError("Invalid store index {}", index);
101}
102
103void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
104 for (size_t component = 0; component < 4; ++component) {
105 const IR::Reg dest{RegStoreComponent32(insn, component)};
106 v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)});
107 }
108}
109
110IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
111 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
112}
113
114void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
115 std::array<IR::F32, 4> swizzled;
116 for (size_t component = 0; component < 4; ++component) {
117 swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)};
118 }
119 const Encoding tld4s{insn};
120 v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
121 v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
122}
123} // Anonymous namespace
124
125void TranslatorVisitor::TLD4S(u64 insn) {
126 const IR::Value sample{Sample(*this, insn)};
127 if (Encoding{insn}.precision == Precision::F32) {
128 Store32(*this, insn, sample);
129 } else {
130 Store16(*this, insn, sample);
131 }
132}
133
134} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
new file mode 100644
index 000000000..c3fe3ffda
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
@@ -0,0 +1,182 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) {
49 const IR::U32 value{v.X(reg)};
50 const u32 base{has_lod_clamp ? 12U : 16U};
51 return v.ir.CompositeConstruct(
52 v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true),
53 v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true));
54}
55
56void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
57 union {
58 u64 raw;
59 BitField<49, 1, u64> nodep;
60 BitField<35, 1, u64> aoffi;
61 BitField<50, 1, u64> lc;
62 BitField<51, 3, IR::Pred> sparse_pred;
63 BitField<0, 8, IR::Reg> dest_reg;
64 BitField<8, 8, IR::Reg> coord_reg;
65 BitField<20, 8, IR::Reg> derivate_reg;
66 BitField<28, 3, TextureType> type;
67 BitField<31, 4, u64> mask;
68 BitField<36, 13, u64> cbuf_offset;
69 } const txd{insn};
70
71 const bool has_lod_clamp = txd.lc != 0;
72 if (has_lod_clamp) {
73 throw NotImplementedException("TXD.LC - CLAMP is not implemented");
74 }
75
76 IR::Value coords;
77 u32 num_derivates{};
78 IR::Reg base_reg{txd.coord_reg};
79 IR::Reg last_reg;
80 IR::Value handle;
81 if (is_bindless) {
82 handle = v.X(base_reg++);
83 } else {
84 handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4));
85 }
86
87 const auto read_array{[&]() -> IR::F32 {
88 const IR::U32 base{v.ir.Imm32(0)};
89 const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)};
90 const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)};
91 return v.ir.ConvertUToF(32, 16, array_index);
92 }};
93 switch (txd.type) {
94 case TextureType::_1D: {
95 coords = v.F(base_reg);
96 num_derivates = 1;
97 last_reg = base_reg + 1;
98 break;
99 }
100 case TextureType::ARRAY_1D: {
101 last_reg = base_reg + 1;
102 coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
103 num_derivates = 1;
104 break;
105 }
106 case TextureType::_2D: {
107 last_reg = base_reg + 2;
108 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
109 num_derivates = 2;
110 break;
111 }
112 case TextureType::ARRAY_2D: {
113 last_reg = base_reg + 2;
114 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
115 num_derivates = 2;
116 break;
117 }
118 default:
119 throw NotImplementedException("Invalid texture type");
120 }
121
122 const IR::Reg derivate_reg{txd.derivate_reg};
123 IR::Value derivates;
124 switch (num_derivates) {
125 case 1: {
126 derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
127 break;
128 }
129 case 2: {
130 derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
131 v.F(derivate_reg + 2), v.F(derivate_reg + 3));
132 break;
133 }
134 default:
135 throw NotImplementedException("Invalid texture type");
136 }
137
138 IR::Value offset;
139 if (txd.aoffi != 0) {
140 offset = MakeOffset(v, last_reg, has_lod_clamp);
141 }
142
143 IR::F32 lod_clamp;
144 if (has_lod_clamp) {
145 // Lod Clamp is a Fixed Point 4.8, we need to transform it to float.
146 // to convert a fixed point, float(value) / float(1 << fixed_point)
147 // in this case the fixed_point is 8.
148 const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))};
149 const IR::F32 fixp_lc{v.ir.ConvertUToF(
150 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))};
151 lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f);
152 }
153
154 IR::TextureInstInfo info{};
155 info.type.Assign(GetType(txd.type));
156 info.num_derivates.Assign(num_derivates);
157 info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
158 const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
159
160 IR::Reg dest_reg{txd.dest_reg};
161 for (size_t element = 0; element < 4; ++element) {
162 if (((txd.mask >> element) & 1) == 0) {
163 continue;
164 }
165 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
166 ++dest_reg;
167 }
168 if (txd.sparse_pred != IR::Pred::PT) {
169 v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
170 }
171}
172} // Anonymous namespace
173
174void TranslatorVisitor::TXD(u64 insn) {
175 Impl(*this, insn, false);
176}
177
178void TranslatorVisitor::TXD_b(u64 insn) {
179 Impl(*this, insn, true);
180}
181
182} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
new file mode 100644
index 000000000..983058303
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
@@ -0,0 +1,165 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
49 const auto read_array{
50 [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }};
51 switch (type) {
52 case TextureType::_1D:
53 return v.X(reg);
54 case TextureType::ARRAY_1D:
55 return v.ir.CompositeConstruct(v.X(reg + 1), read_array());
56 case TextureType::_2D:
57 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
58 case TextureType::ARRAY_2D:
59 return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array());
60 case TextureType::_3D:
61 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
62 case TextureType::ARRAY_3D:
63 throw NotImplementedException("3D array texture type");
64 case TextureType::CUBE:
65 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
66 case TextureType::ARRAY_CUBE:
67 return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array());
68 }
69 throw NotImplementedException("Invalid texture type {}", type);
70}
71
72IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
73 const IR::U32 value{v.X(reg++)};
74 switch (type) {
75 case TextureType::_1D:
76 case TextureType::ARRAY_1D:
77 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
78 case TextureType::_2D:
79 case TextureType::ARRAY_2D:
80 return v.ir.CompositeConstruct(
81 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
82 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
83 case TextureType::_3D:
84 case TextureType::ARRAY_3D:
85 return v.ir.CompositeConstruct(
86 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
87 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
88 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
89 case TextureType::CUBE:
90 case TextureType::ARRAY_CUBE:
91 throw NotImplementedException("Illegal offset on CUBE sample");
92 }
93 throw NotImplementedException("Invalid texture type {}", type);
94}
95
96void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
97 union {
98 u64 raw;
99 BitField<49, 1, u64> nodep;
100 BitField<55, 1, u64> lod;
101 BitField<50, 1, u64> multisample;
102 BitField<35, 1, u64> aoffi;
103 BitField<54, 1, u64> clamp;
104 BitField<51, 3, IR::Pred> sparse_pred;
105 BitField<0, 8, IR::Reg> dest_reg;
106 BitField<8, 8, IR::Reg> coord_reg;
107 BitField<20, 8, IR::Reg> meta_reg;
108 BitField<28, 3, TextureType> type;
109 BitField<31, 4, u64> mask;
110 BitField<36, 13, u64> cbuf_offset;
111 } const tld{insn};
112
113 const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)};
114
115 IR::Reg meta_reg{tld.meta_reg};
116 IR::Value handle;
117 IR::Value offset;
118 IR::U32 lod;
119 IR::U32 multisample;
120 if (is_bindless) {
121 handle = v.X(meta_reg++);
122 } else {
123 handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4));
124 }
125 if (tld.lod != 0) {
126 lod = v.X(meta_reg++);
127 } else {
128 lod = v.ir.Imm32(0U);
129 }
130 if (tld.aoffi != 0) {
131 offset = MakeOffset(v, meta_reg, tld.type);
132 }
133 if (tld.multisample != 0) {
134 multisample = v.X(meta_reg++);
135 }
136 if (tld.clamp != 0) {
137 throw NotImplementedException("TLD.CL - CLAMP is not implmented");
138 }
139 IR::TextureInstInfo info{};
140 info.type.Assign(GetType(tld.type));
141 const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)};
142
143 IR::Reg dest_reg{tld.dest_reg};
144 for (size_t element = 0; element < 4; ++element) {
145 if (((tld.mask >> element) & 1) == 0) {
146 continue;
147 }
148 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
149 ++dest_reg;
150 }
151 if (tld.sparse_pred != IR::Pred::PT) {
152 v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
153 }
154}
155} // Anonymous namespace
156
157void TranslatorVisitor::TLD(u64 insn) {
158 Impl(*this, insn, false);
159}
160
161void TranslatorVisitor::TLD_b(u64 insn) {
162 Impl(*this, insn, true);
163}
164
165} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
new file mode 100644
index 000000000..5dd7e31b2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
@@ -0,0 +1,242 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F16,
16 F32,
17};
18
19constexpr unsigned R = 1;
20constexpr unsigned G = 2;
21constexpr unsigned B = 4;
22constexpr unsigned A = 8;
23
24constexpr std::array RG_LUT{
25 R, //
26 G, //
27 B, //
28 A, //
29 R | G, //
30 R | A, //
31 G | A, //
32 B | A, //
33};
34
35constexpr std::array RGBA_LUT{
36 R | G | B, //
37 R | G | A, //
38 R | B | A, //
39 G | B | A, //
40 R | G | B | A, //
41};
42
43union Encoding {
44 u64 raw;
45 BitField<59, 1, Precision> precision;
46 BitField<54, 1, u64> aoffi;
47 BitField<53, 1, u64> lod;
48 BitField<55, 1, u64> ms;
49 BitField<49, 1, u64> nodep;
50 BitField<28, 8, IR::Reg> dest_reg_b;
51 BitField<0, 8, IR::Reg> dest_reg_a;
52 BitField<8, 8, IR::Reg> src_reg_a;
53 BitField<20, 8, IR::Reg> src_reg_b;
54 BitField<36, 13, u64> cbuf_offset;
55 BitField<50, 3, u64> swizzle;
56 BitField<53, 4, u64> encoding;
57};
58
59void CheckAlignment(IR::Reg reg, size_t alignment) {
60 if (!IR::IsAligned(reg, alignment)) {
61 throw NotImplementedException("Unaligned source register {}", reg);
62 }
63}
64
65IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
66 const IR::U32 value{v.X(reg)};
67 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
68 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
69}
70
71IR::Value Sample(TranslatorVisitor& v, u64 insn) {
72 const Encoding tlds{insn};
73 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))};
74 const IR::Reg reg_a{tlds.src_reg_a};
75 const IR::Reg reg_b{tlds.src_reg_b};
76 IR::Value coords;
77 IR::U32 lod{v.ir.Imm32(0U)};
78 IR::Value offsets;
79 IR::U32 multisample;
80 Shader::TextureType texture_type{};
81 switch (tlds.encoding) {
82 case 0:
83 texture_type = Shader::TextureType::Color1D;
84 coords = v.X(reg_a);
85 break;
86 case 1:
87 texture_type = Shader::TextureType::Color1D;
88 coords = v.X(reg_a);
89 lod = v.X(reg_b);
90 break;
91 case 2:
92 texture_type = Shader::TextureType::Color2D;
93 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b));
94 break;
95 case 4:
96 CheckAlignment(reg_a, 2);
97 texture_type = Shader::TextureType::Color2D;
98 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
99 offsets = MakeOffset(v, reg_b);
100 break;
101 case 5:
102 CheckAlignment(reg_a, 2);
103 texture_type = Shader::TextureType::Color2D;
104 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
105 lod = v.X(reg_b);
106 break;
107 case 6:
108 CheckAlignment(reg_a, 2);
109 texture_type = Shader::TextureType::Color2D;
110 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
111 multisample = v.X(reg_b);
112 break;
113 case 7:
114 CheckAlignment(reg_a, 2);
115 texture_type = Shader::TextureType::Color3D;
116 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b));
117 break;
118 case 8: {
119 CheckAlignment(reg_b, 2);
120 const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))};
121 texture_type = Shader::TextureType::ColorArray2D;
122 coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array);
123 break;
124 }
125 case 12:
126 CheckAlignment(reg_a, 2);
127 CheckAlignment(reg_b, 2);
128 texture_type = Shader::TextureType::Color2D;
129 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
130 lod = v.X(reg_b);
131 offsets = MakeOffset(v, reg_b + 1);
132 break;
133 default:
134 throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
135 }
136 IR::TextureInstInfo info{};
137 if (tlds.precision == Precision::F16) {
138 info.relaxed_precision.Assign(1);
139 }
140 info.type.Assign(texture_type);
141 return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
142}
143
144unsigned Swizzle(u64 insn) {
145 const Encoding tlds{insn};
146 const size_t encoding{tlds.swizzle};
147 if (tlds.dest_reg_b == IR::Reg::RZ) {
148 if (encoding >= RG_LUT.size()) {
149 throw NotImplementedException("Illegal RG encoding {}", encoding);
150 }
151 return RG_LUT[encoding];
152 } else {
153 if (encoding >= RGBA_LUT.size()) {
154 throw NotImplementedException("Illegal RGBA encoding {}", encoding);
155 }
156 return RGBA_LUT[encoding];
157 }
158}
159
160IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
161 return IR::F32{v.ir.CompositeExtract(sample, component)};
162}
163
164IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
165 const Encoding tlds{insn};
166 switch (index) {
167 case 0:
168 return tlds.dest_reg_a;
169 case 1:
170 CheckAlignment(tlds.dest_reg_a, 2);
171 return tlds.dest_reg_a + 1;
172 case 2:
173 return tlds.dest_reg_b;
174 case 3:
175 CheckAlignment(tlds.dest_reg_b, 2);
176 return tlds.dest_reg_b + 1;
177 }
178 throw LogicError("Invalid store index {}", index);
179}
180
181void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
182 const unsigned swizzle{Swizzle(insn)};
183 unsigned store_index{0};
184 for (unsigned component = 0; component < 4; ++component) {
185 if (((swizzle >> component) & 1) == 0) {
186 continue;
187 }
188 const IR::Reg dest{RegStoreComponent32(insn, store_index)};
189 v.F(dest, Extract(v, sample, component));
190 ++store_index;
191 }
192}
193
194IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
195 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
196}
197
198void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
199 const unsigned swizzle{Swizzle(insn)};
200 unsigned store_index{0};
201 std::array<IR::F32, 4> swizzled;
202 for (unsigned component = 0; component < 4; ++component) {
203 if (((swizzle >> component) & 1) == 0) {
204 continue;
205 }
206 swizzled[store_index] = Extract(v, sample, component);
207 ++store_index;
208 }
209 const IR::F32 zero{v.ir.Imm32(0.0f)};
210 const Encoding tlds{insn};
211 switch (store_index) {
212 case 1:
213 v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero));
214 break;
215 case 2:
216 case 3:
217 case 4:
218 v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
219 switch (store_index) {
220 case 2:
221 break;
222 case 3:
223 v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero));
224 break;
225 case 4:
226 v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
227 break;
228 }
229 break;
230 }
231}
232} // Anonymous namespace
233
234void TranslatorVisitor::TLDS(u64 insn) {
235 const IR::Value sample{Sample(*this, insn)};
236 if (Encoding{insn}.precision == Precision::F32) {
237 Store32(*this, insn, sample);
238 } else {
239 Store16(*this, insn, sample);
240 }
241}
242} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
new file mode 100644
index 000000000..aea3c0e62
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
@@ -0,0 +1,131 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
49 // The ISA reads an array component here, but this is not needed on high level shading languages
50 // We are dropping this information.
51 switch (type) {
52 case TextureType::_1D:
53 return v.F(reg);
54 case TextureType::ARRAY_1D:
55 return v.F(reg + 1);
56 case TextureType::_2D:
57 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
58 case TextureType::ARRAY_2D:
59 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2));
60 case TextureType::_3D:
61 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
62 case TextureType::ARRAY_3D:
63 throw NotImplementedException("3D array texture type");
64 case TextureType::CUBE:
65 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
66 case TextureType::ARRAY_CUBE:
67 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
68 }
69 throw NotImplementedException("Invalid texture type {}", type);
70}
71
72void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
73 union {
74 u64 raw;
75 BitField<49, 1, u64> nodep;
76 BitField<35, 1, u64> ndv;
77 BitField<0, 8, IR::Reg> dest_reg;
78 BitField<8, 8, IR::Reg> coord_reg;
79 BitField<20, 8, IR::Reg> meta_reg;
80 BitField<28, 3, TextureType> type;
81 BitField<31, 4, u64> mask;
82 BitField<36, 13, u64> cbuf_offset;
83 } const tmml{insn};
84
85 if ((tmml.mask & 0b1100) != 0) {
86 throw NotImplementedException("TMML BA results are not implmented");
87 }
88 const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
89
90 IR::U32 handle;
91 IR::Reg meta_reg{tmml.meta_reg};
92 if (is_bindless) {
93 handle = v.X(meta_reg++);
94 } else {
95 handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4));
96 }
97 IR::TextureInstInfo info{};
98 info.type.Assign(GetType(tmml.type));
99 const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)};
100
101 IR::Reg dest_reg{tmml.dest_reg};
102 for (size_t element = 0; element < 4; ++element) {
103 if (((tmml.mask >> element) & 1) == 0) {
104 continue;
105 }
106 IR::F32 value{v.ir.CompositeExtract(sample, element)};
107 if (element < 2) {
108 IR::U32 casted_value;
109 if (element == 0) {
110 casted_value = v.ir.ConvertFToU(32, value);
111 } else {
112 casted_value = v.ir.ConvertFToS(16, value);
113 }
114 v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
115 } else {
116 v.F(dest_reg, value);
117 }
118 ++dest_reg;
119 }
120}
121} // Anonymous namespace
122
123void TranslatorVisitor::TMML(u64 insn) {
124 Impl(*this, insn, false);
125}
126
127void TranslatorVisitor::TMML_b(u64 insn) {
128 Impl(*this, insn, true);
129}
130
131} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
new file mode 100644
index 000000000..0459e5473
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -0,0 +1,76 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Mode : u64 {
15 Dimension = 1,
16 TextureType = 2,
17 SamplePos = 5,
18};
19
20IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
21 switch (mode) {
22 case Mode::Dimension: {
23 const IR::U32 lod{v.X(src_reg)};
24 return v.ir.ImageQueryDimension(handle, lod);
25 }
26 case Mode::TextureType:
27 case Mode::SamplePos:
28 default:
29 throw NotImplementedException("Mode {}", mode);
30 }
31}
32
33void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
34 union {
35 u64 raw;
36 BitField<49, 1, u64> nodep;
37 BitField<0, 8, IR::Reg> dest_reg;
38 BitField<8, 8, IR::Reg> src_reg;
39 BitField<22, 3, Mode> mode;
40 BitField<31, 4, u64> mask;
41 } const txq{insn};
42
43 IR::Reg src_reg{txq.src_reg};
44 IR::U32 handle;
45 if (cbuf_offset) {
46 handle = v.ir.Imm32(*cbuf_offset);
47 } else {
48 handle = v.X(src_reg);
49 ++src_reg;
50 }
51 const IR::Value query{Query(v, handle, txq.mode, src_reg)};
52 IR::Reg dest_reg{txq.dest_reg};
53 for (int element = 0; element < 4; ++element) {
54 if (((txq.mask >> element) & 1) == 0) {
55 continue;
56 }
57 v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
58 ++dest_reg;
59 }
60}
61} // Anonymous namespace
62
63void TranslatorVisitor::TXQ(u64 insn) {
64 union {
65 u64 raw;
66 BitField<36, 13, u64> cbuf_offset;
67 } const txq{insn};
68
69 Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4));
70}
71
72void TranslatorVisitor::TXQ_b(u64 insn) {
73 Impl(*this, insn, std::nullopt);
74}
75
76} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
new file mode 100644
index 000000000..e1f4174cf
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/exception.h"
6#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
7
8namespace Shader::Maxwell {
9
10IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
11 u32 selector, bool is_signed) {
12 switch (width) {
13 case VideoWidth::Byte:
14 case VideoWidth::Unknown:
15 return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
16 case VideoWidth::Short:
17 return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
18 case VideoWidth::Word:
19 return value;
20 default:
21 throw NotImplementedException("Unknown VideoWidth {}", width);
22 }
23}
24
25VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
26 // immediates must be 16-bit format.
27 return is_immediate ? VideoWidth::Short : width;
28}
29
30} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
new file mode 100644
index 000000000..40c0b907c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
@@ -0,0 +1,23 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11enum class VideoWidth : u64 {
12 Byte,
13 Unknown,
14 Short,
15 Word,
16};
17
18[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
19 VideoWidth width, u32 selector, bool is_signed);
20
21[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
22
23} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
new file mode 100644
index 000000000..78869601f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
@@ -0,0 +1,92 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class VideoMinMaxOps : u64 {
13 MRG_16H,
14 MRG_16L,
15 MRG_8B0,
16 MRG_8B2,
17 ACC,
18 MIN,
19 MAX,
20};
21
22[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
23 VideoMinMaxOps op, bool is_signed) {
24 switch (op) {
25 case VideoMinMaxOps::MIN:
26 return ir.IMin(lhs, rhs, is_signed);
27 case VideoMinMaxOps::MAX:
28 return ir.IMax(lhs, rhs, is_signed);
29 default:
30 throw NotImplementedException("VMNMX op {}", op);
31 }
32}
33} // Anonymous namespace
34
35void TranslatorVisitor::VMNMX(u64 insn) {
36 union {
37 u64 raw;
38 BitField<0, 8, IR::Reg> dest_reg;
39 BitField<20, 16, u64> src_b_imm;
40 BitField<28, 2, u64> src_b_selector;
41 BitField<29, 2, VideoWidth> src_b_width;
42 BitField<36, 2, u64> src_a_selector;
43 BitField<37, 2, VideoWidth> src_a_width;
44 BitField<47, 1, u64> cc;
45 BitField<48, 1, u64> src_a_sign;
46 BitField<49, 1, u64> src_b_sign;
47 BitField<50, 1, u64> is_src_b_reg;
48 BitField<51, 3, VideoMinMaxOps> op;
49 BitField<54, 1, u64> dest_sign;
50 BitField<55, 1, u64> sat;
51 BitField<56, 1, u64> mx;
52 } const vmnmx{insn};
53
54 if (vmnmx.cc != 0) {
55 throw NotImplementedException("VMNMX CC");
56 }
57 if (vmnmx.sat != 0) {
58 throw NotImplementedException("VMNMX SAT");
59 }
60 // Selectors were shown to default to 2 in unit tests
61 if (vmnmx.src_a_selector != 2) {
62 throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
63 }
64 if (vmnmx.src_b_selector != 2) {
65 throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
66 }
67 if (vmnmx.src_a_width != VideoWidth::Word) {
68 throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
69 }
70
71 const bool is_b_imm{vmnmx.is_src_b_reg == 0};
72 const IR::U32 src_a{GetReg8(insn)};
73 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
74 const IR::U32 src_c{GetReg39(insn)};
75
76 const VideoWidth a_width{vmnmx.src_a_width};
77 const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
78
79 const bool src_a_signed{vmnmx.src_a_sign != 0};
80 const bool src_b_signed{vmnmx.src_b_sign != 0};
81 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
82 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
83
84 // First operation's sign is only dependent on operand b's sign
85 const bool op_1_signed{src_b_signed};
86
87 const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
88 : ir.IMin(op_a, op_b, op_1_signed)};
89 X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
90}
91
92} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
new file mode 100644
index 000000000..cc2e6d6e6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
@@ -0,0 +1,64 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::VMAD(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<20, 16, u64> src_b_imm;
16 BitField<28, 2, u64> src_b_selector;
17 BitField<29, 2, VideoWidth> src_b_width;
18 BitField<36, 2, u64> src_a_selector;
19 BitField<37, 2, VideoWidth> src_a_width;
20 BitField<47, 1, u64> cc;
21 BitField<48, 1, u64> src_a_sign;
22 BitField<49, 1, u64> src_b_sign;
23 BitField<50, 1, u64> is_src_b_reg;
24 BitField<51, 2, u64> scale;
25 BitField<53, 1, u64> src_c_neg;
26 BitField<54, 1, u64> src_a_neg;
27 BitField<55, 1, u64> sat;
28 } const vmad{insn};
29
30 if (vmad.cc != 0) {
31 throw NotImplementedException("VMAD CC");
32 }
33 if (vmad.sat != 0) {
34 throw NotImplementedException("VMAD SAT");
35 }
36 if (vmad.scale != 0) {
37 throw NotImplementedException("VMAD SCALE");
38 }
39 if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
40 throw NotImplementedException("VMAD PO");
41 }
42 if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
43 throw NotImplementedException("VMAD NEG");
44 }
45 const bool is_b_imm{vmad.is_src_b_reg == 0};
46 const IR::U32 src_a{GetReg8(insn)};
47 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
48 const IR::U32 src_c{GetReg39(insn)};
49
50 const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
51 // Immediate values can't have a selector
52 const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
53 const VideoWidth a_width{vmad.src_a_width};
54 const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
55
56 const bool src_a_signed{vmad.src_a_sign != 0};
57 const bool src_b_signed{vmad.src_b_sign != 0};
58 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
59 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
60
61 X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
62}
63
64} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
new file mode 100644
index 000000000..1b66abc33
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
@@ -0,0 +1,92 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class VsetpCompareOp : u64 {
14 False = 0,
15 LessThan,
16 Equal,
17 LessThanEqual,
18 GreaterThan = 16,
19 NotEqual,
20 GreaterThanEqual,
21 True,
22};
23
24CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
25 switch (op) {
26 case VsetpCompareOp::False:
27 return CompareOp::False;
28 case VsetpCompareOp::LessThan:
29 return CompareOp::LessThan;
30 case VsetpCompareOp::Equal:
31 return CompareOp::Equal;
32 case VsetpCompareOp::LessThanEqual:
33 return CompareOp::LessThanEqual;
34 case VsetpCompareOp::GreaterThan:
35 return CompareOp::GreaterThan;
36 case VsetpCompareOp::NotEqual:
37 return CompareOp::NotEqual;
38 case VsetpCompareOp::GreaterThanEqual:
39 return CompareOp::GreaterThanEqual;
40 case VsetpCompareOp::True:
41 return CompareOp::True;
42 default:
43 throw NotImplementedException("Invalid compare op {}", op);
44 }
45}
46} // Anonymous namespace
47
48void TranslatorVisitor::VSETP(u64 insn) {
49 union {
50 u64 raw;
51 BitField<0, 3, IR::Pred> dest_pred_b;
52 BitField<3, 3, IR::Pred> dest_pred_a;
53 BitField<20, 16, u64> src_b_imm;
54 BitField<28, 2, u64> src_b_selector;
55 BitField<29, 2, VideoWidth> src_b_width;
56 BitField<36, 2, u64> src_a_selector;
57 BitField<37, 2, VideoWidth> src_a_width;
58 BitField<39, 3, IR::Pred> bop_pred;
59 BitField<42, 1, u64> neg_bop_pred;
60 BitField<43, 5, VsetpCompareOp> compare_op;
61 BitField<45, 2, BooleanOp> bop;
62 BitField<48, 1, u64> src_a_sign;
63 BitField<49, 1, u64> src_b_sign;
64 BitField<50, 1, u64> is_src_b_reg;
65 } const vsetp{insn};
66
67 const bool is_b_imm{vsetp.is_src_b_reg == 0};
68 const IR::U32 src_a{GetReg8(insn)};
69 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
70
71 const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
72 const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)};
73 const VideoWidth a_width{vsetp.src_a_width};
74 const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
75
76 const bool src_a_signed{vsetp.src_a_sign != 0};
77 const bool src_b_signed{vsetp.src_b_sign != 0};
78 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
79 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
80
81 // Compare operation's sign is only dependent on operand b's sign
82 const bool compare_signed{src_b_signed};
83 const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
84 const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
85 const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
86 const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
87 const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
88 ir.SetPred(vsetp.dest_pred_a, result_a);
89 ir.SetPred(vsetp.dest_pred_b, result_b);
90}
91
92} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
new file mode 100644
index 000000000..7ce370f09
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
@@ -0,0 +1,54 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class VoteOp : u64 {
12 ALL,
13 ANY,
14 EQ,
15};
16
17[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
18 switch (vote_op) {
19 case VoteOp::ALL:
20 return ir.VoteAll(pred);
21 case VoteOp::ANY:
22 return ir.VoteAny(pred);
23 case VoteOp::EQ:
24 return ir.VoteEqual(pred);
25 default:
26 throw NotImplementedException("Invalid VOTE op {}", vote_op);
27 }
28}
29
30void Vote(TranslatorVisitor& v, u64 insn) {
31 union {
32 u64 insn;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<39, 3, IR::Pred> pred_a;
35 BitField<42, 1, u64> neg_pred_a;
36 BitField<45, 3, IR::Pred> pred_b;
37 BitField<48, 2, VoteOp> vote_op;
38 } const vote{insn};
39
40 const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
41 v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
42 v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::VOTE(u64 insn) {
47 Vote(*this, insn);
48}
49
50void TranslatorVisitor::VOTE_vtg(u64) {
51 LOG_WARNING(Shader, "(STUBBED) called");
52}
53
54} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
new file mode 100644
index 000000000..550fed55c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
@@ -0,0 +1,69 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class ShuffleMode : u64 {
14 IDX,
15 UP,
16 DOWN,
17 BFLY,
18};
19
20[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
21 const IR::U32& index, const IR::U32& mask,
22 ShuffleMode shfl_op) {
23 const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
24 const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
25 switch (shfl_op) {
26 case ShuffleMode::IDX:
27 return ir.ShuffleIndex(value, index, clamp, seg_mask);
28 case ShuffleMode::UP:
29 return ir.ShuffleUp(value, index, clamp, seg_mask);
30 case ShuffleMode::DOWN:
31 return ir.ShuffleDown(value, index, clamp, seg_mask);
32 case ShuffleMode::BFLY:
33 return ir.ShuffleButterfly(value, index, clamp, seg_mask);
34 default:
35 throw NotImplementedException("Invalid SHFL op {}", shfl_op);
36 }
37}
38
39void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
40 union {
41 u64 insn;
42 BitField<0, 8, IR::Reg> dest_reg;
43 BitField<8, 8, IR::Reg> src_reg;
44 BitField<30, 2, ShuffleMode> mode;
45 BitField<48, 3, IR::Pred> pred;
46 } const shfl{insn};
47
48 const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
49 v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
50 v.X(shfl.dest_reg, result);
51}
52} // Anonymous namespace
53
54void TranslatorVisitor::SHFL(u64 insn) {
55 union {
56 u64 insn;
57 BitField<20, 5, u64> src_a_imm;
58 BitField<28, 1, u64> src_a_flag;
59 BitField<29, 1, u64> src_b_flag;
60 BitField<34, 13, u64> src_b_imm;
61 } const flags{insn};
62 const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
63 : GetReg20(insn)};
64 const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
65 : GetReg39(insn)};
66 Shuffle(*this, insn, src_a, src_b);
67}
68
69} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
new file mode 100644
index 000000000..8e3c4c5d5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
@@ -0,0 +1,52 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/environment.h"
6#include "shader_recompiler/frontend/ir/basic_block.h"
7#include "shader_recompiler/frontend/maxwell/decode.h"
8#include "shader_recompiler/frontend/maxwell/location.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10#include "shader_recompiler/frontend/maxwell/translate/translate.h"
11
12namespace Shader::Maxwell {
13
14template <auto method>
15static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) {
16 using MethodType = decltype(method);
17 if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) {
18 (visitor.*method)(pc, insn);
19 } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) {
20 (visitor.*method)(insn);
21 } else {
22 (visitor.*method)();
23 }
24}
25
26void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) {
27 if (location_begin == location_end) {
28 return;
29 }
30 TranslatorVisitor visitor{env, *block};
31 for (Location pc = location_begin; pc != location_end; ++pc) {
32 const u64 insn{env.ReadInstruction(pc.Offset())};
33 try {
34 const Opcode opcode{Decode(insn)};
35 switch (opcode) {
36#define INST(name, cute, mask) \
37 case Opcode::name: \
38 Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \
39 break;
40#include "shader_recompiler/frontend/maxwell/maxwell.inc"
41#undef OPCODE
42 default:
43 throw LogicError("Invalid opcode {}", opcode);
44 }
45 } catch (Exception& exception) {
46 exception.Prepend(fmt::format("Translate {}: ", Decode(insn)));
47 throw;
48 }
49 }
50}
51
52} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h
new file mode 100644
index 000000000..a3edd2e46
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h
@@ -0,0 +1,14 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9
10namespace Shader::Maxwell {
11
12void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end);
13
14} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
new file mode 100644
index 000000000..c067d459c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -0,0 +1,223 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <vector>
8
9#include "common/settings.h"
10#include "shader_recompiler/exception.h"
11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/frontend/ir/post_order.h"
13#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
14#include "shader_recompiler/frontend/maxwell/translate/translate.h"
15#include "shader_recompiler/frontend/maxwell/translate_program.h"
16#include "shader_recompiler/host_translate_info.h"
17#include "shader_recompiler/ir_opt/passes.h"
18
19namespace Shader::Maxwell {
20namespace {
21IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
22 size_t num_syntax_blocks{};
23 for (const auto& node : syntax_list) {
24 if (node.type == IR::AbstractSyntaxNode::Type::Block) {
25 ++num_syntax_blocks;
26 }
27 }
28 IR::BlockList blocks;
29 blocks.reserve(num_syntax_blocks);
30 for (const auto& node : syntax_list) {
31 if (node.type == IR::AbstractSyntaxNode::Type::Block) {
32 blocks.push_back(node.data.block);
33 }
34 }
35 return blocks;
36}
37
38void RemoveUnreachableBlocks(IR::Program& program) {
39 // Some blocks might be unreachable if a function call exists unconditionally
40 // If this happens the number of blocks and post order blocks will mismatch
41 if (program.blocks.size() == program.post_order_blocks.size()) {
42 return;
43 }
44 const auto begin{program.blocks.begin() + 1};
45 const auto end{program.blocks.end()};
46 const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }};
47 program.blocks.erase(std::remove_if(begin, end, pred), end);
48}
49
50void CollectInterpolationInfo(Environment& env, IR::Program& program) {
51 if (program.stage != Stage::Fragment) {
52 return;
53 }
54 const ProgramHeader& sph{env.SPH()};
55 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
56 std::optional<PixelImap> imap;
57 for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) {
58 if (value == PixelImap::Unused) {
59 continue;
60 }
61 if (imap && imap != value) {
62 throw NotImplementedException("Per component interpolation");
63 }
64 imap = value;
65 }
66 if (!imap) {
67 continue;
68 }
69 program.info.interpolation[index] = [&] {
70 switch (*imap) {
71 case PixelImap::Unused:
72 case PixelImap::Perspective:
73 return Interpolation::Smooth;
74 case PixelImap::Constant:
75 return Interpolation::Flat;
76 case PixelImap::ScreenLinear:
77 return Interpolation::NoPerspective;
78 }
79 throw NotImplementedException("Unknown interpolation {}", *imap);
80 }();
81 }
82}
83
84void AddNVNStorageBuffers(IR::Program& program) {
85 if (!program.info.uses_global_memory) {
86 return;
87 }
88 const u32 driver_cbuf{0};
89 const u32 descriptor_size{0x10};
90 const u32 num_buffers{16};
91 const u32 base{[&] {
92 switch (program.stage) {
93 case Stage::VertexA:
94 case Stage::VertexB:
95 return 0x110u;
96 case Stage::TessellationControl:
97 return 0x210u;
98 case Stage::TessellationEval:
99 return 0x310u;
100 case Stage::Geometry:
101 return 0x410u;
102 case Stage::Fragment:
103 return 0x510u;
104 case Stage::Compute:
105 return 0x310u;
106 }
107 throw InvalidArgument("Invalid stage {}", program.stage);
108 }()};
109 auto& descs{program.info.storage_buffers_descriptors};
110 for (u32 index = 0; index < num_buffers; ++index) {
111 if (!program.info.nvn_buffer_used[index]) {
112 continue;
113 }
114 const u32 offset{base + index * descriptor_size};
115 const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
116 if (it != descs.end()) {
117 it->is_written |= program.info.stores_global_memory;
118 continue;
119 }
120 descs.push_back({
121 .cbuf_index = driver_cbuf,
122 .cbuf_offset = offset,
123 .count = 1,
124 .is_written = program.info.stores_global_memory,
125 });
126 }
127}
128} // Anonymous namespace
129
130IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
131 Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
132 IR::Program program;
133 program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
134 program.blocks = GenerateBlocks(program.syntax_list);
135 program.post_order_blocks = PostOrder(program.syntax_list.front());
136 program.stage = env.ShaderStage();
137 program.local_memory_size = env.LocalMemorySize();
138 switch (program.stage) {
139 case Stage::TessellationControl: {
140 const ProgramHeader& sph{env.SPH()};
141 program.invocations = sph.common2.threads_per_input_primitive;
142 break;
143 }
144 case Stage::Geometry: {
145 const ProgramHeader& sph{env.SPH()};
146 program.output_topology = sph.common3.output_topology;
147 program.output_vertices = sph.common4.max_output_vertices;
148 program.invocations = sph.common2.threads_per_input_primitive;
149 program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
150 if (program.is_geometry_passthrough) {
151 const auto& mask{env.GpPassthroughMask()};
152 for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
153 program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
154 }
155 }
156 break;
157 }
158 case Stage::Compute:
159 program.workgroup_size = env.WorkgroupSize();
160 program.shared_memory_size = env.SharedMemorySize();
161 break;
162 default:
163 break;
164 }
165 RemoveUnreachableBlocks(program);
166
167 // Replace instructions before the SSA rewrite
168 if (!host_info.support_float16) {
169 Optimization::LowerFp16ToFp32(program);
170 }
171 if (!host_info.support_int64) {
172 Optimization::LowerInt64ToInt32(program);
173 }
174 Optimization::SsaRewritePass(program);
175
176 Optimization::GlobalMemoryToStorageBufferPass(program);
177 Optimization::TexturePass(env, program);
178
179 Optimization::ConstantPropagationPass(program);
180 Optimization::DeadCodeEliminationPass(program);
181 if (Settings::values.renderer_debug) {
182 Optimization::VerificationPass(program);
183 }
184 Optimization::CollectShaderInfoPass(env, program);
185 CollectInterpolationInfo(env, program);
186 AddNVNStorageBuffers(program);
187 return program;
188}
189
190IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
191 Environment& env_vertex_b) {
192 IR::Program result{};
193 Optimization::VertexATransformPass(vertex_a);
194 Optimization::VertexBTransformPass(vertex_b);
195 for (const auto& term : vertex_a.syntax_list) {
196 if (term.type != IR::AbstractSyntaxNode::Type::Return) {
197 result.syntax_list.push_back(term);
198 }
199 }
200 result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(),
201 vertex_b.syntax_list.end());
202 result.blocks = GenerateBlocks(result.syntax_list);
203 result.post_order_blocks = vertex_b.post_order_blocks;
204 for (const auto& block : vertex_a.post_order_blocks) {
205 result.post_order_blocks.push_back(block);
206 }
207 result.stage = Stage::VertexB;
208 result.info = vertex_a.info;
209 result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
210 result.info.loads.mask |= vertex_b.info.loads.mask;
211 result.info.stores.mask |= vertex_b.info.stores.mask;
212
213 Optimization::JoinTextureInfo(result.info, vertex_b.info);
214 Optimization::JoinStorageInfo(result.info, vertex_b.info);
215 Optimization::DeadCodeEliminationPass(result);
216 if (Settings::values.renderer_debug) {
217 Optimization::VerificationPass(result);
218 }
219 Optimization::CollectShaderInfoPass(env_vertex_b, result);
220 return result;
221}
222
223} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h
new file mode 100644
index 000000000..a84814811
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.h
@@ -0,0 +1,23 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/frontend/maxwell/control_flow.h"
11#include "shader_recompiler/host_translate_info.h"
12#include "shader_recompiler/object_pool.h"
13
14namespace Shader::Maxwell {
15
16[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
17 ObjectPool<IR::Block>& block_pool, Environment& env,
18 Flow::CFG& cfg, const HostTranslateInfo& host_info);
19
20[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
21 Environment& env_vertex_b);
22
23} // namespace Shader::Maxwell