summaryrefslogtreecommitdiff
path: root/src/shader_recompiler/frontend/maxwell
diff options
context:
space:
mode:
Diffstat (limited to 'src/shader_recompiler/frontend/maxwell')
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.cpp642
-rw-r--r--src/shader_recompiler/frontend/maxwell/control_flow.h169
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.cpp149
-rw-r--r--src/shader_recompiler/frontend/maxwell/decode.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/instruction.h63
-rw-r--r--src/shader_recompiler/frontend/maxwell/location.h112
-rw-r--r--src/shader_recompiler/frontend/maxwell/maxwell.inc286
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.cpp26
-rw-r--r--src/shader_recompiler/frontend/maxwell/opcodes.h30
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp883
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.h20
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp110
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp35
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp96
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp74
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h57
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp153
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h28
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp72
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp50
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp43
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp47
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp55
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp78
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp214
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp253
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp94
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp41
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp60
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp125
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp169
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h42
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp143
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp117
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp118
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp272
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/impl.h387
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp105
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp48
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp80
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp82
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp36
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp86
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp58
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp135
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp126
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp62
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h39
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp108
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp196
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp218
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp184
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp116
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp122
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp66
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp71
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp181
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp283
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp45
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp46
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp38
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp53
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp44
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp205
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp281
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp236
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp266
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp208
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp134
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp182
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp165
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp242
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp131
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp76
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp30
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h23
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp64
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp92
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp54
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp69
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.cpp52
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate/translate.h14
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp223
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.h23
108 files changed, 12603 insertions, 0 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
new file mode 100644
index 000000000..1a954a509
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp
@@ -0,0 +1,642 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <optional>
8#include <string>
9#include <utility>
10
11#include <fmt/format.h>
12
13#include "shader_recompiler/exception.h"
14#include "shader_recompiler/frontend/maxwell/control_flow.h"
15#include "shader_recompiler/frontend/maxwell/decode.h"
16#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
17#include "shader_recompiler/frontend/maxwell/location.h"
18
19namespace Shader::Maxwell::Flow {
20namespace {
21struct Compare {
22 bool operator()(const Block& lhs, Location rhs) const noexcept {
23 return lhs.begin < rhs;
24 }
25
26 bool operator()(Location lhs, const Block& rhs) const noexcept {
27 return lhs < rhs.begin;
28 }
29
30 bool operator()(const Block& lhs, const Block& rhs) const noexcept {
31 return lhs.begin < rhs.begin;
32 }
33};
34
35u32 BranchOffset(Location pc, Instruction inst) {
36 return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u;
37}
38
39void Split(Block* old_block, Block* new_block, Location pc) {
40 if (pc <= old_block->begin || pc >= old_block->end) {
41 throw InvalidArgument("Invalid address to split={}", pc);
42 }
43 *new_block = Block{};
44 new_block->begin = pc;
45 new_block->end = old_block->end;
46 new_block->end_class = old_block->end_class;
47 new_block->cond = old_block->cond;
48 new_block->stack = old_block->stack;
49 new_block->branch_true = old_block->branch_true;
50 new_block->branch_false = old_block->branch_false;
51 new_block->function_call = old_block->function_call;
52 new_block->return_block = old_block->return_block;
53 new_block->branch_reg = old_block->branch_reg;
54 new_block->branch_offset = old_block->branch_offset;
55 new_block->indirect_branches = std::move(old_block->indirect_branches);
56
57 const Location old_begin{old_block->begin};
58 Stack old_stack{std::move(old_block->stack)};
59 *old_block = Block{};
60 old_block->begin = old_begin;
61 old_block->end = pc;
62 old_block->end_class = EndClass::Branch;
63 old_block->cond = IR::Condition(true);
64 old_block->stack = old_stack;
65 old_block->branch_true = new_block;
66 old_block->branch_false = nullptr;
67}
68
69Token OpcodeToken(Opcode opcode) {
70 switch (opcode) {
71 case Opcode::PBK:
72 case Opcode::BRK:
73 return Token::PBK;
74 case Opcode::PCNT:
75 case Opcode::CONT:
76 return Token::PBK;
77 case Opcode::PEXIT:
78 case Opcode::EXIT:
79 return Token::PEXIT;
80 case Opcode::PLONGJMP:
81 case Opcode::LONGJMP:
82 return Token::PLONGJMP;
83 case Opcode::PRET:
84 case Opcode::RET:
85 case Opcode::CAL:
86 return Token::PRET;
87 case Opcode::SSY:
88 case Opcode::SYNC:
89 return Token::SSY;
90 default:
91 throw InvalidArgument("{}", opcode);
92 }
93}
94
95bool IsAbsoluteJump(Opcode opcode) {
96 switch (opcode) {
97 case Opcode::JCAL:
98 case Opcode::JMP:
99 case Opcode::JMX:
100 return true;
101 default:
102 return false;
103 }
104}
105
106bool HasFlowTest(Opcode opcode) {
107 switch (opcode) {
108 case Opcode::BRA:
109 case Opcode::BRX:
110 case Opcode::EXIT:
111 case Opcode::JMP:
112 case Opcode::JMX:
113 case Opcode::KIL:
114 case Opcode::BRK:
115 case Opcode::CONT:
116 case Opcode::LONGJMP:
117 case Opcode::RET:
118 case Opcode::SYNC:
119 return true;
120 case Opcode::CAL:
121 case Opcode::JCAL:
122 return false;
123 default:
124 throw InvalidArgument("Invalid branch {}", opcode);
125 }
126}
127
128std::string NameOf(const Block& block) {
129 if (block.begin.IsVirtual()) {
130 return fmt::format("\"Virtual {}\"", block.begin);
131 } else {
132 return fmt::format("\"{}\"", block.begin);
133 }
134}
135} // Anonymous namespace
136
137void Stack::Push(Token token, Location target) {
138 entries.push_back({
139 .token = token,
140 .target{target},
141 });
142}
143
144std::pair<Location, Stack> Stack::Pop(Token token) const {
145 const std::optional<Location> pc{Peek(token)};
146 if (!pc) {
147 throw LogicError("Token could not be found");
148 }
149 return {*pc, Remove(token)};
150}
151
152std::optional<Location> Stack::Peek(Token token) const {
153 const auto it{std::find_if(entries.rbegin(), entries.rend(),
154 [token](const auto& entry) { return entry.token == token; })};
155 if (it == entries.rend()) {
156 return std::nullopt;
157 }
158 return it->target;
159}
160
161Stack Stack::Remove(Token token) const {
162 const auto it{std::find_if(entries.rbegin(), entries.rend(),
163 [token](const auto& entry) { return entry.token == token; })};
164 const auto pos{std::distance(entries.rbegin(), it)};
165 Stack result;
166 result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1);
167 return result;
168}
169
170bool Block::Contains(Location pc) const noexcept {
171 return pc >= begin && pc < end;
172}
173
174Function::Function(ObjectPool<Block>& block_pool, Location start_address)
175 : entrypoint{start_address} {
176 Label& label{labels.emplace_back()};
177 label.address = start_address;
178 label.block = block_pool.Create(Block{});
179 label.block->begin = start_address;
180 label.block->end = start_address;
181 label.block->end_class = EndClass::Branch;
182 label.block->cond = IR::Condition(true);
183 label.block->branch_true = nullptr;
184 label.block->branch_false = nullptr;
185}
186
187CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address,
188 bool exits_to_dispatcher_)
189 : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{
190 exits_to_dispatcher_} {
191 if (exits_to_dispatcher) {
192 dispatch_block = block_pool.Create(Block{});
193 dispatch_block->begin = {};
194 dispatch_block->end = {};
195 dispatch_block->end_class = EndClass::Exit;
196 dispatch_block->cond = IR::Condition(true);
197 dispatch_block->stack = {};
198 dispatch_block->branch_true = nullptr;
199 dispatch_block->branch_false = nullptr;
200 }
201 functions.emplace_back(block_pool, start_address);
202 for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) {
203 while (!functions[function_id].labels.empty()) {
204 Function& function{functions[function_id]};
205 Label label{function.labels.back()};
206 function.labels.pop_back();
207 AnalyzeLabel(function_id, label);
208 }
209 }
210 if (exits_to_dispatcher) {
211 const auto last_block{functions[0].blocks.rbegin()};
212 dispatch_block->begin = last_block->end + 1;
213 dispatch_block->end = last_block->end + 1;
214 functions[0].blocks.insert(*dispatch_block);
215 }
216}
217
218void CFG::AnalyzeLabel(FunctionId function_id, Label& label) {
219 if (InspectVisitedBlocks(function_id, label)) {
220 // Label address has been visited
221 return;
222 }
223 // Try to find the next block
224 Function* const function{&functions[function_id]};
225 Location pc{label.address};
226 const auto next_it{function->blocks.upper_bound(pc, Compare{})};
227 const bool is_last{next_it == function->blocks.end()};
228 Block* const next{is_last ? nullptr : &*next_it};
229 // Insert before the next block
230 Block* const block{label.block};
231 // Analyze instructions until it reaches an already visited block or there's a branch
232 bool is_branch{false};
233 while (!next || pc < next->begin) {
234 is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch;
235 if (is_branch) {
236 break;
237 }
238 ++pc;
239 }
240 if (!is_branch) {
241 // If the block finished without a branch,
242 // it means that the next instruction is already visited, jump to it
243 block->end = pc;
244 block->cond = IR::Condition{true};
245 block->branch_true = next;
246 block->branch_false = nullptr;
247 }
248 // Function's pointer might be invalid, resolve it again
249 // Insert the new block
250 functions[function_id].blocks.insert(*block);
251}
252
253bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) {
254 const Location pc{label.address};
255 Function& function{functions[function_id]};
256 const auto it{
257 std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })};
258 if (it == function.blocks.end()) {
259 // Address has not been visited
260 return false;
261 }
262 Block* const visited_block{&*it};
263 if (visited_block->begin == pc) {
264 throw LogicError("Dangling block");
265 }
266 Block* const new_block{label.block};
267 Split(visited_block, new_block, pc);
268 function.blocks.insert(it, *new_block);
269 return true;
270}
271
272CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) {
273 const Instruction inst{env.ReadInstruction(pc.Offset())};
274 const Opcode opcode{Decode(inst.raw)};
275 switch (opcode) {
276 case Opcode::BRA:
277 case Opcode::JMP:
278 case Opcode::RET:
279 if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
280 return AnalysisState::Continue;
281 }
282 switch (opcode) {
283 case Opcode::BRA:
284 case Opcode::JMP:
285 AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode));
286 break;
287 case Opcode::RET:
288 block->end_class = EndClass::Return;
289 break;
290 default:
291 break;
292 }
293 block->end = pc;
294 return AnalysisState::Branch;
295 case Opcode::BRK:
296 case Opcode::CONT:
297 case Opcode::LONGJMP:
298 case Opcode::SYNC: {
299 if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) {
300 return AnalysisState::Continue;
301 }
302 const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))};
303 block->branch_true = AddLabel(block, new_stack, stack_pc, function_id);
304 block->end = pc;
305 return AnalysisState::Branch;
306 }
307 case Opcode::KIL: {
308 const Predicate pred{inst.Pred()};
309 const auto ir_pred{static_cast<IR::Pred>(pred.index)};
310 const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated};
311 AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond);
312 return AnalysisState::Branch;
313 }
314 case Opcode::PBK:
315 case Opcode::PCNT:
316 case Opcode::PEXIT:
317 case Opcode::PLONGJMP:
318 case Opcode::SSY:
319 block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst));
320 return AnalysisState::Continue;
321 case Opcode::BRX:
322 case Opcode::JMX:
323 return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id);
324 case Opcode::EXIT:
325 return AnalyzeEXIT(block, function_id, pc, inst);
326 case Opcode::PRET:
327 throw NotImplementedException("PRET flow analysis");
328 case Opcode::CAL:
329 case Opcode::JCAL: {
330 const bool is_absolute{IsAbsoluteJump(opcode)};
331 const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
332 // Technically CAL pushes into PRET, but that's implicit in the function call for us
333 // Insert the function into the list if it doesn't exist
334 const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)};
335 const bool exists{it != functions.end()};
336 const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it))
337 : functions.size()};
338 if (!exists) {
339 functions.emplace_back(block_pool, cal_pc);
340 }
341 block->end_class = EndClass::Call;
342 block->function_call = call_id;
343 block->return_block = AddLabel(block, block->stack, pc + 1, function_id);
344 block->end = pc;
345 return AnalysisState::Branch;
346 }
347 default:
348 break;
349 }
350 const Predicate pred{inst.Pred()};
351 if (pred == Predicate{true} || pred == Predicate{false}) {
352 return AnalysisState::Continue;
353 }
354 const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated};
355 AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond);
356 return AnalysisState::Branch;
357}
358
359void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc,
360 EndClass insn_end_class, IR::Condition cond) {
361 if (block->begin != pc) {
362 // If the block doesn't start in the conditional instruction
363 // mark it as a label to visit it later
364 block->end = pc;
365 block->cond = IR::Condition{true};
366 block->branch_true = AddLabel(block, block->stack, pc, function_id);
367 block->branch_false = nullptr;
368 return;
369 }
370 // Create a virtual block and a conditional block
371 Block* const conditional_block{block_pool.Create()};
372 Block virtual_block{};
373 virtual_block.begin = block->begin.Virtual();
374 virtual_block.end = block->begin.Virtual();
375 virtual_block.end_class = EndClass::Branch;
376 virtual_block.stack = block->stack;
377 virtual_block.cond = cond;
378 virtual_block.branch_true = conditional_block;
379 virtual_block.branch_false = nullptr;
380 // Save the contents of the visited block in the conditional block
381 *conditional_block = std::move(*block);
382 // Impersonate the visited block with a virtual block
383 *block = std::move(virtual_block);
384 // Set the end properties of the conditional instruction
385 conditional_block->end = pc + 1;
386 conditional_block->end_class = insn_end_class;
387 // Add a label to the instruction after the conditional instruction
388 Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)};
389 // Branch to the next instruction from the virtual block
390 block->branch_false = endif_block;
391 // And branch to it from the conditional instruction if it is a branch or a kill instruction
392 // Kill instructions are considered a branch because they demote to a helper invocation and
393 // execution may continue.
394 if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) {
395 conditional_block->cond = IR::Condition{true};
396 conditional_block->branch_true = endif_block;
397 conditional_block->branch_false = nullptr;
398 }
399 // Finally insert the condition block into the list of blocks
400 functions[function_id].blocks.insert(*conditional_block);
401}
402
403bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
404 Opcode opcode) {
405 if (inst.branch.is_cbuf) {
406 throw NotImplementedException("Branch with constant buffer offset");
407 }
408 const Predicate pred{inst.Pred()};
409 if (pred == Predicate{false}) {
410 return false;
411 }
412 const bool has_flow_test{HasFlowTest(opcode)};
413 const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T};
414 if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
415 block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated);
416 block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
417 } else {
418 block->cond = IR::Condition{true};
419 }
420 return true;
421}
422
423void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
424 bool is_absolute) {
425 const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)};
426 block->branch_true = AddLabel(block, block->stack, bra_pc, function_id);
427}
428
429CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
430 FunctionId function_id) {
431 const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)};
432 if (!brx_table) {
433 TrackIndirectBranchTable(env, pc, program_start);
434 throw NotImplementedException("Failed to track indirect branch");
435 }
436 const IR::FlowTest flow_test{inst.branch.flow_test};
437 const Predicate pred{inst.Pred()};
438 if (flow_test != IR::FlowTest::T || pred != Predicate{true}) {
439 throw NotImplementedException("Conditional indirect branch");
440 }
441 std::vector<u32> targets;
442 targets.reserve(brx_table->num_entries);
443 for (u32 i = 0; i < brx_table->num_entries; ++i) {
444 u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)};
445 if (!is_absolute) {
446 target += pc.Offset();
447 }
448 target += static_cast<u32>(brx_table->branch_offset);
449 target += 8;
450 targets.push_back(target);
451 }
452 std::ranges::sort(targets);
453 targets.erase(std::unique(targets.begin(), targets.end()), targets.end());
454
455 block->indirect_branches.reserve(targets.size());
456 for (const u32 target : targets) {
457 Block* const branch{AddLabel(block, block->stack, target, function_id)};
458 block->indirect_branches.push_back({
459 .block = branch,
460 .address = target,
461 });
462 }
463 block->cond = IR::Condition{true};
464 block->end = pc + 1;
465 block->end_class = EndClass::IndirectBranch;
466 block->branch_reg = brx_table->branch_reg;
467 block->branch_offset = brx_table->branch_offset + 8;
468 if (!is_absolute) {
469 block->branch_offset += pc.Offset();
470 }
471 return AnalysisState::Branch;
472}
473
474CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc,
475 Instruction inst) {
476 const IR::FlowTest flow_test{inst.branch.flow_test};
477 const Predicate pred{inst.Pred()};
478 if (pred == Predicate{false} || flow_test == IR::FlowTest::F) {
479 // EXIT will never be taken
480 return AnalysisState::Continue;
481 }
482 if (exits_to_dispatcher && function_id != 0) {
483 throw NotImplementedException("Dispatch EXIT on external function");
484 }
485 if (pred != Predicate{true} || flow_test != IR::FlowTest::T) {
486 if (block->stack.Peek(Token::PEXIT).has_value()) {
487 throw NotImplementedException("Conditional EXIT with PEXIT token");
488 }
489 const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated};
490 if (exits_to_dispatcher) {
491 block->end = pc;
492 block->end_class = EndClass::Branch;
493 block->cond = cond;
494 block->branch_true = dispatch_block;
495 block->branch_false = AddLabel(block, block->stack, pc + 1, function_id);
496 return AnalysisState::Branch;
497 }
498 AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond);
499 return AnalysisState::Branch;
500 }
501 if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) {
502 const Stack popped_stack{block->stack.Remove(Token::PEXIT)};
503 block->cond = IR::Condition{true};
504 block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id);
505 block->branch_false = nullptr;
506 return AnalysisState::Branch;
507 }
508 if (exits_to_dispatcher) {
509 block->cond = IR::Condition{true};
510 block->end = pc;
511 block->end_class = EndClass::Branch;
512 block->branch_true = dispatch_block;
513 block->branch_false = nullptr;
514 return AnalysisState::Branch;
515 }
516 block->end = pc + 1;
517 block->end_class = EndClass::Exit;
518 return AnalysisState::Branch;
519}
520
521Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) {
522 Function& function{functions[function_id]};
523 if (block->begin == pc) {
524 // Jumps to itself
525 return block;
526 }
527 if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) {
528 // Block already exists and it has been visited
529 if (function.blocks.begin() != it) {
530 // Check if the previous node is the virtual variant of the label
531 // This won't exist if a virtual node is not needed or it hasn't been visited
532 // If it hasn't been visited and a virtual node is needed, this will still behave as
533 // expected because the node impersonated with its virtual node.
534 const auto prev{std::prev(it)};
535 if (it->begin.Virtual() == prev->begin) {
536 return &*prev;
537 }
538 }
539 return &*it;
540 }
541 // Make sure we don't insert the same layer twice
542 const auto label_it{std::ranges::find(function.labels, pc, &Label::address)};
543 if (label_it != function.labels.end()) {
544 return label_it->block;
545 }
546 Block* const new_block{block_pool.Create()};
547 new_block->begin = pc;
548 new_block->end = pc;
549 new_block->end_class = EndClass::Branch;
550 new_block->cond = IR::Condition(true);
551 new_block->stack = stack;
552 new_block->branch_true = nullptr;
553 new_block->branch_false = nullptr;
554 function.labels.push_back(Label{
555 .address{pc},
556 .block = new_block,
557 .stack{std::move(stack)},
558 });
559 return new_block;
560}
561
562std::string CFG::Dot() const {
563 int node_uid{0};
564
565 std::string dot{"digraph shader {\n"};
566 for (const Function& function : functions) {
567 dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint);
568 dot += fmt::format("\t\tnode [style=filled];\n");
569 for (const Block& block : function.blocks) {
570 const std::string name{NameOf(block)};
571 const auto add_branch = [&](Block* branch, bool add_label) {
572 dot += fmt::format("\t\t{}->{}", name, NameOf(*branch));
573 if (add_label && block.cond != IR::Condition{true} &&
574 block.cond != IR::Condition{false}) {
575 dot += fmt::format(" [label=\"{}\"]", block.cond);
576 }
577 dot += '\n';
578 };
579 dot += fmt::format("\t\t{};\n", name);
580 switch (block.end_class) {
581 case EndClass::Branch:
582 if (block.cond != IR::Condition{false}) {
583 add_branch(block.branch_true, true);
584 }
585 if (block.cond != IR::Condition{true}) {
586 add_branch(block.branch_false, false);
587 }
588 break;
589 case EndClass::IndirectBranch:
590 for (const IndirectBranch& branch : block.indirect_branches) {
591 add_branch(branch.block, false);
592 }
593 break;
594 case EndClass::Call:
595 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
596 dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block));
597 dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n",
598 node_uid, block.function_call);
599 dot += '\n';
600 ++node_uid;
601 break;
602 case EndClass::Exit:
603 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
604 dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n",
605 node_uid);
606 ++node_uid;
607 break;
608 case EndClass::Return:
609 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
610 dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n",
611 node_uid);
612 ++node_uid;
613 break;
614 case EndClass::Kill:
615 dot += fmt::format("\t\t{}->N{};\n", name, node_uid);
616 dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n",
617 node_uid);
618 ++node_uid;
619 break;
620 }
621 }
622 if (function.entrypoint == 8) {
623 dot += fmt::format("\t\tlabel = \"main\";\n");
624 } else {
625 dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint);
626 }
627 dot += "\t}\n";
628 }
629 if (!functions.empty()) {
630 auto& function{functions.front()};
631 if (function.blocks.empty()) {
632 dot += "Start;\n";
633 } else {
634 dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin()));
635 }
636 dot += fmt::format("\tStart [shape=diamond];\n");
637 }
638 dot += "}\n";
639 return dot;
640}
641
642} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h
new file mode 100644
index 000000000..a6bd3e196
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/control_flow.h
@@ -0,0 +1,169 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8#include <optional>
9#include <span>
10#include <string>
11#include <vector>
12
13#include <boost/container/small_vector.hpp>
14#include <boost/intrusive/set.hpp>
15
16#include "shader_recompiler/environment.h"
17#include "shader_recompiler/frontend/ir/condition.h"
18#include "shader_recompiler/frontend/maxwell/instruction.h"
19#include "shader_recompiler/frontend/maxwell/location.h"
20#include "shader_recompiler/frontend/maxwell/opcodes.h"
21#include "shader_recompiler/object_pool.h"
22
23namespace Shader::Maxwell::Flow {
24
25struct Block;
26
27using FunctionId = size_t;
28
29enum class EndClass {
30 Branch,
31 IndirectBranch,
32 Call,
33 Exit,
34 Return,
35 Kill,
36};
37
38enum class Token {
39 SSY,
40 PBK,
41 PEXIT,
42 PRET,
43 PCNT,
44 PLONGJMP,
45};
46
47struct StackEntry {
48 auto operator<=>(const StackEntry&) const noexcept = default;
49
50 Token token;
51 Location target;
52};
53
54class Stack {
55public:
56 void Push(Token token, Location target);
57 [[nodiscard]] std::pair<Location, Stack> Pop(Token token) const;
58 [[nodiscard]] std::optional<Location> Peek(Token token) const;
59 [[nodiscard]] Stack Remove(Token token) const;
60
61private:
62 boost::container::small_vector<StackEntry, 3> entries;
63};
64
65struct IndirectBranch {
66 Block* block;
67 u32 address;
68};
69
70struct Block : boost::intrusive::set_base_hook<
71 // Normal link is ~2.5% faster compared to safe link
72 boost::intrusive::link_mode<boost::intrusive::normal_link>> {
73 [[nodiscard]] bool Contains(Location pc) const noexcept;
74
75 bool operator<(const Block& rhs) const noexcept {
76 return begin < rhs.begin;
77 }
78
79 Location begin;
80 Location end;
81 EndClass end_class{};
82 IR::Condition cond{};
83 Stack stack;
84 Block* branch_true{};
85 Block* branch_false{};
86 FunctionId function_call{};
87 Block* return_block{};
88 IR::Reg branch_reg{};
89 s32 branch_offset{};
90 std::vector<IndirectBranch> indirect_branches;
91};
92
93struct Label {
94 Location address;
95 Block* block;
96 Stack stack;
97};
98
99struct Function {
100 explicit Function(ObjectPool<Block>& block_pool, Location start_address);
101
102 Location entrypoint;
103 boost::container::small_vector<Label, 16> labels;
104 boost::intrusive::set<Block> blocks;
105};
106
107class CFG {
108 enum class AnalysisState {
109 Branch,
110 Continue,
111 };
112
113public:
114 explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address,
115 bool exits_to_dispatcher = false);
116
117 CFG& operator=(const CFG&) = delete;
118 CFG(const CFG&) = delete;
119
120 CFG& operator=(CFG&&) = delete;
121 CFG(CFG&&) = delete;
122
123 [[nodiscard]] std::string Dot() const;
124
125 [[nodiscard]] std::span<const Function> Functions() const noexcept {
126 return std::span(functions.data(), functions.size());
127 }
128 [[nodiscard]] std::span<Function> Functions() noexcept {
129 return std::span(functions.data(), functions.size());
130 }
131
132 [[nodiscard]] bool ExitsToDispatcher() const {
133 return exits_to_dispatcher;
134 }
135
136private:
137 void AnalyzeLabel(FunctionId function_id, Label& label);
138
139 /// Inspect already visited blocks.
140 /// Return true when the block has already been visited
141 bool InspectVisitedBlocks(FunctionId function_id, const Label& label);
142
143 AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc);
144
145 void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class,
146 IR::Condition cond);
147
148 /// Return true when the branch instruction is confirmed to be a branch
149 bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst,
150 Opcode opcode);
151
152 void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst,
153 bool is_absolute);
154 AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute,
155 FunctionId function_id);
156 AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst);
157
158 /// Return the branch target block id
159 Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id);
160
161 Environment& env;
162 ObjectPool<Block>& block_pool;
163 boost::container::small_vector<Function, 1> functions;
164 Location program_start;
165 bool exits_to_dispatcher{};
166 Block* dispatch_block{};
167};
168
169} // namespace Shader::Maxwell::Flow
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp
new file mode 100644
index 000000000..972f677dc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.cpp
@@ -0,0 +1,149 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <array>
7#include <bit>
8#include <memory>
9#include <string_view>
10
11#include "common/common_types.h"
12#include "shader_recompiler/exception.h"
13#include "shader_recompiler/frontend/maxwell/decode.h"
14#include "shader_recompiler/frontend/maxwell/opcodes.h"
15
16namespace Shader::Maxwell {
17namespace {
18struct MaskValue {
19 u64 mask;
20 u64 value;
21};
22
23constexpr MaskValue MaskValueFromEncoding(const char* encoding) {
24 u64 mask{};
25 u64 value{};
26 u64 bit{u64(1) << 63};
27 while (*encoding) {
28 switch (*encoding) {
29 case '0':
30 mask |= bit;
31 break;
32 case '1':
33 mask |= bit;
34 value |= bit;
35 break;
36 case '-':
37 break;
38 case ' ':
39 break;
40 default:
41 throw LogicError("Invalid encoding character '{}'", *encoding);
42 }
43 ++encoding;
44 if (*encoding != ' ') {
45 bit >>= 1;
46 }
47 }
48 return MaskValue{.mask = mask, .value = value};
49}
50
51struct InstEncoding {
52 MaskValue mask_value;
53 Opcode opcode;
54};
55constexpr std::array UNORDERED_ENCODINGS{
56#define INST(name, cute, encode) \
57 InstEncoding{ \
58 .mask_value{MaskValueFromEncoding(encode)}, \
59 .opcode = Opcode::name, \
60 },
61#include "maxwell.inc"
62#undef INST
63};
64
65constexpr auto SortedEncodings() {
66 std::array encodings{UNORDERED_ENCODINGS};
67 std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) {
68 return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask);
69 });
70 return encodings;
71}
72constexpr auto ENCODINGS{SortedEncodings()};
73
74constexpr int WidestLeftBits() {
75 int bits{64};
76 for (const InstEncoding& encoding : ENCODINGS) {
77 bits = std::min(bits, std::countr_zero(encoding.mask_value.mask));
78 }
79 return 64 - bits;
80}
81constexpr int WIDEST_LEFT_BITS{WidestLeftBits()};
82constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS};
83
84constexpr size_t ToFastLookupIndex(u64 value) {
85 return static_cast<size_t>(value >> MASK_SHIFT);
86}
87
88constexpr size_t FastLookupSize() {
89 size_t max_width{};
90 for (const InstEncoding& encoding : ENCODINGS) {
91 max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask));
92 }
93 return max_width + 1;
94}
95constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()};
96
97struct InstInfo {
98 [[nodiscard]] u64 Mask() const noexcept {
99 return static_cast<u64>(high_mask) << MASK_SHIFT;
100 }
101
102 [[nodiscard]] u64 Value() const noexcept {
103 return static_cast<u64>(high_value) << MASK_SHIFT;
104 }
105
106 u16 high_mask;
107 u16 high_value;
108 Opcode opcode;
109};
110
111constexpr auto MakeFastLookupTableIndex(size_t index) {
112 std::array<InstInfo, 2> encodings{};
113 size_t element{};
114 for (const auto& encoding : ENCODINGS) {
115 const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)};
116 const size_t value{ToFastLookupIndex(encoding.mask_value.value)};
117 if ((index & mask) == value) {
118 encodings.at(element) = InstInfo{
119 .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT),
120 .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT),
121 .opcode = encoding.opcode,
122 };
123 ++element;
124 }
125 }
126 return encodings;
127}
128
129/*constexpr*/ auto MakeFastLookupTable() {
130 auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()};
131 for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) {
132 (*encodings)[index] = MakeFastLookupTableIndex(index);
133 }
134 return encodings;
135}
136const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()};
137} // Anonymous namespace
138
139Opcode Decode(u64 insn) {
140 const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]};
141 const auto it{std::ranges::find_if(
142 table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })};
143 if (it == table.end()) {
144 throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn);
145 }
146 return it->opcode;
147}
148
149} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h
new file mode 100644
index 000000000..b4f080fd7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/decode.h
@@ -0,0 +1,14 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9
10namespace Shader::Maxwell {
11
12[[nodiscard]] Opcode Decode(u64 insn);
13
14} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
new file mode 100644
index 000000000..008625cb3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp
@@ -0,0 +1,108 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/decode.h"
10#include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h"
11#include "shader_recompiler/frontend/maxwell/opcodes.h"
12#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
13
14namespace Shader::Maxwell {
15namespace {
16union Encoding {
17 u64 raw;
18 BitField<0, 8, IR::Reg> dest_reg;
19 BitField<8, 8, IR::Reg> src_reg;
20 BitField<20, 19, u64> immediate;
21 BitField<56, 1, u64> is_negative;
22 BitField<20, 24, s64> brx_offset;
23};
24
25template <typename Callable>
26std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) {
27 while (pos >= block_begin) {
28 const u64 insn{env.ReadInstruction(pos.Offset())};
29 --pos;
30 if (func(insn, Decode(insn))) {
31 return insn;
32 }
33 }
34 return std::nullopt;
35}
36
37std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos,
38 IR::Reg brx_reg) {
39 return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) {
40 const LDC::Encoding ldc{insn};
41 return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 &&
42 ldc.mode == LDC::Mode::Default;
43 });
44}
45
46std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos,
47 IR::Reg ldc_reg) {
48 return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) {
49 const Encoding shl{insn};
50 return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg;
51 });
52}
53
54std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos,
55 IR::Reg shl_reg) {
56 return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) {
57 const Encoding imnmx{insn};
58 return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg;
59 });
60}
61} // Anonymous namespace
62
63std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
64 Location block_begin) {
65 const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())};
66 const Opcode brx_opcode{Decode(brx_insn)};
67 if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) {
68 throw LogicError("Tracked instruction is not BRX or JMX");
69 }
70 const IR::Reg brx_reg{Encoding{brx_insn}.src_reg};
71 const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)};
72
73 Location pos{brx_pos};
74 const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)};
75 if (!ldc_insn) {
76 return std::nullopt;
77 }
78 const LDC::Encoding ldc{*ldc_insn};
79 const u32 cbuf_index{static_cast<u32>(ldc.index)};
80 const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))};
81 const IR::Reg ldc_reg{ldc.src_reg};
82
83 const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)};
84 if (!shl_insn) {
85 return std::nullopt;
86 }
87 const Encoding shl{*shl_insn};
88 const IR::Reg shl_reg{shl.src_reg};
89
90 const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)};
91 if (!imnmx_insn) {
92 return std::nullopt;
93 }
94 const Encoding imnmx{*imnmx_insn};
95 if (imnmx.is_negative != 0) {
96 return std::nullopt;
97 }
98 const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())};
99 return IndirectBranchTableInfo{
100 .cbuf_index = cbuf_index,
101 .cbuf_offset = cbuf_offset,
102 .num_entries = imnmx_immediate + 1,
103 .branch_offset = brx_offset,
104 .branch_reg = brx_reg,
105 };
106}
107
108} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
new file mode 100644
index 000000000..eee5102fa
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <optional>
8
9#include "common/bit_field.h"
10#include "common/common_types.h"
11#include "shader_recompiler/environment.h"
12#include "shader_recompiler/frontend/ir/reg.h"
13#include "shader_recompiler/frontend/maxwell/location.h"
14
15namespace Shader::Maxwell {
16
17struct IndirectBranchTableInfo {
18 u32 cbuf_index{};
19 u32 cbuf_offset{};
20 u32 num_entries{};
21 s32 branch_offset{};
22 IR::Reg branch_reg{};
23};
24
25std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos,
26 Location block_begin);
27
28} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h
new file mode 100644
index 000000000..743d68d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/instruction.h
@@ -0,0 +1,63 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/flow_test.h"
10#include "shader_recompiler/frontend/ir/reg.h"
11
12namespace Shader::Maxwell {
13
14struct Predicate {
15 Predicate() = default;
16 Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {}
17 Predicate(bool value) : index{7}, negated{!value} {}
18 Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {}
19
20 unsigned index;
21 bool negated;
22};
23
24inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept {
25 return lhs.index == rhs.index && lhs.negated == rhs.negated;
26}
27
28inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept {
29 return !(lhs == rhs);
30}
31
32union Instruction {
33 Instruction(u64 raw_) : raw{raw_} {}
34
35 u64 raw;
36
37 union {
38 BitField<5, 1, u64> is_cbuf;
39 BitField<0, 5, IR::FlowTest> flow_test;
40
41 [[nodiscard]] u32 Absolute() const noexcept {
42 return static_cast<u32>(absolute);
43 }
44
45 [[nodiscard]] s32 Offset() const noexcept {
46 return static_cast<s32>(offset);
47 }
48
49 private:
50 BitField<20, 24, s64> offset;
51 BitField<20, 32, u64> absolute;
52 } branch;
53
54 [[nodiscard]] Predicate Pred() const noexcept {
55 return Predicate{pred};
56 }
57
58private:
59 BitField<16, 4, u64> pred;
60};
61static_assert(std::is_trivially_copyable_v<Instruction>);
62
63} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h
new file mode 100644
index 000000000..26d29eae2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/location.h
@@ -0,0 +1,112 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <compare>
8#include <iterator>
9
10#include <fmt/format.h>
11
12#include "common/common_types.h"
13#include "shader_recompiler/exception.h"
14
15namespace Shader::Maxwell {
16
17class Location {
18 static constexpr u32 VIRTUAL_BIAS{4};
19
20public:
21 constexpr Location() = default;
22
23 constexpr Location(u32 initial_offset) : offset{initial_offset} {
24 if (initial_offset % 8 != 0) {
25 throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset);
26 }
27 Align();
28 }
29
30 constexpr Location Virtual() const noexcept {
31 Location virtual_location;
32 virtual_location.offset = offset - VIRTUAL_BIAS;
33 return virtual_location;
34 }
35
36 [[nodiscard]] constexpr u32 Offset() const noexcept {
37 return offset;
38 }
39
40 [[nodiscard]] constexpr bool IsVirtual() const {
41 return offset % 8 == VIRTUAL_BIAS;
42 }
43
44 constexpr auto operator<=>(const Location&) const noexcept = default;
45
46 constexpr Location operator++() noexcept {
47 const Location copy{*this};
48 Step();
49 return copy;
50 }
51
52 constexpr Location operator++(int) noexcept {
53 Step();
54 return *this;
55 }
56
57 constexpr Location operator--() noexcept {
58 const Location copy{*this};
59 Back();
60 return copy;
61 }
62
63 constexpr Location operator--(int) noexcept {
64 Back();
65 return *this;
66 }
67
68 constexpr Location operator+(int number) const {
69 Location new_pc{*this};
70 while (number > 0) {
71 --number;
72 ++new_pc;
73 }
74 while (number < 0) {
75 ++number;
76 --new_pc;
77 }
78 return new_pc;
79 }
80
81 constexpr Location operator-(int number) const {
82 return operator+(-number);
83 }
84
85private:
86 constexpr void Align() {
87 offset += offset % 32 == 0 ? 8 : 0;
88 }
89
90 constexpr void Step() {
91 offset += 8 + (offset % 32 == 24 ? 8 : 0);
92 }
93
94 constexpr void Back() {
95 offset -= 8 + (offset % 32 == 8 ? 8 : 0);
96 }
97
98 u32 offset{0xcccccccc};
99};
100
101} // namespace Shader::Maxwell
102
103template <>
104struct fmt::formatter<Shader::Maxwell::Location> {
105 constexpr auto parse(format_parse_context& ctx) {
106 return ctx.begin();
107 }
108 template <typename FormatContext>
109 auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) {
110 return fmt::format_to(ctx.out(), "{:04x}", location.Offset());
111 }
112};
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc
new file mode 100644
index 000000000..2fee591bb
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc
@@ -0,0 +1,286 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5INST(AL2P, "AL2P", "1110 1111 1010 0---")
6INST(ALD, "ALD", "1110 1111 1101 1---")
7INST(AST, "AST", "1110 1111 1111 0---")
8INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----")
9INST(ATOM, "ATOM", "1110 1101 ---- ----")
10INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----")
11INST(ATOMS, "ATOMS", "1110 1100 ---- ----")
12INST(B2R, "B2R", "1111 0000 1011 1---")
13INST(BAR, "BAR", "1111 0000 1010 1---")
14INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---")
15INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---")
16INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---")
17INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---")
18INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---")
19INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---")
20INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---")
21INST(BPT, "BPT", "1110 0011 1010 ----")
22INST(BRA, "BRA", "1110 0010 0100 ----")
23INST(BRK, "BRK", "1110 0011 0100 ----")
24INST(BRX, "BRX", "1110 0010 0101 ----")
25INST(CAL, "CAL", "1110 0010 0110 ----")
26INST(CCTL, "CCTL", "1110 1111 011- ----")
27INST(CCTLL, "CCTLL", "1110 1111 100- ----")
28INST(CONT, "CONT", "1110 0011 0101 ----")
29INST(CS2R, "CS2R", "0101 0000 1100 1---")
30INST(CSET, "CSET", "0101 0000 1001 1---")
31INST(CSETP, "CSETP", "0101 0000 1010 0---")
32INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---")
33INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---")
34INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---")
35INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---")
36INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----")
37INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----")
38INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----")
39INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----")
40INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---")
41INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---")
42INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---")
43INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---")
44INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---")
45INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---")
46INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----")
47INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----")
48INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----")
49INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----")
50INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----")
51INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----")
52INST(EXIT, "EXIT", "1110 0011 0000 ----")
53INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---")
54INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---")
55INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---")
56INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---")
57INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---")
58INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---")
59INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---")
60INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---")
61INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---")
62INST(FADD32I, "FADD32I", "0000 10-- ---- ----")
63INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---")
64INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---")
65INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---")
66INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----")
67INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----")
68INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----")
69INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----")
70INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----")
71INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----")
72INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----")
73INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----")
74INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----")
75INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---")
76INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---")
77INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---")
78INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---")
79INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---")
80INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---")
81INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---")
82INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---")
83INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---")
84INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----")
85INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----")
86INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----")
87INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----")
88INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----")
89INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----")
90INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----")
91INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---")
92INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----")
93INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----")
94INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---")
95INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----")
96INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----")
97INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----")
98INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---")
99INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----")
100INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----")
101INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----")
102INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----")
103INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---")
104INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----")
105INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----")
106INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----")
107INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---")
108INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----")
109INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----")
110INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---")
111INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----")
112INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----")
113INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---")
114INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---")
115INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---")
116INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---")
117INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---")
118INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---")
119INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---")
120INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---")
121INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---")
122INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----")
123INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----")
124INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----")
125INST(IADD32I, "IADD32I", "0001 110- ---- ----")
126INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----")
127INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----")
128INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----")
129INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----")
130INST(IDE, "IDE", "1110 0011 1001 ----")
131INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---")
132INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---")
133INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----")
134INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----")
135INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----")
136INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----")
137INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----")
138INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----")
139INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----")
140INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----")
141INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----")
142INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---")
143INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---")
144INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---")
145INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---")
146INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---")
147INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---")
148INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----")
149INST(IPA, "IPA", "1110 0000 ---- ----")
150INST(ISBERD, "ISBERD", "1110 1111 1101 0---")
151INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---")
152INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---")
153INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---")
154INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----")
155INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----")
156INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----")
157INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----")
158INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----")
159INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----")
160INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----")
161INST(JCAL, "JCAL", "1110 0010 0010 ----")
162INST(JMP, "JMP", "1110 0010 0001 ----")
163INST(JMX, "JMX", "1110 0010 0000 ----")
164INST(KIL, "KIL", "1110 0011 0011 ----")
165INST(LD, "LD", "100- ---- ---- ----")
166INST(LDC, "LDC", "1110 1111 1001 0---")
167INST(LDG, "LDG", "1110 1110 1101 0---")
168INST(LDL, "LDL", "1110 1111 0100 0---")
169INST(LDS, "LDS", "1110 1111 0100 1---")
170INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---")
171INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----")
172INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---")
173INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----")
174INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---")
175INST(LEPC, "LEPC", "0101 0000 1101 0---")
176INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----")
177INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---")
178INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---")
179INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---")
180INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---")
181INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----")
182INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----")
183INST(LOP32I, "LOP32I", "0000 01-- ---- ----")
184INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---")
185INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---")
186INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---")
187INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---")
188INST(MOV32I, "MOV32I", "0000 0001 0000 ----")
189INST(MUFU, "MUFU", "0101 0000 1000 0---")
190INST(NOP, "NOP", "0101 0000 1011 0---")
191INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---")
192INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---")
193INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---")
194INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---")
195INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---")
196INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---")
197INST(PBK, "PBK", "1110 0010 1010 ----")
198INST(PCNT, "PCNT", "1110 0010 1011 ----")
199INST(PEXIT, "PEXIT", "1110 0010 0011 ----")
200INST(PIXLD, "PIXLD", "1110 1111 1110 1---")
201INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----")
202INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---")
203INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---")
204INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---")
205INST(PRET, "PRET", "1110 0010 0111 ----")
206INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----")
207INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----")
208INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----")
209INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----")
210INST(PSET, "PSET", "0101 0000 1000 1---")
211INST(PSETP, "PSETP", "0101 0000 1001 0---")
212INST(R2B, "R2B", "1111 0000 1100 0---")
213INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---")
214INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---")
215INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---")
216INST(RAM, "RAM", "1110 0011 1000 ----")
217INST(RED, "RED", "1110 1011 1111 1---")
218INST(RET, "RET", "1110 0011 0010 ----")
219INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---")
220INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---")
221INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---")
222INST(RTT, "RTT", "1110 0011 0110 ----")
223INST(S2R, "S2R", "1111 0000 1100 1---")
224INST(SAM, "SAM", "1110 0011 0111 ----")
225INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---")
226INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---")
227INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---")
228INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----")
229INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----")
230INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---")
231INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---")
232INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---")
233INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---")
234INST(SHFL, "SHFL", "1110 1111 0001 0---")
235INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---")
236INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---")
237INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---")
238INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---")
239INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---")
240INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---")
241INST(SSY, "SSY", "1110 0010 1001 ----")
242INST(ST, "ST", "101- ---- ---- ----")
243INST(STG, "STG", "1110 1110 1101 1---")
244INST(STL, "STL", "1110 1111 0101 0---")
245INST(STP, "STP", "1110 1110 1010 0---")
246INST(STS, "STS", "1110 1111 0101 1---")
247INST(SUATOM, "SUATOM", "1110 1010 0--- ----")
248INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----")
249INST(SULD, "SULD", "1110 1011 000- ----")
250INST(SURED, "SURED", "1110 1011 010- ----")
251INST(SUST, "SUST", "1110 1011 001- ----")
252INST(SYNC, "SYNC", "1111 0000 1111 1---")
253INST(TEX, "TEX", "1100 0--- ---- ----")
254INST(TEX_b, "TEX (b)", "1101 1110 10-- ----")
255INST(TEXS, "TEXS", "1101 -00- ---- ----")
256INST(TLD, "TLD", "1101 1100 ---- ----")
257INST(TLD_b, "TLD (b)", "1101 1101 ---- ----")
258INST(TLD4, "TLD4", "1100 10-- ---- ----")
259INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----")
260INST(TLD4S, "TLD4S", "1101 1111 -0-- ----")
261INST(TLDS, "TLDS", "1101 -01- ---- ----")
262INST(TMML, "TMML", "1101 1111 0101 1---")
263INST(TMML_b, "TMML (b)", "1101 1111 0110 0---")
264INST(TXA, "TXA", "1101 1111 0100 0---")
265INST(TXD, "TXD", "1101 1110 00-- ----")
266INST(TXD_b, "TXD (b)", "1101 1110 01-- ----")
267INST(TXQ, "TXQ", "1101 1111 0100 1---")
268INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---")
269INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----")
270INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----")
271INST(VADD, "VADD", "0010 00-- ---- ----")
272INST(VMAD, "VMAD", "0101 1111 ---- ----")
273INST(VMNMX, "VMNMX", "0011 101- ---- ----")
274INST(VOTE, "VOTE", "0101 0000 1101 1---")
275INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---")
276INST(VSET, "VSET", "0100 000- ---- ----")
277INST(VSETP, "VSETP", "0101 0000 1111 0---")
278INST(VSHL, "VSHL", "0101 0111 ---- ----")
279INST(VSHR, "VSHR", "0101 0110 ---- ----")
280INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----")
281INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----")
282INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----")
283INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----")
284
285// Removed due to its weird formatting making fast tables larger
286// INST(CCTLT, "CCTLT", "1110 1011 1111 0--0")
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
new file mode 100644
index 000000000..ccc40c20c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp
@@ -0,0 +1,26 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9
10namespace Shader::Maxwell {
11namespace {
12constexpr std::array NAME_TABLE{
13#define INST(name, cute, encode) cute,
14#include "maxwell.inc"
15#undef INST
16};
17} // Anonymous namespace
18
19const char* NameOf(Opcode opcode) {
20 if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) {
21 throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode));
22 }
23 return NAME_TABLE[static_cast<size_t>(opcode)];
24}
25
26} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h
new file mode 100644
index 000000000..cd574f29d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/opcodes.h
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <fmt/format.h>
8
9namespace Shader::Maxwell {
10
11enum class Opcode {
12#define INST(name, cute, encode) name,
13#include "maxwell.inc"
14#undef INST
15};
16
17const char* NameOf(Opcode opcode);
18
19} // namespace Shader::Maxwell
20
21template <>
22struct fmt::formatter<Shader::Maxwell::Opcode> {
23 constexpr auto parse(format_parse_context& ctx) {
24 return ctx.begin();
25 }
26 template <typename FormatContext>
27 auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) {
28 return format_to(ctx.out(), "{}", NameOf(opcode));
29 }
30};
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
new file mode 100644
index 000000000..8b3e0a15c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -0,0 +1,883 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <string>
8#include <unordered_map>
9#include <utility>
10#include <vector>
11#include <version>
12
13#include <fmt/format.h>
14
15#include <boost/intrusive/list.hpp>
16
17#include "shader_recompiler/environment.h"
18#include "shader_recompiler/frontend/ir/basic_block.h"
19#include "shader_recompiler/frontend/ir/ir_emitter.h"
20#include "shader_recompiler/frontend/maxwell/decode.h"
21#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
22#include "shader_recompiler/frontend/maxwell/translate/translate.h"
23#include "shader_recompiler/object_pool.h"
24
25namespace Shader::Maxwell {
26namespace {
27struct Statement;
28
29// Use normal_link because we are not guaranteed to destroy the tree in order
30using ListBaseHook =
31 boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>;
32
33using Tree = boost::intrusive::list<Statement,
34 // Allow using Statement without a definition
35 boost::intrusive::base_hook<ListBaseHook>,
36 // Avoid linear complexity on splice, size is never called
37 boost::intrusive::constant_time_size<false>>;
38using Node = Tree::iterator;
39
40enum class StatementType {
41 Code,
42 Goto,
43 Label,
44 If,
45 Loop,
46 Break,
47 Return,
48 Kill,
49 Unreachable,
50 Function,
51 Identity,
52 Not,
53 Or,
54 SetVariable,
55 SetIndirectBranchVariable,
56 Variable,
57 IndirectBranchCond,
58};
59
60bool HasChildren(StatementType type) {
61 switch (type) {
62 case StatementType::If:
63 case StatementType::Loop:
64 case StatementType::Function:
65 return true;
66 default:
67 return false;
68 }
69}
70
71struct Goto {};
72struct Label {};
73struct If {};
74struct Loop {};
75struct Break {};
76struct Return {};
77struct Kill {};
78struct Unreachable {};
79struct FunctionTag {};
80struct Identity {};
81struct Not {};
82struct Or {};
83struct SetVariable {};
84struct SetIndirectBranchVariable {};
85struct Variable {};
86struct IndirectBranchCond {};
87
88#ifdef _MSC_VER
89#pragma warning(push)
90#pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement
91#endif
92struct Statement : ListBaseHook {
93 Statement(const Flow::Block* block_, Statement* up_)
94 : block{block_}, up{up_}, type{StatementType::Code} {}
95 Statement(Goto, Statement* cond_, Node label_, Statement* up_)
96 : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {}
97 Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {}
98 Statement(If, Statement* cond_, Tree&& children_, Statement* up_)
99 : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {}
100 Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_)
101 : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {}
102 Statement(Break, Statement* cond_, Statement* up_)
103 : cond{cond_}, up{up_}, type{StatementType::Break} {}
104 Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {}
105 Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {}
106 Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {}
107 Statement(FunctionTag) : children{}, type{StatementType::Function} {}
108 Statement(Identity, IR::Condition cond_, Statement* up_)
109 : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {}
110 Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {}
111 Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_)
112 : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {}
113 Statement(SetVariable, u32 id_, Statement* op_, Statement* up_)
114 : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {}
115 Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_)
116 : branch_offset{branch_offset_},
117 branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {}
118 Statement(Variable, u32 id_, Statement* up_)
119 : id{id_}, up{up_}, type{StatementType::Variable} {}
120 Statement(IndirectBranchCond, u32 location_, Statement* up_)
121 : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {}
122
123 ~Statement() {
124 if (HasChildren(type)) {
125 std::destroy_at(&children);
126 }
127 }
128
129 union {
130 const Flow::Block* block;
131 Node label;
132 Tree children;
133 IR::Condition guest_cond;
134 Statement* op;
135 Statement* op_a;
136 u32 location;
137 s32 branch_offset;
138 };
139 union {
140 Statement* cond;
141 Statement* op_b;
142 u32 id;
143 IR::Reg branch_reg;
144 };
145 Statement* up{};
146 StatementType type;
147};
148#ifdef _MSC_VER
149#pragma warning(pop)
150#endif
151
152std::string DumpExpr(const Statement* stmt) {
153 switch (stmt->type) {
154 case StatementType::Identity:
155 return fmt::format("{}", stmt->guest_cond);
156 case StatementType::Not:
157 return fmt::format("!{}", DumpExpr(stmt->op));
158 case StatementType::Or:
159 return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b));
160 case StatementType::Variable:
161 return fmt::format("goto_L{}", stmt->id);
162 case StatementType::IndirectBranchCond:
163 return fmt::format("(indirect_branch == {:x})", stmt->location);
164 default:
165 return "<invalid type>";
166 }
167}
168
169[[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) {
170 std::string ret;
171 std::string indent(indentation, ' ');
172 for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) {
173 switch (stmt->type) {
174 case StatementType::Code:
175 ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent,
176 stmt->block->begin.Offset(), stmt->block->end.Offset(),
177 reinterpret_cast<uintptr_t>(stmt->block));
178 break;
179 case StatementType::Goto:
180 ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond),
181 stmt->label->id);
182 break;
183 case StatementType::Label:
184 ret += fmt::format("{}L{}:\n", indent, stmt->id);
185 break;
186 case StatementType::If:
187 ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond));
188 ret += DumpTree(stmt->children, indentation + 4);
189 ret += fmt::format("{} }}\n", indent);
190 break;
191 case StatementType::Loop:
192 ret += fmt::format("{} do {{\n", indent);
193 ret += DumpTree(stmt->children, indentation + 4);
194 ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond));
195 break;
196 case StatementType::Break:
197 ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond));
198 break;
199 case StatementType::Return:
200 ret += fmt::format("{} return;\n", indent);
201 break;
202 case StatementType::Kill:
203 ret += fmt::format("{} kill;\n", indent);
204 break;
205 case StatementType::Unreachable:
206 ret += fmt::format("{} unreachable;\n", indent);
207 break;
208 case StatementType::SetVariable:
209 ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op));
210 break;
211 case StatementType::SetIndirectBranchVariable:
212 ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg,
213 stmt->branch_offset);
214 break;
215 case StatementType::Function:
216 case StatementType::Identity:
217 case StatementType::Not:
218 case StatementType::Or:
219 case StatementType::Variable:
220 case StatementType::IndirectBranchCond:
221 throw LogicError("Statement can't be printed");
222 }
223 }
224 return ret;
225}
226
227void SanitizeNoBreaks(const Tree& tree) {
228 if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) {
229 throw NotImplementedException("Capturing statement with break nodes");
230 }
231}
232
233size_t Level(Node stmt) {
234 size_t level{0};
235 Statement* node{stmt->up};
236 while (node) {
237 ++level;
238 node = node->up;
239 }
240 return level;
241}
242
243bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) {
244 const size_t goto_level{Level(goto_stmt)};
245 const size_t label_level{Level(label_stmt)};
246 size_t min_level;
247 size_t max_level;
248 Node min;
249 Node max;
250 if (label_level < goto_level) {
251 min_level = label_level;
252 max_level = goto_level;
253 min = label_stmt;
254 max = goto_stmt;
255 } else { // goto_level < label_level
256 min_level = goto_level;
257 max_level = label_level;
258 min = goto_stmt;
259 max = label_stmt;
260 }
261 while (max_level > min_level) {
262 --max_level;
263 max = max->up;
264 }
265 return min->up == max->up;
266}
267
268bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) {
269 return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt);
270}
271
272[[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept {
273 Node it{goto_stmt};
274 do {
275 if (it == label_stmt) {
276 return true;
277 }
278 --it;
279 } while (it != goto_stmt->up->children.begin());
280 while (it != goto_stmt->up->children.end()) {
281 if (it == label_stmt) {
282 return true;
283 }
284 ++it;
285 }
286 return false;
287}
288
289Node SiblingFromNephew(Node uncle, Node nephew) noexcept {
290 Statement* const parent{uncle->up};
291 Statement* it{&*nephew};
292 while (it->up != parent) {
293 it = it->up;
294 }
295 return Tree::s_iterator_to(*it);
296}
297
298bool AreOrdered(Node left_sibling, Node right_sibling) noexcept {
299 const Node end{right_sibling->up->children.end()};
300 for (auto it = right_sibling; it != end; ++it) {
301 if (it == left_sibling) {
302 return false;
303 }
304 }
305 return true;
306}
307
308bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept {
309 const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)};
310 return AreOrdered(sibling, goto_stmt);
311}
312
313class GotoPass {
314public:
315 explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} {
316 std::vector gotos{BuildTree(cfg)};
317 const auto end{gotos.rend()};
318 for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) {
319 RemoveGoto(*goto_stmt);
320 }
321 }
322
323 Statement& RootStatement() noexcept {
324 return root_stmt;
325 }
326
327private:
328 void RemoveGoto(Node goto_stmt) {
329 // Force goto_stmt and label_stmt to be directly related
330 const Node label_stmt{goto_stmt->label};
331 if (IsIndirectlyRelated(goto_stmt, label_stmt)) {
332 // Move goto_stmt out using outward-movement transformation until it becomes
333 // directly related to label_stmt
334 while (!IsDirectlyRelated(goto_stmt, label_stmt)) {
335 goto_stmt = MoveOutward(goto_stmt);
336 }
337 }
338 // Force goto_stmt and label_stmt to be siblings
339 if (IsDirectlyRelated(goto_stmt, label_stmt)) {
340 const size_t label_level{Level(label_stmt)};
341 size_t goto_level{Level(goto_stmt)};
342 if (goto_level > label_level) {
343 // Move goto_stmt out of its level using outward-movement transformations
344 while (goto_level > label_level) {
345 goto_stmt = MoveOutward(goto_stmt);
346 --goto_level;
347 }
348 } else { // Level(goto_stmt) < Level(label_stmt)
349 if (NeedsLift(goto_stmt, label_stmt)) {
350 // Lift goto_stmt to above stmt containing label_stmt using goto-lifting
351 // transformations
352 goto_stmt = Lift(goto_stmt);
353 }
354 // Move goto_stmt into label_stmt's level using inward-movement transformation
355 while (goto_level < label_level) {
356 goto_stmt = MoveInward(goto_stmt);
357 ++goto_level;
358 }
359 }
360 }
361 // Expensive operation:
362 // if (!AreSiblings(goto_stmt, label_stmt)) {
363 // throw LogicError("Goto is not a sibling with the label");
364 // }
365 // goto_stmt and label_stmt are guaranteed to be siblings, eliminate
366 if (std::next(goto_stmt) == label_stmt) {
367 // Simply eliminate the goto if the label is next to it
368 goto_stmt->up->children.erase(goto_stmt);
369 } else if (AreOrdered(goto_stmt, label_stmt)) {
370 // Eliminate goto_stmt with a conditional
371 EliminateAsConditional(goto_stmt, label_stmt);
372 } else {
373 // Eliminate goto_stmt with a loop
374 EliminateAsLoop(goto_stmt, label_stmt);
375 }
376 }
377
378 std::vector<Node> BuildTree(Flow::CFG& cfg) {
379 u32 label_id{0};
380 std::vector<Node> gotos;
381 Flow::Function& first_function{cfg.Functions().front()};
382 BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt);
383 return gotos;
384 }
385
386 void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id,
387 std::vector<Node>& gotos, Node function_insert_point,
388 std::optional<Node> return_label) {
389 Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)};
390 Tree& root{root_stmt.children};
391 std::unordered_map<Flow::Block*, Node> local_labels;
392 local_labels.reserve(function.blocks.size());
393
394 for (Flow::Block& block : function.blocks) {
395 Statement* const label{pool.Create(Label{}, label_id, &root_stmt)};
396 const Node label_it{root.insert(function_insert_point, *label)};
397 local_labels.emplace(&block, label_it);
398 ++label_id;
399 }
400 for (Flow::Block& block : function.blocks) {
401 const Node label{local_labels.at(&block)};
402 // Insertion point
403 const Node ip{std::next(label)};
404
405 // Reset goto variables before the first block and after its respective label
406 const auto make_reset_variable{[&]() -> Statement& {
407 return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt);
408 }};
409 root.push_front(make_reset_variable());
410 root.insert(ip, make_reset_variable());
411 root.insert(ip, *pool.Create(&block, &root_stmt));
412
413 switch (block.end_class) {
414 case Flow::EndClass::Branch: {
415 Statement* const always_cond{
416 pool.Create(Identity{}, IR::Condition{true}, &root_stmt)};
417 if (block.cond == IR::Condition{true}) {
418 const Node true_label{local_labels.at(block.branch_true)};
419 gotos.push_back(
420 root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt)));
421 } else if (block.cond == IR::Condition{false}) {
422 const Node false_label{local_labels.at(block.branch_false)};
423 gotos.push_back(root.insert(
424 ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
425 } else {
426 const Node true_label{local_labels.at(block.branch_true)};
427 const Node false_label{local_labels.at(block.branch_false)};
428 Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
429 gotos.push_back(
430 root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt)));
431 gotos.push_back(root.insert(
432 ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt)));
433 }
434 break;
435 }
436 case Flow::EndClass::IndirectBranch:
437 root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg,
438 block.branch_offset, &root_stmt));
439 for (const Flow::IndirectBranch& indirect : block.indirect_branches) {
440 const Node indirect_label{local_labels.at(indirect.block)};
441 Statement* cond{
442 pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)};
443 Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)};
444 gotos.push_back(root.insert(ip, *goto_stmt));
445 }
446 root.insert(ip, *pool.Create(Unreachable{}, &root_stmt));
447 break;
448 case Flow::EndClass::Call: {
449 Flow::Function& call{cfg.Functions()[block.function_call]};
450 const Node call_return_label{local_labels.at(block.return_block)};
451 BuildTree(cfg, call, label_id, gotos, ip, call_return_label);
452 break;
453 }
454 case Flow::EndClass::Exit:
455 root.insert(ip, *pool.Create(Return{}, &root_stmt));
456 break;
457 case Flow::EndClass::Return: {
458 Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)};
459 auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)};
460 gotos.push_back(root.insert(ip, *goto_stmt));
461 break;
462 }
463 case Flow::EndClass::Kill:
464 root.insert(ip, *pool.Create(Kill{}, &root_stmt));
465 break;
466 }
467 }
468 }
469
470 void UpdateTreeUp(Statement* tree) {
471 for (Statement& stmt : tree->children) {
472 stmt.up = tree;
473 }
474 }
475
476 void EliminateAsConditional(Node goto_stmt, Node label_stmt) {
477 Tree& body{goto_stmt->up->children};
478 Tree if_body;
479 if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt);
480 Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)};
481 Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)};
482 UpdateTreeUp(if_stmt);
483 body.insert(goto_stmt, *if_stmt);
484 body.erase(goto_stmt);
485 }
486
487 void EliminateAsLoop(Node goto_stmt, Node label_stmt) {
488 Tree& body{goto_stmt->up->children};
489 Tree loop_body;
490 loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt);
491 Statement* const cond{goto_stmt->cond};
492 Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)};
493 UpdateTreeUp(loop);
494 body.insert(goto_stmt, *loop);
495 body.erase(goto_stmt);
496 }
497
498 [[nodiscard]] Node MoveOutward(Node goto_stmt) {
499 switch (goto_stmt->up->type) {
500 case StatementType::If:
501 return MoveOutwardIf(goto_stmt);
502 case StatementType::Loop:
503 return MoveOutwardLoop(goto_stmt);
504 default:
505 throw LogicError("Invalid outward movement");
506 }
507 }
508
509 [[nodiscard]] Node MoveInward(Node goto_stmt) {
510 Statement* const parent{goto_stmt->up};
511 Tree& body{parent->children};
512 const Node label{goto_stmt->label};
513 const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
514 const u32 label_id{label->id};
515
516 Statement* const goto_cond{goto_stmt->cond};
517 Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
518 body.insert(goto_stmt, *set_var);
519
520 Tree if_body;
521 if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt);
522 Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
523 Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)};
524 if (!if_body.empty()) {
525 Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)};
526 UpdateTreeUp(if_stmt);
527 body.insert(goto_stmt, *if_stmt);
528 }
529 body.erase(goto_stmt);
530
531 switch (label_nested_stmt->type) {
532 case StatementType::If:
533 // Update nested if condition
534 label_nested_stmt->cond =
535 pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt);
536 break;
537 case StatementType::Loop:
538 break;
539 default:
540 throw LogicError("Invalid inward movement");
541 }
542 Tree& nested_tree{label_nested_stmt->children};
543 Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)};
544 return nested_tree.insert(nested_tree.begin(), *new_goto);
545 }
546
547 [[nodiscard]] Node Lift(Node goto_stmt) {
548 Statement* const parent{goto_stmt->up};
549 Tree& body{parent->children};
550 const Node label{goto_stmt->label};
551 const u32 label_id{label->id};
552 const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)};
553
554 Tree loop_body;
555 loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt);
556 SanitizeNoBreaks(loop_body);
557 Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)};
558 Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)};
559 UpdateTreeUp(loop_stmt);
560 body.insert(goto_stmt, *loop_stmt);
561
562 Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)};
563 loop_stmt->children.push_front(*new_goto);
564 const Node new_goto_node{loop_stmt->children.begin()};
565
566 Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)};
567 loop_stmt->children.push_back(*set_var);
568
569 body.erase(goto_stmt);
570 return new_goto_node;
571 }
572
573 Node MoveOutwardIf(Node goto_stmt) {
574 const Node parent{Tree::s_iterator_to(*goto_stmt->up)};
575 Tree& body{parent->children};
576 const u32 label_id{goto_stmt->label->id};
577 Statement* const goto_cond{goto_stmt->cond};
578 Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)};
579 body.insert(goto_stmt, *set_goto_var);
580
581 Tree if_body;
582 if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end());
583 if_body.pop_front();
584 Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
585 Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)};
586 Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)};
587 UpdateTreeUp(if_stmt);
588 body.insert(goto_stmt, *if_stmt);
589
590 body.erase(goto_stmt);
591
592 Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)};
593 Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)};
594 Tree& parent_tree{parent->up->children};
595 return parent_tree.insert(std::next(parent), *new_goto);
596 }
597
598 Node MoveOutwardLoop(Node goto_stmt) {
599 Statement* const parent{goto_stmt->up};
600 Tree& body{parent->children};
601 const u32 label_id{goto_stmt->label->id};
602 Statement* const goto_cond{goto_stmt->cond};
603 Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)};
604 Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)};
605 Statement* const break_stmt{pool.Create(Break{}, cond, parent)};
606 body.insert(goto_stmt, *set_goto_var);
607 body.insert(goto_stmt, *break_stmt);
608 body.erase(goto_stmt);
609
610 const Node loop{Tree::s_iterator_to(*goto_stmt->up)};
611 Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)};
612 Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)};
613 Tree& parent_tree{loop->up->children};
614 return parent_tree.insert(std::next(loop), *new_goto);
615 }
616
617 ObjectPool<Statement>& pool;
618 Statement root_stmt{FunctionTag{}};
619};
620
621[[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) {
622 Tree& tree{stmt.up->children};
623 const Node end{tree.end()};
624 Node forward_node{std::next(Tree::s_iterator_to(stmt))};
625 while (forward_node != end && !HasChildren(forward_node->type)) {
626 if (forward_node->type == StatementType::Code) {
627 return &*forward_node;
628 }
629 ++forward_node;
630 }
631 return nullptr;
632}
633
634[[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) {
635 switch (stmt.type) {
636 case StatementType::Identity:
637 return ir.Condition(stmt.guest_cond);
638 case StatementType::Not:
639 return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)});
640 case StatementType::Or:
641 return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b));
642 case StatementType::Variable:
643 return ir.GetGotoVariable(stmt.id);
644 case StatementType::IndirectBranchCond:
645 return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location));
646 default:
647 throw NotImplementedException("Statement type {}", stmt.type);
648 }
649}
650
651class TranslatePass {
652public:
653 TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
654 ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
655 IR::AbstractSyntaxList& syntax_list_)
656 : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
657 syntax_list{syntax_list_} {
658 Visit(root_stmt, nullptr, nullptr);
659
660 IR::Block& first_block{*syntax_list.front().data.block};
661 IR::IREmitter ir(first_block, first_block.begin());
662 ir.Prologue();
663 }
664
665private:
666 void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) {
667 IR::Block* current_block{};
668 const auto ensure_block{[&] {
669 if (current_block) {
670 return;
671 }
672 current_block = block_pool.Create(inst_pool);
673 auto& node{syntax_list.emplace_back()};
674 node.type = IR::AbstractSyntaxNode::Type::Block;
675 node.data.block = current_block;
676 }};
677 Tree& tree{parent.children};
678 for (auto it = tree.begin(); it != tree.end(); ++it) {
679 Statement& stmt{*it};
680 switch (stmt.type) {
681 case StatementType::Label:
682 // Labels can be ignored
683 break;
684 case StatementType::Code: {
685 ensure_block();
686 Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset());
687 break;
688 }
689 case StatementType::SetVariable: {
690 ensure_block();
691 IR::IREmitter ir{*current_block};
692 ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op));
693 break;
694 }
695 case StatementType::SetIndirectBranchVariable: {
696 ensure_block();
697 IR::IREmitter ir{*current_block};
698 IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))};
699 ir.SetIndirectBranchVariable(address);
700 break;
701 }
702 case StatementType::If: {
703 ensure_block();
704 IR::Block* const merge_block{MergeBlock(parent, stmt)};
705
706 // Implement if header block
707 IR::IREmitter ir{*current_block};
708 const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
709
710 const size_t if_node_index{syntax_list.size()};
711 syntax_list.emplace_back();
712
713 // Visit children
714 const size_t then_block_index{syntax_list.size()};
715 Visit(stmt, break_block, merge_block);
716
717 IR::Block* const then_block{syntax_list.at(then_block_index).data.block};
718 current_block->AddBranch(then_block);
719 current_block->AddBranch(merge_block);
720 current_block = merge_block;
721
722 auto& if_node{syntax_list[if_node_index]};
723 if_node.type = IR::AbstractSyntaxNode::Type::If;
724 if_node.data.if_node.cond = cond;
725 if_node.data.if_node.body = then_block;
726 if_node.data.if_node.merge = merge_block;
727
728 auto& endif_node{syntax_list.emplace_back()};
729 endif_node.type = IR::AbstractSyntaxNode::Type::EndIf;
730 endif_node.data.end_if.merge = merge_block;
731
732 auto& merge{syntax_list.emplace_back()};
733 merge.type = IR::AbstractSyntaxNode::Type::Block;
734 merge.data.block = merge_block;
735 break;
736 }
737 case StatementType::Loop: {
738 IR::Block* const loop_header_block{block_pool.Create(inst_pool)};
739 if (current_block) {
740 current_block->AddBranch(loop_header_block);
741 }
742 auto& header_node{syntax_list.emplace_back()};
743 header_node.type = IR::AbstractSyntaxNode::Type::Block;
744 header_node.data.block = loop_header_block;
745
746 IR::Block* const continue_block{block_pool.Create(inst_pool)};
747 IR::Block* const merge_block{MergeBlock(parent, stmt)};
748
749 const size_t loop_node_index{syntax_list.size()};
750 syntax_list.emplace_back();
751
752 // Visit children
753 const size_t body_block_index{syntax_list.size()};
754 Visit(stmt, merge_block, continue_block);
755
756 // The continue block is located at the end of the loop
757 IR::IREmitter ir{*continue_block};
758 const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
759
760 IR::Block* const body_block{syntax_list.at(body_block_index).data.block};
761 loop_header_block->AddBranch(body_block);
762
763 continue_block->AddBranch(loop_header_block);
764 continue_block->AddBranch(merge_block);
765
766 current_block = merge_block;
767
768 auto& loop{syntax_list[loop_node_index]};
769 loop.type = IR::AbstractSyntaxNode::Type::Loop;
770 loop.data.loop.body = body_block;
771 loop.data.loop.continue_block = continue_block;
772 loop.data.loop.merge = merge_block;
773
774 auto& continue_block_node{syntax_list.emplace_back()};
775 continue_block_node.type = IR::AbstractSyntaxNode::Type::Block;
776 continue_block_node.data.block = continue_block;
777
778 auto& repeat{syntax_list.emplace_back()};
779 repeat.type = IR::AbstractSyntaxNode::Type::Repeat;
780 repeat.data.repeat.cond = cond;
781 repeat.data.repeat.loop_header = loop_header_block;
782 repeat.data.repeat.merge = merge_block;
783
784 auto& merge{syntax_list.emplace_back()};
785 merge.type = IR::AbstractSyntaxNode::Type::Block;
786 merge.data.block = merge_block;
787 break;
788 }
789 case StatementType::Break: {
790 ensure_block();
791 IR::Block* const skip_block{MergeBlock(parent, stmt)};
792
793 IR::IREmitter ir{*current_block};
794 const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))};
795 current_block->AddBranch(break_block);
796 current_block->AddBranch(skip_block);
797 current_block = skip_block;
798
799 auto& break_node{syntax_list.emplace_back()};
800 break_node.type = IR::AbstractSyntaxNode::Type::Break;
801 break_node.data.break_node.cond = cond;
802 break_node.data.break_node.merge = break_block;
803 break_node.data.break_node.skip = skip_block;
804
805 auto& merge{syntax_list.emplace_back()};
806 merge.type = IR::AbstractSyntaxNode::Type::Block;
807 merge.data.block = skip_block;
808 break;
809 }
810 case StatementType::Return: {
811 ensure_block();
812 IR::IREmitter{*current_block}.Epilogue();
813 current_block = nullptr;
814 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
815 break;
816 }
817 case StatementType::Kill: {
818 ensure_block();
819 IR::Block* demote_block{MergeBlock(parent, stmt)};
820 IR::IREmitter{*current_block}.DemoteToHelperInvocation();
821 current_block->AddBranch(demote_block);
822 current_block = demote_block;
823
824 auto& merge{syntax_list.emplace_back()};
825 merge.type = IR::AbstractSyntaxNode::Type::Block;
826 merge.data.block = demote_block;
827 break;
828 }
829 case StatementType::Unreachable: {
830 ensure_block();
831 current_block = nullptr;
832 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
833 break;
834 }
835 default:
836 throw NotImplementedException("Statement type {}", stmt.type);
837 }
838 }
839 if (current_block) {
840 if (fallthrough_block) {
841 current_block->AddBranch(fallthrough_block);
842 } else {
843 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable;
844 }
845 }
846 }
847
848 IR::Block* MergeBlock(Statement& parent, Statement& stmt) {
849 Statement* merge_stmt{TryFindForwardBlock(stmt)};
850 if (!merge_stmt) {
851 // Create a merge block we can visit later
852 merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent);
853 parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt);
854 }
855 return block_pool.Create(inst_pool);
856 }
857
858 ObjectPool<Statement>& stmt_pool;
859 ObjectPool<IR::Inst>& inst_pool;
860 ObjectPool<IR::Block>& block_pool;
861 Environment& env;
862 IR::AbstractSyntaxList& syntax_list;
863
864// TODO: C++20 Remove this when all compilers support constexpr std::vector
865#if __cpp_lib_constexpr_vector >= 201907
866 static constexpr Flow::Block dummy_flow_block;
867#else
868 const Flow::Block dummy_flow_block;
869#endif
870};
871} // Anonymous namespace
872
873IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
874 Environment& env, Flow::CFG& cfg) {
875 ObjectPool<Statement> stmt_pool{64};
876 GotoPass goto_pass{cfg, stmt_pool};
877 Statement& root{goto_pass.RootStatement()};
878 IR::AbstractSyntaxList syntax_list;
879 TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
880 return syntax_list;
881}
882
883} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
new file mode 100644
index 000000000..88b083649
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -0,0 +1,20 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/abstract_syntax_list.h"
9#include "shader_recompiler/frontend/ir/basic_block.h"
10#include "shader_recompiler/frontend/ir/value.h"
11#include "shader_recompiler/frontend/maxwell/control_flow.h"
12#include "shader_recompiler/object_pool.h"
13
14namespace Shader::Maxwell {
15
16[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
17 ObjectPool<IR::Block>& block_pool, Environment& env,
18 Flow::CFG& cfg);
19
20} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
new file mode 100644
index 000000000..d9f999e05
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp
@@ -0,0 +1,214 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class AtomOp : u64 {
12 ADD,
13 MIN,
14 MAX,
15 INC,
16 DEC,
17 AND,
18 OR,
19 XOR,
20 EXCH,
21 SAFEADD,
22};
23
24enum class AtomSize : u64 {
25 U32,
26 S32,
27 U64,
28 F32,
29 F16x2,
30 S64,
31};
32
33IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b,
34 AtomOp op, bool is_signed) {
35 switch (op) {
36 case AtomOp::ADD:
37 return ir.GlobalAtomicIAdd(offset, op_b);
38 case AtomOp::MIN:
39 return ir.GlobalAtomicIMin(offset, op_b, is_signed);
40 case AtomOp::MAX:
41 return ir.GlobalAtomicIMax(offset, op_b, is_signed);
42 case AtomOp::INC:
43 return ir.GlobalAtomicInc(offset, op_b);
44 case AtomOp::DEC:
45 return ir.GlobalAtomicDec(offset, op_b);
46 case AtomOp::AND:
47 return ir.GlobalAtomicAnd(offset, op_b);
48 case AtomOp::OR:
49 return ir.GlobalAtomicOr(offset, op_b);
50 case AtomOp::XOR:
51 return ir.GlobalAtomicXor(offset, op_b);
52 case AtomOp::EXCH:
53 return ir.GlobalAtomicExchange(offset, op_b);
54 default:
55 throw NotImplementedException("Integer Atom Operation {}", op);
56 }
57}
58
59IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op,
60 AtomSize size) {
61 static constexpr IR::FpControl f16_control{
62 .no_contraction = false,
63 .rounding = IR::FpRounding::RN,
64 .fmz_mode = IR::FmzMode::DontCare,
65 };
66 static constexpr IR::FpControl f32_control{
67 .no_contraction = false,
68 .rounding = IR::FpRounding::RN,
69 .fmz_mode = IR::FmzMode::FTZ,
70 };
71 switch (op) {
72 case AtomOp::ADD:
73 return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control)
74 : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control);
75 case AtomOp::MIN:
76 return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control);
77 case AtomOp::MAX:
78 return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control);
79 default:
80 throw NotImplementedException("FP Atom Operation {}", op);
81 }
82}
83
84IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) {
85 union {
86 u64 raw;
87 BitField<8, 8, IR::Reg> addr_reg;
88 BitField<28, 20, s64> addr_offset;
89 BitField<28, 20, u64> rz_addr_offset;
90 BitField<48, 1, u64> e;
91 } const mem{insn};
92
93 const IR::U64 address{[&]() -> IR::U64 {
94 if (mem.e == 0) {
95 return v.ir.UConvert(64, v.X(mem.addr_reg));
96 }
97 return v.L(mem.addr_reg);
98 }()};
99 const u64 addr_offset{[&]() -> u64 {
100 if (mem.addr_reg == IR::Reg::RZ) {
101 // When RZ is used, the address is an absolute address
102 return static_cast<u64>(mem.rz_addr_offset.Value());
103 } else {
104 return static_cast<u64>(mem.addr_offset.Value());
105 }
106 }()};
107 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
108}
109
110bool AtomOpNotApplicable(AtomSize size, AtomOp op) {
111 // TODO: SAFEADD
112 switch (size) {
113 case AtomSize::S32:
114 case AtomSize::U64:
115 return (op == AtomOp::INC || op == AtomOp::DEC);
116 case AtomSize::S64:
117 return !(op == AtomOp::MIN || op == AtomOp::MAX);
118 case AtomSize::F32:
119 return op != AtomOp::ADD;
120 case AtomSize::F16x2:
121 return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX);
122 default:
123 return false;
124 }
125}
126
127IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) {
128 switch (size) {
129 case AtomSize::U32:
130 case AtomSize::S32:
131 case AtomSize::F32:
132 case AtomSize::F16x2:
133 return ir.LoadGlobal32(offset);
134 case AtomSize::U64:
135 case AtomSize::S64:
136 return ir.PackUint2x32(ir.LoadGlobal64(offset));
137 default:
138 throw NotImplementedException("Atom Size {}", size);
139 }
140}
141
142void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) {
143 switch (size) {
144 case AtomSize::U32:
145 case AtomSize::S32:
146 case AtomSize::F16x2:
147 return v.X(dest_reg, IR::U32{result});
148 case AtomSize::U64:
149 case AtomSize::S64:
150 return v.L(dest_reg, IR::U64{result});
151 case AtomSize::F32:
152 return v.F(dest_reg, IR::F32{result});
153 default:
154 break;
155 }
156}
157
158IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset,
159 AtomSize size, AtomOp op) {
160 switch (size) {
161 case AtomSize::U32:
162 case AtomSize::S32:
163 return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32);
164 case AtomSize::U64:
165 case AtomSize::S64:
166 return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64);
167 case AtomSize::F32:
168 return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size);
169 case AtomSize::F16x2: {
170 return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size);
171 }
172 default:
173 throw NotImplementedException("Atom Size {}", size);
174 }
175}
176
177void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg,
178 const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) {
179 IR::Value result;
180 if (AtomOpNotApplicable(size, op)) {
181 result = LoadGlobal(v.ir, offset, size);
182 } else {
183 result = ApplyAtomOp(v, operand_reg, offset, size, op);
184 }
185 if (write_dest) {
186 StoreResult(v, dest_reg, result, size);
187 }
188}
189} // Anonymous namespace
190
191void TranslatorVisitor::ATOM(u64 insn) {
192 union {
193 u64 raw;
194 BitField<0, 8, IR::Reg> dest_reg;
195 BitField<20, 8, IR::Reg> operand_reg;
196 BitField<49, 3, AtomSize> size;
197 BitField<52, 4, AtomOp> op;
198 } const atom{insn};
199 const IR::U64 offset{AtomOffset(*this, insn)};
200 GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true);
201}
202
203void TranslatorVisitor::RED(u64 insn) {
204 union {
205 u64 raw;
206 BitField<0, 8, IR::Reg> operand_reg;
207 BitField<20, 3, AtomSize> size;
208 BitField<23, 3, AtomOp> op;
209 } const red{insn};
210 const IR::U64 offset{AtomOffset(*this, insn)};
211 GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true);
212}
213
214} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
new file mode 100644
index 000000000..8b974621e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp
@@ -0,0 +1,110 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class AtomOp : u64 {
12 ADD,
13 MIN,
14 MAX,
15 INC,
16 DEC,
17 AND,
18 OR,
19 XOR,
20 EXCH,
21};
22
23enum class AtomsSize : u64 {
24 U32,
25 S32,
26 U64,
27};
28
29IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op,
30 bool is_signed) {
31 switch (op) {
32 case AtomOp::ADD:
33 return ir.SharedAtomicIAdd(offset, op_b);
34 case AtomOp::MIN:
35 return ir.SharedAtomicIMin(offset, op_b, is_signed);
36 case AtomOp::MAX:
37 return ir.SharedAtomicIMax(offset, op_b, is_signed);
38 case AtomOp::INC:
39 return ir.SharedAtomicInc(offset, op_b);
40 case AtomOp::DEC:
41 return ir.SharedAtomicDec(offset, op_b);
42 case AtomOp::AND:
43 return ir.SharedAtomicAnd(offset, op_b);
44 case AtomOp::OR:
45 return ir.SharedAtomicOr(offset, op_b);
46 case AtomOp::XOR:
47 return ir.SharedAtomicXor(offset, op_b);
48 case AtomOp::EXCH:
49 return ir.SharedAtomicExchange(offset, op_b);
50 default:
51 throw NotImplementedException("Integer Atoms Operation {}", op);
52 }
53}
54
55IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) {
56 union {
57 u64 raw;
58 BitField<8, 8, IR::Reg> offset_reg;
59 BitField<30, 22, u64> absolute_offset;
60 BitField<30, 22, s64> relative_offset;
61 } const encoding{insn};
62
63 if (encoding.offset_reg == IR::Reg::RZ) {
64 return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2));
65 } else {
66 const s32 relative{static_cast<s32>(encoding.relative_offset << 2)};
67 return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
68 }
69}
70
71void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) {
72 switch (size) {
73 case AtomsSize::U32:
74 case AtomsSize::S32:
75 return v.X(dest_reg, IR::U32{result});
76 case AtomsSize::U64:
77 return v.L(dest_reg, IR::U64{result});
78 default:
79 break;
80 }
81}
82} // Anonymous namespace
83
84void TranslatorVisitor::ATOMS(u64 insn) {
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<8, 8, IR::Reg> addr_reg;
89 BitField<20, 8, IR::Reg> src_reg_b;
90 BitField<28, 2, AtomsSize> size;
91 BitField<52, 4, AtomOp> op;
92 } const atoms{insn};
93
94 const bool size_64{atoms.size == AtomsSize::U64};
95 if (size_64 && atoms.op != AtomOp::EXCH) {
96 throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value());
97 }
98 const bool is_signed{atoms.size == AtomsSize::S32};
99 const IR::U32 offset{AtomsOffset(*this, insn)};
100
101 IR::Value result;
102 if (size_64) {
103 result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed);
104 } else {
105 result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed);
106 }
107 StoreResult(*this, atoms.dest_reg, result, atoms.size);
108}
109
110} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
new file mode 100644
index 000000000..fb3f00d3f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp
@@ -0,0 +1,35 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/opcodes.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12enum class BitSize : u64 {
13 B32,
14 B64,
15 B96,
16 B128,
17};
18
19void TranslatorVisitor::AL2P(u64 inst) {
20 union {
21 u64 raw;
22 BitField<0, 8, IR::Reg> result_register;
23 BitField<8, 8, IR::Reg> indexing_register;
24 BitField<20, 11, s64> offset;
25 BitField<47, 2, BitSize> bitsize;
26 } al2p{inst};
27 if (al2p.bitsize != BitSize::B32) {
28 throw NotImplementedException("BitSize {}", al2p.bitsize.Value());
29 }
30 const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))};
31 const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)};
32 X(al2p.result_register, result);
33}
34
35} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
new file mode 100644
index 000000000..86e433e41
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp
@@ -0,0 +1,96 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/ir/modifiers.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13// Seems to be in CUDA terminology.
14enum class LocalScope : u64 {
15 CTA,
16 GL,
17 SYS,
18 VC,
19};
20} // Anonymous namespace
21
22void TranslatorVisitor::MEMBAR(u64 inst) {
23 union {
24 u64 raw;
25 BitField<8, 2, LocalScope> scope;
26 } const membar{inst};
27
28 if (membar.scope == LocalScope::CTA) {
29 ir.WorkgroupMemoryBarrier();
30 } else {
31 ir.DeviceMemoryBarrier();
32 }
33}
34
35void TranslatorVisitor::DEPBAR() {
36 // DEPBAR is a no-op
37}
38
39void TranslatorVisitor::BAR(u64 insn) {
40 enum class Mode {
41 RedPopc,
42 Scan,
43 RedAnd,
44 RedOr,
45 Sync,
46 Arrive,
47 };
48 union {
49 u64 raw;
50 BitField<43, 1, u64> is_a_imm;
51 BitField<44, 1, u64> is_b_imm;
52 BitField<8, 8, u64> imm_a;
53 BitField<20, 12, u64> imm_b;
54 BitField<42, 1, u64> neg_pred;
55 BitField<39, 3, IR::Pred> pred;
56 } const bar{insn};
57
58 const Mode mode{[insn] {
59 switch (insn & 0x0000009B00000000ULL) {
60 case 0x0000000200000000ULL:
61 return Mode::RedPopc;
62 case 0x0000000300000000ULL:
63 return Mode::Scan;
64 case 0x0000000A00000000ULL:
65 return Mode::RedAnd;
66 case 0x0000001200000000ULL:
67 return Mode::RedOr;
68 case 0x0000008000000000ULL:
69 return Mode::Sync;
70 case 0x0000008100000000ULL:
71 return Mode::Arrive;
72 }
73 throw NotImplementedException("Invalid encoding");
74 }()};
75 if (mode != Mode::Sync) {
76 throw NotImplementedException("BAR mode {}", mode);
77 }
78 if (bar.is_a_imm == 0) {
79 throw NotImplementedException("Non-immediate input A");
80 }
81 if (bar.imm_a != 0) {
82 throw NotImplementedException("Non-zero input A");
83 }
84 if (bar.is_b_imm == 0) {
85 throw NotImplementedException("Non-immediate input B");
86 }
87 if (bar.imm_b != 0) {
88 throw NotImplementedException("Non-zero input B");
89 }
90 if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) {
91 throw NotImplementedException("Non-true input predicate");
92 }
93 ir.Barrier();
94}
95
96} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
new file mode 100644
index 000000000..9d5a87e52
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp
@@ -0,0 +1,74 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> offset_reg;
16 BitField<40, 1, u64> brev;
17 BitField<47, 1, u64> cc;
18 BitField<48, 1, u64> is_signed;
19 } const bfe{insn};
20
21 const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)};
22 const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)};
23
24 // Common constants
25 const IR::U32 zero{v.ir.Imm32(0)};
26 const IR::U32 one{v.ir.Imm32(1)};
27 const IR::U32 max_size{v.ir.Imm32(32)};
28 // Edge case conditions
29 const IR::U1 zero_count{v.ir.IEqual(count, zero)};
30 const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)};
31 const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)};
32
33 IR::U32 base{v.X(bfe.offset_reg)};
34 if (bfe.brev != 0) {
35 base = v.ir.BitReverse(base);
36 }
37 IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)};
38 if (bfe.is_signed != 0) {
39 const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)};
40 const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
41 const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)};
42 // Replicate condition
43 result = IR::U32{v.ir.Select(replicate, replicated_bit, result)};
44 // Exceeding condition
45 const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)};
46 result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)};
47 }
48 // Zero count condition
49 result = IR::U32{v.ir.Select(zero_count, zero, result)};
50
51 v.X(bfe.dest_reg, result);
52
53 if (bfe.cc != 0) {
54 v.SetZFlag(v.ir.IEqual(result, zero));
55 v.SetSFlag(v.ir.ILessThan(result, zero, true));
56 v.ResetCFlag();
57 v.ResetOFlag();
58 }
59}
60} // Anonymous namespace
61
62void TranslatorVisitor::BFE_reg(u64 insn) {
63 BFE(*this, insn, GetReg20(insn));
64}
65
66void TranslatorVisitor::BFE_cbuf(u64 insn) {
67 BFE(*this, insn, GetCbuf(insn));
68}
69
70void TranslatorVisitor::BFE_imm(u64 insn) {
71 BFE(*this, insn, GetImm20(insn));
72}
73
74} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
new file mode 100644
index 000000000..1e1ec2119
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> insert_reg;
16 BitField<47, 1, u64> cc;
17 } const bfi{insn};
18
19 const IR::U32 zero{v.ir.Imm32(0)};
20 const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)};
21 const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)};
22 const IR::U32 max_size{v.ir.Imm32(32)};
23
24 // Edge case conditions
25 const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)};
26 const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)};
27
28 const IR::U32 remaining_size{v.ir.ISub(max_size, offset)};
29 const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)};
30
31 const IR::U32 insert{v.X(bfi.insert_reg)};
32 IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)};
33
34 result = IR::U32{v.ir.Select(exceed_offset, base, result)};
35
36 v.X(bfi.dest_reg, result);
37 if (bfi.cc != 0) {
38 v.SetZFlag(v.ir.IEqual(result, zero));
39 v.SetSFlag(v.ir.ILessThan(result, zero, true));
40 v.ResetCFlag();
41 v.ResetOFlag();
42 }
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::BFI_reg(u64 insn) {
47 BFI(*this, insn, GetReg20(insn), GetReg39(insn));
48}
49
50void TranslatorVisitor::BFI_rc(u64 insn) {
51 BFI(*this, insn, GetReg39(insn), GetCbuf(insn));
52}
53
54void TranslatorVisitor::BFI_cr(u64 insn) {
55 BFI(*this, insn, GetCbuf(insn), GetReg39(insn));
56}
57
58void TranslatorVisitor::BFI_imm(u64 insn) {
59 BFI(*this, insn, GetImm20(insn), GetReg39(insn));
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
new file mode 100644
index 000000000..371c0e0f7
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp
@@ -0,0 +1,36 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void Check(u64 insn) {
13 union {
14 u64 raw;
15 BitField<5, 1, u64> cbuf_mode;
16 BitField<6, 1, u64> lmt;
17 } const encoding{insn};
18
19 if (encoding.cbuf_mode != 0) {
20 throw NotImplementedException("Constant buffer mode");
21 }
22 if (encoding.lmt != 0) {
23 throw NotImplementedException("LMT");
24 }
25}
26} // Anonymous namespace
27
28void TranslatorVisitor::BRX(u64 insn) {
29 Check(insn);
30}
31
32void TranslatorVisitor::JMX(u64 insn) {
33 Check(insn);
34}
35
36} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
new file mode 100644
index 000000000..fd73f656c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h
@@ -0,0 +1,57 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
11
12namespace Shader::Maxwell {
13
14enum class FpRounding : u64 {
15 RN,
16 RM,
17 RP,
18 RZ,
19};
20
21enum class FmzMode : u64 {
22 None,
23 FTZ,
24 FMZ,
25 INVALIDFMZ3,
26};
27
28inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
29 switch (fp_rounding) {
30 case FpRounding::RN:
31 return IR::FpRounding::RN;
32 case FpRounding::RM:
33 return IR::FpRounding::RM;
34 case FpRounding::RP:
35 return IR::FpRounding::RP;
36 case FpRounding::RZ:
37 return IR::FpRounding::RZ;
38 }
39 throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
40}
41
42inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
43 switch (fmz_mode) {
44 case FmzMode::None:
45 return IR::FmzMode::None;
46 case FmzMode::FTZ:
47 return IR::FmzMode::FTZ;
48 case FmzMode::FMZ:
49 // FMZ is manually handled in the instruction
50 return IR::FmzMode::FTZ;
51 case FmzMode::INVALIDFMZ3:
52 break;
53 }
54 throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
55}
56
57} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
new file mode 100644
index 000000000..20458d2ad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp
@@ -0,0 +1,153 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
6
7namespace Shader::Maxwell {
8IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
9 CompareOp compare_op, bool is_signed) {
10 switch (compare_op) {
11 case CompareOp::False:
12 return ir.Imm1(false);
13 case CompareOp::LessThan:
14 return ir.ILessThan(operand_1, operand_2, is_signed);
15 case CompareOp::Equal:
16 return ir.IEqual(operand_1, operand_2);
17 case CompareOp::LessThanEqual:
18 return ir.ILessThanEqual(operand_1, operand_2, is_signed);
19 case CompareOp::GreaterThan:
20 return ir.IGreaterThan(operand_1, operand_2, is_signed);
21 case CompareOp::NotEqual:
22 return ir.INotEqual(operand_1, operand_2);
23 case CompareOp::GreaterThanEqual:
24 return ir.IGreaterThanEqual(operand_1, operand_2, is_signed);
25 case CompareOp::True:
26 return ir.Imm1(true);
27 default:
28 throw NotImplementedException("Invalid compare op {}", compare_op);
29 }
30}
31
32IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
33 CompareOp compare_op, bool is_signed) {
34 const IR::U32 zero{ir.Imm32(0)};
35 const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)};
36 const IR::U1 z_flag{ir.GetZFlag()};
37 const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)};
38 const IR::U1 flip_logic{is_signed ? ir.Imm1(false)
39 : ir.LogicalXor(ir.ILessThan(operand_1, zero, true),
40 ir.ILessThan(operand_2, zero, true))};
41 switch (compare_op) {
42 case CompareOp::False:
43 return ir.Imm1(false);
44 case CompareOp::LessThan:
45 return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
46 ir.ILessThan(intermediate, zero, true))};
47 case CompareOp::Equal:
48 return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag);
49 case CompareOp::LessThanEqual: {
50 const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true),
51 ir.ILessThan(intermediate, zero, true))};
52 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
53 }
54 case CompareOp::GreaterThan: {
55 const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true),
56 ir.IGreaterThan(intermediate, zero, true))};
57 const IR::U1 not_z{ir.LogicalNot(z_flag)};
58 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z));
59 }
60 case CompareOp::NotEqual:
61 return ir.LogicalOr(ir.INotEqual(intermediate, zero),
62 ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag)));
63 case CompareOp::GreaterThanEqual: {
64 const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true),
65 ir.IGreaterThanEqual(intermediate, zero, true))};
66 return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag));
67 }
68 case CompareOp::True:
69 return ir.Imm1(true);
70 default:
71 throw NotImplementedException("Invalid compare op {}", compare_op);
72 }
73}
74
75IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2,
76 BooleanOp bop) {
77 switch (bop) {
78 case BooleanOp::AND:
79 return ir.LogicalAnd(predicate_1, predicate_2);
80 case BooleanOp::OR:
81 return ir.LogicalOr(predicate_1, predicate_2);
82 case BooleanOp::XOR:
83 return ir.LogicalXor(predicate_1, predicate_2);
84 default:
85 throw NotImplementedException("Invalid bop {}", bop);
86 }
87}
88
89IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) {
90 switch (op) {
91 case PredicateOp::False:
92 return ir.Imm1(false);
93 case PredicateOp::True:
94 return ir.Imm1(true);
95 case PredicateOp::Zero:
96 return ir.IEqual(result, ir.Imm32(0));
97 case PredicateOp::NonZero:
98 return ir.INotEqual(result, ir.Imm32(0));
99 default:
100 throw NotImplementedException("Invalid Predicate operation {}", op);
101 }
102}
103
104bool IsCompareOpOrdered(FPCompareOp op) {
105 switch (op) {
106 case FPCompareOp::LTU:
107 case FPCompareOp::EQU:
108 case FPCompareOp::LEU:
109 case FPCompareOp::GTU:
110 case FPCompareOp::NEU:
111 case FPCompareOp::GEU:
112 return false;
113 default:
114 return true;
115 }
116}
117
118IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
119 const IR::F16F32F64& operand_2, FPCompareOp compare_op,
120 IR::FpControl control) {
121 const bool ordered{IsCompareOpOrdered(compare_op)};
122 switch (compare_op) {
123 case FPCompareOp::F:
124 return ir.Imm1(false);
125 case FPCompareOp::LT:
126 case FPCompareOp::LTU:
127 return ir.FPLessThan(operand_1, operand_2, control, ordered);
128 case FPCompareOp::EQ:
129 case FPCompareOp::EQU:
130 return ir.FPEqual(operand_1, operand_2, control, ordered);
131 case FPCompareOp::LE:
132 case FPCompareOp::LEU:
133 return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
134 case FPCompareOp::GT:
135 case FPCompareOp::GTU:
136 return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
137 case FPCompareOp::NE:
138 case FPCompareOp::NEU:
139 return ir.FPNotEqual(operand_1, operand_2, control, ordered);
140 case FPCompareOp::GE:
141 case FPCompareOp::GEU:
142 return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
143 case FPCompareOp::NUM:
144 return ir.FPOrdered(operand_1, operand_2);
145 case FPCompareOp::Nan:
146 return ir.FPUnordered(operand_1, operand_2);
147 case FPCompareOp::T:
148 return ir.Imm1(true);
149 default:
150 throw NotImplementedException("Invalid FP compare op {}", compare_op);
151 }
152}
153} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
new file mode 100644
index 000000000..214d0af3c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h
@@ -0,0 +1,28 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11[[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
12 const IR::U32& operand_2, CompareOp compare_op, bool is_signed);
13
14[[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1,
15 const IR::U32& operand_2, CompareOp compare_op,
16 bool is_signed);
17
18[[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1,
19 const IR::U1& predicate_2, BooleanOp bop);
20
21[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
22
23[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
24
25[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
26 const IR::F16F32F64& operand_2, FPCompareOp compare_op,
27 IR::FpControl control = {});
28} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
new file mode 100644
index 000000000..420f2fb94
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12void TranslatorVisitor::CSET(u64 insn) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 5, IR::FlowTest> cc_test;
17 BitField<39, 3, IR::Pred> bop_pred;
18 BitField<42, 1, u64> neg_bop_pred;
19 BitField<44, 1, u64> bf;
20 BitField<45, 2, BooleanOp> bop;
21 BitField<47, 1, u64> cc;
22 } const cset{insn};
23
24 const IR::U32 one_mask{ir.Imm32(-1)};
25 const IR::U32 fp_one{ir.Imm32(0x3f800000)};
26 const IR::U32 zero{ir.Imm32(0)};
27 const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one};
28 const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)};
29 const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)};
30 const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)};
31 const IR::U32 result{ir.Select(pred_result, pass_result, zero)};
32 X(cset.dest_reg, result);
33 if (cset.cc != 0) {
34 const IR::U1 is_zero{ir.IEqual(result, zero)};
35 SetZFlag(is_zero);
36 if (cset.bf != 0) {
37 ResetSFlag();
38 } else {
39 SetSFlag(ir.LogicalNot(is_zero));
40 }
41 ResetOFlag();
42 ResetCFlag();
43 }
44}
45
46void TranslatorVisitor::CSETP(u64 insn) {
47 union {
48 u64 raw;
49 BitField<0, 3, IR::Pred> dest_pred_b;
50 BitField<3, 3, IR::Pred> dest_pred_a;
51 BitField<8, 5, IR::FlowTest> cc_test;
52 BitField<39, 3, IR::Pred> bop_pred;
53 BitField<42, 1, u64> neg_bop_pred;
54 BitField<45, 2, BooleanOp> bop;
55 } const csetp{insn};
56
57 const BooleanOp bop{csetp.bop};
58 const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)};
59 const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)};
60 const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)};
61 const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)};
62 ir.SetPred(csetp.dest_pred_a, result_a);
63 ir.SetPred(csetp.dest_pred_b, result_b);
64}
65
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
new file mode 100644
index 000000000..5a1b3a8fc
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<39, 2, FpRounding> fp_rounding;
19 BitField<45, 1, u64> neg_b;
20 BitField<46, 1, u64> abs_a;
21 BitField<47, 1, u64> cc;
22 BitField<48, 1, u64> neg_a;
23 BitField<49, 1, u64> abs_b;
24 } const dadd{insn};
25 if (dadd.cc != 0) {
26 throw NotImplementedException("DADD CC");
27 }
28
29 const IR::F64 src_a{v.D(dadd.src_a_reg)};
30 const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)};
31 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)};
32
33 const IR::FpControl control{
34 .no_contraction = true,
35 .rounding = CastFpRounding(dadd.fp_rounding),
36 .fmz_mode = IR::FmzMode::None,
37 };
38
39 v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
40}
41} // Anonymous namespace
42
43void TranslatorVisitor::DADD_reg(u64 insn) {
44 DADD(*this, insn, GetDoubleReg20(insn));
45}
46
47void TranslatorVisitor::DADD_cbuf(u64 insn) {
48 DADD(*this, insn, GetDoubleCbuf(insn));
49}
50
51void TranslatorVisitor::DADD_imm(u64 insn) {
52 DADD(*this, insn, GetDoubleImm20(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
new file mode 100644
index 000000000..1173192e4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp
@@ -0,0 +1,72 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 BitField<43, 1, u64> negate_a;
20 BitField<44, 1, u64> abs_b;
21 BitField<45, 2, BooleanOp> bop;
22 BitField<47, 1, u64> cc;
23 BitField<48, 4, FPCompareOp> compare_op;
24 BitField<52, 1, u64> bf;
25 BitField<53, 1, u64> negate_b;
26 BitField<54, 1, u64> abs_a;
27 } const dset{insn};
28
29 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)};
30 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)};
31
32 IR::U1 pred{v.ir.GetPred(dset.pred)};
33 if (dset.neg_pred != 0) {
34 pred = v.ir.LogicalNot(pred);
35 }
36 const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)};
37 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)};
38
39 const IR::U32 one_mask{v.ir.Imm32(-1)};
40 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
41 const IR::U32 zero{v.ir.Imm32(0)};
42 const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one};
43 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
44
45 v.X(dset.dest_reg, result);
46 if (dset.cc != 0) {
47 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
48 v.SetZFlag(is_zero);
49 if (dset.bf != 0) {
50 v.ResetSFlag();
51 } else {
52 v.SetSFlag(v.ir.LogicalNot(is_zero));
53 }
54 v.ResetCFlag();
55 v.ResetOFlag();
56 }
57}
58} // Anonymous namespace
59
60void TranslatorVisitor::DSET_reg(u64 insn) {
61 DSET(*this, insn, GetDoubleReg20(insn));
62}
63
64void TranslatorVisitor::DSET_cbuf(u64 insn) {
65 DSET(*this, insn, GetDoubleCbuf(insn));
66}
67
68void TranslatorVisitor::DSET_imm(u64 insn) {
69 DSET(*this, insn, GetDoubleImm20(insn));
70}
71
72} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
new file mode 100644
index 000000000..f66097014
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<50, 2, FpRounding> fp_rounding;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> neg_b;
21 BitField<49, 1, u64> neg_c;
22 } const dfma{insn};
23
24 if (dfma.cc != 0) {
25 throw NotImplementedException("DFMA CC");
26 }
27
28 const IR::F64 src_a{v.D(dfma.src_a_reg)};
29 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)};
30 const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)};
31
32 const IR::FpControl control{
33 .no_contraction = true,
34 .rounding = CastFpRounding(dfma.fp_rounding),
35 .fmz_mode = IR::FmzMode::None,
36 };
37
38 v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control));
39}
40} // Anonymous namespace
41
42void TranslatorVisitor::DFMA_reg(u64 insn) {
43 DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn));
44}
45
46void TranslatorVisitor::DFMA_cr(u64 insn) {
47 DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn));
48}
49
50void TranslatorVisitor::DFMA_rc(u64 insn) {
51 DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn));
52}
53
54void TranslatorVisitor::DFMA_imm(u64 insn) {
55 DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn));
56}
57
58} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
new file mode 100644
index 000000000..6b551847c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<45, 1, u64> negate_b;
19 BitField<46, 1, u64> abs_a;
20 BitField<47, 1, u64> cc;
21 BitField<48, 1, u64> negate_a;
22 BitField<49, 1, u64> abs_b;
23 } const dmnmx{insn};
24
25 if (dmnmx.cc != 0) {
26 throw NotImplementedException("DMNMX CC");
27 }
28
29 const IR::U1 pred{v.ir.GetPred(dmnmx.pred)};
30 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)};
31 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)};
32
33 IR::F64 max{v.ir.FPMax(op_a, op_b)};
34 IR::F64 min{v.ir.FPMin(op_a, op_b)};
35
36 if (dmnmx.neg_pred != 0) {
37 std::swap(min, max);
38 }
39 v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)});
40}
41} // Anonymous namespace
42
43void TranslatorVisitor::DMNMX_reg(u64 insn) {
44 DMNMX(*this, insn, GetDoubleReg20(insn));
45}
46
47void TranslatorVisitor::DMNMX_cbuf(u64 insn) {
48 DMNMX(*this, insn, GetDoubleCbuf(insn));
49}
50
51void TranslatorVisitor::DMNMX_imm(u64 insn) {
52 DMNMX(*this, insn, GetDoubleImm20(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
new file mode 100644
index 000000000..c0159fb65
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp
@@ -0,0 +1,50 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12
13void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a_reg;
18 BitField<39, 2, FpRounding> fp_rounding;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> neg;
21 } const dmul{insn};
22
23 if (dmul.cc != 0) {
24 throw NotImplementedException("DMUL CC");
25 }
26
27 const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)};
28 const IR::FpControl control{
29 .no_contraction = true,
30 .rounding = CastFpRounding(dmul.fp_rounding),
31 .fmz_mode = IR::FmzMode::None,
32 };
33
34 v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control));
35}
36} // Anonymous namespace
37
38void TranslatorVisitor::DMUL_reg(u64 insn) {
39 DMUL(*this, insn, GetDoubleReg20(insn));
40}
41
42void TranslatorVisitor::DMUL_cbuf(u64 insn) {
43 DMUL(*this, insn, GetDoubleCbuf(insn));
44}
45
46void TranslatorVisitor::DMUL_imm(u64 insn) {
47 DMUL(*this, insn, GetDoubleImm20(insn));
48}
49
50} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
new file mode 100644
index 000000000..b8e74ee44
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp
@@ -0,0 +1,54 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<3, 3, IR::Pred> dest_pred_a;
17 BitField<6, 1, u64> negate_b;
18 BitField<7, 1, u64> abs_a;
19 BitField<8, 8, IR::Reg> src_a_reg;
20 BitField<39, 3, IR::Pred> bop_pred;
21 BitField<42, 1, u64> neg_bop_pred;
22 BitField<43, 1, u64> negate_a;
23 BitField<44, 1, u64> abs_b;
24 BitField<45, 2, BooleanOp> bop;
25 BitField<48, 4, FPCompareOp> compare_op;
26 } const dsetp{insn};
27
28 const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)};
29 const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)};
30
31 const BooleanOp bop{dsetp.bop};
32 const FPCompareOp compare_op{dsetp.compare_op};
33 const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)};
34 const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)};
35 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
36 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
37 v.ir.SetPred(dsetp.dest_pred_a, result_a);
38 v.ir.SetPred(dsetp.dest_pred_b, result_b);
39}
40} // Anonymous namespace
41
42void TranslatorVisitor::DSETP_reg(u64 insn) {
43 DSETP(*this, insn, GetDoubleReg20(insn));
44}
45
46void TranslatorVisitor::DSETP_cbuf(u64 insn) {
47 DSETP(*this, insn, GetDoubleCbuf(insn));
48}
49
50void TranslatorVisitor::DSETP_imm(u64 insn) {
51 DSETP(*this, insn, GetDoubleImm20(insn));
52}
53
54} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
new file mode 100644
index 000000000..c2443c886
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp
@@ -0,0 +1,43 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ExitFragment(TranslatorVisitor& v) {
12 const ProgramHeader sph{v.env.SPH()};
13 IR::Reg src_reg{IR::Reg::R0};
14 for (u32 render_target = 0; render_target < 8; ++render_target) {
15 const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)};
16 for (u32 component = 0; component < 4; ++component) {
17 if (!mask[component]) {
18 continue;
19 }
20 v.ir.SetFragColor(render_target, component, v.F(src_reg));
21 ++src_reg;
22 }
23 }
24 if (sph.ps.omap.sample_mask != 0) {
25 v.ir.SetSampleMask(v.X(src_reg));
26 }
27 if (sph.ps.omap.depth != 0) {
28 v.ir.SetFragDepth(v.F(src_reg + 1));
29 }
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::EXIT() {
34 switch (env.ShaderStage()) {
35 case Stage::Fragment:
36 ExitFragment(*this);
37 break;
38 default:
39 break;
40 }
41}
42
43} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
new file mode 100644
index 000000000..f0cb25d61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp
@@ -0,0 +1,47 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<40, 1, u64> tilde;
16 BitField<41, 1, u64> shift;
17 BitField<47, 1, u64> cc;
18 BitField<48, 1, u64> is_signed;
19 } const flo{insn};
20
21 if (flo.cc != 0) {
22 throw NotImplementedException("CC");
23 }
24 if (flo.tilde != 0) {
25 src = v.ir.BitwiseNot(src);
26 }
27 IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)};
28 if (flo.shift != 0) {
29 const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))};
30 result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))};
31 }
32 v.X(flo.dest_reg, result);
33}
34} // Anonymous namespace
35
36void TranslatorVisitor::FLO_reg(u64 insn) {
37 FLO(*this, insn, GetReg20(insn));
38}
39
40void TranslatorVisitor::FLO_cbuf(u64 insn) {
41 FLO(*this, insn, GetCbuf(insn));
42}
43
44void TranslatorVisitor::FLO_imm(u64 insn) {
45 FLO(*this, insn, GetImm20(insn));
46}
47} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
new file mode 100644
index 000000000..b8c89810c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp
@@ -0,0 +1,82 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
13 const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a;
18 } const fadd{insn};
19
20 if (cc) {
21 throw NotImplementedException("FADD CC");
22 }
23 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
24 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
25 IR::FpControl control{
26 .no_contraction = true,
27 .rounding = CastFpRounding(fp_rounding),
28 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
29 };
30 IR::F32 value{v.ir.FPAdd(op_a, op_b, control)};
31 if (sat) {
32 value = v.ir.FPSaturate(value);
33 }
34 v.F(fadd.dest_reg, value);
35}
36
37void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
38 union {
39 u64 raw;
40 BitField<39, 2, FpRounding> fp_rounding;
41 BitField<44, 1, u64> ftz;
42 BitField<45, 1, u64> neg_b;
43 BitField<46, 1, u64> abs_a;
44 BitField<47, 1, u64> cc;
45 BitField<48, 1, u64> neg_a;
46 BitField<49, 1, u64> abs_b;
47 BitField<50, 1, u64> sat;
48 } const fadd{insn};
49
50 FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
51 fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
52}
53} // Anonymous namespace
54
55void TranslatorVisitor::FADD_reg(u64 insn) {
56 FADD(*this, insn, GetFloatReg20(insn));
57}
58
59void TranslatorVisitor::FADD_cbuf(u64 insn) {
60 FADD(*this, insn, GetFloatCbuf(insn));
61}
62
63void TranslatorVisitor::FADD_imm(u64 insn) {
64 FADD(*this, insn, GetFloatImm20(insn));
65}
66
67void TranslatorVisitor::FADD32I(u64 insn) {
68 union {
69 u64 raw;
70 BitField<55, 1, u64> ftz;
71 BitField<56, 1, u64> neg_a;
72 BitField<54, 1, u64> abs_a;
73 BitField<52, 1, u64> cc;
74 BitField<53, 1, u64> neg_b;
75 BitField<57, 1, u64> abs_b;
76 } const fadd32i{insn};
77
78 FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn),
79 fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0);
80}
81
82} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
new file mode 100644
index 000000000..7127ebf54
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp
@@ -0,0 +1,55 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<47, 1, u64> ftz;
18 BitField<48, 4, FPCompareOp> compare_op;
19 } const fcmp{insn};
20
21 const IR::F32 zero{v.ir.Imm32(0.0f)};
22 const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)};
23 const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
24 const IR::U32 src_reg{v.X(fcmp.src_reg)};
25 const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
26
27 v.X(fcmp.dest_reg, result);
28}
29} // Anonymous namespace
30
31void TranslatorVisitor::FCMP_reg(u64 insn) {
32 FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
33}
34
35void TranslatorVisitor::FCMP_rc(u64 insn) {
36 FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
37}
38
39void TranslatorVisitor::FCMP_cr(u64 insn) {
40 FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
41}
42
43void TranslatorVisitor::FCMP_imm(u64 insn) {
44 union {
45 u64 raw;
46 BitField<20, 19, u64> value;
47 BitField<56, 1, u64> is_negative;
48 } const fcmp{insn};
49 const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0};
50 const u32 value{static_cast<u32>(fcmp.value) << 12};
51
52 FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn));
53}
54
55} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
new file mode 100644
index 000000000..eece4f28f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
@@ -0,0 +1,78 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 BitField<43, 1, u64> negate_a;
20 BitField<44, 1, u64> abs_b;
21 BitField<45, 2, BooleanOp> bop;
22 BitField<47, 1, u64> cc;
23 BitField<48, 4, FPCompareOp> compare_op;
24 BitField<52, 1, u64> bf;
25 BitField<53, 1, u64> negate_b;
26 BitField<54, 1, u64> abs_a;
27 BitField<55, 1, u64> ftz;
28 } const fset{insn};
29
30 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
31 const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
32 const IR::FpControl control{
33 .no_contraction = false,
34 .rounding = IR::FpRounding::DontCare,
35 .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
36 };
37
38 IR::U1 pred{v.ir.GetPred(fset.pred)};
39 if (fset.neg_pred != 0) {
40 pred = v.ir.LogicalNot(pred);
41 }
42 const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
43 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
44
45 const IR::U32 one_mask{v.ir.Imm32(-1)};
46 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
47 const IR::U32 zero{v.ir.Imm32(0)};
48 const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
49 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
50
51 v.X(fset.dest_reg, result);
52 if (fset.cc != 0) {
53 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
54 v.SetZFlag(is_zero);
55 if (fset.bf != 0) {
56 v.ResetSFlag();
57 } else {
58 v.SetSFlag(v.ir.LogicalNot(is_zero));
59 }
60 v.ResetCFlag();
61 v.ResetOFlag();
62 }
63}
64} // Anonymous namespace
65
66void TranslatorVisitor::FSET_reg(u64 insn) {
67 FSET(*this, insn, GetFloatReg20(insn));
68}
69
70void TranslatorVisitor::FSET_cbuf(u64 insn) {
71 FSET(*this, insn, GetFloatCbuf(insn));
72}
73
74void TranslatorVisitor::FSET_imm(u64 insn) {
75 FSET(*this, insn, GetFloatImm20(insn));
76}
77
78} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
new file mode 100644
index 000000000..02ab023c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
@@ -0,0 +1,214 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
6#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
7
8namespace Shader::Maxwell {
9namespace {
10enum class FloatFormat : u64 {
11 F16 = 1,
12 F32 = 2,
13 F64 = 3,
14};
15
16enum class RoundingOp : u64 {
17 None = 0,
18 Pass = 3,
19 Round = 8,
20 Floor = 9,
21 Ceil = 10,
22 Trunc = 11,
23};
24
25[[nodiscard]] u32 WidthSize(FloatFormat width) {
26 switch (width) {
27 case FloatFormat::F16:
28 return 16;
29 case FloatFormat::F32:
30 return 32;
31 case FloatFormat::F64:
32 return 64;
33 default:
34 throw NotImplementedException("Invalid width {}", width);
35 }
36}
37
38void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
39 union {
40 u64 insn;
41 BitField<0, 8, IR::Reg> dest_reg;
42 BitField<44, 1, u64> ftz;
43 BitField<45, 1, u64> neg;
44 BitField<47, 1, u64> cc;
45 BitField<50, 1, u64> sat;
46 BitField<39, 4, u64> rounding_op;
47 BitField<39, 2, FpRounding> rounding;
48 BitField<10, 2, FloatFormat> src_size;
49 BitField<8, 2, FloatFormat> dst_size;
50
51 [[nodiscard]] RoundingOp RoundingOperation() const {
52 constexpr u64 rounding_mask = 0x0B;
53 return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask);
54 }
55 } const f2f{insn};
56
57 if (f2f.cc != 0) {
58 throw NotImplementedException("F2F CC");
59 }
60
61 IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
62
63 const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
64 IR::FpControl fp_control{
65 .no_contraction = false,
66 .rounding = IR::FpRounding::DontCare,
67 .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None),
68 };
69 if (f2f.src_size != f2f.dst_size) {
70 fp_control.rounding = CastFpRounding(f2f.rounding);
71 input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control);
72 } else {
73 switch (f2f.RoundingOperation()) {
74 case RoundingOp::None:
75 case RoundingOp::Pass:
76 // Make sure NANs are handled properly
77 switch (f2f.src_size) {
78 case FloatFormat::F16:
79 input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
80 break;
81 case FloatFormat::F32:
82 input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
83 break;
84 case FloatFormat::F64:
85 input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
86 break;
87 }
88 break;
89 case RoundingOp::Round:
90 input = v.ir.FPRoundEven(input, fp_control);
91 break;
92 case RoundingOp::Floor:
93 input = v.ir.FPFloor(input, fp_control);
94 break;
95 case RoundingOp::Ceil:
96 input = v.ir.FPCeil(input, fp_control);
97 break;
98 case RoundingOp::Trunc:
99 input = v.ir.FPTrunc(input, fp_control);
100 break;
101 default:
102 throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value());
103 }
104 }
105 if (f2f.sat != 0 && !any_fp64) {
106 input = v.ir.FPSaturate(input);
107 }
108
109 switch (f2f.dst_size) {
110 case FloatFormat::F16: {
111 const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
112 v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
113 break;
114 }
115 case FloatFormat::F32:
116 v.F(f2f.dest_reg, input);
117 break;
118 case FloatFormat::F64:
119 v.D(f2f.dest_reg, input);
120 break;
121 default:
122 throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value());
123 }
124}
125} // Anonymous namespace
126
127void TranslatorVisitor::F2F_reg(u64 insn) {
128 union {
129 u64 insn;
130 BitField<49, 1, u64> abs;
131 BitField<10, 2, FloatFormat> src_size;
132 BitField<41, 1, u64> selector;
133 } const f2f{insn};
134
135 IR::F16F32F64 src_a;
136 switch (f2f.src_size) {
137 case FloatFormat::F16: {
138 auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
139 src_a = f2f.selector != 0 ? rhs_a : lhs_a;
140 break;
141 }
142 case FloatFormat::F32:
143 src_a = GetFloatReg20(insn);
144 break;
145 case FloatFormat::F64:
146 src_a = GetDoubleReg20(insn);
147 break;
148 default:
149 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
150 }
151 F2F(*this, insn, src_a, f2f.abs != 0);
152}
153
154void TranslatorVisitor::F2F_cbuf(u64 insn) {
155 union {
156 u64 insn;
157 BitField<49, 1, u64> abs;
158 BitField<10, 2, FloatFormat> src_size;
159 BitField<41, 1, u64> selector;
160 } const f2f{insn};
161
162 IR::F16F32F64 src_a;
163 switch (f2f.src_size) {
164 case FloatFormat::F16: {
165 auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
166 src_a = f2f.selector != 0 ? rhs_a : lhs_a;
167 break;
168 }
169 case FloatFormat::F32:
170 src_a = GetFloatCbuf(insn);
171 break;
172 case FloatFormat::F64:
173 src_a = GetDoubleCbuf(insn);
174 break;
175 default:
176 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
177 }
178 F2F(*this, insn, src_a, f2f.abs != 0);
179}
180
181void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
182 union {
183 u64 insn;
184 BitField<49, 1, u64> abs;
185 BitField<10, 2, FloatFormat> src_size;
186 BitField<41, 1, u64> selector;
187 BitField<20, 19, u64> imm;
188 BitField<56, 1, u64> imm_neg;
189 } const f2f{insn};
190
191 IR::F16F32F64 src_a;
192 switch (f2f.src_size) {
193 case FloatFormat::F16: {
194 const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
195 const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
196 src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
197 if (f2f.imm_neg != 0) {
198 throw NotImplementedException("Neg bit on F16");
199 }
200 break;
201 }
202 case FloatFormat::F32:
203 src_a = GetFloatImm20(insn);
204 break;
205 case FloatFormat::F64:
206 src_a = GetDoubleImm20(insn);
207 break;
208 default:
209 throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value());
210 }
211 F2F(*this, insn, src_a, f2f.abs != 0);
212}
213
214} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
new file mode 100644
index 000000000..92b1ce015
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
@@ -0,0 +1,253 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <limits>
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/opcodes.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class DestFormat : u64 {
15 Invalid,
16 I16,
17 I32,
18 I64,
19};
20enum class SrcFormat : u64 {
21 Invalid,
22 F16,
23 F32,
24 F64,
25};
26enum class Rounding : u64 {
27 Round,
28 Floor,
29 Ceil,
30 Trunc,
31};
32
33union F2I {
34 u64 raw;
35 BitField<0, 8, IR::Reg> dest_reg;
36 BitField<8, 2, DestFormat> dest_format;
37 BitField<10, 2, SrcFormat> src_format;
38 BitField<12, 1, u64> is_signed;
39 BitField<39, 2, Rounding> rounding;
40 BitField<41, 1, u64> half;
41 BitField<44, 1, u64> ftz;
42 BitField<45, 1, u64> abs;
43 BitField<47, 1, u64> cc;
44 BitField<49, 1, u64> neg;
45};
46
47size_t BitSize(DestFormat dest_format) {
48 switch (dest_format) {
49 case DestFormat::I16:
50 return 16;
51 case DestFormat::I32:
52 return 32;
53 case DestFormat::I64:
54 return 64;
55 default:
56 throw NotImplementedException("Invalid destination format {}", dest_format);
57 }
58}
59
60std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) {
61 if (is_signed) {
62 switch (format) {
63 case DestFormat::I16:
64 return {static_cast<f64>(std::numeric_limits<s16>::max()),
65 static_cast<f64>(std::numeric_limits<s16>::min())};
66 case DestFormat::I32:
67 return {static_cast<f64>(std::numeric_limits<s32>::max()),
68 static_cast<f64>(std::numeric_limits<s32>::min())};
69 case DestFormat::I64:
70 return {static_cast<f64>(std::numeric_limits<s64>::max()),
71 static_cast<f64>(std::numeric_limits<s64>::min())};
72 default:
73 break;
74 }
75 } else {
76 switch (format) {
77 case DestFormat::I16:
78 return {static_cast<f64>(std::numeric_limits<u16>::max()),
79 static_cast<f64>(std::numeric_limits<u16>::min())};
80 case DestFormat::I32:
81 return {static_cast<f64>(std::numeric_limits<u32>::max()),
82 static_cast<f64>(std::numeric_limits<u32>::min())};
83 case DestFormat::I64:
84 return {static_cast<f64>(std::numeric_limits<u64>::max()),
85 static_cast<f64>(std::numeric_limits<u64>::min())};
86 default:
87 break;
88 }
89 }
90 throw NotImplementedException("Invalid destination format {}", format);
91}
92
93IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) {
94 union {
95 u64 raw;
96 BitField<20, 14, s64> offset;
97 BitField<34, 5, u64> binding;
98 } const cbuf{insn};
99 if (cbuf.binding >= 18) {
100 throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
101 }
102 if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) {
103 throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4);
104 }
105 if (cbuf.offset % 2 != 0) {
106 throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4);
107 }
108 const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))};
109 const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)};
110 const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)};
111 const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)};
112 return v.ir.PackDouble2x32(vector);
113}
114
115void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
116 // F2I is used to convert from a floating point value to an integer
117 const F2I f2i{insn};
118
119 const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 &&
120 f2i.dest_format != DestFormat::I64};
121 IR::FmzMode fmz_mode{IR::FmzMode::DontCare};
122 if (denorm_cares) {
123 fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None;
124 }
125 const IR::FpControl fp_control{
126 .no_contraction = true,
127 .rounding = IR::FpRounding::DontCare,
128 .fmz_mode = fmz_mode,
129 };
130 const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
131 const IR::F16F32F64 rounded_value{[&] {
132 switch (f2i.rounding) {
133 case Rounding::Round:
134 return v.ir.FPRoundEven(op_a, fp_control);
135 case Rounding::Floor:
136 return v.ir.FPFloor(op_a, fp_control);
137 case Rounding::Ceil:
138 return v.ir.FPCeil(op_a, fp_control);
139 case Rounding::Trunc:
140 return v.ir.FPTrunc(op_a, fp_control);
141 default:
142 throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
143 }
144 }()};
145 const bool is_signed{f2i.is_signed != 0};
146 const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed);
147
148 IR::F16F32F64 intermediate;
149 switch (f2i.src_format) {
150 case SrcFormat::F16: {
151 const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))};
152 const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))};
153 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
154 break;
155 }
156 case SrcFormat::F32: {
157 const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))};
158 const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))};
159 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
160 break;
161 }
162 case SrcFormat::F64: {
163 const IR::F64 max_val{v.ir.Imm64(max_bound)};
164 const IR::F64 min_val{v.ir.Imm64(min_bound)};
165 intermediate = v.ir.FPClamp(rounded_value, min_val, max_val);
166 break;
167 }
168 default:
169 throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value());
170 }
171
172 const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))};
173 IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)};
174
175 bool handled_special_case = false;
176 const bool special_nan_cases =
177 (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64);
178 if (special_nan_cases) {
179 if (f2i.dest_format == DestFormat::I32) {
180 handled_special_case = true;
181 result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)};
182 } else if (f2i.dest_format == DestFormat::I64) {
183 handled_special_case = true;
184 result = IR::U64{
185 v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)};
186 }
187 }
188 if (!handled_special_case && is_signed) {
189 if (bitsize != 64) {
190 result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)};
191 } else {
192 result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)};
193 }
194 }
195
196 if (bitsize == 64) {
197 v.L(f2i.dest_reg, result);
198 } else {
199 v.X(f2i.dest_reg, result);
200 }
201
202 if (f2i.cc != 0) {
203 throw NotImplementedException("F2I CC");
204 }
205}
206} // Anonymous namespace
207
208void TranslatorVisitor::F2I_reg(u64 insn) {
209 union {
210 u64 raw;
211 F2I base;
212 BitField<20, 8, IR::Reg> src_reg;
213 } const f2i{insn};
214
215 const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
216 switch (f2i.base.src_format) {
217 case SrcFormat::F16:
218 return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
219 case SrcFormat::F32:
220 return F(f2i.src_reg);
221 case SrcFormat::F64:
222 return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
223 default:
224 throw NotImplementedException("Invalid F2I source format {}",
225 f2i.base.src_format.Value());
226 }
227 }()};
228 TranslateF2I(*this, insn, op_a);
229}
230
231void TranslatorVisitor::F2I_cbuf(u64 insn) {
232 const F2I f2i{insn};
233 const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
234 switch (f2i.src_format) {
235 case SrcFormat::F16:
236 return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)};
237 case SrcFormat::F32:
238 return GetFloatCbuf(insn);
239 case SrcFormat::F64: {
240 return UnpackCbuf(*this, insn);
241 }
242 default:
243 throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value());
244 }
245 }()};
246 TranslateF2I(*this, insn, op_a);
247}
248
249void TranslatorVisitor::F2I_imm(u64) {
250 throw NotImplementedException("{}", Opcode::F2I_imm);
251}
252
253} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fa2a7807b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
@@ -0,0 +1,94 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
13 bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<8, 8, IR::Reg> src_a;
18 } const ffma{insn};
19
20 if (cc) {
21 throw NotImplementedException("FFMA CC");
22 }
23 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
24 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
25 const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
26 const IR::FpControl fp_control{
27 .no_contraction = true,
28 .rounding = CastFpRounding(fp_rounding),
29 .fmz_mode = CastFmzMode(fmz_mode),
30 };
31 IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)};
32 if (fmz_mode == FmzMode::FMZ && !sat) {
33 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
34 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
35 const IR::F32 zero{v.ir.Imm32(0.0f)};
36 const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
37 const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
38 const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
39 value = IR::F32{v.ir.Select(any_zero, op_c, value)};
40 }
41 if (sat) {
42 value = v.ir.FPSaturate(value);
43 }
44 v.F(ffma.dest_reg, value);
45}
46
47void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
48 union {
49 u64 raw;
50 BitField<47, 1, u64> cc;
51 BitField<48, 1, u64> neg_b;
52 BitField<49, 1, u64> neg_c;
53 BitField<50, 1, u64> sat;
54 BitField<51, 2, FpRounding> fp_rounding;
55 BitField<53, 2, FmzMode> fmz_mode;
56 } const ffma{insn};
57
58 FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
59 ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
60}
61} // Anonymous namespace
62
63void TranslatorVisitor::FFMA_reg(u64 insn) {
64 FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn));
65}
66
67void TranslatorVisitor::FFMA_rc(u64 insn) {
68 FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn));
69}
70
71void TranslatorVisitor::FFMA_cr(u64 insn) {
72 FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn));
73}
74
75void TranslatorVisitor::FFMA_imm(u64 insn) {
76 FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn));
77}
78
79void TranslatorVisitor::FFMA32I(u64 insn) {
80 union {
81 u64 raw;
82 BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register
83 BitField<52, 1, u64> cc;
84 BitField<53, 2, FmzMode> fmz_mode;
85 BitField<55, 1, u64> sat;
86 BitField<56, 1, u64> neg_a;
87 BitField<57, 1, u64> neg_c;
88 } const ffma32i{insn};
89
90 FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false,
91 ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN);
92}
93
94} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
new file mode 100644
index 000000000..c0d6ee5af
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<44, 1, u64> ftz;
19 BitField<45, 1, u64> negate_b;
20 BitField<46, 1, u64> abs_a;
21 BitField<47, 1, u64> cc;
22 BitField<48, 1, u64> negate_a;
23 BitField<49, 1, u64> abs_b;
24 } const fmnmx{insn};
25
26 if (fmnmx.cc) {
27 throw NotImplementedException("FMNMX CC");
28 }
29
30 const IR::U1 pred{v.ir.GetPred(fmnmx.pred)};
31 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)};
32 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)};
33
34 const IR::FpControl control{
35 .no_contraction = false,
36 .rounding = IR::FpRounding::DontCare,
37 .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
38 };
39 IR::F32 max{v.ir.FPMax(op_a, op_b, control)};
40 IR::F32 min{v.ir.FPMin(op_a, op_b, control)};
41
42 if (fmnmx.neg_pred != 0) {
43 std::swap(min, max);
44 }
45
46 v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)});
47}
48} // Anonymous namespace
49
50void TranslatorVisitor::FMNMX_reg(u64 insn) {
51 FMNMX(*this, insn, GetFloatReg20(insn));
52}
53
54void TranslatorVisitor::FMNMX_cbuf(u64 insn) {
55 FMNMX(*this, insn, GetFloatCbuf(insn));
56}
57
58void TranslatorVisitor::FMNMX_imm(u64 insn) {
59 FMNMX(*this, insn, GetFloatImm20(insn));
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
new file mode 100644
index 000000000..2f8605619
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class Operation : u64 {
14 Cos = 0,
15 Sin = 1,
16 Ex2 = 2, // Base 2 exponent
17 Lg2 = 3, // Base 2 logarithm
18 Rcp = 4, // Reciprocal
19 Rsq = 5, // Reciprocal square root
20 Rcp64H = 6, // 64-bit reciprocal
21 Rsq64H = 7, // 64-bit reciprocal square root
22 Sqrt = 8,
23};
24} // Anonymous namespace
25
26void TranslatorVisitor::MUFU(u64 insn) {
27 // MUFU is used to implement a bunch of special functions. See Operation.
28 union {
29 u64 raw;
30 BitField<0, 8, IR::Reg> dest_reg;
31 BitField<8, 8, IR::Reg> src_reg;
32 BitField<20, 4, Operation> operation;
33 BitField<46, 1, u64> abs;
34 BitField<48, 1, u64> neg;
35 BitField<50, 1, u64> sat;
36 } const mufu{insn};
37
38 const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
39 IR::F32 value{[&]() -> IR::F32 {
40 switch (mufu.operation) {
41 case Operation::Cos:
42 return ir.FPCos(op_a);
43 case Operation::Sin:
44 return ir.FPSin(op_a);
45 case Operation::Ex2:
46 return ir.FPExp2(op_a);
47 case Operation::Lg2:
48 return ir.FPLog2(op_a);
49 case Operation::Rcp:
50 return ir.FPRecip(op_a);
51 case Operation::Rsq:
52 return ir.FPRecipSqrt(op_a);
53 case Operation::Rcp64H:
54 throw NotImplementedException("MUFU.RCP64H");
55 case Operation::Rsq64H:
56 throw NotImplementedException("MUFU.RSQ64H");
57 case Operation::Sqrt:
58 return ir.FPSqrt(op_a);
59 default:
60 throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value());
61 }
62 }()};
63
64 if (mufu.sat) {
65 value = ir.FPSaturate(value);
66 }
67
68 F(mufu.dest_reg, value);
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
new file mode 100644
index 000000000..06226b7ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp
@@ -0,0 +1,127 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/ir/ir_emitter.h"
8#include "shader_recompiler/frontend/ir/modifiers.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Scale : u64 {
15 None,
16 D2,
17 D4,
18 D8,
19 M8,
20 M4,
21 M2,
22 INVALIDSCALE37,
23};
24
25float ScaleFactor(Scale scale) {
26 switch (scale) {
27 case Scale::None:
28 return 1.0f;
29 case Scale::D2:
30 return 1.0f / 2.0f;
31 case Scale::D4:
32 return 1.0f / 4.0f;
33 case Scale::D8:
34 return 1.0f / 8.0f;
35 case Scale::M8:
36 return 8.0f;
37 case Scale::M4:
38 return 4.0f;
39 case Scale::M2:
40 return 2.0f;
41 case Scale::INVALIDSCALE37:
42 break;
43 }
44 throw NotImplementedException("Invalid FMUL scale {}", scale);
45}
46
47void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
48 FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
49 union {
50 u64 raw;
51 BitField<0, 8, IR::Reg> dest_reg;
52 BitField<8, 8, IR::Reg> src_a;
53 } const fmul{insn};
54
55 if (cc) {
56 throw NotImplementedException("FMUL CC");
57 }
58 IR::F32 op_a{v.F(fmul.src_a)};
59 if (scale != Scale::None) {
60 if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
61 throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
62 }
63 op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
64 }
65 const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
66 const IR::FpControl fp_control{
67 .no_contraction = true,
68 .rounding = CastFpRounding(fp_rounding),
69 .fmz_mode = CastFmzMode(fmz_mode),
70 };
71 IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)};
72 if (fmz_mode == FmzMode::FMZ && !sat) {
73 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
74 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
75 const IR::F32 zero{v.ir.Imm32(0.0f)};
76 const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)};
77 const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)};
78 const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)};
79 value = IR::F32{v.ir.Select(any_zero, zero, value)};
80 }
81 if (sat) {
82 value = v.ir.FPSaturate(value);
83 }
84 v.F(fmul.dest_reg, value);
85}
86
87void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
88 union {
89 u64 raw;
90 BitField<39, 2, FpRounding> fp_rounding;
91 BitField<41, 3, Scale> scale;
92 BitField<44, 2, FmzMode> fmz;
93 BitField<47, 1, u64> cc;
94 BitField<48, 1, u64> neg_b;
95 BitField<50, 1, u64> sat;
96 } const fmul{insn};
97
98 FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
99 fmul.neg_b != 0);
100}
101} // Anonymous namespace
102
103void TranslatorVisitor::FMUL_reg(u64 insn) {
104 return FMUL(*this, insn, GetFloatReg20(insn));
105}
106
107void TranslatorVisitor::FMUL_cbuf(u64 insn) {
108 return FMUL(*this, insn, GetFloatCbuf(insn));
109}
110
111void TranslatorVisitor::FMUL_imm(u64 insn) {
112 return FMUL(*this, insn, GetFloatImm20(insn));
113}
114
115void TranslatorVisitor::FMUL32I(u64 insn) {
116 union {
117 u64 raw;
118 BitField<52, 1, u64> cc;
119 BitField<53, 2, FmzMode> fmz;
120 BitField<55, 1, u64> sat;
121 } const fmul32i{insn};
122
123 FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None,
124 fmul32i.sat != 0, fmul32i.cc != 0, false);
125}
126
127} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
new file mode 100644
index 000000000..f91b93fad
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
@@ -0,0 +1,41 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 SINCOS,
13 EX2,
14};
15
16void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) {
17 union {
18 u64 raw;
19 BitField<0, 8, IR::Reg> dest_reg;
20 BitField<39, 1, Mode> mode;
21 BitField<45, 1, u64> neg;
22 BitField<49, 1, u64> abs;
23 } const rro{insn};
24
25 v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0));
26}
27} // Anonymous namespace
28
29void TranslatorVisitor::RRO_reg(u64 insn) {
30 RRO(*this, insn, GetFloatReg20(insn));
31}
32
33void TranslatorVisitor::RRO_cbuf(u64 insn) {
34 RRO(*this, insn, GetFloatCbuf(insn));
35}
36
37void TranslatorVisitor::RRO_imm(u64) {
38 throw NotImplementedException("RRO (imm)");
39}
40
41} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
new file mode 100644
index 000000000..5f93a1513
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
@@ -0,0 +1,60 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
13 union {
14 u64 insn;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<3, 3, IR::Pred> dest_pred_a;
17 BitField<6, 1, u64> negate_b;
18 BitField<7, 1, u64> abs_a;
19 BitField<8, 8, IR::Reg> src_a_reg;
20 BitField<39, 3, IR::Pred> bop_pred;
21 BitField<42, 1, u64> neg_bop_pred;
22 BitField<43, 1, u64> negate_a;
23 BitField<44, 1, u64> abs_b;
24 BitField<45, 2, BooleanOp> bop;
25 BitField<47, 1, u64> ftz;
26 BitField<48, 4, FPCompareOp> compare_op;
27 } const fsetp{insn};
28
29 const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
30 const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
31 const IR::FpControl control{
32 .no_contraction = false,
33 .rounding = IR::FpRounding::DontCare,
34 .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
35 };
36
37 const BooleanOp bop{fsetp.bop};
38 const FPCompareOp compare_op{fsetp.compare_op};
39 const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
40 const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
41 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
42 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
43 v.ir.SetPred(fsetp.dest_pred_a, result_a);
44 v.ir.SetPred(fsetp.dest_pred_b, result_b);
45}
46} // Anonymous namespace
47
48void TranslatorVisitor::FSETP_reg(u64 insn) {
49 FSETP(*this, insn, GetFloatReg20(insn));
50}
51
52void TranslatorVisitor::FSETP_cbuf(u64 insn) {
53 FSETP(*this, insn, GetFloatCbuf(insn));
54}
55
56void TranslatorVisitor::FSETP_imm(u64 insn) {
57 FSETP(*this, insn, GetFloatImm20(insn));
58}
59
60} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
new file mode 100644
index 000000000..7550a8d4c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::FSWZADD(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<28, 8, u64> swizzle;
16 BitField<38, 1, u64> ndv;
17 BitField<39, 2, FpRounding> round;
18 BitField<44, 1, u64> ftz;
19 BitField<47, 1, u64> cc;
20 } const fswzadd{insn};
21
22 if (fswzadd.ndv != 0) {
23 throw NotImplementedException("FSWZADD NDV");
24 }
25
26 const IR::F32 src_a{GetFloatReg8(insn)};
27 const IR::F32 src_b{GetFloatReg20(insn)};
28 const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))};
29
30 const IR::FpControl fp_control{
31 .no_contraction = false,
32 .rounding = CastFpRounding(fswzadd.round),
33 .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
34 };
35
36 const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)};
37 F(fswzadd.dest_reg, result);
38
39 if (fswzadd.cc != 0) {
40 throw NotImplementedException("FSWZADD CC");
41 }
42}
43
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
new file mode 100644
index 000000000..f2738a93b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp
@@ -0,0 +1,125 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
10 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
11 union {
12 u64 raw;
13 BitField<0, 8, IR::Reg> dest_reg;
14 BitField<8, 8, IR::Reg> src_a;
15 } const hadd2{insn};
16
17 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
18 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
19 const bool promotion{lhs_a.Type() != lhs_b.Type()};
20 if (promotion) {
21 if (lhs_a.Type() == IR::Type::F16) {
22 lhs_a = v.ir.FPConvert(32, lhs_a);
23 rhs_a = v.ir.FPConvert(32, rhs_a);
24 }
25 if (lhs_b.Type() == IR::Type::F16) {
26 lhs_b = v.ir.FPConvert(32, lhs_b);
27 rhs_b = v.ir.FPConvert(32, rhs_b);
28 }
29 }
30 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
31 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
32
33 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
34 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
35
36 const IR::FpControl fp_control{
37 .no_contraction = true,
38 .rounding = IR::FpRounding::DontCare,
39 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
40 };
41 IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
42 IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
43 if (sat) {
44 lhs = v.ir.FPSaturate(lhs);
45 rhs = v.ir.FPSaturate(rhs);
46 }
47 if (promotion) {
48 lhs = v.ir.FPConvert(16, lhs);
49 rhs = v.ir.FPConvert(16, rhs);
50 }
51 v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
52}
53
54void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b,
55 const IR::U32& src_b) {
56 union {
57 u64 raw;
58 BitField<49, 2, Merge> merge;
59 BitField<39, 1, u64> ftz;
60 BitField<43, 1, u64> neg_a;
61 BitField<44, 1, u64> abs_a;
62 BitField<47, 2, Swizzle> swizzle_a;
63 } const hadd2{insn};
64
65 HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0,
66 hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b);
67}
68} // Anonymous namespace
69
70void TranslatorVisitor::HADD2_reg(u64 insn) {
71 union {
72 u64 raw;
73 BitField<32, 1, u64> sat;
74 BitField<31, 1, u64> neg_b;
75 BitField<30, 1, u64> abs_b;
76 BitField<28, 2, Swizzle> swizzle_b;
77 } const hadd2{insn};
78
79 HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
80 GetReg20(insn));
81}
82
83void TranslatorVisitor::HADD2_cbuf(u64 insn) {
84 union {
85 u64 raw;
86 BitField<52, 1, u64> sat;
87 BitField<56, 1, u64> neg_b;
88 BitField<54, 1, u64> abs_b;
89 } const hadd2{insn};
90
91 HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
92 GetCbuf(insn));
93}
94
95void TranslatorVisitor::HADD2_imm(u64 insn) {
96 union {
97 u64 raw;
98 BitField<52, 1, u64> sat;
99 BitField<56, 1, u64> neg_high;
100 BitField<30, 9, u64> high;
101 BitField<29, 1, u64> neg_low;
102 BitField<20, 9, u64> low;
103 } const hadd2{insn};
104
105 const u32 imm{
106 static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) |
107 static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)};
108 HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm));
109}
110
111void TranslatorVisitor::HADD2_32I(u64 insn) {
112 union {
113 u64 raw;
114 BitField<55, 1, u64> ftz;
115 BitField<52, 1, u64> sat;
116 BitField<56, 1, u64> neg_a;
117 BitField<53, 2, Swizzle> swizzle_a;
118 BitField<20, 32, u64> imm32;
119 } const hadd2{insn};
120
121 const u32 imm{static_cast<u32>(hadd2.imm32)};
122 HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
123 hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
124}
125} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
new file mode 100644
index 000000000..fd7986701
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp
@@ -0,0 +1,169 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c,
10 Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c,
11 bool sat, HalfPrecision precision) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a;
16 } const hfma2{insn};
17
18 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)};
19 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
20 auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)};
21 const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()};
22 if (promotion) {
23 if (lhs_a.Type() == IR::Type::F16) {
24 lhs_a = v.ir.FPConvert(32, lhs_a);
25 rhs_a = v.ir.FPConvert(32, rhs_a);
26 }
27 if (lhs_b.Type() == IR::Type::F16) {
28 lhs_b = v.ir.FPConvert(32, lhs_b);
29 rhs_b = v.ir.FPConvert(32, rhs_b);
30 }
31 if (lhs_c.Type() == IR::Type::F16) {
32 lhs_c = v.ir.FPConvert(32, lhs_c);
33 rhs_c = v.ir.FPConvert(32, rhs_c);
34 }
35 }
36
37 lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b);
38 rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b);
39
40 lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c);
41 rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c);
42
43 const IR::FpControl fp_control{
44 .no_contraction = true,
45 .rounding = IR::FpRounding::DontCare,
46 .fmz_mode = HalfPrecision2FmzMode(precision),
47 };
48 IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)};
49 IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)};
50 if (precision == HalfPrecision::FMZ && !sat) {
51 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
52 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
53 const IR::F32 zero{v.ir.Imm32(0.0f)};
54 const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
55 const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
56 const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
57 lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)};
58
59 const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
60 const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
61 const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
62 rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)};
63 }
64 if (sat) {
65 lhs = v.ir.FPSaturate(lhs);
66 rhs = v.ir.FPSaturate(rhs);
67 }
68 if (promotion) {
69 lhs = v.ir.FPConvert(16, lhs);
70 rhs = v.ir.FPConvert(16, rhs);
71 }
72 v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge));
73}
74
75void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b,
76 Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat,
77 HalfPrecision precision) {
78 union {
79 u64 raw;
80 BitField<47, 2, Swizzle> swizzle_a;
81 BitField<49, 2, Merge> merge;
82 } const hfma2{insn};
83
84 HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c,
85 sat, precision);
86}
87} // Anonymous namespace
88
89void TranslatorVisitor::HFMA2_reg(u64 insn) {
90 union {
91 u64 raw;
92 BitField<28, 2, Swizzle> swizzle_b;
93 BitField<32, 1, u64> saturate;
94 BitField<31, 1, u64> neg_b;
95 BitField<30, 1, u64> neg_c;
96 BitField<35, 2, Swizzle> swizzle_c;
97 BitField<37, 2, HalfPrecision> precision;
98 } const hfma2{insn};
99
100 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c,
101 GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
102}
103
104void TranslatorVisitor::HFMA2_rc(u64 insn) {
105 union {
106 u64 raw;
107 BitField<51, 1, u64> neg_c;
108 BitField<52, 1, u64> saturate;
109 BitField<53, 2, Swizzle> swizzle_b;
110 BitField<56, 1, u64> neg_b;
111 BitField<57, 2, HalfPrecision> precision;
112 } const hfma2{insn};
113
114 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32,
115 GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision);
116}
117
118void TranslatorVisitor::HFMA2_cr(u64 insn) {
119 union {
120 u64 raw;
121 BitField<51, 1, u64> neg_c;
122 BitField<52, 1, u64> saturate;
123 BitField<53, 2, Swizzle> swizzle_c;
124 BitField<56, 1, u64> neg_b;
125 BitField<57, 2, HalfPrecision> precision;
126 } const hfma2{insn};
127
128 HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c,
129 GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
130}
131
132void TranslatorVisitor::HFMA2_imm(u64 insn) {
133 union {
134 u64 raw;
135 BitField<51, 1, u64> neg_c;
136 BitField<52, 1, u64> saturate;
137 BitField<53, 2, Swizzle> swizzle_c;
138
139 BitField<56, 1, u64> neg_high;
140 BitField<30, 9, u64> high;
141 BitField<29, 1, u64> neg_low;
142 BitField<20, 9, u64> low;
143 BitField<57, 2, HalfPrecision> precision;
144 } const hfma2{insn};
145
146 const u32 imm{
147 static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) |
148 static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)};
149
150 HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm),
151 GetReg39(insn), hfma2.saturate != 0, hfma2.precision);
152}
153
154void TranslatorVisitor::HFMA2_32I(u64 insn) {
155 union {
156 u64 raw;
157 BitField<0, 8, IR::Reg> src_c;
158 BitField<20, 32, u64> imm32;
159 BitField<52, 1, u64> neg_c;
160 BitField<53, 2, Swizzle> swizzle_a;
161 BitField<55, 2, HalfPrecision> precision;
162 } const hfma2{insn};
163
164 const u32 imm{static_cast<u32>(hfma2.imm32)};
165 HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0,
166 Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision);
167}
168
169} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
new file mode 100644
index 000000000..0dbeb7f56
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8
9IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) {
10 switch (precision) {
11 case HalfPrecision::None:
12 return IR::FmzMode::None;
13 case HalfPrecision::FTZ:
14 return IR::FmzMode::FTZ;
15 case HalfPrecision::FMZ:
16 return IR::FmzMode::FMZ;
17 default:
18 return IR::FmzMode::DontCare;
19 }
20}
21
22std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
23 switch (swizzle) {
24 case Swizzle::H1_H0: {
25 const IR::Value vector{ir.UnpackFloat2x16(value)};
26 return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
27 }
28 case Swizzle::H0_H0: {
29 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
30 return {scalar, scalar};
31 }
32 case Swizzle::H1_H1: {
33 const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
34 return {scalar, scalar};
35 }
36 case Swizzle::F32: {
37 const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
38 return {scalar, scalar};
39 }
40 }
41 throw InvalidArgument("Invalid swizzle {}", swizzle);
42}
43
44IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
45 Merge merge) {
46 switch (merge) {
47 case Merge::H1_H0:
48 return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
49 case Merge::F32:
50 return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
51 case Merge::MRG_H0:
52 case Merge::MRG_H1: {
53 const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
54 const bool is_h0{merge == Merge::MRG_H0};
55 const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)};
56 return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1));
57 }
58 }
59 throw InvalidArgument("Invalid merge {}", merge);
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
new file mode 100644
index 000000000..59da56a7e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h
@@ -0,0 +1,42 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/exception.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14
15enum class Merge : u64 {
16 H1_H0,
17 F32,
18 MRG_H0,
19 MRG_H1,
20};
21
22enum class Swizzle : u64 {
23 H1_H0,
24 F32,
25 H0_H0,
26 H1_H1,
27};
28
29enum class HalfPrecision : u64 {
30 None = 0,
31 FTZ = 1,
32 FMZ = 2,
33};
34
35IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision);
36
37std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle);
38
39IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
40 Merge merge);
41
42} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
new file mode 100644
index 000000000..3f548ce76
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp
@@ -0,0 +1,143 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a,
10 Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b,
11 HalfPrecision precision) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_a;
16 } const hmul2{insn};
17
18 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)};
19 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
20 const bool promotion{lhs_a.Type() != lhs_b.Type()};
21 if (promotion) {
22 if (lhs_a.Type() == IR::Type::F16) {
23 lhs_a = v.ir.FPConvert(32, lhs_a);
24 rhs_a = v.ir.FPConvert(32, rhs_a);
25 }
26 if (lhs_b.Type() == IR::Type::F16) {
27 lhs_b = v.ir.FPConvert(32, lhs_b);
28 rhs_b = v.ir.FPConvert(32, rhs_b);
29 }
30 }
31 lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
32 rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
33
34 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
35 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
36
37 const IR::FpControl fp_control{
38 .no_contraction = true,
39 .rounding = IR::FpRounding::DontCare,
40 .fmz_mode = HalfPrecision2FmzMode(precision),
41 };
42 IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)};
43 IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)};
44 if (precision == HalfPrecision::FMZ && !sat) {
45 // Do not implement FMZ if SAT is enabled, as it does the logic for us.
46 // On D3D9 mode, anything * 0 is zero, even NAN and infinity
47 const IR::F32 zero{v.ir.Imm32(0.0f)};
48 const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)};
49 const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)};
50 const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)};
51 lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)};
52
53 const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)};
54 const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)};
55 const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)};
56 rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)};
57 }
58 if (sat) {
59 lhs = v.ir.FPSaturate(lhs);
60 rhs = v.ir.FPSaturate(rhs);
61 }
62 if (promotion) {
63 lhs = v.ir.FPConvert(16, lhs);
64 rhs = v.ir.FPConvert(16, rhs);
65 }
66 v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge));
67}
68
69void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b,
70 Swizzle swizzle_b, const IR::U32& src_b) {
71 union {
72 u64 raw;
73 BitField<49, 2, Merge> merge;
74 BitField<47, 2, Swizzle> swizzle_a;
75 BitField<39, 2, HalfPrecision> precision;
76 } const hmul2{insn};
77
78 HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b,
79 hmul2.precision);
80}
81} // Anonymous namespace
82
83void TranslatorVisitor::HMUL2_reg(u64 insn) {
84 union {
85 u64 raw;
86 BitField<32, 1, u64> sat;
87 BitField<31, 1, u64> neg_b;
88 BitField<30, 1, u64> abs_b;
89 BitField<44, 1, u64> abs_a;
90 BitField<28, 2, Swizzle> swizzle_b;
91 } const hmul2{insn};
92
93 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0,
94 hmul2.swizzle_b, GetReg20(insn));
95}
96
97void TranslatorVisitor::HMUL2_cbuf(u64 insn) {
98 union {
99 u64 raw;
100 BitField<52, 1, u64> sat;
101 BitField<54, 1, u64> abs_b;
102 BitField<43, 1, u64> neg_a;
103 BitField<44, 1, u64> abs_a;
104 } const hmul2{insn};
105
106 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false,
107 Swizzle::F32, GetCbuf(insn));
108}
109
110void TranslatorVisitor::HMUL2_imm(u64 insn) {
111 union {
112 u64 raw;
113 BitField<52, 1, u64> sat;
114 BitField<56, 1, u64> neg_high;
115 BitField<30, 9, u64> high;
116 BitField<29, 1, u64> neg_low;
117 BitField<20, 9, u64> low;
118 BitField<43, 1, u64> neg_a;
119 BitField<44, 1, u64> abs_a;
120 } const hmul2{insn};
121
122 const u32 imm{
123 static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) |
124 static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)};
125 HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false,
126 Swizzle::H1_H0, ir.Imm32(imm));
127}
128
129void TranslatorVisitor::HMUL2_32I(u64 insn) {
130 union {
131 u64 raw;
132 BitField<55, 2, HalfPrecision> precision;
133 BitField<52, 1, u64> sat;
134 BitField<53, 2, Swizzle> swizzle_a;
135 BitField<20, 32, u64> imm32;
136 } const hmul2{insn};
137
138 const u32 imm{static_cast<u32>(hmul2.imm32)};
139 HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false,
140 Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision);
141}
142
143} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
new file mode 100644
index 000000000..cca5b831f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp
@@ -0,0 +1,117 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b,
10 bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) {
11 union {
12 u64 insn;
13 BitField<0, 8, IR::Reg> dest_reg;
14 BitField<8, 8, IR::Reg> src_a_reg;
15 BitField<39, 3, IR::Pred> pred;
16 BitField<42, 1, u64> neg_pred;
17 BitField<43, 1, u64> neg_a;
18 BitField<45, 2, BooleanOp> bop;
19 BitField<44, 1, u64> abs_a;
20 BitField<47, 2, Swizzle> swizzle_a;
21 } const hset2{insn};
22
23 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
24 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
25
26 if (lhs_a.Type() != lhs_b.Type()) {
27 if (lhs_a.Type() == IR::Type::F16) {
28 lhs_a = v.ir.FPConvert(32, lhs_a);
29 rhs_a = v.ir.FPConvert(32, rhs_a);
30 }
31 if (lhs_b.Type() == IR::Type::F16) {
32 lhs_b = v.ir.FPConvert(32, lhs_b);
33 rhs_b = v.ir.FPConvert(32, rhs_b);
34 }
35 }
36
37 lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
38 rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
39
40 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
41 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
42
43 const IR::FpControl control{
44 .no_contraction = false,
45 .rounding = IR::FpRounding::DontCare,
46 .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None),
47 };
48
49 IR::U1 pred{v.ir.GetPred(hset2.pred)};
50 if (hset2.neg_pred != 0) {
51 pred = v.ir.LogicalNot(pred);
52 }
53 const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
54 const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
55 const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)};
56 const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)};
57
58 const u32 true_value = bf ? 0x3c00 : 0xffff;
59 const IR::U32 true_val_lhs{v.ir.Imm32(true_value)};
60 const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)};
61 const IR::U32 fail_result{v.ir.Imm32(0)};
62 const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)};
63 const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)};
64
65 v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)});
66}
67} // Anonymous namespace
68
69void TranslatorVisitor::HSET2_reg(u64 insn) {
70 union {
71 u64 insn;
72 BitField<30, 1, u64> abs_b;
73 BitField<49, 1, u64> bf;
74 BitField<31, 1, u64> neg_b;
75 BitField<50, 1, u64> ftz;
76 BitField<35, 4, FPCompareOp> compare_op;
77 BitField<28, 2, Swizzle> swizzle_b;
78 } const hset2{insn};
79
80 HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0,
81 hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b);
82}
83
84void TranslatorVisitor::HSET2_cbuf(u64 insn) {
85 union {
86 u64 insn;
87 BitField<53, 1, u64> bf;
88 BitField<56, 1, u64> neg_b;
89 BitField<54, 1, u64> ftz;
90 BitField<49, 4, FPCompareOp> compare_op;
91 } const hset2{insn};
92
93 HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false,
94 hset2.compare_op, Swizzle::F32);
95}
96
97void TranslatorVisitor::HSET2_imm(u64 insn) {
98 union {
99 u64 insn;
100 BitField<53, 1, u64> bf;
101 BitField<54, 1, u64> ftz;
102 BitField<49, 4, FPCompareOp> compare_op;
103 BitField<56, 1, u64> neg_high;
104 BitField<30, 9, u64> high;
105 BitField<29, 1, u64> neg_low;
106 BitField<20, 9, u64> low;
107 } const hset2{insn};
108
109 const u32 imm{
110 static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) |
111 static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)};
112
113 HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
114 Swizzle::H1_H0);
115}
116
117} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
new file mode 100644
index 000000000..b3931dae3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp
@@ -0,0 +1,118 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h"
6
7namespace Shader::Maxwell {
8namespace {
9void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b,
10 Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) {
11 union {
12 u64 insn;
13 BitField<8, 8, IR::Reg> src_a_reg;
14 BitField<3, 3, IR::Pred> dest_pred_a;
15 BitField<0, 3, IR::Pred> dest_pred_b;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<43, 1, u64> neg_a;
19 BitField<45, 2, BooleanOp> bop;
20 BitField<44, 1, u64> abs_a;
21 BitField<6, 1, u64> ftz;
22 BitField<47, 2, Swizzle> swizzle_a;
23 } const hsetp2{insn};
24
25 auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
26 auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
27
28 if (lhs_a.Type() != lhs_b.Type()) {
29 if (lhs_a.Type() == IR::Type::F16) {
30 lhs_a = v.ir.FPConvert(32, lhs_a);
31 rhs_a = v.ir.FPConvert(32, rhs_a);
32 }
33 if (lhs_b.Type() == IR::Type::F16) {
34 lhs_b = v.ir.FPConvert(32, lhs_b);
35 rhs_b = v.ir.FPConvert(32, rhs_b);
36 }
37 }
38
39 lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
40 rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
41
42 lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
43 rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
44
45 const IR::FpControl control{
46 .no_contraction = false,
47 .rounding = IR::FpRounding::DontCare,
48 .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None),
49 };
50
51 IR::U1 pred{v.ir.GetPred(hsetp2.pred)};
52 if (hsetp2.neg_pred != 0) {
53 pred = v.ir.LogicalNot(pred);
54 }
55 const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)};
56 const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)};
57 const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)};
58 const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)};
59
60 if (h_and) {
61 auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs);
62 v.ir.SetPred(hsetp2.dest_pred_a, result);
63 v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result));
64 } else {
65 v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs);
66 v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs);
67 }
68}
69} // Anonymous namespace
70
71void TranslatorVisitor::HSETP2_reg(u64 insn) {
72 union {
73 u64 insn;
74 BitField<30, 1, u64> abs_b;
75 BitField<49, 1, u64> h_and;
76 BitField<31, 1, u64> neg_b;
77 BitField<35, 4, FPCompareOp> compare_op;
78 BitField<28, 2, Swizzle> swizzle_b;
79 } const hsetp2{insn};
80 HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b,
81 hsetp2.compare_op, hsetp2.h_and != 0);
82}
83
84void TranslatorVisitor::HSETP2_cbuf(u64 insn) {
85 union {
86 u64 insn;
87 BitField<53, 1, u64> h_and;
88 BitField<54, 1, u64> abs_b;
89 BitField<56, 1, u64> neg_b;
90 BitField<49, 4, FPCompareOp> compare_op;
91 } const hsetp2{insn};
92
93 HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32,
94 hsetp2.compare_op, hsetp2.h_and != 0);
95}
96
97void TranslatorVisitor::HSETP2_imm(u64 insn) {
98 union {
99 u64 insn;
100 BitField<53, 1, u64> h_and;
101 BitField<54, 1, u64> ftz;
102 BitField<49, 4, FPCompareOp> compare_op;
103 BitField<56, 1, u64> neg_high;
104 BitField<30, 9, u64> high;
105 BitField<29, 1, u64> neg_low;
106 BitField<20, 9, u64> low;
107 } const hsetp2{insn};
108
109 const u32 imm{static_cast<u32>(hsetp2.low << 6) |
110 static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) |
111 static_cast<u32>(hsetp2.high << 22) |
112 static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)};
113
114 HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op,
115 hsetp2.h_and != 0);
116}
117
118} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
new file mode 100644
index 000000000..b446aae0e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp
@@ -0,0 +1,272 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/frontend/ir/ir_emitter.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11[[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding,
12 u32 offset) {
13 if (unaligned) {
14 return ir.Imm32(0);
15 }
16 return ir.GetCbuf(binding, IR::U32{IR::Value{offset}});
17}
18} // Anonymous namespace
19
20IR::U32 TranslatorVisitor::X(IR::Reg reg) {
21 return ir.GetReg(reg);
22}
23
24IR::U64 TranslatorVisitor::L(IR::Reg reg) {
25 if (!IR::IsAligned(reg, 2)) {
26 throw NotImplementedException("Unaligned source register {}", reg);
27 }
28 return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
29}
30
31IR::F32 TranslatorVisitor::F(IR::Reg reg) {
32 return ir.BitCast<IR::F32>(X(reg));
33}
34
35IR::F64 TranslatorVisitor::D(IR::Reg reg) {
36 if (!IR::IsAligned(reg, 2)) {
37 throw NotImplementedException("Unaligned source register {}", reg);
38 }
39 return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))};
40}
41
42void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
43 ir.SetReg(dest_reg, value);
44}
45
46void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) {
47 if (!IR::IsAligned(dest_reg, 2)) {
48 throw NotImplementedException("Unaligned destination register {}", dest_reg);
49 }
50 const IR::Value result{ir.UnpackUint2x32(value)};
51 for (int i = 0; i < 2; i++) {
52 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
53 }
54}
55
56void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
57 X(dest_reg, ir.BitCast<IR::U32>(value));
58}
59
60void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) {
61 if (!IR::IsAligned(dest_reg, 2)) {
62 throw NotImplementedException("Unaligned destination register {}", dest_reg);
63 }
64 const IR::Value result{ir.UnpackDouble2x32(value)};
65 for (int i = 0; i < 2; i++) {
66 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
67 }
68}
69
70IR::U32 TranslatorVisitor::GetReg8(u64 insn) {
71 union {
72 u64 raw;
73 BitField<8, 8, IR::Reg> index;
74 } const reg{insn};
75 return X(reg.index);
76}
77
78IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
79 union {
80 u64 raw;
81 BitField<20, 8, IR::Reg> index;
82 } const reg{insn};
83 return X(reg.index);
84}
85
86IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
87 union {
88 u64 raw;
89 BitField<39, 8, IR::Reg> index;
90 } const reg{insn};
91 return X(reg.index);
92}
93
94IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) {
95 return ir.BitCast<IR::F32>(GetReg8(insn));
96}
97
98IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) {
99 return ir.BitCast<IR::F32>(GetReg20(insn));
100}
101
102IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) {
103 return ir.BitCast<IR::F32>(GetReg39(insn));
104}
105
106IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) {
107 union {
108 u64 raw;
109 BitField<20, 8, IR::Reg> index;
110 } const reg{insn};
111 return D(reg.index);
112}
113
114IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) {
115 union {
116 u64 raw;
117 BitField<39, 8, IR::Reg> index;
118 } const reg{insn};
119 return D(reg.index);
120}
121
122static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) {
123 union {
124 u64 raw;
125 BitField<20, 14, u64> offset;
126 BitField<34, 5, u64> binding;
127 } const cbuf{insn};
128
129 if (cbuf.binding >= 18) {
130 throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding);
131 }
132 if (cbuf.offset >= 0x10'000) {
133 throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset);
134 }
135 const IR::Value binding{static_cast<u32>(cbuf.binding)};
136 const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4};
137 return {IR::U32{binding}, IR::U32{byte_offset}};
138}
139
140IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
141 const auto [binding, byte_offset]{CbufAddr(insn)};
142 return ir.GetCbuf(binding, byte_offset);
143}
144
145IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) {
146 const auto [binding, byte_offset]{CbufAddr(insn)};
147 return ir.GetFloatCbuf(binding, byte_offset);
148}
149
150IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) {
151 union {
152 u64 raw;
153 BitField<20, 1, u64> unaligned;
154 } const cbuf{insn};
155
156 const auto [binding, offset_value]{CbufAddr(insn)};
157 const bool unaligned{cbuf.unaligned != 0};
158 const u32 offset{offset_value.U32()};
159 const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u};
160
161 const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})};
162 const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)};
163 return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value));
164}
165
166IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) {
167 union {
168 u64 raw;
169 BitField<20, 1, u64> unaligned;
170 } const cbuf{insn};
171
172 if (cbuf.unaligned != 0) {
173 throw NotImplementedException("Unaligned packed constant buffer read");
174 }
175 const auto [binding, lower_offset]{CbufAddr(insn)};
176 const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)};
177 const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)};
178 const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)};
179 return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value));
180}
181
182IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
183 union {
184 u64 raw;
185 BitField<20, 19, u64> value;
186 BitField<56, 1, u64> is_negative;
187 } const imm{insn};
188
189 if (imm.is_negative != 0) {
190 const s64 raw{static_cast<s64>(imm.value)};
191 return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw));
192 } else {
193 return ir.Imm32(static_cast<u32>(imm.value));
194 }
195}
196
197IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) {
198 union {
199 u64 raw;
200 BitField<20, 19, u64> value;
201 BitField<56, 1, u64> is_negative;
202 } const imm{insn};
203 const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)};
204 const u32 value{static_cast<u32>(imm.value) << 12};
205 return ir.Imm32(Common::BitCast<f32>(value | sign_bit));
206}
207
208IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) {
209 union {
210 u64 raw;
211 BitField<20, 19, u64> value;
212 BitField<56, 1, u64> is_negative;
213 } const imm{insn};
214 const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0};
215 const u64 value{imm.value << 44};
216 return ir.Imm64(Common::BitCast<f64>(value | sign_bit));
217}
218
219IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) {
220 const s64 value{GetImm20(insn).U32()};
221 return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32));
222}
223
224IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
225 union {
226 u64 raw;
227 BitField<20, 32, u64> value;
228 } const imm{insn};
229 return ir.Imm32(static_cast<u32>(imm.value));
230}
231
232IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) {
233 union {
234 u64 raw;
235 BitField<20, 32, u64> value;
236 } const imm{insn};
237 return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value)));
238}
239
240void TranslatorVisitor::SetZFlag(const IR::U1& value) {
241 ir.SetZFlag(value);
242}
243
244void TranslatorVisitor::SetSFlag(const IR::U1& value) {
245 ir.SetSFlag(value);
246}
247
248void TranslatorVisitor::SetCFlag(const IR::U1& value) {
249 ir.SetCFlag(value);
250}
251
252void TranslatorVisitor::SetOFlag(const IR::U1& value) {
253 ir.SetOFlag(value);
254}
255
256void TranslatorVisitor::ResetZero() {
257 SetZFlag(ir.Imm1(false));
258}
259
260void TranslatorVisitor::ResetSFlag() {
261 SetSFlag(ir.Imm1(false));
262}
263
264void TranslatorVisitor::ResetCFlag() {
265 SetCFlag(ir.Imm1(false));
266}
267
268void TranslatorVisitor::ResetOFlag() {
269 SetOFlag(ir.Imm1(false));
270}
271
272} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
new file mode 100644
index 000000000..335e4f24f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h
@@ -0,0 +1,387 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/ir/ir_emitter.h"
10#include "shader_recompiler/frontend/maxwell/instruction.h"
11
12namespace Shader::Maxwell {
13
14enum class CompareOp : u64 {
15 False,
16 LessThan,
17 Equal,
18 LessThanEqual,
19 GreaterThan,
20 NotEqual,
21 GreaterThanEqual,
22 True,
23};
24
25enum class BooleanOp : u64 {
26 AND,
27 OR,
28 XOR,
29};
30
31enum class PredicateOp : u64 {
32 False,
33 True,
34 Zero,
35 NonZero,
36};
37
38enum class FPCompareOp : u64 {
39 F,
40 LT,
41 EQ,
42 LE,
43 GT,
44 NE,
45 GE,
46 NUM,
47 Nan,
48 LTU,
49 EQU,
50 LEU,
51 GTU,
52 NEU,
53 GEU,
54 T,
55};
56
57class TranslatorVisitor {
58public:
59 explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}
60
61 Environment& env;
62 IR::IREmitter ir;
63
64 void AL2P(u64 insn);
65 void ALD(u64 insn);
66 void AST(u64 insn);
67 void ATOM_cas(u64 insn);
68 void ATOM(u64 insn);
69 void ATOMS_cas(u64 insn);
70 void ATOMS(u64 insn);
71 void B2R(u64 insn);
72 void BAR(u64 insn);
73 void BFE_reg(u64 insn);
74 void BFE_cbuf(u64 insn);
75 void BFE_imm(u64 insn);
76 void BFI_reg(u64 insn);
77 void BFI_rc(u64 insn);
78 void BFI_cr(u64 insn);
79 void BFI_imm(u64 insn);
80 void BPT(u64 insn);
81 void BRA(u64 insn);
82 void BRK(u64 insn);
83 void BRX(u64 insn);
84 void CAL();
85 void CCTL(u64 insn);
86 void CCTLL(u64 insn);
87 void CONT(u64 insn);
88 void CS2R(u64 insn);
89 void CSET(u64 insn);
90 void CSETP(u64 insn);
91 void DADD_reg(u64 insn);
92 void DADD_cbuf(u64 insn);
93 void DADD_imm(u64 insn);
94 void DEPBAR();
95 void DFMA_reg(u64 insn);
96 void DFMA_rc(u64 insn);
97 void DFMA_cr(u64 insn);
98 void DFMA_imm(u64 insn);
99 void DMNMX_reg(u64 insn);
100 void DMNMX_cbuf(u64 insn);
101 void DMNMX_imm(u64 insn);
102 void DMUL_reg(u64 insn);
103 void DMUL_cbuf(u64 insn);
104 void DMUL_imm(u64 insn);
105 void DSET_reg(u64 insn);
106 void DSET_cbuf(u64 insn);
107 void DSET_imm(u64 insn);
108 void DSETP_reg(u64 insn);
109 void DSETP_cbuf(u64 insn);
110 void DSETP_imm(u64 insn);
111 void EXIT();
112 void F2F_reg(u64 insn);
113 void F2F_cbuf(u64 insn);
114 void F2F_imm(u64 insn);
115 void F2I_reg(u64 insn);
116 void F2I_cbuf(u64 insn);
117 void F2I_imm(u64 insn);
118 void FADD_reg(u64 insn);
119 void FADD_cbuf(u64 insn);
120 void FADD_imm(u64 insn);
121 void FADD32I(u64 insn);
122 void FCHK_reg(u64 insn);
123 void FCHK_cbuf(u64 insn);
124 void FCHK_imm(u64 insn);
125 void FCMP_reg(u64 insn);
126 void FCMP_rc(u64 insn);
127 void FCMP_cr(u64 insn);
128 void FCMP_imm(u64 insn);
129 void FFMA_reg(u64 insn);
130 void FFMA_rc(u64 insn);
131 void FFMA_cr(u64 insn);
132 void FFMA_imm(u64 insn);
133 void FFMA32I(u64 insn);
134 void FLO_reg(u64 insn);
135 void FLO_cbuf(u64 insn);
136 void FLO_imm(u64 insn);
137 void FMNMX_reg(u64 insn);
138 void FMNMX_cbuf(u64 insn);
139 void FMNMX_imm(u64 insn);
140 void FMUL_reg(u64 insn);
141 void FMUL_cbuf(u64 insn);
142 void FMUL_imm(u64 insn);
143 void FMUL32I(u64 insn);
144 void FSET_reg(u64 insn);
145 void FSET_cbuf(u64 insn);
146 void FSET_imm(u64 insn);
147 void FSETP_reg(u64 insn);
148 void FSETP_cbuf(u64 insn);
149 void FSETP_imm(u64 insn);
150 void FSWZADD(u64 insn);
151 void GETCRSPTR(u64 insn);
152 void GETLMEMBASE(u64 insn);
153 void HADD2_reg(u64 insn);
154 void HADD2_cbuf(u64 insn);
155 void HADD2_imm(u64 insn);
156 void HADD2_32I(u64 insn);
157 void HFMA2_reg(u64 insn);
158 void HFMA2_rc(u64 insn);
159 void HFMA2_cr(u64 insn);
160 void HFMA2_imm(u64 insn);
161 void HFMA2_32I(u64 insn);
162 void HMUL2_reg(u64 insn);
163 void HMUL2_cbuf(u64 insn);
164 void HMUL2_imm(u64 insn);
165 void HMUL2_32I(u64 insn);
166 void HSET2_reg(u64 insn);
167 void HSET2_cbuf(u64 insn);
168 void HSET2_imm(u64 insn);
169 void HSETP2_reg(u64 insn);
170 void HSETP2_cbuf(u64 insn);
171 void HSETP2_imm(u64 insn);
172 void I2F_reg(u64 insn);
173 void I2F_cbuf(u64 insn);
174 void I2F_imm(u64 insn);
175 void I2I_reg(u64 insn);
176 void I2I_cbuf(u64 insn);
177 void I2I_imm(u64 insn);
178 void IADD_reg(u64 insn);
179 void IADD_cbuf(u64 insn);
180 void IADD_imm(u64 insn);
181 void IADD3_reg(u64 insn);
182 void IADD3_cbuf(u64 insn);
183 void IADD3_imm(u64 insn);
184 void IADD32I(u64 insn);
185 void ICMP_reg(u64 insn);
186 void ICMP_rc(u64 insn);
187 void ICMP_cr(u64 insn);
188 void ICMP_imm(u64 insn);
189 void IDE(u64 insn);
190 void IDP_reg(u64 insn);
191 void IDP_imm(u64 insn);
192 void IMAD_reg(u64 insn);
193 void IMAD_rc(u64 insn);
194 void IMAD_cr(u64 insn);
195 void IMAD_imm(u64 insn);
196 void IMAD32I(u64 insn);
197 void IMADSP_reg(u64 insn);
198 void IMADSP_rc(u64 insn);
199 void IMADSP_cr(u64 insn);
200 void IMADSP_imm(u64 insn);
201 void IMNMX_reg(u64 insn);
202 void IMNMX_cbuf(u64 insn);
203 void IMNMX_imm(u64 insn);
204 void IMUL_reg(u64 insn);
205 void IMUL_cbuf(u64 insn);
206 void IMUL_imm(u64 insn);
207 void IMUL32I(u64 insn);
208 void IPA(u64 insn);
209 void ISBERD(u64 insn);
210 void ISCADD_reg(u64 insn);
211 void ISCADD_cbuf(u64 insn);
212 void ISCADD_imm(u64 insn);
213 void ISCADD32I(u64 insn);
214 void ISET_reg(u64 insn);
215 void ISET_cbuf(u64 insn);
216 void ISET_imm(u64 insn);
217 void ISETP_reg(u64 insn);
218 void ISETP_cbuf(u64 insn);
219 void ISETP_imm(u64 insn);
220 void JCAL(u64 insn);
221 void JMP(u64 insn);
222 void JMX(u64 insn);
223 void KIL();
224 void LD(u64 insn);
225 void LDC(u64 insn);
226 void LDG(u64 insn);
227 void LDL(u64 insn);
228 void LDS(u64 insn);
229 void LEA_hi_reg(u64 insn);
230 void LEA_hi_cbuf(u64 insn);
231 void LEA_lo_reg(u64 insn);
232 void LEA_lo_cbuf(u64 insn);
233 void LEA_lo_imm(u64 insn);
234 void LEPC(u64 insn);
235 void LONGJMP(u64 insn);
236 void LOP_reg(u64 insn);
237 void LOP_cbuf(u64 insn);
238 void LOP_imm(u64 insn);
239 void LOP3_reg(u64 insn);
240 void LOP3_cbuf(u64 insn);
241 void LOP3_imm(u64 insn);
242 void LOP32I(u64 insn);
243 void MEMBAR(u64 insn);
244 void MOV_reg(u64 insn);
245 void MOV_cbuf(u64 insn);
246 void MOV_imm(u64 insn);
247 void MOV32I(u64 insn);
248 void MUFU(u64 insn);
249 void NOP(u64 insn);
250 void OUT_reg(u64 insn);
251 void OUT_cbuf(u64 insn);
252 void OUT_imm(u64 insn);
253 void P2R_reg(u64 insn);
254 void P2R_cbuf(u64 insn);
255 void P2R_imm(u64 insn);
256 void PBK();
257 void PCNT();
258 void PEXIT(u64 insn);
259 void PIXLD(u64 insn);
260 void PLONGJMP(u64 insn);
261 void POPC_reg(u64 insn);
262 void POPC_cbuf(u64 insn);
263 void POPC_imm(u64 insn);
264 void PRET(u64 insn);
265 void PRMT_reg(u64 insn);
266 void PRMT_rc(u64 insn);
267 void PRMT_cr(u64 insn);
268 void PRMT_imm(u64 insn);
269 void PSET(u64 insn);
270 void PSETP(u64 insn);
271 void R2B(u64 insn);
272 void R2P_reg(u64 insn);
273 void R2P_cbuf(u64 insn);
274 void R2P_imm(u64 insn);
275 void RAM(u64 insn);
276 void RED(u64 insn);
277 void RET(u64 insn);
278 void RRO_reg(u64 insn);
279 void RRO_cbuf(u64 insn);
280 void RRO_imm(u64 insn);
281 void RTT(u64 insn);
282 void S2R(u64 insn);
283 void SAM(u64 insn);
284 void SEL_reg(u64 insn);
285 void SEL_cbuf(u64 insn);
286 void SEL_imm(u64 insn);
287 void SETCRSPTR(u64 insn);
288 void SETLMEMBASE(u64 insn);
289 void SHF_l_reg(u64 insn);
290 void SHF_l_imm(u64 insn);
291 void SHF_r_reg(u64 insn);
292 void SHF_r_imm(u64 insn);
293 void SHFL(u64 insn);
294 void SHL_reg(u64 insn);
295 void SHL_cbuf(u64 insn);
296 void SHL_imm(u64 insn);
297 void SHR_reg(u64 insn);
298 void SHR_cbuf(u64 insn);
299 void SHR_imm(u64 insn);
300 void SSY();
301 void ST(u64 insn);
302 void STG(u64 insn);
303 void STL(u64 insn);
304 void STP(u64 insn);
305 void STS(u64 insn);
306 void SUATOM(u64 insn);
307 void SUATOM_cas(u64 insn);
308 void SULD(u64 insn);
309 void SURED(u64 insn);
310 void SUST(u64 insn);
311 void SYNC(u64 insn);
312 void TEX(u64 insn);
313 void TEX_b(u64 insn);
314 void TEXS(u64 insn);
315 void TLD(u64 insn);
316 void TLD_b(u64 insn);
317 void TLD4(u64 insn);
318 void TLD4_b(u64 insn);
319 void TLD4S(u64 insn);
320 void TLDS(u64 insn);
321 void TMML(u64 insn);
322 void TMML_b(u64 insn);
323 void TXA(u64 insn);
324 void TXD(u64 insn);
325 void TXD_b(u64 insn);
326 void TXQ(u64 insn);
327 void TXQ_b(u64 insn);
328 void VABSDIFF(u64 insn);
329 void VABSDIFF4(u64 insn);
330 void VADD(u64 insn);
331 void VMAD(u64 insn);
332 void VMNMX(u64 insn);
333 void VOTE(u64 insn);
334 void VOTE_vtg(u64 insn);
335 void VSET(u64 insn);
336 void VSETP(u64 insn);
337 void VSHL(u64 insn);
338 void VSHR(u64 insn);
339 void XMAD_reg(u64 insn);
340 void XMAD_rc(u64 insn);
341 void XMAD_cr(u64 insn);
342 void XMAD_imm(u64 insn);
343
344 [[nodiscard]] IR::U32 X(IR::Reg reg);
345 [[nodiscard]] IR::U64 L(IR::Reg reg);
346 [[nodiscard]] IR::F32 F(IR::Reg reg);
347 [[nodiscard]] IR::F64 D(IR::Reg reg);
348
349 void X(IR::Reg dest_reg, const IR::U32& value);
350 void L(IR::Reg dest_reg, const IR::U64& value);
351 void F(IR::Reg dest_reg, const IR::F32& value);
352 void D(IR::Reg dest_reg, const IR::F64& value);
353
354 [[nodiscard]] IR::U32 GetReg8(u64 insn);
355 [[nodiscard]] IR::U32 GetReg20(u64 insn);
356 [[nodiscard]] IR::U32 GetReg39(u64 insn);
357 [[nodiscard]] IR::F32 GetFloatReg8(u64 insn);
358 [[nodiscard]] IR::F32 GetFloatReg20(u64 insn);
359 [[nodiscard]] IR::F32 GetFloatReg39(u64 insn);
360 [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn);
361 [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn);
362
363 [[nodiscard]] IR::U32 GetCbuf(u64 insn);
364 [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn);
365 [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn);
366 [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn);
367
368 [[nodiscard]] IR::U32 GetImm20(u64 insn);
369 [[nodiscard]] IR::F32 GetFloatImm20(u64 insn);
370 [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn);
371 [[nodiscard]] IR::U64 GetPackedImm20(u64 insn);
372
373 [[nodiscard]] IR::U32 GetImm32(u64 insn);
374 [[nodiscard]] IR::F32 GetFloatImm32(u64 insn);
375
376 void SetZFlag(const IR::U1& value);
377 void SetSFlag(const IR::U1& value);
378 void SetCFlag(const IR::U1& value);
379 void SetOFlag(const IR::U1& value);
380
381 void ResetZero();
382 void ResetSFlag();
383 void ResetCFlag();
384 void ResetOFlag();
385};
386
387} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
new file mode 100644
index 000000000..8ffd84867
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp
@@ -0,0 +1,105 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
12 bool cc) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_a;
17 } const iadd{insn};
18
19 if (sat) {
20 throw NotImplementedException("IADD SAT");
21 }
22 if (x && po) {
23 throw NotImplementedException("IADD X+PO");
24 }
25 // Operand A is always read from here, negated if needed
26 IR::U32 op_a{v.X(iadd.src_a)};
27 if (neg_a) {
28 op_a = v.ir.INeg(op_a);
29 }
30 // Add both operands
31 IR::U32 result{v.ir.IAdd(op_a, op_b)};
32 if (x) {
33 const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
34 result = v.ir.IAdd(result, carry);
35 }
36 if (po) {
37 // .PO adds one to the result
38 result = v.ir.IAdd(result, v.ir.Imm32(1));
39 }
40 if (cc) {
41 // Store flags
42 // TODO: Does this grab the result pre-PO or after?
43 if (po) {
44 throw NotImplementedException("IADD CC+PO");
45 }
46 // TODO: How does CC behave when X is set?
47 if (x) {
48 throw NotImplementedException("IADD X+CC");
49 }
50 v.SetZFlag(v.ir.GetZeroFromOp(result));
51 v.SetSFlag(v.ir.GetSignFromOp(result));
52 v.SetCFlag(v.ir.GetCarryFromOp(result));
53 v.SetOFlag(v.ir.GetOverflowFromOp(result));
54 }
55 // Store result
56 v.X(iadd.dest_reg, result);
57}
58
59void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
60 union {
61 u64 insn;
62 BitField<43, 1, u64> x;
63 BitField<47, 1, u64> cc;
64 BitField<48, 2, u64> three_for_po;
65 BitField<48, 1, u64> neg_b;
66 BitField<49, 1, u64> neg_a;
67 BitField<50, 1, u64> sat;
68 } const iadd{insn};
69
70 const bool po{iadd.three_for_po == 3};
71 if (!po && iadd.neg_b != 0) {
72 op_b = v.ir.INeg(op_b);
73 }
74 IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
75}
76} // Anonymous namespace
77
78void TranslatorVisitor::IADD_reg(u64 insn) {
79 IADD(*this, insn, GetReg20(insn));
80}
81
82void TranslatorVisitor::IADD_cbuf(u64 insn) {
83 IADD(*this, insn, GetCbuf(insn));
84}
85
86void TranslatorVisitor::IADD_imm(u64 insn) {
87 IADD(*this, insn, GetImm20(insn));
88}
89
90void TranslatorVisitor::IADD32I(u64 insn) {
91 union {
92 u64 raw;
93 BitField<52, 1, u64> cc;
94 BitField<53, 1, u64> x;
95 BitField<54, 1, u64> sat;
96 BitField<55, 2, u64> three_for_po;
97 BitField<56, 1, u64> neg_a;
98 } const iadd32i{insn};
99
100 const bool po{iadd32i.three_for_po == 3};
101 const bool neg_a{!po && iadd32i.neg_a != 0};
102 IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
103}
104
105} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
new file mode 100644
index 000000000..040cfc10f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
@@ -0,0 +1,122 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Shift : u64 {
12 None,
13 Right,
14 Left,
15};
16enum class Half : u64 {
17 All,
18 Lower,
19 Upper,
20};
21
22[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
23 constexpr bool is_signed{false};
24 switch (half) {
25 case Half::All:
26 return value;
27 case Half::Lower:
28 return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
29 case Half::Upper:
30 return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
31 }
32 throw NotImplementedException("Invalid half");
33}
34
35[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
36 switch (shift) {
37 case Shift::None:
38 return value;
39 case Shift::Right: {
40 // 33-bit RS IADD3 edge case
41 const IR::U1 edge_case{ir.GetCarryFromOp(value)};
42 const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
43 return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
44 }
45 case Shift::Left:
46 return ir.ShiftLeftLogical(value, ir.Imm32(16));
47 }
48 throw NotImplementedException("Invalid shift");
49}
50
51void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
52 Shift shift = Shift::None) {
53 union {
54 u64 insn;
55 BitField<0, 8, IR::Reg> dest_reg;
56 BitField<47, 1, u64> cc;
57 BitField<48, 1, u64> x;
58 BitField<49, 1, u64> neg_c;
59 BitField<50, 1, u64> neg_b;
60 BitField<51, 1, u64> neg_a;
61 } iadd3{insn};
62
63 if (iadd3.neg_a != 0) {
64 op_a = v.ir.INeg(op_a);
65 }
66 if (iadd3.neg_b != 0) {
67 op_b = v.ir.INeg(op_b);
68 }
69 if (iadd3.neg_c != 0) {
70 op_c = v.ir.INeg(op_c);
71 }
72 IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
73 if (iadd3.x != 0) {
74 // TODO: How does RS behave when X is set?
75 if (shift == Shift::Right) {
76 throw NotImplementedException("IADD3 X+RS");
77 }
78 const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
79 lhs_1 = v.ir.IAdd(lhs_1, carry);
80 }
81 const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)};
82 const IR::U32 result{v.ir.IAdd(lhs_2, op_c)};
83
84 v.X(iadd3.dest_reg, result);
85 if (iadd3.cc != 0) {
86 // TODO: How does CC behave when X is set?
87 if (iadd3.x != 0) {
88 throw NotImplementedException("IADD3 X+CC");
89 }
90 v.SetZFlag(v.ir.GetZeroFromOp(result));
91 v.SetSFlag(v.ir.GetSignFromOp(result));
92 v.SetCFlag(v.ir.GetCarryFromOp(result));
93 const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)};
94 v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1));
95 }
96}
97} // Anonymous namespace
98
99void TranslatorVisitor::IADD3_reg(u64 insn) {
100 union {
101 u64 insn;
102 BitField<37, 2, Shift> shift;
103 BitField<35, 2, Half> half_a;
104 BitField<33, 2, Half> half_b;
105 BitField<31, 2, Half> half_c;
106 } const iadd3{insn};
107
108 const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};
109 const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)};
110 const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)};
111 IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift);
112}
113
114void TranslatorVisitor::IADD3_cbuf(u64 insn) {
115 IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn));
116}
117
118void TranslatorVisitor::IADD3_imm(u64 insn) {
119 IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn));
120}
121
122} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
new file mode 100644
index 000000000..ba6e01926
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp
@@ -0,0 +1,48 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<48, 1, u64> is_signed;
18 BitField<49, 3, CompareOp> compare_op;
19 } const icmp{insn};
20
21 const IR::U32 zero{v.ir.Imm32(0)};
22 const bool is_signed{icmp.is_signed != 0};
23 const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)};
24
25 const IR::U32 src_reg{v.X(icmp.src_reg)};
26 const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
27
28 v.X(icmp.dest_reg, result);
29}
30} // Anonymous namespace
31
32void TranslatorVisitor::ICMP_reg(u64 insn) {
33 ICMP(*this, insn, GetReg20(insn), GetReg39(insn));
34}
35
36void TranslatorVisitor::ICMP_rc(u64 insn) {
37 ICMP(*this, insn, GetReg39(insn), GetCbuf(insn));
38}
39
40void TranslatorVisitor::ICMP_cr(u64 insn) {
41 ICMP(*this, insn, GetCbuf(insn), GetReg39(insn));
42}
43
44void TranslatorVisitor::ICMP_imm(u64 insn) {
45 ICMP(*this, insn, GetImm20(insn), GetReg39(insn));
46}
47
48} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
new file mode 100644
index 000000000..8ce1aee04
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp
@@ -0,0 +1,80 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
13 CompareOp compare_op, bool is_signed, bool x) {
14 return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
15 : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
16}
17
18void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
19 union {
20 u64 insn;
21 BitField<0, 8, IR::Reg> dest_reg;
22 BitField<8, 8, IR::Reg> src_reg;
23 BitField<39, 3, IR::Pred> pred;
24 BitField<42, 1, u64> neg_pred;
25 BitField<43, 1, u64> x;
26 BitField<44, 1, u64> bf;
27 BitField<45, 2, BooleanOp> bop;
28 BitField<47, 1, u64> cc;
29 BitField<48, 1, u64> is_signed;
30 BitField<49, 3, CompareOp> compare_op;
31 } const iset{insn};
32
33 const IR::U32 src_a{v.X(iset.src_reg)};
34 const bool is_signed{iset.is_signed != 0};
35 const IR::U32 zero{v.ir.Imm32(0)};
36 const bool x{iset.x != 0};
37 const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)};
38
39 IR::U1 pred{v.ir.GetPred(iset.pred)};
40 if (iset.neg_pred != 0) {
41 pred = v.ir.LogicalNot(pred);
42 }
43 const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)};
44
45 const IR::U32 one_mask{v.ir.Imm32(-1)};
46 const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
47 const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one};
48 const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)};
49
50 v.X(iset.dest_reg, result);
51 if (iset.cc != 0) {
52 if (x) {
53 throw NotImplementedException("ISET.CC + X");
54 }
55 const IR::U1 is_zero{v.ir.IEqual(result, zero)};
56 v.SetZFlag(is_zero);
57 if (iset.bf != 0) {
58 v.ResetSFlag();
59 } else {
60 v.SetSFlag(v.ir.LogicalNot(is_zero));
61 }
62 v.ResetCFlag();
63 v.ResetOFlag();
64 }
65}
66} // Anonymous namespace
67
68void TranslatorVisitor::ISET_reg(u64 insn) {
69 ISET(*this, insn, GetReg20(insn));
70}
71
72void TranslatorVisitor::ISET_cbuf(u64 insn) {
73 ISET(*this, insn, GetCbuf(insn));
74}
75
76void TranslatorVisitor::ISET_imm(u64 insn) {
77 ISET(*this, insn, GetImm20(insn));
78}
79
80} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
new file mode 100644
index 000000000..0b8119ddd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
@@ -0,0 +1,182 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class FloatFormat : u64 {
13 F16 = 1,
14 F32 = 2,
15 F64 = 3,
16};
17
18enum class IntFormat : u64 {
19 U8 = 0,
20 U16 = 1,
21 U32 = 2,
22 U64 = 3,
23};
24
25union Encoding {
26 u64 raw;
27 BitField<0, 8, IR::Reg> dest_reg;
28 BitField<8, 2, FloatFormat> float_format;
29 BitField<10, 2, IntFormat> int_format;
30 BitField<13, 1, u64> is_signed;
31 BitField<39, 2, FpRounding> fp_rounding;
32 BitField<41, 2, u64> selector;
33 BitField<47, 1, u64> cc;
34 BitField<45, 1, u64> neg;
35 BitField<49, 1, u64> abs;
36};
37
38bool Is64(u64 insn) {
39 return Encoding{insn}.int_format == IntFormat::U64;
40}
41
42int BitSize(FloatFormat format) {
43 switch (format) {
44 case FloatFormat::F16:
45 return 16;
46 case FloatFormat::F32:
47 return 32;
48 case FloatFormat::F64:
49 return 64;
50 }
51 throw NotImplementedException("Invalid float format {}", format);
52}
53
54IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) {
55 const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))};
56 const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))};
57 const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)};
58 const IR::U1 is_least{v.ir.IEqual(value, least_value)};
59 return IR::U32{v.ir.Select(is_least, value, absolute)};
60}
61
62void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
63 const Encoding i2f{insn};
64 if (i2f.cc != 0) {
65 throw NotImplementedException("I2F CC");
66 }
67 const bool is_signed{i2f.is_signed != 0};
68 int src_bitsize{};
69 switch (i2f.int_format) {
70 case IntFormat::U8:
71 src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
72 v.ir.Imm32(8), is_signed);
73 if (i2f.abs != 0) {
74 src = SmallAbs(v, src, 8);
75 }
76 src_bitsize = 8;
77 break;
78 case IntFormat::U16:
79 if (i2f.selector == 1 || i2f.selector == 3) {
80 throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value());
81 }
82 src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8),
83 v.ir.Imm32(16), is_signed);
84 if (i2f.abs != 0) {
85 src = SmallAbs(v, src, 16);
86 }
87 src_bitsize = 16;
88 break;
89 case IntFormat::U32:
90 case IntFormat::U64:
91 if (i2f.selector != 0) {
92 throw NotImplementedException("Unexpected selector {}", i2f.selector.Value());
93 }
94 if (i2f.abs != 0 && is_signed) {
95 src = v.ir.IAbs(src);
96 }
97 src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32;
98 break;
99 }
100 const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32};
101 const int dst_bitsize{BitSize(i2f.float_format)};
102 const IR::FpControl fp_control{
103 .no_contraction = false,
104 .rounding = CastFpRounding(i2f.fp_rounding),
105 .fmz_mode = IR::FmzMode::DontCare,
106 };
107 auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize),
108 static_cast<size_t>(conversion_src_bitsize), is_signed, src,
109 fp_control)};
110 if (i2f.neg != 0) {
111 if (i2f.abs != 0 || !is_signed) {
112 // We know the value is positive
113 value = v.ir.FPNeg(value);
114 } else {
115 // Only negate if the input isn't the lowest value
116 IR::U1 is_least;
117 if (src_bitsize == 64) {
118 is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min()));
119 } else if (src_bitsize == 32) {
120 is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min()));
121 } else {
122 const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))};
123 is_least = v.ir.IEqual(src, least_value);
124 }
125 value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))};
126 }
127 }
128 switch (i2f.float_format) {
129 case FloatFormat::F16: {
130 const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
131 v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
132 break;
133 }
134 case FloatFormat::F32:
135 v.F(i2f.dest_reg, value);
136 break;
137 case FloatFormat::F64: {
138 if (!IR::IsAligned(i2f.dest_reg, 2)) {
139 throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
140 }
141 const IR::Value vector{v.ir.UnpackDouble2x32(value)};
142 for (int i = 0; i < 2; ++i) {
143 v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))});
144 }
145 break;
146 }
147 default:
148 throw NotImplementedException("Invalid float format {}", i2f.float_format.Value());
149 }
150}
151} // Anonymous namespace
152
153void TranslatorVisitor::I2F_reg(u64 insn) {
154 if (Is64(insn)) {
155 union {
156 u64 raw;
157 BitField<20, 8, IR::Reg> reg;
158 } const value{insn};
159 const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))};
160 I2F(*this, insn, ir.PackUint2x32(regs));
161 } else {
162 I2F(*this, insn, GetReg20(insn));
163 }
164}
165
166void TranslatorVisitor::I2F_cbuf(u64 insn) {
167 if (Is64(insn)) {
168 I2F(*this, insn, GetPackedCbuf(insn));
169 } else {
170 I2F(*this, insn, GetCbuf(insn));
171 }
172}
173
174void TranslatorVisitor::I2F_imm(u64 insn) {
175 if (Is64(insn)) {
176 I2F(*this, insn, GetPackedImm20(insn));
177 } else {
178 I2F(*this, insn, GetImm20(insn));
179 }
180}
181
182} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
new file mode 100644
index 000000000..5feefc0ce
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp
@@ -0,0 +1,82 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class MaxShift : u64 {
12 U32,
13 Undefined,
14 U64,
15 S64,
16};
17
18IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift,
19 bool right_shift, bool is_signed) {
20 if (!right_shift) {
21 return ir.ShiftLeftLogical(packed_int, safe_shift);
22 }
23 if (is_signed) {
24 return ir.ShiftRightArithmetic(packed_int, safe_shift);
25 }
26 return ir.ShiftRightLogical(packed_int, safe_shift);
27}
28
29void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits,
30 bool right_shift) {
31 union {
32 u64 insn;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<0, 8, IR::Reg> lo_bits_reg;
35 BitField<37, 2, MaxShift> max_shift;
36 BitField<47, 1, u64> cc;
37 BitField<48, 2, u64> x_mode;
38 BitField<50, 1, u64> wrap;
39 } const shf{insn};
40
41 if (shf.cc != 0) {
42 throw NotImplementedException("SHF CC");
43 }
44 if (shf.x_mode != 0) {
45 throw NotImplementedException("SHF X Mode");
46 }
47 if (shf.max_shift == MaxShift::Undefined) {
48 throw NotImplementedException("SHF Use of undefined MaxShift value");
49 }
50 const IR::U32 low_bits{v.X(shf.lo_bits_reg)};
51 const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))};
52 const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)};
53 const IR::U32 safe_shift{shf.wrap != 0
54 ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1)))
55 : v.ir.UMin(shift, max_shift)};
56
57 const bool is_signed{shf.max_shift == MaxShift::S64};
58 const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)};
59 const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)};
60
61 const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)};
62 v.X(shf.dest_reg, result);
63}
64} // Anonymous namespace
65
66void TranslatorVisitor::SHF_l_reg(u64 insn) {
67 SHF(*this, insn, GetReg20(insn), GetReg39(insn), false);
68}
69
70void TranslatorVisitor::SHF_l_imm(u64 insn) {
71 SHF(*this, insn, GetImm20(insn), GetReg39(insn), false);
72}
73
74void TranslatorVisitor::SHF_r_reg(u64 insn) {
75 SHF(*this, insn, GetReg20(insn), GetReg39(insn), true);
76}
77
78void TranslatorVisitor::SHF_r_imm(u64 insn) {
79 SHF(*this, insn, GetImm20(insn), GetReg39(insn), true);
80}
81
82} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
new file mode 100644
index 000000000..1badbacc4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp
@@ -0,0 +1,64 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg;
16 BitField<39, 3, IR::Pred> pred;
17 BitField<42, 1, u64> neg_pred;
18 BitField<43, 2, u64> mode;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> is_signed;
21 } const imnmx{insn};
22
23 if (imnmx.cc != 0) {
24 throw NotImplementedException("IMNMX CC");
25 }
26
27 if (imnmx.mode != 0) {
28 throw NotImplementedException("IMNMX.MODE");
29 }
30
31 const IR::U1 pred{v.ir.GetPred(imnmx.pred)};
32 const IR::U32 op_a{v.X(imnmx.src_reg)};
33 IR::U32 min;
34 IR::U32 max;
35
36 if (imnmx.is_signed != 0) {
37 min = IR::U32{v.ir.SMin(op_a, op_b)};
38 max = IR::U32{v.ir.SMax(op_a, op_b)};
39 } else {
40 min = IR::U32{v.ir.UMin(op_a, op_b)};
41 max = IR::U32{v.ir.UMax(op_a, op_b)};
42 }
43 if (imnmx.neg_pred != 0) {
44 std::swap(min, max);
45 }
46
47 const IR::U32 result{v.ir.Select(pred, min, max)};
48 v.X(imnmx.dest_reg, result);
49}
50} // Anonymous namespace
51
52void TranslatorVisitor::IMNMX_reg(u64 insn) {
53 IMNMX(*this, insn, GetReg20(insn));
54}
55
56void TranslatorVisitor::IMNMX_cbuf(u64 insn) {
57 IMNMX(*this, insn, GetCbuf(insn));
58}
59
60void TranslatorVisitor::IMNMX_imm(u64 insn) {
61 IMNMX(*this, insn, GetImm20(insn));
62}
63
64} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
new file mode 100644
index 000000000..5ece7678d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp
@@ -0,0 +1,36 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<40, 1, u64> tilde;
16 } const popc{insn};
17
18 const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src);
19 const IR::U32 result = v.ir.BitCount(operand);
20 v.X(popc.dest_reg, result);
21}
22} // Anonymous namespace
23
24void TranslatorVisitor::POPC_reg(u64 insn) {
25 POPC(*this, insn, GetReg20(insn));
26}
27
28void TranslatorVisitor::POPC_cbuf(u64 insn) {
29 POPC(*this, insn, GetCbuf(insn));
30}
31
32void TranslatorVisitor::POPC_imm(u64 insn) {
33 POPC(*this, insn, GetImm20(insn));
34}
35
36} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
new file mode 100644
index 000000000..044671943
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp
@@ -0,0 +1,86 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b,
12 u64 scale_imm) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> op_a;
17 } const iscadd{insn};
18
19 const bool po{neg_a && neg_b};
20 IR::U32 op_a{v.X(iscadd.op_a)};
21 if (po) {
22 // When PO is present, add one
23 op_b = v.ir.IAdd(op_b, v.ir.Imm32(1));
24 } else {
25 // When PO is not present, the bits are interpreted as negation
26 if (neg_a) {
27 op_a = v.ir.INeg(op_a);
28 }
29 if (neg_b) {
30 op_b = v.ir.INeg(op_b);
31 }
32 }
33 // With the operands already processed, scale A
34 const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))};
35 const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
36
37 const IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
38 v.X(iscadd.dest_reg, result);
39
40 if (cc) {
41 v.SetZFlag(v.ir.GetZeroFromOp(result));
42 v.SetSFlag(v.ir.GetSignFromOp(result));
43 const IR::U1 carry{v.ir.GetCarryFromOp(result)};
44 const IR::U1 overflow{v.ir.GetOverflowFromOp(result)};
45 v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry);
46 v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow);
47 }
48}
49
50void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
51 union {
52 u64 raw;
53 BitField<47, 1, u64> cc;
54 BitField<48, 1, u64> neg_b;
55 BitField<49, 1, u64> neg_a;
56 BitField<39, 5, u64> scale;
57 } const iscadd{insn};
58
59 ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale);
60}
61
62} // Anonymous namespace
63
64void TranslatorVisitor::ISCADD_reg(u64 insn) {
65 ISCADD(*this, insn, GetReg20(insn));
66}
67
68void TranslatorVisitor::ISCADD_cbuf(u64 insn) {
69 ISCADD(*this, insn, GetCbuf(insn));
70}
71
72void TranslatorVisitor::ISCADD_imm(u64 insn) {
73 ISCADD(*this, insn, GetImm20(insn));
74}
75
76void TranslatorVisitor::ISCADD32I(u64 insn) {
77 union {
78 u64 raw;
79 BitField<52, 1, u64> cc;
80 BitField<53, 5, u64> scale;
81 } const iscadd{insn};
82
83 return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale);
84}
85
86} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
new file mode 100644
index 000000000..bee10e5b9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp
@@ -0,0 +1,58 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2,
13 CompareOp compare_op, bool is_signed, bool x) {
14 return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed)
15 : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed);
16}
17
18void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
19 union {
20 u64 raw;
21 BitField<0, 3, IR::Pred> dest_pred_b;
22 BitField<3, 3, IR::Pred> dest_pred_a;
23 BitField<8, 8, IR::Reg> src_reg_a;
24 BitField<39, 3, IR::Pred> bop_pred;
25 BitField<42, 1, u64> neg_bop_pred;
26 BitField<43, 1, u64> x;
27 BitField<45, 2, BooleanOp> bop;
28 BitField<48, 1, u64> is_signed;
29 BitField<49, 3, CompareOp> compare_op;
30 } const isetp{insn};
31
32 const bool is_signed{isetp.is_signed != 0};
33 const bool x{isetp.x != 0};
34 const BooleanOp bop{isetp.bop};
35 const CompareOp compare_op{isetp.compare_op};
36 const IR::U32 op_a{v.X(isetp.src_reg_a)};
37 const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)};
38 const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
39 const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
40 const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
41 v.ir.SetPred(isetp.dest_pred_a, result_a);
42 v.ir.SetPred(isetp.dest_pred_b, result_b);
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::ISETP_reg(u64 insn) {
47 ISETP(*this, insn, GetReg20(insn));
48}
49
50void TranslatorVisitor::ISETP_cbuf(u64 insn) {
51 ISETP(*this, insn, GetCbuf(insn));
52}
53
54void TranslatorVisitor::ISETP_imm(u64 insn) {
55 ISETP(*this, insn, GetImm20(insn));
56}
57
58} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
new file mode 100644
index 000000000..20af68852
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg_a;
16 BitField<39, 1, u64> w;
17 BitField<43, 1, u64> x;
18 BitField<47, 1, u64> cc;
19 } const shl{insn};
20
21 if (shl.x != 0) {
22 throw NotImplementedException("SHL.X");
23 }
24 if (shl.cc != 0) {
25 throw NotImplementedException("SHL.CC");
26 }
27 const IR::U32 base{v.X(shl.src_reg_a)};
28 IR::U32 result;
29 if (shl.w != 0) {
30 // When .W is set, the shift value is wrapped
31 // To emulate this we just have to wrap it ourselves.
32 const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
33 result = v.ir.ShiftLeftLogical(base, shift);
34 } else {
35 // When .W is not set, the shift value is clamped between 0 and 32.
36 // To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
37 // We can safely evaluate an out of bounds shift according to the SPIR-V specification:
38 //
39 // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
40 // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
41 // or equal to the bit width of the components of Base."
42 //
43 // And on the GLASM specification it is also safe to evaluate out of bounds:
44 //
45 // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
46 // "The results of a shift operation ("<<") are undefined if the value of the second operand
47 // is negative, or greater than or equal to the number of bits in the first operand."
48 //
49 // Emphasis on undefined results in contrast to undefined behavior.
50 //
51 const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
52 const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
53 result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))};
54 }
55 v.X(shl.dest_reg, result);
56}
57} // Anonymous namespace
58
59void TranslatorVisitor::SHL_reg(u64 insn) {
60 SHL(*this, insn, GetReg20(insn));
61}
62
63void TranslatorVisitor::SHL_cbuf(u64 insn) {
64 SHL(*this, insn, GetCbuf(insn));
65}
66
67void TranslatorVisitor::SHL_imm(u64 insn) {
68 SHL(*this, insn, GetImm20(insn));
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
new file mode 100644
index 000000000..be00bb605
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) {
12 union {
13 u64 insn;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> src_reg_a;
16 BitField<39, 1, u64> is_wrapped;
17 BitField<40, 1, u64> brev;
18 BitField<43, 1, u64> xmode;
19 BitField<47, 1, u64> cc;
20 BitField<48, 1, u64> is_signed;
21 } const shr{insn};
22
23 if (shr.xmode != 0) {
24 throw NotImplementedException("SHR.XMODE");
25 }
26 if (shr.cc != 0) {
27 throw NotImplementedException("SHR.CC");
28 }
29
30 IR::U32 base{v.X(shr.src_reg_a)};
31 if (shr.brev == 1) {
32 base = v.ir.BitReverse(base);
33 }
34 IR::U32 result;
35 const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31));
36 if (shr.is_signed == 1) {
37 result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)};
38 } else {
39 result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)};
40 }
41
42 if (shr.is_wrapped == 0) {
43 const IR::U32 zero{v.ir.Imm32(0)};
44 const IR::U32 safe_bits{v.ir.Imm32(32)};
45
46 const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)};
47 const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)};
48 const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)};
49 result = IR::U32{v.ir.Select(is_safe, result, clamped_value)};
50 }
51 v.X(shr.dest_reg, result);
52}
53} // Anonymous namespace
54
55void TranslatorVisitor::SHR_reg(u64 insn) {
56 SHR(*this, insn, GetReg20(insn));
57}
58
59void TranslatorVisitor::SHR_cbuf(u64 insn) {
60 SHR(*this, insn, GetCbuf(insn));
61}
62
63void TranslatorVisitor::SHR_imm(u64 insn) {
64 SHR(*this, insn, GetImm20(insn));
65}
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
new file mode 100644
index 000000000..2932cdc42
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
@@ -0,0 +1,135 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SelectMode : u64 {
12 Default,
13 CLO,
14 CHI,
15 CSFU,
16 CBCC,
17};
18
19enum class Half : u64 {
20 H0, // Least-significant bits (15:0)
21 H1, // Most-significant bits (31:16)
22};
23
24IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
25 const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
26 return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
27}
28
29void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
30 SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
31 union {
32 u64 raw;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<8, 8, IR::Reg> src_reg_a;
35 BitField<47, 1, u64> cc;
36 BitField<48, 1, u64> is_a_signed;
37 BitField<49, 1, u64> is_b_signed;
38 BitField<53, 1, Half> half_a;
39 } const xmad{insn};
40
41 if (x) {
42 throw NotImplementedException("XMAD X");
43 }
44 const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
45 const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
46
47 IR::U32 product{v.ir.IMul(op_a, op_b)};
48 if (psl) {
49 // .PSL shifts the product 16 bits
50 product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
51 }
52 const IR::U32 op_c{[&]() -> IR::U32 {
53 switch (select_mode) {
54 case SelectMode::Default:
55 return src_c;
56 case SelectMode::CLO:
57 return ExtractHalf(v, src_c, Half::H0, false);
58 case SelectMode::CHI:
59 return ExtractHalf(v, src_c, Half::H1, false);
60 case SelectMode::CBCC:
61 return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
62 case SelectMode::CSFU:
63 throw NotImplementedException("XMAD CSFU");
64 }
65 throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
66 }()};
67 IR::U32 result{v.ir.IAdd(product, op_c)};
68 if (mrg) {
69 // .MRG inserts src_b [15:0] into result's [31:16].
70 const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
71 result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
72 }
73 if (xmad.cc) {
74 throw NotImplementedException("XMAD CC");
75 }
76 // Store result
77 v.X(xmad.dest_reg, result);
78}
79} // Anonymous namespace
80
81void TranslatorVisitor::XMAD_reg(u64 insn) {
82 union {
83 u64 raw;
84 BitField<35, 1, Half> half_b;
85 BitField<36, 1, u64> psl;
86 BitField<37, 1, u64> mrg;
87 BitField<38, 1, u64> x;
88 BitField<50, 3, SelectMode> select_mode;
89 } const xmad{insn};
90
91 XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
92 xmad.mrg != 0, xmad.x != 0);
93}
94
95void TranslatorVisitor::XMAD_rc(u64 insn) {
96 union {
97 u64 raw;
98 BitField<50, 2, SelectMode> select_mode;
99 BitField<52, 1, Half> half_b;
100 BitField<54, 1, u64> x;
101 } const xmad{insn};
102
103 XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false,
104 xmad.x != 0);
105}
106
107void TranslatorVisitor::XMAD_cr(u64 insn) {
108 union {
109 u64 raw;
110 BitField<50, 2, SelectMode> select_mode;
111 BitField<52, 1, Half> half_b;
112 BitField<54, 1, u64> x;
113 BitField<55, 1, u64> psl;
114 BitField<56, 1, u64> mrg;
115 } const xmad{insn};
116
117 XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0,
118 xmad.mrg != 0, xmad.x != 0);
119}
120
121void TranslatorVisitor::XMAD_imm(u64 insn) {
122 union {
123 u64 raw;
124 BitField<20, 16, u64> src_b;
125 BitField<36, 1, u64> psl;
126 BitField<37, 1, u64> mrg;
127 BitField<38, 1, u64> x;
128 BitField<50, 3, SelectMode> select_mode;
129 } const xmad{insn};
130
131 XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
132 Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
133}
134
135} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
new file mode 100644
index 000000000..53e8d8923
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp
@@ -0,0 +1,126 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class IntegerWidth : u64 {
12 Byte,
13 Short,
14 Word,
15};
16
17[[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) {
18 switch (width) {
19 case IntegerWidth::Byte:
20 return ir.Imm32(8);
21 case IntegerWidth::Short:
22 return ir.Imm32(16);
23 case IntegerWidth::Word:
24 return ir.Imm32(32);
25 default:
26 throw NotImplementedException("Invalid width {}", width);
27 }
28}
29
30[[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src,
31 IntegerWidth dst_width) {
32 const IR::U32 zero{ir.Imm32(0)};
33 const IR::U32 count{WidthSize(ir, dst_width)};
34 return ir.BitFieldExtract(src, zero, count, false);
35}
36
37[[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width,
38 bool dst_signed, bool src_signed) {
39 IR::U32 min{};
40 IR::U32 max{};
41 const IR::U32 zero{ir.Imm32(0)};
42 switch (dst_width) {
43 case IntegerWidth::Byte:
44 min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero;
45 max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff);
46 break;
47 case IntegerWidth::Short:
48 min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero;
49 max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff);
50 break;
51 case IntegerWidth::Word:
52 min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero;
53 max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff);
54 break;
55 default:
56 throw NotImplementedException("Invalid width {}", dst_width);
57 }
58 const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src};
59 return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max);
60}
61
62void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) {
63 union {
64 u64 insn;
65 BitField<0, 8, IR::Reg> dest_reg;
66 BitField<8, 2, IntegerWidth> dst_fmt;
67 BitField<12, 1, u64> dst_fmt_sign;
68 BitField<10, 2, IntegerWidth> src_fmt;
69 BitField<13, 1, u64> src_fmt_sign;
70 BitField<41, 3, u64> selector;
71 BitField<45, 1, u64> neg;
72 BitField<47, 1, u64> cc;
73 BitField<49, 1, u64> abs;
74 BitField<50, 1, u64> sat;
75 } const i2i{insn};
76
77 if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) {
78 throw NotImplementedException("16-bit source format incompatible with selector {}",
79 i2i.selector);
80 }
81 if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) {
82 throw NotImplementedException("32-bit source format incompatible with selector {}",
83 i2i.selector);
84 }
85
86 const s32 selector{static_cast<s32>(i2i.selector)};
87 const IR::U32 offset{v.ir.Imm32(selector * 8)};
88 const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)};
89 const bool src_signed{i2i.src_fmt_sign != 0};
90 const bool dst_signed{i2i.dst_fmt_sign != 0};
91 const bool sat{i2i.sat != 0};
92
93 IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)};
94 if (i2i.abs != 0) {
95 src_values = v.ir.IAbs(src_values);
96 }
97 if (i2i.neg != 0) {
98 src_values = v.ir.INeg(src_values);
99 }
100 const IR::U32 result{
101 sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed)
102 : ConvertInteger(v.ir, src_values, i2i.dst_fmt)};
103
104 v.X(i2i.dest_reg, result);
105 if (i2i.cc != 0) {
106 v.SetZFlag(v.ir.GetZeroFromOp(result));
107 v.SetSFlag(v.ir.GetSignFromOp(result));
108 v.ResetCFlag();
109 v.ResetOFlag();
110 }
111}
112} // Anonymous namespace
113
114void TranslatorVisitor::I2I_reg(u64 insn) {
115 I2I(*this, insn, GetReg20(insn));
116}
117
118void TranslatorVisitor::I2I_cbuf(u64 insn) {
119 I2I(*this, insn, GetCbuf(insn));
120}
121
122void TranslatorVisitor::I2I_imm(u64 insn) {
123 I2I(*this, insn, GetImm20(insn));
124}
125
126} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
new file mode 100644
index 000000000..9b85f8059
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
@@ -0,0 +1,53 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 Default,
13 Patch,
14 Prim,
15 Attr,
16};
17
18enum class Shift : u64 {
19 Default,
20 U16,
21 B32,
22};
23
24} // Anonymous namespace
25
26void TranslatorVisitor::ISBERD(u64 insn) {
27 union {
28 u64 raw;
29 BitField<0, 8, IR::Reg> dest_reg;
30 BitField<8, 8, IR::Reg> src_reg;
31 BitField<31, 1, u64> skew;
32 BitField<32, 1, u64> o;
33 BitField<33, 2, Mode> mode;
34 BitField<47, 2, Shift> shift;
35 } const isberd{insn};
36
37 if (isberd.skew != 0) {
38 throw NotImplementedException("SKEW");
39 }
40 if (isberd.o != 0) {
41 throw NotImplementedException("O");
42 }
43 if (isberd.mode != Mode::Default) {
44 throw NotImplementedException("Mode {}", isberd.mode.Value());
45 }
46 if (isberd.shift != Shift::Default) {
47 throw NotImplementedException("Shift {}", isberd.shift.Value());
48 }
49 LOG_WARNING(Shader, "(STUBBED) called");
50 X(isberd.dest_reg, X(isberd.src_reg));
51}
52
53} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
new file mode 100644
index 000000000..2300088e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp
@@ -0,0 +1,62 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h"
9
10namespace Shader::Maxwell {
11using namespace LDC;
12namespace {
13std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index,
14 const IR::U32& reg, const IR::U32& imm) {
15 switch (mode) {
16 case Mode::Default:
17 return {imm_index, ir.IAdd(reg, imm)};
18 default:
19 break;
20 }
21 throw NotImplementedException("Mode {}", mode);
22}
23} // Anonymous namespace
24
25void TranslatorVisitor::LDC(u64 insn) {
26 const Encoding ldc{insn};
27 const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))};
28 const IR::U32 reg{X(ldc.src_reg)};
29 const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))};
30 const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
31 switch (ldc.size) {
32 case Size::U8:
33 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
34 break;
35 case Size::S8:
36 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
37 break;
38 case Size::U16:
39 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
40 break;
41 case Size::S16:
42 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
43 break;
44 case Size::B32:
45 X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
46 break;
47 case Size::B64: {
48 if (!IR::IsAligned(ldc.dest_reg, 2)) {
49 throw NotImplementedException("Unaligned destination register");
50 }
51 const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
52 for (int i = 0; i < 2; ++i) {
53 X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
54 }
55 break;
56 }
57 default:
58 throw NotImplementedException("Invalid size {}", ldc.size.Value());
59 }
60}
61
62} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
new file mode 100644
index 000000000..3074ea0e3
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h
@@ -0,0 +1,39 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/reg.h"
10
11namespace Shader::Maxwell::LDC {
12
13enum class Mode : u64 {
14 Default,
15 IL,
16 IS,
17 ISL,
18};
19
20enum class Size : u64 {
21 U8,
22 S8,
23 U16,
24 S16,
25 B32,
26 B64,
27};
28
29union Encoding {
30 u64 raw;
31 BitField<0, 8, IR::Reg> dest_reg;
32 BitField<8, 8, IR::Reg> src_reg;
33 BitField<20, 16, s64> offset;
34 BitField<36, 5, u64> index;
35 BitField<44, 2, Mode> mode;
36 BitField<48, 3, Size> size;
37};
38
39} // namespace Shader::Maxwell::LDC
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
new file mode 100644
index 000000000..4a0f04e47
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp
@@ -0,0 +1,108 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale,
12 bool neg, bool x) {
13 union {
14 u64 insn;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> offset_lo_reg;
17 BitField<47, 1, u64> cc;
18 BitField<48, 3, IR::Pred> pred;
19 } const lea{insn};
20
21 if (x) {
22 throw NotImplementedException("LEA.HI X");
23 }
24 if (lea.pred != IR::Pred::PT) {
25 throw NotImplementedException("LEA.HI Pred");
26 }
27 if (lea.cc != 0) {
28 throw NotImplementedException("LEA.HI CC");
29 }
30
31 const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
32 const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))};
33 const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset};
34
35 const s32 hi_scale{32 - static_cast<s32>(scale)};
36 const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))};
37 const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)};
38
39 IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)};
40 v.X(lea.dest_reg, result);
41}
42
43void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) {
44 union {
45 u64 insn;
46 BitField<0, 8, IR::Reg> dest_reg;
47 BitField<8, 8, IR::Reg> offset_lo_reg;
48 BitField<39, 5, u64> scale;
49 BitField<45, 1, u64> neg;
50 BitField<46, 1, u64> x;
51 BitField<47, 1, u64> cc;
52 BitField<48, 3, IR::Pred> pred;
53 } const lea{insn};
54 if (lea.x != 0) {
55 throw NotImplementedException("LEA.LO X");
56 }
57 if (lea.pred != IR::Pred::PT) {
58 throw NotImplementedException("LEA.LO Pred");
59 }
60 if (lea.cc != 0) {
61 throw NotImplementedException("LEA.LO CC");
62 }
63
64 const IR::U32 offset_lo{v.X(lea.offset_lo_reg)};
65 const s32 scale{static_cast<s32>(lea.scale)};
66 const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo};
67 const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))};
68
69 IR::U32 result{v.ir.IAdd(base, scaled_offset)};
70 v.X(lea.dest_reg, result);
71}
72} // Anonymous namespace
73
74void TranslatorVisitor::LEA_hi_reg(u64 insn) {
75 union {
76 u64 insn;
77 BitField<28, 5, u64> scale;
78 BitField<37, 1, u64> neg;
79 BitField<38, 1, u64> x;
80 } const lea{insn};
81
82 LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
83}
84
85void TranslatorVisitor::LEA_hi_cbuf(u64 insn) {
86 union {
87 u64 insn;
88 BitField<51, 5, u64> scale;
89 BitField<56, 1, u64> neg;
90 BitField<57, 1, u64> x;
91 } const lea{insn};
92
93 LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0);
94}
95
96void TranslatorVisitor::LEA_lo_reg(u64 insn) {
97 LEA_lo(*this, insn, GetReg20(insn));
98}
99
100void TranslatorVisitor::LEA_lo_cbuf(u64 insn) {
101 LEA_lo(*this, insn, GetCbuf(insn));
102}
103
104void TranslatorVisitor::LEA_lo_imm(u64 insn) {
105 LEA_lo(*this, insn, GetImm20(insn));
106}
107
108} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
new file mode 100644
index 000000000..924fb7a40
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp
@@ -0,0 +1,196 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/ir/ir_emitter.h"
9#include "shader_recompiler/frontend/maxwell/opcodes.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Size : u64 {
15 B32,
16 B64,
17 B96,
18 B128,
19};
20
21enum class InterpolationMode : u64 {
22 Pass,
23 Multiply,
24 Constant,
25 Sc,
26};
27
28enum class SampleMode : u64 {
29 Default,
30 Centroid,
31 Offset,
32};
33
34u32 NumElements(Size size) {
35 switch (size) {
36 case Size::B32:
37 return 1;
38 case Size::B64:
39 return 2;
40 case Size::B96:
41 return 3;
42 case Size::B128:
43 return 4;
44 }
45 throw InvalidArgument("Invalid size {}", size);
46}
47
48template <typename F>
49void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) {
50 const IR::U32 index_value{v.X(index_reg)};
51 for (u32 element = 0; element < num_elements; ++element) {
52 const IR::U32 final_offset{
53 element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}};
54 f(element, final_offset);
55 }
56}
57
58} // Anonymous namespace
59
60void TranslatorVisitor::ALD(u64 insn) {
61 union {
62 u64 raw;
63 BitField<0, 8, IR::Reg> dest_reg;
64 BitField<8, 8, IR::Reg> index_reg;
65 BitField<20, 10, u64> absolute_offset;
66 BitField<20, 11, s64> relative_offset;
67 BitField<39, 8, IR::Reg> vertex_reg;
68 BitField<32, 1, u64> o;
69 BitField<31, 1, u64> patch;
70 BitField<47, 2, Size> size;
71 } const ald{insn};
72
73 const u64 offset{ald.absolute_offset.Value()};
74 if (offset % 4 != 0) {
75 throw NotImplementedException("Unaligned absolute offset {}", offset);
76 }
77 const IR::U32 vertex{X(ald.vertex_reg)};
78 const u32 num_elements{NumElements(ald.size)};
79 if (ald.index_reg == IR::Reg::RZ) {
80 for (u32 element = 0; element < num_elements; ++element) {
81 if (ald.patch != 0) {
82 const IR::Patch patch{offset / 4 + element};
83 F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch));
84 } else {
85 const IR::Attribute attr{offset / 4 + element};
86 F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex));
87 }
88 }
89 return;
90 }
91 if (ald.patch != 0) {
92 throw NotImplementedException("Indirect patch read");
93 }
94 HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
95 F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex));
96 });
97}
98
99void TranslatorVisitor::AST(u64 insn) {
100 union {
101 u64 raw;
102 BitField<0, 8, IR::Reg> src_reg;
103 BitField<8, 8, IR::Reg> index_reg;
104 BitField<20, 10, u64> absolute_offset;
105 BitField<20, 11, s64> relative_offset;
106 BitField<31, 1, u64> patch;
107 BitField<39, 8, IR::Reg> vertex_reg;
108 BitField<47, 2, Size> size;
109 } const ast{insn};
110
111 if (ast.index_reg != IR::Reg::RZ) {
112 throw NotImplementedException("Indexed store");
113 }
114 const u64 offset{ast.absolute_offset.Value()};
115 if (offset % 4 != 0) {
116 throw NotImplementedException("Unaligned absolute offset {}", offset);
117 }
118 const IR::U32 vertex{X(ast.vertex_reg)};
119 const u32 num_elements{NumElements(ast.size)};
120 if (ast.index_reg == IR::Reg::RZ) {
121 for (u32 element = 0; element < num_elements; ++element) {
122 if (ast.patch != 0) {
123 const IR::Patch patch{offset / 4 + element};
124 ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element)));
125 } else {
126 const IR::Attribute attr{offset / 4 + element};
127 ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex);
128 }
129 }
130 return;
131 }
132 if (ast.patch != 0) {
133 throw NotImplementedException("Indexed tessellation patch store");
134 }
135 HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) {
136 ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex);
137 });
138}
139
140void TranslatorVisitor::IPA(u64 insn) {
141 // IPA is the instruction used to read varyings from a fragment shader.
142 // gl_FragCoord is mapped to the gl_Position attribute.
143 // It yields unknown results when used outside of the fragment shader stage.
144 union {
145 u64 raw;
146 BitField<0, 8, IR::Reg> dest_reg;
147 BitField<8, 8, IR::Reg> index_reg;
148 BitField<20, 8, IR::Reg> multiplier;
149 BitField<30, 8, IR::Attribute> attribute;
150 BitField<38, 1, u64> idx;
151 BitField<51, 1, u64> sat;
152 BitField<52, 2, SampleMode> sample_mode;
153 BitField<54, 2, InterpolationMode> interpolation_mode;
154 } const ipa{insn};
155
156 // Indexed IPAs are used for indexed varyings.
157 // For example:
158 //
159 // in vec4 colors[4];
160 // uniform int idx;
161 // void main() {
162 // gl_FragColor = colors[idx];
163 // }
164 const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ};
165 const IR::Attribute attribute{ipa.attribute};
166 IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg))
167 : ir.GetAttribute(attribute)};
168 if (IR::IsGeneric(attribute)) {
169 const ProgramHeader& sph{env.SPH()};
170 const u32 attr_index{IR::GenericAttributeIndex(attribute)};
171 const u32 element{static_cast<u32>(attribute) % 4};
172 const std::array input_map{sph.ps.GenericInputMap(attr_index)};
173 const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective};
174 if (is_perspective) {
175 const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)};
176 value = ir.FPMul(value, position_w);
177 }
178 }
179 if (ipa.interpolation_mode == InterpolationMode::Multiply) {
180 value = ir.FPMul(value, F(ipa.multiplier));
181 }
182
183 // Saturated IPAs are generally generated out of clamped varyings.
184 // For example: clamp(some_varying, 0.0, 1.0)
185 const bool is_saturated{ipa.sat != 0};
186 if (is_saturated) {
187 if (attribute == IR::Attribute::FrontFace) {
188 throw NotImplementedException("IPA.SAT on FrontFace");
189 }
190 value = ir.FPSaturate(value);
191 }
192
193 F(ipa.dest_reg, value);
194}
195
196} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
new file mode 100644
index 000000000..d2a1dbf61
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
@@ -0,0 +1,218 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Size : u64 {
12 U8,
13 S8,
14 U16,
15 S16,
16 B32,
17 B64,
18 B128,
19};
20
21IR::U32 Offset(TranslatorVisitor& v, u64 insn) {
22 union {
23 u64 raw;
24 BitField<8, 8, IR::Reg> offset_reg;
25 BitField<20, 24, u64> absolute_offset;
26 BitField<20, 24, s64> relative_offset;
27 } const encoding{insn};
28
29 if (encoding.offset_reg == IR::Reg::RZ) {
30 return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset));
31 } else {
32 const s32 relative{static_cast<s32>(encoding.relative_offset.Value())};
33 return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative));
34 }
35}
36
37std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) {
38 const IR::U32 offset{Offset(v, insn)};
39 if (offset.IsImmediate()) {
40 return {v.ir.Imm32(offset.U32() / 4), offset};
41 } else {
42 return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset};
43 }
44}
45
46std::pair<int, bool> GetSize(u64 insn) {
47 union {
48 u64 raw;
49 BitField<48, 3, Size> size;
50 } const encoding{insn};
51
52 switch (encoding.size) {
53 case Size::U8:
54 return {8, false};
55 case Size::S8:
56 return {8, true};
57 case Size::U16:
58 return {16, false};
59 case Size::S16:
60 return {16, true};
61 case Size::B32:
62 return {32, false};
63 case Size::B64:
64 return {64, false};
65 case Size::B128:
66 return {128, false};
67 default:
68 throw NotImplementedException("Invalid size {}", encoding.size.Value());
69 }
70}
71
72IR::Reg Reg(u64 insn) {
73 union {
74 u64 raw;
75 BitField<0, 8, IR::Reg> reg;
76 } const encoding{insn};
77
78 return encoding.reg;
79}
80
81IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) {
82 return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24));
83}
84
85IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) {
86 return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16));
87}
88
89IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) {
90 const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())};
91 const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)};
92 return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))};
93}
94} // Anonymous namespace
95
96void TranslatorVisitor::LDL(u64 insn) {
97 const auto [word_offset, offset]{WordOffset(*this, insn)};
98 const IR::U32 word{LoadLocal(*this, word_offset, offset)};
99 const IR::Reg dest{Reg(insn)};
100 const auto [bit_size, is_signed]{GetSize(insn)};
101 switch (bit_size) {
102 case 8: {
103 const IR::U32 bit{ByteOffset(ir, offset)};
104 X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed));
105 break;
106 }
107 case 16: {
108 const IR::U32 bit{ShortOffset(ir, offset)};
109 X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed));
110 break;
111 }
112 case 32:
113 case 64:
114 case 128:
115 if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
116 throw NotImplementedException("Unaligned destination register {}", dest);
117 }
118 X(dest, word);
119 for (int i = 1; i < bit_size / 32; ++i) {
120 const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))};
121 const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))};
122 X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset));
123 }
124 break;
125 }
126}
127
128void TranslatorVisitor::LDS(u64 insn) {
129 const IR::U32 offset{Offset(*this, insn)};
130 const IR::Reg dest{Reg(insn)};
131 const auto [bit_size, is_signed]{GetSize(insn)};
132 const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)};
133 switch (bit_size) {
134 case 8:
135 case 16:
136 case 32:
137 X(dest, IR::U32{value});
138 break;
139 case 64:
140 case 128:
141 if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) {
142 throw NotImplementedException("Unaligned destination register {}", dest);
143 }
144 for (int element = 0; element < bit_size / 32; ++element) {
145 X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))});
146 }
147 break;
148 }
149}
150
151void TranslatorVisitor::STL(u64 insn) {
152 const auto [word_offset, offset]{WordOffset(*this, insn)};
153 if (offset.IsImmediate()) {
154 // TODO: Support storing out of bounds at runtime
155 if (offset.U32() >= env.LocalMemorySize()) {
156 LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping",
157 offset.U32(), env.LocalMemorySize());
158 return;
159 }
160 }
161 const IR::Reg reg{Reg(insn)};
162 const IR::U32 src{X(reg)};
163 const int bit_size{GetSize(insn).first};
164 switch (bit_size) {
165 case 8: {
166 const IR::U32 bit{ByteOffset(ir, offset)};
167 const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))};
168 ir.WriteLocal(word_offset, value);
169 break;
170 }
171 case 16: {
172 const IR::U32 bit{ShortOffset(ir, offset)};
173 const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))};
174 ir.WriteLocal(word_offset, value);
175 break;
176 }
177 case 32:
178 case 64:
179 case 128:
180 if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) {
181 throw NotImplementedException("Unaligned source register");
182 }
183 ir.WriteLocal(word_offset, src);
184 for (int i = 1; i < bit_size / 32; ++i) {
185 ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i));
186 }
187 break;
188 }
189}
190
191void TranslatorVisitor::STS(u64 insn) {
192 const IR::U32 offset{Offset(*this, insn)};
193 const IR::Reg reg{Reg(insn)};
194 const int bit_size{GetSize(insn).first};
195 switch (bit_size) {
196 case 8:
197 case 16:
198 case 32:
199 ir.WriteShared(bit_size, offset, X(reg));
200 break;
201 case 64:
202 if (!IR::IsAligned(reg, 2)) {
203 throw NotImplementedException("Unaligned source register {}", reg);
204 }
205 ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1)));
206 break;
207 case 128: {
208 if (!IR::IsAligned(reg, 2)) {
209 throw NotImplementedException("Unaligned source register {}", reg);
210 }
211 const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))};
212 ir.WriteShared(128, offset, vector);
213 break;
214 }
215 }
216}
217
218} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
new file mode 100644
index 000000000..36c5cff2f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
@@ -0,0 +1,184 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class LoadSize : u64 {
14 U8, // Zero-extend
15 S8, // Sign-extend
16 U16, // Zero-extend
17 S16, // Sign-extend
18 B32,
19 B64,
20 B128,
21 U128, // ???
22};
23
24enum class StoreSize : u64 {
25 U8, // Zero-extend
26 S8, // Sign-extend
27 U16, // Zero-extend
28 S16, // Sign-extend
29 B32,
30 B64,
31 B128,
32};
33
34// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
35enum class LoadCache : u64 {
36 CA, // Cache at all levels, likely to be accessed again
37 CG, // Cache at global level (cache in L2 and below, not L1)
38 CI, // ???
39 CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
40};
41
42// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
43enum class StoreCache : u64 {
44 WB, // Cache write-back all coherent levels
45 CG, // Cache at global level
46 CS, // Cache streaming, likely to be accessed once
47 WT, // Cache write-through (to system memory)
48};
49
50IR::U64 Address(TranslatorVisitor& v, u64 insn) {
51 union {
52 u64 raw;
53 BitField<8, 8, IR::Reg> addr_reg;
54 BitField<20, 24, s64> addr_offset;
55 BitField<20, 24, u64> rz_addr_offset;
56 BitField<45, 1, u64> e;
57 } const mem{insn};
58
59 const IR::U64 address{[&]() -> IR::U64 {
60 if (mem.e == 0) {
61 // LDG/STG without .E uses a 32-bit pointer, zero-extend it
62 return v.ir.UConvert(64, v.X(mem.addr_reg));
63 }
64 if (!IR::IsAligned(mem.addr_reg, 2)) {
65 throw NotImplementedException("Unaligned address register");
66 }
67 // Pack two registers to build the 64-bit address
68 return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
69 }()};
70 const u64 addr_offset{[&]() -> u64 {
71 if (mem.addr_reg == IR::Reg::RZ) {
72 // When RZ is used, the address is an absolute address
73 return static_cast<u64>(mem.rz_addr_offset.Value());
74 } else {
75 return static_cast<u64>(mem.addr_offset.Value());
76 }
77 }()};
78 // Apply the offset
79 return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
80}
81} // Anonymous namespace
82
83void TranslatorVisitor::LDG(u64 insn) {
84 // LDG loads global memory into registers
85 union {
86 u64 raw;
87 BitField<0, 8, IR::Reg> dest_reg;
88 BitField<46, 2, LoadCache> cache;
89 BitField<48, 3, LoadSize> size;
90 } const ldg{insn};
91
92 // Pointer to load data from
93 const IR::U64 address{Address(*this, insn)};
94 const IR::Reg dest_reg{ldg.dest_reg};
95 switch (ldg.size) {
96 case LoadSize::U8:
97 X(dest_reg, ir.LoadGlobalU8(address));
98 break;
99 case LoadSize::S8:
100 X(dest_reg, ir.LoadGlobalS8(address));
101 break;
102 case LoadSize::U16:
103 X(dest_reg, ir.LoadGlobalU16(address));
104 break;
105 case LoadSize::S16:
106 X(dest_reg, ir.LoadGlobalS16(address));
107 break;
108 case LoadSize::B32:
109 X(dest_reg, ir.LoadGlobal32(address));
110 break;
111 case LoadSize::B64: {
112 if (!IR::IsAligned(dest_reg, 2)) {
113 throw NotImplementedException("Unaligned data registers");
114 }
115 const IR::Value vector{ir.LoadGlobal64(address)};
116 for (int i = 0; i < 2; ++i) {
117 X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
118 }
119 break;
120 }
121 case LoadSize::B128:
122 case LoadSize::U128: {
123 if (!IR::IsAligned(dest_reg, 4)) {
124 throw NotImplementedException("Unaligned data registers");
125 }
126 const IR::Value vector{ir.LoadGlobal128(address)};
127 for (int i = 0; i < 4; ++i) {
128 X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))});
129 }
130 break;
131 }
132 default:
133 throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
134 }
135}
136
137void TranslatorVisitor::STG(u64 insn) {
138 // STG stores registers into global memory.
139 union {
140 u64 raw;
141 BitField<0, 8, IR::Reg> data_reg;
142 BitField<46, 2, StoreCache> cache;
143 BitField<48, 3, StoreSize> size;
144 } const stg{insn};
145
146 // Pointer to store data into
147 const IR::U64 address{Address(*this, insn)};
148 const IR::Reg data_reg{stg.data_reg};
149 switch (stg.size) {
150 case StoreSize::U8:
151 ir.WriteGlobalU8(address, X(data_reg));
152 break;
153 case StoreSize::S8:
154 ir.WriteGlobalS8(address, X(data_reg));
155 break;
156 case StoreSize::U16:
157 ir.WriteGlobalU16(address, X(data_reg));
158 break;
159 case StoreSize::S16:
160 ir.WriteGlobalS16(address, X(data_reg));
161 break;
162 case StoreSize::B32:
163 ir.WriteGlobal32(address, X(data_reg));
164 break;
165 case StoreSize::B64: {
166 if (!IR::IsAligned(data_reg, 2)) {
167 throw NotImplementedException("Unaligned data registers");
168 }
169 const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
170 ir.WriteGlobal64(address, vector);
171 break;
172 }
173 case StoreSize::B128:
174 if (!IR::IsAligned(data_reg, 4)) {
175 throw NotImplementedException("Unaligned data registers");
176 }
177 const IR::Value vector{
178 ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
179 ir.WriteGlobal128(address, vector);
180 break;
181 }
182}
183
184} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
new file mode 100644
index 000000000..92cd27ed4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp
@@ -0,0 +1,116 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class LogicalOp : u64 {
13 AND,
14 OR,
15 XOR,
16 PASS_B,
17};
18
19[[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1,
20 const IR::U32& operand_2, LogicalOp op) {
21 switch (op) {
22 case LogicalOp::AND:
23 return ir.BitwiseAnd(operand_1, operand_2);
24 case LogicalOp::OR:
25 return ir.BitwiseOr(operand_1, operand_2);
26 case LogicalOp::XOR:
27 return ir.BitwiseXor(operand_1, operand_2);
28 case LogicalOp::PASS_B:
29 return operand_2;
30 default:
31 throw NotImplementedException("Invalid Logical operation {}", op);
32 }
33}
34
35void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b,
36 LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt,
37 IR::Pred dest_pred = IR::Pred::PT) {
38 union {
39 u64 insn;
40 BitField<0, 8, IR::Reg> dest_reg;
41 BitField<8, 8, IR::Reg> src_reg;
42 } const lop{insn};
43
44 if (x) {
45 throw NotImplementedException("X");
46 }
47 IR::U32 op_a{v.X(lop.src_reg)};
48 if (inv_a != 0) {
49 op_a = v.ir.BitwiseNot(op_a);
50 }
51 if (inv_b != 0) {
52 op_b = v.ir.BitwiseNot(op_b);
53 }
54
55 const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)};
56 if (pred_op) {
57 const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)};
58 v.ir.SetPred(dest_pred, pred_result);
59 }
60 if (cc) {
61 if (bit_op == LogicalOp::PASS_B) {
62 v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0)));
63 v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true));
64 } else {
65 v.SetZFlag(v.ir.GetZeroFromOp(result));
66 v.SetSFlag(v.ir.GetSignFromOp(result));
67 }
68 v.ResetCFlag();
69 v.ResetOFlag();
70 }
71 v.X(lop.dest_reg, result);
72}
73
74void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
75 union {
76 u64 insn;
77 BitField<39, 1, u64> inv_a;
78 BitField<40, 1, u64> inv_b;
79 BitField<41, 2, LogicalOp> bit_op;
80 BitField<43, 1, u64> x;
81 BitField<44, 2, PredicateOp> pred_op;
82 BitField<47, 1, u64> cc;
83 BitField<48, 3, IR::Pred> dest_pred;
84 } const lop{insn};
85
86 LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op,
87 lop.pred_op, lop.dest_pred);
88}
89} // Anonymous namespace
90
91void TranslatorVisitor::LOP_reg(u64 insn) {
92 LOP(*this, insn, GetReg20(insn));
93}
94
95void TranslatorVisitor::LOP_cbuf(u64 insn) {
96 LOP(*this, insn, GetCbuf(insn));
97}
98
99void TranslatorVisitor::LOP_imm(u64 insn) {
100 LOP(*this, insn, GetImm20(insn));
101}
102
103void TranslatorVisitor::LOP32I(u64 insn) {
104 union {
105 u64 raw;
106 BitField<53, 2, LogicalOp> bit_op;
107 BitField<57, 1, u64> x;
108 BitField<52, 1, u64> cc;
109 BitField<55, 1, u64> inv_a;
110 BitField<56, 1, u64> inv_b;
111 } const lop32i{insn};
112
113 LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0,
114 lop32i.inv_b != 0, lop32i.bit_op);
115}
116} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
new file mode 100644
index 000000000..e0fe47912
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp
@@ -0,0 +1,122 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11namespace {
12// https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651
13// Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table)
14IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c,
15 u64 ttbl) {
16 IR::U32 r{ir.Imm32(0)};
17 const IR::U32 not_a{ir.BitwiseNot(a)};
18 const IR::U32 not_b{ir.BitwiseNot(b)};
19 const IR::U32 not_c{ir.BitwiseNot(c)};
20 if (ttbl & 0x01) {
21 // r |= ~a & ~b & ~c;
22 const auto lhs{ir.BitwiseAnd(not_a, not_b)};
23 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
24 r = ir.BitwiseOr(r, rhs);
25 }
26 if (ttbl & 0x02) {
27 // r |= ~a & ~b & c;
28 const auto lhs{ir.BitwiseAnd(not_a, not_b)};
29 const auto rhs{ir.BitwiseAnd(lhs, c)};
30 r = ir.BitwiseOr(r, rhs);
31 }
32 if (ttbl & 0x04) {
33 // r |= ~a & b & ~c;
34 const auto lhs{ir.BitwiseAnd(not_a, b)};
35 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
36 r = ir.BitwiseOr(r, rhs);
37 }
38 if (ttbl & 0x08) {
39 // r |= ~a & b & c;
40 const auto lhs{ir.BitwiseAnd(not_a, b)};
41 const auto rhs{ir.BitwiseAnd(lhs, c)};
42 r = ir.BitwiseOr(r, rhs);
43 }
44 if (ttbl & 0x10) {
45 // r |= a & ~b & ~c;
46 const auto lhs{ir.BitwiseAnd(a, not_b)};
47 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
48 r = ir.BitwiseOr(r, rhs);
49 }
50 if (ttbl & 0x20) {
51 // r |= a & ~b & c;
52 const auto lhs{ir.BitwiseAnd(a, not_b)};
53 const auto rhs{ir.BitwiseAnd(lhs, c)};
54 r = ir.BitwiseOr(r, rhs);
55 }
56 if (ttbl & 0x40) {
57 // r |= a & b & ~c;
58 const auto lhs{ir.BitwiseAnd(a, b)};
59 const auto rhs{ir.BitwiseAnd(lhs, not_c)};
60 r = ir.BitwiseOr(r, rhs);
61 }
62 if (ttbl & 0x80) {
63 // r |= a & b & c;
64 const auto lhs{ir.BitwiseAnd(a, b)};
65 const auto rhs{ir.BitwiseAnd(lhs, c)};
66 r = ir.BitwiseOr(r, rhs);
67 }
68 return r;
69}
70
71IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) {
72 union {
73 u64 insn;
74 BitField<0, 8, IR::Reg> dest_reg;
75 BitField<8, 8, IR::Reg> src_reg;
76 BitField<47, 1, u64> cc;
77 } const lop3{insn};
78
79 if (lop3.cc != 0) {
80 throw NotImplementedException("LOP3 CC");
81 }
82
83 const IR::U32 op_a{v.X(lop3.src_reg)};
84 const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)};
85 v.X(lop3.dest_reg, result);
86 return result;
87}
88
89u64 GetLut48(u64 insn) {
90 union {
91 u64 raw;
92 BitField<48, 8, u64> lut;
93 } const lut{insn};
94 return lut.lut;
95}
96} // Anonymous namespace
97
98void TranslatorVisitor::LOP3_reg(u64 insn) {
99 union {
100 u64 insn;
101 BitField<28, 8, u64> lut;
102 BitField<38, 1, u64> x;
103 BitField<36, 2, PredicateOp> pred_op;
104 BitField<48, 3, IR::Pred> pred;
105 } const lop3{insn};
106
107 if (lop3.x != 0) {
108 throw NotImplementedException("LOP3 X");
109 }
110 const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)};
111 const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)};
112 ir.SetPred(lop3.pred, pred_result);
113}
114
115void TranslatorVisitor::LOP3_cbuf(u64 insn) {
116 LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn));
117}
118
119void TranslatorVisitor::LOP3_imm(u64 insn) {
120 LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn));
121}
122} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
new file mode 100644
index 000000000..4324fd443
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp
@@ -0,0 +1,66 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 PR,
13 CC,
14};
15} // Anonymous namespace
16
17void TranslatorVisitor::P2R_reg(u64) {
18 throw NotImplementedException("P2R (reg)");
19}
20
21void TranslatorVisitor::P2R_cbuf(u64) {
22 throw NotImplementedException("P2R (cbuf)");
23}
24
25void TranslatorVisitor::P2R_imm(u64 insn) {
26 union {
27 u64 raw;
28 BitField<0, 8, IR::Reg> dest_reg;
29 BitField<8, 8, IR::Reg> src;
30 BitField<40, 1, Mode> mode;
31 BitField<41, 2, u64> byte_selector;
32 } const p2r{insn};
33
34 const u32 mask{GetImm20(insn).U32()};
35 const bool pr_mode{p2r.mode == Mode::PR};
36 const u32 num_items{pr_mode ? 7U : 4U};
37 const u32 offset{static_cast<u32>(p2r.byte_selector) * 8};
38 IR::U32 insert{ir.Imm32(0)};
39 for (u32 index = 0; index < num_items; ++index) {
40 if (((mask >> index) & 1) == 0) {
41 continue;
42 }
43 const IR::U1 cond{[this, index, pr_mode] {
44 if (pr_mode) {
45 return ir.GetPred(IR::Pred{index});
46 }
47 switch (index) {
48 case 0:
49 return ir.GetZFlag();
50 case 1:
51 return ir.GetSFlag();
52 case 2:
53 return ir.GetCFlag();
54 case 3:
55 return ir.GetOFlag();
56 }
57 throw LogicError("Unreachable P2R index");
58 }()};
59 const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))};
60 insert = ir.BitwiseOr(insert, bit);
61 }
62 const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))};
63 X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert));
64}
65
66} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
new file mode 100644
index 000000000..6bb08db8a
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/exception.h"
8#include "shader_recompiler/frontend/maxwell/opcodes.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) {
14 union {
15 u64 raw;
16 BitField<0, 8, IR::Reg> dest_reg;
17 BitField<39, 4, u64> mask;
18 BitField<12, 4, u64> mov32i_mask;
19 } const mov{insn};
20
21 if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
22 throw NotImplementedException("Non-full move mask");
23 }
24 v.X(mov.dest_reg, src);
25}
26} // Anonymous namespace
27
28void TranslatorVisitor::MOV_reg(u64 insn) {
29 MOV(*this, insn, GetReg20(insn));
30}
31
32void TranslatorVisitor::MOV_cbuf(u64 insn) {
33 MOV(*this, insn, GetCbuf(insn));
34}
35
36void TranslatorVisitor::MOV_imm(u64 insn) {
37 MOV(*this, insn, GetImm20(insn));
38}
39
40void TranslatorVisitor::MOV32I(u64 insn) {
41 MOV(*this, insn, GetImm32(insn), true);
42}
43
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
new file mode 100644
index 000000000..eda5f177b
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp
@@ -0,0 +1,71 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 PR,
13 CC,
14};
15
16void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) {
17 switch (index) {
18 case 0:
19 return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)});
20 case 1:
21 return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)});
22 case 2:
23 return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)});
24 case 3:
25 return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)});
26 default:
27 throw LogicError("Unreachable R2P index");
28 }
29}
30
31void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) {
32 union {
33 u64 raw;
34 BitField<8, 8, IR::Reg> src_reg;
35 BitField<40, 1, Mode> mode;
36 BitField<41, 2, u64> byte_selector;
37 } const r2p{insn};
38 const IR::U32 src{v.X(r2p.src_reg)};
39 const IR::U32 count{v.ir.Imm32(1)};
40 const bool pr_mode{r2p.mode == Mode::PR};
41 const u32 num_items{pr_mode ? 7U : 4U};
42 const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8};
43 for (u32 index = 0; index < num_items; ++index) {
44 const IR::U32 offset{v.ir.Imm32(offset_base + index)};
45 const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))};
46 const IR::U1 src_bit{v.ir.LogicalNot(src_zero)};
47 const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)};
48 const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)};
49 if (pr_mode) {
50 const IR::Pred pred{index};
51 v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)});
52 } else {
53 SetFlag(v.ir, inv_mask_bit, src_bit, index);
54 }
55 }
56}
57} // Anonymous namespace
58
59void TranslatorVisitor::R2P_reg(u64 insn) {
60 R2P(*this, insn, GetReg20(insn));
61}
62
63void TranslatorVisitor::R2P_cbuf(u64 insn) {
64 R2P(*this, insn, GetCbuf(insn));
65}
66
67void TranslatorVisitor::R2P_imm(u64 insn) {
68 R2P(*this, insn, GetImm20(insn));
69}
70
71} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
new file mode 100644
index 000000000..20cb2674e
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp
@@ -0,0 +1,181 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class SpecialRegister : u64 {
12 SR_LANEID = 0,
13 SR_CLOCK = 1,
14 SR_VIRTCFG = 2,
15 SR_VIRTID = 3,
16 SR_PM0 = 4,
17 SR_PM1 = 5,
18 SR_PM2 = 6,
19 SR_PM3 = 7,
20 SR_PM4 = 8,
21 SR_PM5 = 9,
22 SR_PM6 = 10,
23 SR_PM7 = 11,
24 SR12 = 12,
25 SR13 = 13,
26 SR14 = 14,
27 SR_ORDERING_TICKET = 15,
28 SR_PRIM_TYPE = 16,
29 SR_INVOCATION_ID = 17,
30 SR_Y_DIRECTION = 18,
31 SR_THREAD_KILL = 19,
32 SM_SHADER_TYPE = 20,
33 SR_DIRECTCBEWRITEADDRESSLOW = 21,
34 SR_DIRECTCBEWRITEADDRESSHIGH = 22,
35 SR_DIRECTCBEWRITEENABLE = 23,
36 SR_MACHINE_ID_0 = 24,
37 SR_MACHINE_ID_1 = 25,
38 SR_MACHINE_ID_2 = 26,
39 SR_MACHINE_ID_3 = 27,
40 SR_AFFINITY = 28,
41 SR_INVOCATION_INFO = 29,
42 SR_WSCALEFACTOR_XY = 30,
43 SR_WSCALEFACTOR_Z = 31,
44 SR_TID = 32,
45 SR_TID_X = 33,
46 SR_TID_Y = 34,
47 SR_TID_Z = 35,
48 SR_CTA_PARAM = 36,
49 SR_CTAID_X = 37,
50 SR_CTAID_Y = 38,
51 SR_CTAID_Z = 39,
52 SR_NTID = 40,
53 SR_CirQueueIncrMinusOne = 41,
54 SR_NLATC = 42,
55 SR43 = 43,
56 SR_SM_SPA_VERSION = 44,
57 SR_MULTIPASSSHADERINFO = 45,
58 SR_LWINHI = 46,
59 SR_SWINHI = 47,
60 SR_SWINLO = 48,
61 SR_SWINSZ = 49,
62 SR_SMEMSZ = 50,
63 SR_SMEMBANKS = 51,
64 SR_LWINLO = 52,
65 SR_LWINSZ = 53,
66 SR_LMEMLOSZ = 54,
67 SR_LMEMHIOFF = 55,
68 SR_EQMASK = 56,
69 SR_LTMASK = 57,
70 SR_LEMASK = 58,
71 SR_GTMASK = 59,
72 SR_GEMASK = 60,
73 SR_REGALLOC = 61,
74 SR_BARRIERALLOC = 62,
75 SR63 = 63,
76 SR_GLOBALERRORSTATUS = 64,
77 SR65 = 65,
78 SR_WARPERRORSTATUS = 66,
79 SR_WARPERRORSTATUSCLEAR = 67,
80 SR68 = 68,
81 SR69 = 69,
82 SR70 = 70,
83 SR71 = 71,
84 SR_PM_HI0 = 72,
85 SR_PM_HI1 = 73,
86 SR_PM_HI2 = 74,
87 SR_PM_HI3 = 75,
88 SR_PM_HI4 = 76,
89 SR_PM_HI5 = 77,
90 SR_PM_HI6 = 78,
91 SR_PM_HI7 = 79,
92 SR_CLOCKLO = 80,
93 SR_CLOCKHI = 81,
94 SR_GLOBALTIMERLO = 82,
95 SR_GLOBALTIMERHI = 83,
96 SR84 = 84,
97 SR85 = 85,
98 SR86 = 86,
99 SR87 = 87,
100 SR88 = 88,
101 SR89 = 89,
102 SR90 = 90,
103 SR91 = 91,
104 SR92 = 92,
105 SR93 = 93,
106 SR94 = 94,
107 SR95 = 95,
108 SR_HWTASKID = 96,
109 SR_CIRCULARQUEUEENTRYINDEX = 97,
110 SR_CIRCULARQUEUEENTRYADDRESSLOW = 98,
111 SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99,
112};
113
114[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
115 switch (special_register) {
116 case SpecialRegister::SR_INVOCATION_ID:
117 return ir.InvocationId();
118 case SpecialRegister::SR_THREAD_KILL:
119 return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))};
120 case SpecialRegister::SR_INVOCATION_INFO:
121 LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO");
122 return ir.Imm32(0x00ff'0000);
123 case SpecialRegister::SR_TID: {
124 const IR::Value tid{ir.LocalInvocationId()};
125 return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
126 IR::U32{ir.CompositeExtract(tid, 1)},
127 ir.Imm32(16), ir.Imm32(8)),
128 IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
129 }
130 case SpecialRegister::SR_TID_X:
131 return ir.LocalInvocationIdX();
132 case SpecialRegister::SR_TID_Y:
133 return ir.LocalInvocationIdY();
134 case SpecialRegister::SR_TID_Z:
135 return ir.LocalInvocationIdZ();
136 case SpecialRegister::SR_CTAID_X:
137 return ir.WorkgroupIdX();
138 case SpecialRegister::SR_CTAID_Y:
139 return ir.WorkgroupIdY();
140 case SpecialRegister::SR_CTAID_Z:
141 return ir.WorkgroupIdZ();
142 case SpecialRegister::SR_WSCALEFACTOR_XY:
143 LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY");
144 return ir.Imm32(Common::BitCast<u32>(1.0f));
145 case SpecialRegister::SR_WSCALEFACTOR_Z:
146 LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z");
147 return ir.Imm32(Common::BitCast<u32>(1.0f));
148 case SpecialRegister::SR_LANEID:
149 return ir.LaneId();
150 case SpecialRegister::SR_EQMASK:
151 return ir.SubgroupEqMask();
152 case SpecialRegister::SR_LTMASK:
153 return ir.SubgroupLtMask();
154 case SpecialRegister::SR_LEMASK:
155 return ir.SubgroupLeMask();
156 case SpecialRegister::SR_GTMASK:
157 return ir.SubgroupGtMask();
158 case SpecialRegister::SR_GEMASK:
159 return ir.SubgroupGeMask();
160 case SpecialRegister::SR_Y_DIRECTION:
161 return ir.BitCast<IR::U32>(ir.YDirection());
162 case SpecialRegister::SR_AFFINITY:
163 LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY");
164 return ir.Imm32(0); // This is the default value hardware returns.
165 default:
166 throw NotImplementedException("S2R special register {}", special_register);
167 }
168}
169} // Anonymous namespace
170
171void TranslatorVisitor::S2R(u64 insn) {
172 union {
173 u64 raw;
174 BitField<0, 8, IR::Reg> dest_reg;
175 BitField<20, 8, SpecialRegister> src_reg;
176 } const s2r{insn};
177
178 X(s2r.dest_reg, Read(ir, s2r.src_reg));
179}
180
181} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
new file mode 100644
index 000000000..7e26ab359
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -0,0 +1,283 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/opcodes.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11
12[[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
13 throw NotImplementedException("Instruction {} is not implemented", opcode);
14}
15
16void TranslatorVisitor::ATOM_cas(u64) {
17 ThrowNotImplemented(Opcode::ATOM_cas);
18}
19
20void TranslatorVisitor::ATOMS_cas(u64) {
21 ThrowNotImplemented(Opcode::ATOMS_cas);
22}
23
24void TranslatorVisitor::B2R(u64) {
25 ThrowNotImplemented(Opcode::B2R);
26}
27
28void TranslatorVisitor::BPT(u64) {
29 ThrowNotImplemented(Opcode::BPT);
30}
31
32void TranslatorVisitor::BRA(u64) {
33 ThrowNotImplemented(Opcode::BRA);
34}
35
36void TranslatorVisitor::BRK(u64) {
37 ThrowNotImplemented(Opcode::BRK);
38}
39
40void TranslatorVisitor::CAL() {
41 // CAL is a no-op
42}
43
44void TranslatorVisitor::CCTL(u64) {
45 ThrowNotImplemented(Opcode::CCTL);
46}
47
48void TranslatorVisitor::CCTLL(u64) {
49 ThrowNotImplemented(Opcode::CCTLL);
50}
51
52void TranslatorVisitor::CONT(u64) {
53 ThrowNotImplemented(Opcode::CONT);
54}
55
56void TranslatorVisitor::CS2R(u64) {
57 ThrowNotImplemented(Opcode::CS2R);
58}
59
60void TranslatorVisitor::FCHK_reg(u64) {
61 ThrowNotImplemented(Opcode::FCHK_reg);
62}
63
64void TranslatorVisitor::FCHK_cbuf(u64) {
65 ThrowNotImplemented(Opcode::FCHK_cbuf);
66}
67
68void TranslatorVisitor::FCHK_imm(u64) {
69 ThrowNotImplemented(Opcode::FCHK_imm);
70}
71
72void TranslatorVisitor::GETCRSPTR(u64) {
73 ThrowNotImplemented(Opcode::GETCRSPTR);
74}
75
76void TranslatorVisitor::GETLMEMBASE(u64) {
77 ThrowNotImplemented(Opcode::GETLMEMBASE);
78}
79
80void TranslatorVisitor::IDE(u64) {
81 ThrowNotImplemented(Opcode::IDE);
82}
83
84void TranslatorVisitor::IDP_reg(u64) {
85 ThrowNotImplemented(Opcode::IDP_reg);
86}
87
88void TranslatorVisitor::IDP_imm(u64) {
89 ThrowNotImplemented(Opcode::IDP_imm);
90}
91
92void TranslatorVisitor::IMAD_reg(u64) {
93 ThrowNotImplemented(Opcode::IMAD_reg);
94}
95
96void TranslatorVisitor::IMAD_rc(u64) {
97 ThrowNotImplemented(Opcode::IMAD_rc);
98}
99
100void TranslatorVisitor::IMAD_cr(u64) {
101 ThrowNotImplemented(Opcode::IMAD_cr);
102}
103
104void TranslatorVisitor::IMAD_imm(u64) {
105 ThrowNotImplemented(Opcode::IMAD_imm);
106}
107
108void TranslatorVisitor::IMAD32I(u64) {
109 ThrowNotImplemented(Opcode::IMAD32I);
110}
111
112void TranslatorVisitor::IMADSP_reg(u64) {
113 ThrowNotImplemented(Opcode::IMADSP_reg);
114}
115
116void TranslatorVisitor::IMADSP_rc(u64) {
117 ThrowNotImplemented(Opcode::IMADSP_rc);
118}
119
120void TranslatorVisitor::IMADSP_cr(u64) {
121 ThrowNotImplemented(Opcode::IMADSP_cr);
122}
123
124void TranslatorVisitor::IMADSP_imm(u64) {
125 ThrowNotImplemented(Opcode::IMADSP_imm);
126}
127
128void TranslatorVisitor::IMUL_reg(u64) {
129 ThrowNotImplemented(Opcode::IMUL_reg);
130}
131
132void TranslatorVisitor::IMUL_cbuf(u64) {
133 ThrowNotImplemented(Opcode::IMUL_cbuf);
134}
135
136void TranslatorVisitor::IMUL_imm(u64) {
137 ThrowNotImplemented(Opcode::IMUL_imm);
138}
139
140void TranslatorVisitor::IMUL32I(u64) {
141 ThrowNotImplemented(Opcode::IMUL32I);
142}
143
144void TranslatorVisitor::JCAL(u64) {
145 ThrowNotImplemented(Opcode::JCAL);
146}
147
148void TranslatorVisitor::JMP(u64) {
149 ThrowNotImplemented(Opcode::JMP);
150}
151
152void TranslatorVisitor::KIL() {
153 // KIL is a no-op
154}
155
156void TranslatorVisitor::LD(u64) {
157 ThrowNotImplemented(Opcode::LD);
158}
159
160void TranslatorVisitor::LEPC(u64) {
161 ThrowNotImplemented(Opcode::LEPC);
162}
163
164void TranslatorVisitor::LONGJMP(u64) {
165 ThrowNotImplemented(Opcode::LONGJMP);
166}
167
168void TranslatorVisitor::NOP(u64) {
169 // NOP is No-Op.
170}
171
172void TranslatorVisitor::PBK() {
173 // PBK is a no-op
174}
175
176void TranslatorVisitor::PCNT() {
177 // PCNT is a no-op
178}
179
180void TranslatorVisitor::PEXIT(u64) {
181 ThrowNotImplemented(Opcode::PEXIT);
182}
183
184void TranslatorVisitor::PLONGJMP(u64) {
185 ThrowNotImplemented(Opcode::PLONGJMP);
186}
187
188void TranslatorVisitor::PRET(u64) {
189 ThrowNotImplemented(Opcode::PRET);
190}
191
192void TranslatorVisitor::PRMT_reg(u64) {
193 ThrowNotImplemented(Opcode::PRMT_reg);
194}
195
196void TranslatorVisitor::PRMT_rc(u64) {
197 ThrowNotImplemented(Opcode::PRMT_rc);
198}
199
200void TranslatorVisitor::PRMT_cr(u64) {
201 ThrowNotImplemented(Opcode::PRMT_cr);
202}
203
204void TranslatorVisitor::PRMT_imm(u64) {
205 ThrowNotImplemented(Opcode::PRMT_imm);
206}
207
208void TranslatorVisitor::R2B(u64) {
209 ThrowNotImplemented(Opcode::R2B);
210}
211
212void TranslatorVisitor::RAM(u64) {
213 ThrowNotImplemented(Opcode::RAM);
214}
215
216void TranslatorVisitor::RET(u64) {
217 ThrowNotImplemented(Opcode::RET);
218}
219
220void TranslatorVisitor::RTT(u64) {
221 ThrowNotImplemented(Opcode::RTT);
222}
223
224void TranslatorVisitor::SAM(u64) {
225 ThrowNotImplemented(Opcode::SAM);
226}
227
228void TranslatorVisitor::SETCRSPTR(u64) {
229 ThrowNotImplemented(Opcode::SETCRSPTR);
230}
231
232void TranslatorVisitor::SETLMEMBASE(u64) {
233 ThrowNotImplemented(Opcode::SETLMEMBASE);
234}
235
236void TranslatorVisitor::SSY() {
237 // SSY is a no-op
238}
239
240void TranslatorVisitor::ST(u64) {
241 ThrowNotImplemented(Opcode::ST);
242}
243
244void TranslatorVisitor::STP(u64) {
245 ThrowNotImplemented(Opcode::STP);
246}
247
248void TranslatorVisitor::SUATOM_cas(u64) {
249 ThrowNotImplemented(Opcode::SUATOM_cas);
250}
251
252void TranslatorVisitor::SYNC(u64) {
253 ThrowNotImplemented(Opcode::SYNC);
254}
255
256void TranslatorVisitor::TXA(u64) {
257 ThrowNotImplemented(Opcode::TXA);
258}
259
260void TranslatorVisitor::VABSDIFF(u64) {
261 ThrowNotImplemented(Opcode::VABSDIFF);
262}
263
264void TranslatorVisitor::VABSDIFF4(u64) {
265 ThrowNotImplemented(Opcode::VABSDIFF4);
266}
267
268void TranslatorVisitor::VADD(u64) {
269 ThrowNotImplemented(Opcode::VADD);
270}
271
272void TranslatorVisitor::VSET(u64) {
273 ThrowNotImplemented(Opcode::VSET);
274}
275void TranslatorVisitor::VSHL(u64) {
276 ThrowNotImplemented(Opcode::VSHL);
277}
278
279void TranslatorVisitor::VSHR(u64) {
280 ThrowNotImplemented(Opcode::VSHR);
281}
282
283} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
new file mode 100644
index 000000000..01cfad88d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp
@@ -0,0 +1,45 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<8, 8, IR::Reg> output_reg; // Not needed on host
16 BitField<39, 1, u64> emit;
17 BitField<40, 1, u64> cut;
18 } const out{insn};
19
20 stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11));
21
22 if (out.emit != 0) {
23 v.ir.EmitVertex(stream_index);
24 }
25 if (out.cut != 0) {
26 v.ir.EndPrimitive(stream_index);
27 }
28 // Host doesn't need the output register, but we can write to it to avoid undefined reads
29 v.X(out.dest_reg, v.ir.Imm32(0));
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::OUT_reg(u64 insn) {
34 OUT(*this, insn, GetReg20(insn));
35}
36
37void TranslatorVisitor::OUT_cbuf(u64 insn) {
38 OUT(*this, insn, GetCbuf(insn));
39}
40
41void TranslatorVisitor::OUT_imm(u64 insn) {
42 OUT(*this, insn, GetImm20(insn));
43}
44
45} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
new file mode 100644
index 000000000..b4767afb5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp
@@ -0,0 +1,46 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class Mode : u64 {
12 Default,
13 CovMask,
14 Covered,
15 Offset,
16 CentroidOffset,
17 MyIndex,
18};
19} // Anonymous namespace
20
21void TranslatorVisitor::PIXLD(u64 insn) {
22 union {
23 u64 raw;
24 BitField<31, 3, Mode> mode;
25 BitField<0, 8, IR::Reg> dest_reg;
26 BitField<8, 8, IR::Reg> addr_reg;
27 BitField<20, 8, s64> addr_offset;
28 BitField<45, 3, IR::Pred> dest_pred;
29 } const pixld{insn};
30
31 if (pixld.dest_pred != IR::Pred::PT) {
32 throw NotImplementedException("Destination predicate");
33 }
34 if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) {
35 throw NotImplementedException("Non-zero source register");
36 }
37 switch (pixld.mode) {
38 case Mode::MyIndex:
39 X(pixld.dest_reg, ir.SampleId());
40 break;
41 default:
42 throw NotImplementedException("Mode {}", pixld.mode.Value());
43 }
44}
45
46} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
new file mode 100644
index 000000000..75d1fa8c1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp
@@ -0,0 +1,38 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::PSETP(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 3, IR::Pred> dest_pred_b;
15 BitField<3, 3, IR::Pred> dest_pred_a;
16 BitField<12, 3, IR::Pred> pred_a;
17 BitField<15, 1, u64> neg_pred_a;
18 BitField<24, 2, BooleanOp> bop_1;
19 BitField<29, 3, IR::Pred> pred_b;
20 BitField<32, 1, u64> neg_pred_b;
21 BitField<39, 3, IR::Pred> pred_c;
22 BitField<42, 1, u64> neg_pred_c;
23 BitField<45, 2, BooleanOp> bop_2;
24 } const pset{insn};
25
26 const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
27 const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
28 const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
29
30 const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
31 const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)};
32 const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)};
33 const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)};
34
35 ir.SetPred(pset.dest_pred_a, result_a);
36 ir.SetPred(pset.dest_pred_b, result_b);
37}
38} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
new file mode 100644
index 000000000..b02789874
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp
@@ -0,0 +1,53 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::PSET(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<12, 3, IR::Pred> pred_a;
16 BitField<15, 1, u64> neg_pred_a;
17 BitField<24, 2, BooleanOp> bop_1;
18 BitField<29, 3, IR::Pred> pred_b;
19 BitField<32, 1, u64> neg_pred_b;
20 BitField<39, 3, IR::Pred> pred_c;
21 BitField<42, 1, u64> neg_pred_c;
22 BitField<44, 1, u64> bf;
23 BitField<45, 2, BooleanOp> bop_2;
24 BitField<47, 1, u64> cc;
25 } const pset{insn};
26
27 const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)};
28 const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)};
29 const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)};
30
31 const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)};
32 const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)};
33
34 const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)};
35 const IR::U32 zero{ir.Imm32(0)};
36
37 const IR::U32 result{ir.Select(res_2, true_result, zero)};
38
39 X(pset.dest_reg, result);
40 if (pset.cc != 0) {
41 const IR::U1 is_zero{ir.IEqual(result, zero)};
42 SetZFlag(is_zero);
43 if (pset.bf != 0) {
44 ResetSFlag();
45 } else {
46 SetSFlag(ir.LogicalNot(is_zero));
47 }
48 ResetOFlag();
49 ResetCFlag();
50 }
51}
52
53} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
new file mode 100644
index 000000000..93baa75a9
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp
@@ -0,0 +1,44 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11
12void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) {
13 union {
14 u64 raw;
15 BitField<0, 8, IR::Reg> dest_reg;
16 BitField<8, 8, IR::Reg> src_reg;
17 BitField<39, 3, IR::Pred> pred;
18 BitField<42, 1, u64> neg_pred;
19 } const sel{insn};
20
21 const IR::U1 pred = v.ir.GetPred(sel.pred);
22 IR::U32 op_a{v.X(sel.src_reg)};
23 IR::U32 op_b{src};
24 if (sel.neg_pred != 0) {
25 std::swap(op_a, op_b);
26 }
27 const IR::U32 result{v.ir.Select(pred, op_a, op_b)};
28
29 v.X(sel.dest_reg, result);
30}
31} // Anonymous namespace
32
33void TranslatorVisitor::SEL_reg(u64 insn) {
34 SEL(*this, insn, GetReg20(insn));
35}
36
37void TranslatorVisitor::SEL_cbuf(u64 insn) {
38 SEL(*this, insn, GetCbuf(insn));
39}
40
41void TranslatorVisitor::SEL_imm(u64 insn) {
42 SEL(*this, insn, GetImm20(insn));
43}
44} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
new file mode 100644
index 000000000..63b588ad4
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp
@@ -0,0 +1,205 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "shader_recompiler/frontend/ir/modifiers.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14namespace {
15enum class Type : u64 {
16 _1D,
17 BUFFER_1D,
18 ARRAY_1D,
19 _2D,
20 ARRAY_2D,
21 _3D,
22};
23
24enum class Size : u64 {
25 U32,
26 S32,
27 U64,
28 S64,
29 F32FTZRN,
30 F16x2FTZRN,
31 SD32,
32 SD64,
33};
34
35enum class AtomicOp : u64 {
36 ADD,
37 MIN,
38 MAX,
39 INC,
40 DEC,
41 AND,
42 OR,
43 XOR,
44 EXCH,
45};
46
47enum class Clamp : u64 {
48 IGN,
49 Default,
50 TRAP,
51};
52
53TextureType GetType(Type type) {
54 switch (type) {
55 case Type::_1D:
56 return TextureType::Color1D;
57 case Type::BUFFER_1D:
58 return TextureType::Buffer;
59 case Type::ARRAY_1D:
60 return TextureType::ColorArray1D;
61 case Type::_2D:
62 return TextureType::Color2D;
63 case Type::ARRAY_2D:
64 return TextureType::ColorArray2D;
65 case Type::_3D:
66 return TextureType::Color3D;
67 }
68 throw NotImplementedException("Invalid type {}", type);
69}
70
71IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
72 switch (type) {
73 case Type::_1D:
74 case Type::BUFFER_1D:
75 return v.X(reg);
76 case Type::_2D:
77 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
78 case Type::_3D:
79 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
80 default:
81 break;
82 }
83 throw NotImplementedException("Invalid type {}", type);
84}
85
86IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords,
87 const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op,
88 bool is_signed) {
89 switch (op) {
90 case AtomicOp::ADD:
91 return ir.ImageAtomicIAdd(handle, coords, op_b, info);
92 case AtomicOp::MIN:
93 return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info);
94 case AtomicOp::MAX:
95 return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info);
96 case AtomicOp::INC:
97 return ir.ImageAtomicInc(handle, coords, op_b, info);
98 case AtomicOp::DEC:
99 return ir.ImageAtomicDec(handle, coords, op_b, info);
100 case AtomicOp::AND:
101 return ir.ImageAtomicAnd(handle, coords, op_b, info);
102 case AtomicOp::OR:
103 return ir.ImageAtomicOr(handle, coords, op_b, info);
104 case AtomicOp::XOR:
105 return ir.ImageAtomicXor(handle, coords, op_b, info);
106 case AtomicOp::EXCH:
107 return ir.ImageAtomicExchange(handle, coords, op_b, info);
108 default:
109 throw NotImplementedException("Atomic Operation {}", op);
110 }
111}
112
113ImageFormat Format(Size size) {
114 switch (size) {
115 case Size::U32:
116 case Size::S32:
117 case Size::SD32:
118 return ImageFormat::R32_UINT;
119 default:
120 break;
121 }
122 throw NotImplementedException("Invalid size {}", size);
123}
124
125bool IsSizeInt32(Size size) {
126 switch (size) {
127 case Size::U32:
128 case Size::S32:
129 case Size::SD32:
130 return true;
131 default:
132 return false;
133 }
134}
135
136void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg,
137 IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type,
138 u64 bound_offset, bool is_bindless, bool write_result) {
139 if (clamp != Clamp::IGN) {
140 throw NotImplementedException("Clamp {}", clamp);
141 }
142 if (!IsSizeInt32(size)) {
143 throw NotImplementedException("Size {}", size);
144 }
145 const bool is_signed{size == Size::S32};
146 const ImageFormat format{Format(size)};
147 const TextureType tex_type{GetType(type)};
148 const IR::Value coords{MakeCoords(v, coord_reg, type)};
149
150 const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg)
151 : v.ir.Imm32(static_cast<u32>(bound_offset * 4))};
152 IR::TextureInstInfo info{};
153 info.type.Assign(tex_type);
154 info.image_format.Assign(format);
155
156 // TODO: float/64-bit operand
157 const IR::Value op_b{v.X(operand_reg)};
158 const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)};
159
160 if (write_result) {
161 v.X(dest_reg, IR::U32{color});
162 }
163}
164} // Anonymous namespace
165
166void TranslatorVisitor::SUATOM(u64 insn) {
167 union {
168 u64 raw;
169 BitField<54, 1, u64> is_bindless;
170 BitField<29, 4, AtomicOp> op;
171 BitField<33, 3, Type> type;
172 BitField<51, 3, Size> size;
173 BitField<49, 2, Clamp> clamp;
174 BitField<0, 8, IR::Reg> dest_reg;
175 BitField<8, 8, IR::Reg> coord_reg;
176 BitField<20, 8, IR::Reg> operand_reg;
177 BitField<36, 13, u64> bound_offset; // !is_bindless
178 BitField<39, 8, IR::Reg> bindless_reg; // is_bindless
179 } const suatom{insn};
180
181 ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg,
182 suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset,
183 suatom.is_bindless != 0, true);
184}
185
186void TranslatorVisitor::SURED(u64 insn) {
187 // TODO: confirm offsets
188 union {
189 u64 raw;
190 BitField<51, 1, u64> is_bound;
191 BitField<21, 3, AtomicOp> op;
192 BitField<33, 3, Type> type;
193 BitField<20, 3, Size> size;
194 BitField<49, 2, Clamp> clamp;
195 BitField<0, 8, IR::Reg> operand_reg;
196 BitField<8, 8, IR::Reg> coord_reg;
197 BitField<36, 13, u64> bound_offset; // is_bound
198 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
199 } const sured{insn};
200 ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg,
201 sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset,
202 sured.is_bound == 0, false);
203}
204
205} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
new file mode 100644
index 000000000..681220a8d
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
@@ -0,0 +1,281 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6#include <bit>
7
8#include "common/bit_field.h"
9#include "common/common_types.h"
10#include "shader_recompiler/frontend/ir/modifiers.h"
11#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
12
13namespace Shader::Maxwell {
14namespace {
15enum class Type : u64 {
16 _1D,
17 BUFFER_1D,
18 ARRAY_1D,
19 _2D,
20 ARRAY_2D,
21 _3D,
22};
23
24constexpr unsigned R = 1 << 0;
25constexpr unsigned G = 1 << 1;
26constexpr unsigned B = 1 << 2;
27constexpr unsigned A = 1 << 3;
28
29constexpr std::array MASK{
30 0U, //
31 R, //
32 G, //
33 R | G, //
34 B, //
35 R | B, //
36 G | B, //
37 R | G | B, //
38 A, //
39 R | A, //
40 G | A, //
41 R | G | A, //
42 B | A, //
43 R | B | A, //
44 G | B | A, //
45 R | G | B | A, //
46};
47
48enum class Size : u64 {
49 U8,
50 S8,
51 U16,
52 S16,
53 B32,
54 B64,
55 B128,
56};
57
58enum class Clamp : u64 {
59 IGN,
60 Default,
61 TRAP,
62};
63
64// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
65enum class LoadCache : u64 {
66 CA, // Cache at all levels, likely to be accessed again
67 CG, // Cache at global level (L2 and below, not L1)
68 CI, // ???
69 CV, // Don't cache and fetch again (volatile)
70};
71
72enum class StoreCache : u64 {
73 WB, // Cache write-back all coherent levels
74 CG, // Cache at global level (L2 and below, not L1)
75 CS, // Cache streaming, likely to be accessed once
76 WT, // Cache write-through (to system memory, volatile?)
77};
78
79ImageFormat Format(Size size) {
80 switch (size) {
81 case Size::U8:
82 return ImageFormat::R8_UINT;
83 case Size::S8:
84 return ImageFormat::R8_SINT;
85 case Size::U16:
86 return ImageFormat::R16_UINT;
87 case Size::S16:
88 return ImageFormat::R16_SINT;
89 case Size::B32:
90 return ImageFormat::R32_UINT;
91 case Size::B64:
92 return ImageFormat::R32G32_UINT;
93 case Size::B128:
94 return ImageFormat::R32G32B32A32_UINT;
95 }
96 throw NotImplementedException("Invalid size {}", size);
97}
98
99int SizeInRegs(Size size) {
100 switch (size) {
101 case Size::U8:
102 case Size::S8:
103 case Size::U16:
104 case Size::S16:
105 case Size::B32:
106 return 1;
107 case Size::B64:
108 return 2;
109 case Size::B128:
110 return 4;
111 }
112 throw NotImplementedException("Invalid size {}", size);
113}
114
115TextureType GetType(Type type) {
116 switch (type) {
117 case Type::_1D:
118 return TextureType::Color1D;
119 case Type::BUFFER_1D:
120 return TextureType::Buffer;
121 case Type::ARRAY_1D:
122 return TextureType::ColorArray1D;
123 case Type::_2D:
124 return TextureType::Color2D;
125 case Type::ARRAY_2D:
126 return TextureType::ColorArray2D;
127 case Type::_3D:
128 return TextureType::Color3D;
129 }
130 throw NotImplementedException("Invalid type {}", type);
131}
132
133IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) {
134 const auto array{[&](int index) {
135 return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16));
136 }};
137 switch (type) {
138 case Type::_1D:
139 case Type::BUFFER_1D:
140 return v.X(reg);
141 case Type::ARRAY_1D:
142 return v.ir.CompositeConstruct(v.X(reg), array(1));
143 case Type::_2D:
144 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
145 case Type::ARRAY_2D:
146 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2));
147 case Type::_3D:
148 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
149 }
150 throw NotImplementedException("Invalid type {}", type);
151}
152
153unsigned SwizzleMask(u64 swizzle) {
154 if (swizzle == 0 || swizzle >= MASK.size()) {
155 throw NotImplementedException("Invalid swizzle {}", swizzle);
156 }
157 return MASK[swizzle];
158}
159
160IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) {
161 std::array<IR::U32, 4> colors;
162 for (int i = 0; i < num_regs; ++i) {
163 colors[static_cast<size_t>(i)] = ir.GetReg(reg + i);
164 }
165 for (int i = num_regs; i < 4; ++i) {
166 colors[static_cast<size_t>(i)] = ir.Imm32(0);
167 }
168 return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]);
169}
170} // Anonymous namespace
171
172void TranslatorVisitor::SULD(u64 insn) {
173 union {
174 u64 raw;
175 BitField<51, 1, u64> is_bound;
176 BitField<52, 1, u64> d;
177 BitField<23, 1, u64> ba;
178 BitField<33, 3, Type> type;
179 BitField<24, 2, LoadCache> cache;
180 BitField<20, 3, Size> size; // .D
181 BitField<20, 4, u64> swizzle; // .P
182 BitField<49, 2, Clamp> clamp;
183 BitField<0, 8, IR::Reg> dest_reg;
184 BitField<8, 8, IR::Reg> coord_reg;
185 BitField<36, 13, u64> bound_offset; // is_bound
186 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
187 } const suld{insn};
188
189 if (suld.clamp != Clamp::IGN) {
190 throw NotImplementedException("Clamp {}", suld.clamp.Value());
191 }
192 if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
193 throw NotImplementedException("Cache {}", suld.cache.Value());
194 }
195 const bool is_typed{suld.d != 0};
196 if (is_typed && suld.ba != 0) {
197 throw NotImplementedException("BA");
198 }
199
200 const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless};
201 const TextureType type{GetType(suld.type)};
202 const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)};
203 const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4))
204 : X(suld.bindless_reg)};
205 IR::TextureInstInfo info{};
206 info.type.Assign(type);
207 info.image_format.Assign(format);
208
209 const IR::Value result{ir.ImageRead(handle, coords, info)};
210 IR::Reg dest_reg{suld.dest_reg};
211 if (is_typed) {
212 const int num_regs{SizeInRegs(suld.size)};
213 for (int i = 0; i < num_regs; ++i) {
214 X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))});
215 }
216 } else {
217 const unsigned mask{SwizzleMask(suld.swizzle)};
218 const int bits{std::popcount(mask)};
219 if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) {
220 throw NotImplementedException("Unaligned destination register");
221 }
222 for (unsigned component = 0; component < 4; ++component) {
223 if (((mask >> component) & 1) == 0) {
224 continue;
225 }
226 X(dest_reg, IR::U32{ir.CompositeExtract(result, component)});
227 ++dest_reg;
228 }
229 }
230}
231
232void TranslatorVisitor::SUST(u64 insn) {
233 union {
234 u64 raw;
235 BitField<51, 1, u64> is_bound;
236 BitField<52, 1, u64> d;
237 BitField<23, 1, u64> ba;
238 BitField<33, 3, Type> type;
239 BitField<24, 2, StoreCache> cache;
240 BitField<20, 3, Size> size; // .D
241 BitField<20, 4, u64> swizzle; // .P
242 BitField<49, 2, Clamp> clamp;
243 BitField<0, 8, IR::Reg> data_reg;
244 BitField<8, 8, IR::Reg> coord_reg;
245 BitField<36, 13, u64> bound_offset; // is_bound
246 BitField<39, 8, IR::Reg> bindless_reg; // !is_bound
247 } const sust{insn};
248
249 if (sust.clamp != Clamp::IGN) {
250 throw NotImplementedException("Clamp {}", sust.clamp.Value());
251 }
252 if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
253 throw NotImplementedException("Cache {}", sust.cache.Value());
254 }
255 const bool is_typed{sust.d != 0};
256 if (is_typed && sust.ba != 0) {
257 throw NotImplementedException("BA");
258 }
259 const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless};
260 const TextureType type{GetType(sust.type)};
261 const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)};
262 const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4))
263 : X(sust.bindless_reg)};
264 IR::TextureInstInfo info{};
265 info.type.Assign(type);
266 info.image_format.Assign(format);
267
268 IR::Value color;
269 if (is_typed) {
270 color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size));
271 } else {
272 const unsigned mask{SwizzleMask(sust.swizzle)};
273 if (mask != 0xf) {
274 throw NotImplementedException("Non-full mask");
275 }
276 color = MakeColor(ir, sust.data_reg, 4);
277 }
278 ir.ImageWrite(handle, coords, color, info);
279}
280
281} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
new file mode 100644
index 000000000..0046b5edd
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
@@ -0,0 +1,236 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Blod : u64 {
15 None,
16 LZ,
17 LB,
18 LL,
19 INVALIDBLOD4,
20 INVALIDBLOD5,
21 LBA,
22 LLA,
23};
24
25enum class TextureType : u64 {
26 _1D,
27 ARRAY_1D,
28 _2D,
29 ARRAY_2D,
30 _3D,
31 ARRAY_3D,
32 CUBE,
33 ARRAY_CUBE,
34};
35
36Shader::TextureType GetType(TextureType type) {
37 switch (type) {
38 case TextureType::_1D:
39 return Shader::TextureType::Color1D;
40 case TextureType::ARRAY_1D:
41 return Shader::TextureType::ColorArray1D;
42 case TextureType::_2D:
43 return Shader::TextureType::Color2D;
44 case TextureType::ARRAY_2D:
45 return Shader::TextureType::ColorArray2D;
46 case TextureType::_3D:
47 return Shader::TextureType::Color3D;
48 case TextureType::ARRAY_3D:
49 throw NotImplementedException("3D array texture type");
50 case TextureType::CUBE:
51 return Shader::TextureType::ColorCube;
52 case TextureType::ARRAY_CUBE:
53 return Shader::TextureType::ColorArrayCube;
54 }
55 throw NotImplementedException("Invalid texture type {}", type);
56}
57
58IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
59 const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
60 switch (type) {
61 case TextureType::_1D:
62 return v.F(reg);
63 case TextureType::ARRAY_1D:
64 return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
65 case TextureType::_2D:
66 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
67 case TextureType::ARRAY_2D:
68 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
69 case TextureType::_3D:
70 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
71 case TextureType::ARRAY_3D:
72 throw NotImplementedException("3D array texture type");
73 case TextureType::CUBE:
74 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
75 case TextureType::ARRAY_CUBE:
76 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
77 }
78 throw NotImplementedException("Invalid texture type {}", type);
79}
80
81IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
82 switch (blod) {
83 case Blod::None:
84 return v.ir.Imm32(0.0f);
85 case Blod::LZ:
86 return v.ir.Imm32(0.0f);
87 case Blod::LB:
88 case Blod::LL:
89 case Blod::LBA:
90 case Blod::LLA:
91 return v.F(reg++);
92 case Blod::INVALIDBLOD4:
93 case Blod::INVALIDBLOD5:
94 break;
95 }
96 throw NotImplementedException("Invalid blod {}", blod);
97}
98
99IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
100 const IR::U32 value{v.X(reg++)};
101 switch (type) {
102 case TextureType::_1D:
103 case TextureType::ARRAY_1D:
104 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
105 case TextureType::_2D:
106 case TextureType::ARRAY_2D:
107 return v.ir.CompositeConstruct(
108 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
109 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
110 case TextureType::_3D:
111 case TextureType::ARRAY_3D:
112 return v.ir.CompositeConstruct(
113 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
114 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
115 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
116 case TextureType::CUBE:
117 case TextureType::ARRAY_CUBE:
118 throw NotImplementedException("Illegal offset on CUBE sample");
119 }
120 throw NotImplementedException("Invalid texture type {}", type);
121}
122
123bool HasExplicitLod(Blod blod) {
124 switch (blod) {
125 case Blod::LL:
126 case Blod::LLA:
127 case Blod::LZ:
128 return true;
129 default:
130 return false;
131 }
132}
133
134void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
135 std::optional<u32> cbuf_offset) {
136 union {
137 u64 raw;
138 BitField<35, 1, u64> ndv;
139 BitField<49, 1, u64> nodep;
140 BitField<50, 1, u64> dc;
141 BitField<51, 3, IR::Pred> sparse_pred;
142 BitField<0, 8, IR::Reg> dest_reg;
143 BitField<8, 8, IR::Reg> coord_reg;
144 BitField<20, 8, IR::Reg> meta_reg;
145 BitField<28, 3, TextureType> type;
146 BitField<31, 4, u64> mask;
147 } const tex{insn};
148
149 if (lc) {
150 throw NotImplementedException("LC");
151 }
152 const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
153
154 IR::Reg meta_reg{tex.meta_reg};
155 IR::Value handle;
156 IR::Value offset;
157 IR::F32 dref;
158 IR::F32 lod_clamp;
159 if (cbuf_offset) {
160 handle = v.ir.Imm32(*cbuf_offset);
161 } else {
162 handle = v.X(meta_reg++);
163 }
164 const IR::F32 lod{MakeLod(v, meta_reg, blod)};
165 if (aoffi) {
166 offset = MakeOffset(v, meta_reg, tex.type);
167 }
168 if (tex.dc != 0) {
169 dref = v.F(meta_reg++);
170 }
171 IR::TextureInstInfo info{};
172 info.type.Assign(GetType(tex.type));
173 info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
174 info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
175 info.has_lod_clamp.Assign(lc ? 1 : 0);
176
177 const IR::Value sample{[&]() -> IR::Value {
178 if (tex.dc == 0) {
179 if (HasExplicitLod(blod)) {
180 return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info);
181 } else {
182 return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
183 }
184 }
185 if (HasExplicitLod(blod)) {
186 return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info);
187 } else {
188 return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
189 info);
190 }
191 }()};
192
193 IR::Reg dest_reg{tex.dest_reg};
194 for (int element = 0; element < 4; ++element) {
195 if (((tex.mask >> element) & 1) == 0) {
196 continue;
197 }
198 IR::F32 value;
199 if (tex.dc != 0) {
200 value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
201 } else {
202 value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))};
203 }
204 v.F(dest_reg, value);
205 ++dest_reg;
206 }
207 if (tex.sparse_pred != IR::Pred::PT) {
208 v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
209 }
210}
211} // Anonymous namespace
212
213void TranslatorVisitor::TEX(u64 insn) {
214 union {
215 u64 raw;
216 BitField<54, 1, u64> aoffi;
217 BitField<55, 3, Blod> blod;
218 BitField<58, 1, u64> lc;
219 BitField<36, 13, u64> cbuf_offset;
220 } const tex{insn};
221
222 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4));
223}
224
225void TranslatorVisitor::TEX_b(u64 insn) {
226 union {
227 u64 raw;
228 BitField<36, 1, u64> aoffi;
229 BitField<37, 3, Blod> blod;
230 BitField<40, 1, u64> lc;
231 } const tex{insn};
232
233 Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
234}
235
236} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
new file mode 100644
index 000000000..154e7f1a1
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -0,0 +1,266 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F16,
16 F32,
17};
18
19union Encoding {
20 u64 raw;
21 BitField<59, 1, Precision> precision;
22 BitField<53, 4, u64> encoding;
23 BitField<49, 1, u64> nodep;
24 BitField<28, 8, IR::Reg> dest_reg_b;
25 BitField<0, 8, IR::Reg> dest_reg_a;
26 BitField<8, 8, IR::Reg> src_reg_a;
27 BitField<20, 8, IR::Reg> src_reg_b;
28 BitField<36, 13, u64> cbuf_offset;
29 BitField<50, 3, u64> swizzle;
30};
31
32constexpr unsigned R = 1;
33constexpr unsigned G = 2;
34constexpr unsigned B = 4;
35constexpr unsigned A = 8;
36
37constexpr std::array RG_LUT{
38 R, //
39 G, //
40 B, //
41 A, //
42 R | G, //
43 R | A, //
44 G | A, //
45 B | A, //
46};
47
48constexpr std::array RGBA_LUT{
49 R | G | B, //
50 R | G | A, //
51 R | B | A, //
52 G | B | A, //
53 R | G | B | A, //
54};
55
56void CheckAlignment(IR::Reg reg, size_t alignment) {
57 if (!IR::IsAligned(reg, alignment)) {
58 throw NotImplementedException("Unaligned source register {}", reg);
59 }
60}
61
62template <typename... Args>
63IR::Value Composite(TranslatorVisitor& v, Args... regs) {
64 return v.ir.CompositeConstruct(v.F(regs)...);
65}
66
67IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
68 return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
69}
70
71IR::Value Sample(TranslatorVisitor& v, u64 insn) {
72 const Encoding texs{insn};
73 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))};
74 const IR::F32 zero{v.ir.Imm32(0.0f)};
75 const IR::Reg reg_a{texs.src_reg_a};
76 const IR::Reg reg_b{texs.src_reg_b};
77 IR::TextureInstInfo info{};
78 if (texs.precision == Precision::F16) {
79 info.relaxed_precision.Assign(1);
80 }
81 switch (texs.encoding) {
82 case 0: // 1D.LZ
83 info.type.Assign(TextureType::Color1D);
84 return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info);
85 case 1: // 2D
86 info.type.Assign(TextureType::Color2D);
87 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
88 case 2: // 2D.LZ
89 info.type.Assign(TextureType::Color2D);
90 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info);
91 case 3: // 2D.LL
92 CheckAlignment(reg_a, 2);
93 info.type.Assign(TextureType::Color2D);
94 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
95 info);
96 case 4: // 2D.DC
97 CheckAlignment(reg_a, 2);
98 info.type.Assign(TextureType::Color2D);
99 info.is_depth.Assign(1);
100 return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
101 {}, {}, {}, info);
102 case 5: // 2D.LL.DC
103 CheckAlignment(reg_a, 2);
104 CheckAlignment(reg_b, 2);
105 info.type.Assign(TextureType::Color2D);
106 info.is_depth.Assign(1);
107 return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
108 v.F(reg_b + 1), v.F(reg_b), {}, info);
109 case 6: // 2D.LZ.DC
110 CheckAlignment(reg_a, 2);
111 info.type.Assign(TextureType::Color2D);
112 info.is_depth.Assign(1);
113 return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
114 zero, {}, info);
115 case 7: // ARRAY_2D
116 CheckAlignment(reg_a, 2);
117 info.type.Assign(TextureType::ColorArray2D);
118 return v.ir.ImageSampleImplicitLod(
119 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
120 {}, {}, {}, info);
121 case 8: // ARRAY_2D.LZ
122 CheckAlignment(reg_a, 2);
123 info.type.Assign(TextureType::ColorArray2D);
124 return v.ir.ImageSampleExplicitLod(
125 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
126 zero, {}, info);
127 case 9: // ARRAY_2D.LZ.DC
128 CheckAlignment(reg_a, 2);
129 CheckAlignment(reg_b, 2);
130 info.type.Assign(TextureType::ColorArray2D);
131 info.is_depth.Assign(1);
132 return v.ir.ImageSampleDrefExplicitLod(
133 handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
134 v.F(reg_b + 1), zero, {}, info);
135 case 10: // 3D
136 CheckAlignment(reg_a, 2);
137 info.type.Assign(TextureType::Color3D);
138 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
139 {}, info);
140 case 11: // 3D.LZ
141 CheckAlignment(reg_a, 2);
142 info.type.Assign(TextureType::Color3D);
143 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
144 info);
145 case 12: // CUBE
146 CheckAlignment(reg_a, 2);
147 info.type.Assign(TextureType::ColorCube);
148 return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
149 {}, info);
150 case 13: // CUBE.LL
151 CheckAlignment(reg_a, 2);
152 CheckAlignment(reg_b, 2);
153 info.type.Assign(TextureType::ColorCube);
154 return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
155 v.F(reg_b + 1), {}, info);
156 default:
157 throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
158 }
159}
160
161unsigned Swizzle(u64 insn) {
162 const Encoding texs{insn};
163 const size_t encoding{texs.swizzle};
164 if (texs.dest_reg_b == IR::Reg::RZ) {
165 if (encoding >= RG_LUT.size()) {
166 throw NotImplementedException("Illegal RG encoding {}", encoding);
167 }
168 return RG_LUT[encoding];
169 } else {
170 if (encoding >= RGBA_LUT.size()) {
171 throw NotImplementedException("Illegal RGBA encoding {}", encoding);
172 }
173 return RGBA_LUT[encoding];
174 }
175}
176
177IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
178 const bool is_shadow{sample.Type() == IR::Type::F32};
179 if (is_shadow) {
180 const bool is_alpha{component == 3};
181 return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
182 } else {
183 return IR::F32{v.ir.CompositeExtract(sample, component)};
184 }
185}
186
187IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
188 const Encoding texs{insn};
189 switch (index) {
190 case 0:
191 return texs.dest_reg_a;
192 case 1:
193 CheckAlignment(texs.dest_reg_a, 2);
194 return texs.dest_reg_a + 1;
195 case 2:
196 return texs.dest_reg_b;
197 case 3:
198 CheckAlignment(texs.dest_reg_b, 2);
199 return texs.dest_reg_b + 1;
200 }
201 throw LogicError("Invalid store index {}", index);
202}
203
204void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
205 const unsigned swizzle{Swizzle(insn)};
206 unsigned store_index{0};
207 for (unsigned component = 0; component < 4; ++component) {
208 if (((swizzle >> component) & 1) == 0) {
209 continue;
210 }
211 const IR::Reg dest{RegStoreComponent32(insn, store_index)};
212 v.F(dest, Extract(v, sample, component));
213 ++store_index;
214 }
215}
216
217IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
218 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
219}
220
221void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
222 const unsigned swizzle{Swizzle(insn)};
223 unsigned store_index{0};
224 std::array<IR::F32, 4> swizzled;
225 for (unsigned component = 0; component < 4; ++component) {
226 if (((swizzle >> component) & 1) == 0) {
227 continue;
228 }
229 swizzled[store_index] = Extract(v, sample, component);
230 ++store_index;
231 }
232 const IR::F32 zero{v.ir.Imm32(0.0f)};
233 const Encoding texs{insn};
234 switch (store_index) {
235 case 1:
236 v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
237 break;
238 case 2:
239 case 3:
240 case 4:
241 v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
242 switch (store_index) {
243 case 2:
244 break;
245 case 3:
246 v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
247 break;
248 case 4:
249 v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
250 break;
251 }
252 break;
253 }
254}
255} // Anonymous namespace
256
257void TranslatorVisitor::TEXS(u64 insn) {
258 const IR::Value sample{Sample(*this, insn)};
259 if (Encoding{insn}.precision == Precision::F32) {
260 Store32(*this, insn, sample);
261 } else {
262 Store16(*this, insn, sample);
263 }
264}
265
266} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
new file mode 100644
index 000000000..218cbc1a8
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp
@@ -0,0 +1,208 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26enum class OffsetType : u64 {
27 None = 0,
28 AOFFI,
29 PTP,
30 Invalid,
31};
32
33enum class ComponentType : u64 {
34 R = 0,
35 G = 1,
36 B = 2,
37 A = 3,
38};
39
40Shader::TextureType GetType(TextureType type) {
41 switch (type) {
42 case TextureType::_1D:
43 return Shader::TextureType::Color1D;
44 case TextureType::ARRAY_1D:
45 return Shader::TextureType::ColorArray1D;
46 case TextureType::_2D:
47 return Shader::TextureType::Color2D;
48 case TextureType::ARRAY_2D:
49 return Shader::TextureType::ColorArray2D;
50 case TextureType::_3D:
51 return Shader::TextureType::Color3D;
52 case TextureType::ARRAY_3D:
53 throw NotImplementedException("3D array texture type");
54 case TextureType::CUBE:
55 return Shader::TextureType::ColorCube;
56 case TextureType::ARRAY_CUBE:
57 return Shader::TextureType::ColorArrayCube;
58 }
59 throw NotImplementedException("Invalid texture type {}", type);
60}
61
62IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
63 const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }};
64 switch (type) {
65 case TextureType::_1D:
66 return v.F(reg);
67 case TextureType::ARRAY_1D:
68 return v.ir.CompositeConstruct(v.F(reg + 1), read_array());
69 case TextureType::_2D:
70 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
71 case TextureType::ARRAY_2D:
72 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array());
73 case TextureType::_3D:
74 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
75 case TextureType::ARRAY_3D:
76 throw NotImplementedException("3D array texture type");
77 case TextureType::CUBE:
78 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
79 case TextureType::ARRAY_CUBE:
80 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array());
81 }
82 throw NotImplementedException("Invalid texture type {}", type);
83}
84
85IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
86 const IR::U32 value{v.X(reg++)};
87 switch (type) {
88 case TextureType::_1D:
89 case TextureType::ARRAY_1D:
90 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true);
91 case TextureType::_2D:
92 case TextureType::ARRAY_2D:
93 return v.ir.CompositeConstruct(
94 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
95 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
96 case TextureType::_3D:
97 case TextureType::ARRAY_3D:
98 return v.ir.CompositeConstruct(
99 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
100 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true),
101 v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true));
102 case TextureType::CUBE:
103 case TextureType::ARRAY_CUBE:
104 throw NotImplementedException("Illegal offset on CUBE sample");
105 }
106 throw NotImplementedException("Invalid texture type {}", type);
107}
108
109std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
110 const IR::U32 value1{v.X(reg++)};
111 const IR::U32 value2{v.X(reg++)};
112 const IR::U32 bitsize{v.ir.Imm32(6)};
113 const auto make_vector{[&v, &bitsize](const IR::U32& value) {
114 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
115 v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
116 v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
117 v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
118 }};
119 return {make_vector(value1), make_vector(value2)};
120}
121
122void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
123 bool is_bindless) {
124 union {
125 u64 raw;
126 BitField<35, 1, u64> ndv;
127 BitField<49, 1, u64> nodep;
128 BitField<50, 1, u64> dc;
129 BitField<51, 3, IR::Pred> sparse_pred;
130 BitField<0, 8, IR::Reg> dest_reg;
131 BitField<8, 8, IR::Reg> coord_reg;
132 BitField<20, 8, IR::Reg> meta_reg;
133 BitField<28, 3, TextureType> type;
134 BitField<31, 4, u64> mask;
135 BitField<36, 13, u64> cbuf_offset;
136 } const tld4{insn};
137
138 const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)};
139
140 IR::Reg meta_reg{tld4.meta_reg};
141 IR::Value handle;
142 IR::Value offset;
143 IR::Value offset2;
144 IR::F32 dref;
145 if (!is_bindless) {
146 handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4));
147 } else {
148 handle = v.X(meta_reg++);
149 }
150 switch (offset_type) {
151 case OffsetType::None:
152 break;
153 case OffsetType::AOFFI:
154 offset = MakeOffset(v, meta_reg, tld4.type);
155 break;
156 case OffsetType::PTP:
157 std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
158 break;
159 default:
160 throw NotImplementedException("Invalid offset type {}", offset_type);
161 }
162 if (tld4.dc != 0) {
163 dref = v.F(meta_reg++);
164 }
165 IR::TextureInstInfo info{};
166 info.type.Assign(GetType(tld4.type));
167 info.is_depth.Assign(tld4.dc != 0 ? 1 : 0);
168 info.gather_component.Assign(static_cast<u32>(component_type));
169 const IR::Value sample{[&] {
170 if (tld4.dc == 0) {
171 return v.ir.ImageGather(handle, coords, offset, offset2, info);
172 }
173 return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info);
174 }()};
175
176 IR::Reg dest_reg{tld4.dest_reg};
177 for (size_t element = 0; element < 4; ++element) {
178 if (((tld4.mask >> element) & 1) == 0) {
179 continue;
180 }
181 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
182 ++dest_reg;
183 }
184 if (tld4.sparse_pred != IR::Pred::PT) {
185 v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
186 }
187}
188} // Anonymous namespace
189
190void TranslatorVisitor::TLD4(u64 insn) {
191 union {
192 u64 raw;
193 BitField<56, 2, ComponentType> component;
194 BitField<54, 2, OffsetType> offset;
195 } const tld4{insn};
196 Impl(*this, insn, tld4.component, tld4.offset, false);
197}
198
199void TranslatorVisitor::TLD4_b(u64 insn) {
200 union {
201 u64 raw;
202 BitField<38, 2, ComponentType> component;
203 BitField<36, 2, OffsetType> offset;
204 } const tld4{insn};
205 Impl(*this, insn, tld4.component, tld4.offset, true);
206}
207
208} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
new file mode 100644
index 000000000..34efa2d50
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
@@ -0,0 +1,134 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <utility>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F32,
16 F16,
17};
18
19enum class ComponentType : u64 {
20 R = 0,
21 G = 1,
22 B = 2,
23 A = 3,
24};
25
26union Encoding {
27 u64 raw;
28 BitField<55, 1, Precision> precision;
29 BitField<52, 2, ComponentType> component_type;
30 BitField<51, 1, u64> aoffi;
31 BitField<50, 1, u64> dc;
32 BitField<49, 1, u64> nodep;
33 BitField<28, 8, IR::Reg> dest_reg_b;
34 BitField<0, 8, IR::Reg> dest_reg_a;
35 BitField<8, 8, IR::Reg> src_reg_a;
36 BitField<20, 8, IR::Reg> src_reg_b;
37 BitField<36, 13, u64> cbuf_offset;
38};
39
40void CheckAlignment(IR::Reg reg, size_t alignment) {
41 if (!IR::IsAligned(reg, alignment)) {
42 throw NotImplementedException("Unaligned source register {}", reg);
43 }
44}
45
46IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
47 const IR::U32 value{v.X(reg)};
48 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
49 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
50}
51
52IR::Value Sample(TranslatorVisitor& v, u64 insn) {
53 const Encoding tld4s{insn};
54 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))};
55 const IR::Reg reg_a{tld4s.src_reg_a};
56 const IR::Reg reg_b{tld4s.src_reg_b};
57 IR::TextureInstInfo info{};
58 if (tld4s.precision == Precision::F16) {
59 info.relaxed_precision.Assign(1);
60 }
61 info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value()));
62 info.type.Assign(Shader::TextureType::Color2D);
63 info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0);
64 IR::Value coords;
65 if (tld4s.aoffi != 0) {
66 CheckAlignment(reg_a, 2);
67 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
68 IR::Value offset = MakeOffset(v, reg_b);
69 if (tld4s.dc != 0) {
70 CheckAlignment(reg_b, 2);
71 IR::F32 dref = v.F(reg_b + 1);
72 return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info);
73 }
74 return v.ir.ImageGather(handle, coords, offset, {}, info);
75 }
76 if (tld4s.dc != 0) {
77 CheckAlignment(reg_a, 2);
78 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
79 IR::F32 dref = v.F(reg_b);
80 return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info);
81 }
82 coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b));
83 return v.ir.ImageGather(handle, coords, {}, {}, info);
84}
85
86IR::Reg RegStoreComponent32(u64 insn, size_t index) {
87 const Encoding tlds4{insn};
88 switch (index) {
89 case 0:
90 return tlds4.dest_reg_a;
91 case 1:
92 CheckAlignment(tlds4.dest_reg_a, 2);
93 return tlds4.dest_reg_a + 1;
94 case 2:
95 return tlds4.dest_reg_b;
96 case 3:
97 CheckAlignment(tlds4.dest_reg_b, 2);
98 return tlds4.dest_reg_b + 1;
99 }
100 throw LogicError("Invalid store index {}", index);
101}
102
103void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
104 for (size_t component = 0; component < 4; ++component) {
105 const IR::Reg dest{RegStoreComponent32(insn, component)};
106 v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)});
107 }
108}
109
110IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
111 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
112}
113
114void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
115 std::array<IR::F32, 4> swizzled;
116 for (size_t component = 0; component < 4; ++component) {
117 swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)};
118 }
119 const Encoding tld4s{insn};
120 v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
121 v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
122}
123} // Anonymous namespace
124
125void TranslatorVisitor::TLD4S(u64 insn) {
126 const IR::Value sample{Sample(*this, insn)};
127 if (Encoding{insn}.precision == Precision::F32) {
128 Store32(*this, insn, sample);
129 } else {
130 Store16(*this, insn, sample);
131 }
132}
133
134} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
new file mode 100644
index 000000000..c3fe3ffda
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp
@@ -0,0 +1,182 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) {
49 const IR::U32 value{v.X(reg)};
50 const u32 base{has_lod_clamp ? 12U : 16U};
51 return v.ir.CompositeConstruct(
52 v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true),
53 v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true));
54}
55
56void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
57 union {
58 u64 raw;
59 BitField<49, 1, u64> nodep;
60 BitField<35, 1, u64> aoffi;
61 BitField<50, 1, u64> lc;
62 BitField<51, 3, IR::Pred> sparse_pred;
63 BitField<0, 8, IR::Reg> dest_reg;
64 BitField<8, 8, IR::Reg> coord_reg;
65 BitField<20, 8, IR::Reg> derivate_reg;
66 BitField<28, 3, TextureType> type;
67 BitField<31, 4, u64> mask;
68 BitField<36, 13, u64> cbuf_offset;
69 } const txd{insn};
70
71 const bool has_lod_clamp = txd.lc != 0;
72 if (has_lod_clamp) {
73 throw NotImplementedException("TXD.LC - CLAMP is not implemented");
74 }
75
76 IR::Value coords;
77 u32 num_derivates{};
78 IR::Reg base_reg{txd.coord_reg};
79 IR::Reg last_reg;
80 IR::Value handle;
81 if (is_bindless) {
82 handle = v.X(base_reg++);
83 } else {
84 handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4));
85 }
86
87 const auto read_array{[&]() -> IR::F32 {
88 const IR::U32 base{v.ir.Imm32(0)};
89 const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)};
90 const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)};
91 return v.ir.ConvertUToF(32, 16, array_index);
92 }};
93 switch (txd.type) {
94 case TextureType::_1D: {
95 coords = v.F(base_reg);
96 num_derivates = 1;
97 last_reg = base_reg + 1;
98 break;
99 }
100 case TextureType::ARRAY_1D: {
101 last_reg = base_reg + 1;
102 coords = v.ir.CompositeConstruct(v.F(base_reg), read_array());
103 num_derivates = 1;
104 break;
105 }
106 case TextureType::_2D: {
107 last_reg = base_reg + 2;
108 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1));
109 num_derivates = 2;
110 break;
111 }
112 case TextureType::ARRAY_2D: {
113 last_reg = base_reg + 2;
114 coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array());
115 num_derivates = 2;
116 break;
117 }
118 default:
119 throw NotImplementedException("Invalid texture type");
120 }
121
122 const IR::Reg derivate_reg{txd.derivate_reg};
123 IR::Value derivates;
124 switch (num_derivates) {
125 case 1: {
126 derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1));
127 break;
128 }
129 case 2: {
130 derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1),
131 v.F(derivate_reg + 2), v.F(derivate_reg + 3));
132 break;
133 }
134 default:
135 throw NotImplementedException("Invalid texture type");
136 }
137
138 IR::Value offset;
139 if (txd.aoffi != 0) {
140 offset = MakeOffset(v, last_reg, has_lod_clamp);
141 }
142
143 IR::F32 lod_clamp;
144 if (has_lod_clamp) {
145 // Lod Clamp is a Fixed Point 4.8, we need to transform it to float.
146 // to convert a fixed point, float(value) / float(1 << fixed_point)
147 // in this case the fixed_point is 8.
148 const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))};
149 const IR::F32 fixp_lc{v.ir.ConvertUToF(
150 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))};
151 lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f);
152 }
153
154 IR::TextureInstInfo info{};
155 info.type.Assign(GetType(txd.type));
156 info.num_derivates.Assign(num_derivates);
157 info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0);
158 const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)};
159
160 IR::Reg dest_reg{txd.dest_reg};
161 for (size_t element = 0; element < 4; ++element) {
162 if (((txd.mask >> element) & 1) == 0) {
163 continue;
164 }
165 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
166 ++dest_reg;
167 }
168 if (txd.sparse_pred != IR::Pred::PT) {
169 v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
170 }
171}
172} // Anonymous namespace
173
174void TranslatorVisitor::TXD(u64 insn) {
175 Impl(*this, insn, false);
176}
177
178void TranslatorVisitor::TXD_b(u64 insn) {
179 Impl(*this, insn, true);
180}
181
182} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
new file mode 100644
index 000000000..983058303
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp
@@ -0,0 +1,165 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
49 const auto read_array{
50 [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }};
51 switch (type) {
52 case TextureType::_1D:
53 return v.X(reg);
54 case TextureType::ARRAY_1D:
55 return v.ir.CompositeConstruct(v.X(reg + 1), read_array());
56 case TextureType::_2D:
57 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1));
58 case TextureType::ARRAY_2D:
59 return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array());
60 case TextureType::_3D:
61 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
62 case TextureType::ARRAY_3D:
63 throw NotImplementedException("3D array texture type");
64 case TextureType::CUBE:
65 return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2));
66 case TextureType::ARRAY_CUBE:
67 return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array());
68 }
69 throw NotImplementedException("Invalid texture type {}", type);
70}
71
72IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
73 const IR::U32 value{v.X(reg++)};
74 switch (type) {
75 case TextureType::_1D:
76 case TextureType::ARRAY_1D:
77 return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true);
78 case TextureType::_2D:
79 case TextureType::ARRAY_2D:
80 return v.ir.CompositeConstruct(
81 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
82 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
83 case TextureType::_3D:
84 case TextureType::ARRAY_3D:
85 return v.ir.CompositeConstruct(
86 v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
87 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true),
88 v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true));
89 case TextureType::CUBE:
90 case TextureType::ARRAY_CUBE:
91 throw NotImplementedException("Illegal offset on CUBE sample");
92 }
93 throw NotImplementedException("Invalid texture type {}", type);
94}
95
96void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
97 union {
98 u64 raw;
99 BitField<49, 1, u64> nodep;
100 BitField<55, 1, u64> lod;
101 BitField<50, 1, u64> multisample;
102 BitField<35, 1, u64> aoffi;
103 BitField<54, 1, u64> clamp;
104 BitField<51, 3, IR::Pred> sparse_pred;
105 BitField<0, 8, IR::Reg> dest_reg;
106 BitField<8, 8, IR::Reg> coord_reg;
107 BitField<20, 8, IR::Reg> meta_reg;
108 BitField<28, 3, TextureType> type;
109 BitField<31, 4, u64> mask;
110 BitField<36, 13, u64> cbuf_offset;
111 } const tld{insn};
112
113 const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)};
114
115 IR::Reg meta_reg{tld.meta_reg};
116 IR::Value handle;
117 IR::Value offset;
118 IR::U32 lod;
119 IR::U32 multisample;
120 if (is_bindless) {
121 handle = v.X(meta_reg++);
122 } else {
123 handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4));
124 }
125 if (tld.lod != 0) {
126 lod = v.X(meta_reg++);
127 } else {
128 lod = v.ir.Imm32(0U);
129 }
130 if (tld.aoffi != 0) {
131 offset = MakeOffset(v, meta_reg, tld.type);
132 }
133 if (tld.multisample != 0) {
134 multisample = v.X(meta_reg++);
135 }
136 if (tld.clamp != 0) {
137 throw NotImplementedException("TLD.CL - CLAMP is not implmented");
138 }
139 IR::TextureInstInfo info{};
140 info.type.Assign(GetType(tld.type));
141 const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)};
142
143 IR::Reg dest_reg{tld.dest_reg};
144 for (size_t element = 0; element < 4; ++element) {
145 if (((tld.mask >> element) & 1) == 0) {
146 continue;
147 }
148 v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)});
149 ++dest_reg;
150 }
151 if (tld.sparse_pred != IR::Pred::PT) {
152 v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
153 }
154}
155} // Anonymous namespace
156
157void TranslatorVisitor::TLD(u64 insn) {
158 Impl(*this, insn, false);
159}
160
161void TranslatorVisitor::TLD_b(u64 insn) {
162 Impl(*this, insn, true);
163}
164
165} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
new file mode 100644
index 000000000..5dd7e31b2
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp
@@ -0,0 +1,242 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <array>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Precision : u64 {
15 F16,
16 F32,
17};
18
19constexpr unsigned R = 1;
20constexpr unsigned G = 2;
21constexpr unsigned B = 4;
22constexpr unsigned A = 8;
23
24constexpr std::array RG_LUT{
25 R, //
26 G, //
27 B, //
28 A, //
29 R | G, //
30 R | A, //
31 G | A, //
32 B | A, //
33};
34
35constexpr std::array RGBA_LUT{
36 R | G | B, //
37 R | G | A, //
38 R | B | A, //
39 G | B | A, //
40 R | G | B | A, //
41};
42
43union Encoding {
44 u64 raw;
45 BitField<59, 1, Precision> precision;
46 BitField<54, 1, u64> aoffi;
47 BitField<53, 1, u64> lod;
48 BitField<55, 1, u64> ms;
49 BitField<49, 1, u64> nodep;
50 BitField<28, 8, IR::Reg> dest_reg_b;
51 BitField<0, 8, IR::Reg> dest_reg_a;
52 BitField<8, 8, IR::Reg> src_reg_a;
53 BitField<20, 8, IR::Reg> src_reg_b;
54 BitField<36, 13, u64> cbuf_offset;
55 BitField<50, 3, u64> swizzle;
56 BitField<53, 4, u64> encoding;
57};
58
59void CheckAlignment(IR::Reg reg, size_t alignment) {
60 if (!IR::IsAligned(reg, alignment)) {
61 throw NotImplementedException("Unaligned source register {}", reg);
62 }
63}
64
65IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
66 const IR::U32 value{v.X(reg)};
67 return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
68 v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
69}
70
71IR::Value Sample(TranslatorVisitor& v, u64 insn) {
72 const Encoding tlds{insn};
73 const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))};
74 const IR::Reg reg_a{tlds.src_reg_a};
75 const IR::Reg reg_b{tlds.src_reg_b};
76 IR::Value coords;
77 IR::U32 lod{v.ir.Imm32(0U)};
78 IR::Value offsets;
79 IR::U32 multisample;
80 Shader::TextureType texture_type{};
81 switch (tlds.encoding) {
82 case 0:
83 texture_type = Shader::TextureType::Color1D;
84 coords = v.X(reg_a);
85 break;
86 case 1:
87 texture_type = Shader::TextureType::Color1D;
88 coords = v.X(reg_a);
89 lod = v.X(reg_b);
90 break;
91 case 2:
92 texture_type = Shader::TextureType::Color2D;
93 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b));
94 break;
95 case 4:
96 CheckAlignment(reg_a, 2);
97 texture_type = Shader::TextureType::Color2D;
98 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
99 offsets = MakeOffset(v, reg_b);
100 break;
101 case 5:
102 CheckAlignment(reg_a, 2);
103 texture_type = Shader::TextureType::Color2D;
104 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
105 lod = v.X(reg_b);
106 break;
107 case 6:
108 CheckAlignment(reg_a, 2);
109 texture_type = Shader::TextureType::Color2D;
110 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
111 multisample = v.X(reg_b);
112 break;
113 case 7:
114 CheckAlignment(reg_a, 2);
115 texture_type = Shader::TextureType::Color3D;
116 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b));
117 break;
118 case 8: {
119 CheckAlignment(reg_b, 2);
120 const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))};
121 texture_type = Shader::TextureType::ColorArray2D;
122 coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array);
123 break;
124 }
125 case 12:
126 CheckAlignment(reg_a, 2);
127 CheckAlignment(reg_b, 2);
128 texture_type = Shader::TextureType::Color2D;
129 coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
130 lod = v.X(reg_b);
131 offsets = MakeOffset(v, reg_b + 1);
132 break;
133 default:
134 throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
135 }
136 IR::TextureInstInfo info{};
137 if (tlds.precision == Precision::F16) {
138 info.relaxed_precision.Assign(1);
139 }
140 info.type.Assign(texture_type);
141 return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
142}
143
144unsigned Swizzle(u64 insn) {
145 const Encoding tlds{insn};
146 const size_t encoding{tlds.swizzle};
147 if (tlds.dest_reg_b == IR::Reg::RZ) {
148 if (encoding >= RG_LUT.size()) {
149 throw NotImplementedException("Illegal RG encoding {}", encoding);
150 }
151 return RG_LUT[encoding];
152 } else {
153 if (encoding >= RGBA_LUT.size()) {
154 throw NotImplementedException("Illegal RGBA encoding {}", encoding);
155 }
156 return RGBA_LUT[encoding];
157 }
158}
159
160IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
161 return IR::F32{v.ir.CompositeExtract(sample, component)};
162}
163
164IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
165 const Encoding tlds{insn};
166 switch (index) {
167 case 0:
168 return tlds.dest_reg_a;
169 case 1:
170 CheckAlignment(tlds.dest_reg_a, 2);
171 return tlds.dest_reg_a + 1;
172 case 2:
173 return tlds.dest_reg_b;
174 case 3:
175 CheckAlignment(tlds.dest_reg_b, 2);
176 return tlds.dest_reg_b + 1;
177 }
178 throw LogicError("Invalid store index {}", index);
179}
180
181void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
182 const unsigned swizzle{Swizzle(insn)};
183 unsigned store_index{0};
184 for (unsigned component = 0; component < 4; ++component) {
185 if (((swizzle >> component) & 1) == 0) {
186 continue;
187 }
188 const IR::Reg dest{RegStoreComponent32(insn, store_index)};
189 v.F(dest, Extract(v, sample, component));
190 ++store_index;
191 }
192}
193
194IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
195 return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
196}
197
198void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
199 const unsigned swizzle{Swizzle(insn)};
200 unsigned store_index{0};
201 std::array<IR::F32, 4> swizzled;
202 for (unsigned component = 0; component < 4; ++component) {
203 if (((swizzle >> component) & 1) == 0) {
204 continue;
205 }
206 swizzled[store_index] = Extract(v, sample, component);
207 ++store_index;
208 }
209 const IR::F32 zero{v.ir.Imm32(0.0f)};
210 const Encoding tlds{insn};
211 switch (store_index) {
212 case 1:
213 v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero));
214 break;
215 case 2:
216 case 3:
217 case 4:
218 v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
219 switch (store_index) {
220 case 2:
221 break;
222 case 3:
223 v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero));
224 break;
225 case 4:
226 v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
227 break;
228 }
229 break;
230 }
231}
232} // Anonymous namespace
233
234void TranslatorVisitor::TLDS(u64 insn) {
235 const IR::Value sample{Sample(*this, insn)};
236 if (Encoding{insn}.precision == Precision::F32) {
237 Store32(*this, insn, sample);
238 } else {
239 Store16(*this, insn, sample);
240 }
241}
242} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
new file mode 100644
index 000000000..aea3c0e62
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp
@@ -0,0 +1,131 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14
15enum class TextureType : u64 {
16 _1D,
17 ARRAY_1D,
18 _2D,
19 ARRAY_2D,
20 _3D,
21 ARRAY_3D,
22 CUBE,
23 ARRAY_CUBE,
24};
25
26Shader::TextureType GetType(TextureType type) {
27 switch (type) {
28 case TextureType::_1D:
29 return Shader::TextureType::Color1D;
30 case TextureType::ARRAY_1D:
31 return Shader::TextureType::ColorArray1D;
32 case TextureType::_2D:
33 return Shader::TextureType::Color2D;
34 case TextureType::ARRAY_2D:
35 return Shader::TextureType::ColorArray2D;
36 case TextureType::_3D:
37 return Shader::TextureType::Color3D;
38 case TextureType::ARRAY_3D:
39 throw NotImplementedException("3D array texture type");
40 case TextureType::CUBE:
41 return Shader::TextureType::ColorCube;
42 case TextureType::ARRAY_CUBE:
43 return Shader::TextureType::ColorArrayCube;
44 }
45 throw NotImplementedException("Invalid texture type {}", type);
46}
47
48IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
49 // The ISA reads an array component here, but this is not needed on high level shading languages
50 // We are dropping this information.
51 switch (type) {
52 case TextureType::_1D:
53 return v.F(reg);
54 case TextureType::ARRAY_1D:
55 return v.F(reg + 1);
56 case TextureType::_2D:
57 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
58 case TextureType::ARRAY_2D:
59 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2));
60 case TextureType::_3D:
61 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
62 case TextureType::ARRAY_3D:
63 throw NotImplementedException("3D array texture type");
64 case TextureType::CUBE:
65 return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
66 case TextureType::ARRAY_CUBE:
67 return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
68 }
69 throw NotImplementedException("Invalid texture type {}", type);
70}
71
72void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) {
73 union {
74 u64 raw;
75 BitField<49, 1, u64> nodep;
76 BitField<35, 1, u64> ndv;
77 BitField<0, 8, IR::Reg> dest_reg;
78 BitField<8, 8, IR::Reg> coord_reg;
79 BitField<20, 8, IR::Reg> meta_reg;
80 BitField<28, 3, TextureType> type;
81 BitField<31, 4, u64> mask;
82 BitField<36, 13, u64> cbuf_offset;
83 } const tmml{insn};
84
85 if ((tmml.mask & 0b1100) != 0) {
86 throw NotImplementedException("TMML BA results are not implmented");
87 }
88 const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)};
89
90 IR::U32 handle;
91 IR::Reg meta_reg{tmml.meta_reg};
92 if (is_bindless) {
93 handle = v.X(meta_reg++);
94 } else {
95 handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4));
96 }
97 IR::TextureInstInfo info{};
98 info.type.Assign(GetType(tmml.type));
99 const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)};
100
101 IR::Reg dest_reg{tmml.dest_reg};
102 for (size_t element = 0; element < 4; ++element) {
103 if (((tmml.mask >> element) & 1) == 0) {
104 continue;
105 }
106 IR::F32 value{v.ir.CompositeExtract(sample, element)};
107 if (element < 2) {
108 IR::U32 casted_value;
109 if (element == 0) {
110 casted_value = v.ir.ConvertFToU(32, value);
111 } else {
112 casted_value = v.ir.ConvertFToS(16, value);
113 }
114 v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8)));
115 } else {
116 v.F(dest_reg, value);
117 }
118 ++dest_reg;
119 }
120}
121} // Anonymous namespace
122
123void TranslatorVisitor::TMML(u64 insn) {
124 Impl(*this, insn, false);
125}
126
127void TranslatorVisitor::TMML_b(u64 insn) {
128 Impl(*this, insn, true);
129}
130
131} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
new file mode 100644
index 000000000..0459e5473
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp
@@ -0,0 +1,76 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/ir/modifiers.h"
10#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
11
12namespace Shader::Maxwell {
13namespace {
14enum class Mode : u64 {
15 Dimension = 1,
16 TextureType = 2,
17 SamplePos = 5,
18};
19
20IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) {
21 switch (mode) {
22 case Mode::Dimension: {
23 const IR::U32 lod{v.X(src_reg)};
24 return v.ir.ImageQueryDimension(handle, lod);
25 }
26 case Mode::TextureType:
27 case Mode::SamplePos:
28 default:
29 throw NotImplementedException("Mode {}", mode);
30 }
31}
32
33void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) {
34 union {
35 u64 raw;
36 BitField<49, 1, u64> nodep;
37 BitField<0, 8, IR::Reg> dest_reg;
38 BitField<8, 8, IR::Reg> src_reg;
39 BitField<22, 3, Mode> mode;
40 BitField<31, 4, u64> mask;
41 } const txq{insn};
42
43 IR::Reg src_reg{txq.src_reg};
44 IR::U32 handle;
45 if (cbuf_offset) {
46 handle = v.ir.Imm32(*cbuf_offset);
47 } else {
48 handle = v.X(src_reg);
49 ++src_reg;
50 }
51 const IR::Value query{Query(v, handle, txq.mode, src_reg)};
52 IR::Reg dest_reg{txq.dest_reg};
53 for (int element = 0; element < 4; ++element) {
54 if (((txq.mask >> element) & 1) == 0) {
55 continue;
56 }
57 v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))});
58 ++dest_reg;
59 }
60}
61} // Anonymous namespace
62
63void TranslatorVisitor::TXQ(u64 insn) {
64 union {
65 u64 raw;
66 BitField<36, 13, u64> cbuf_offset;
67 } const txq{insn};
68
69 Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4));
70}
71
72void TranslatorVisitor::TXQ_b(u64 insn) {
73 Impl(*this, insn, std::nullopt);
74}
75
76} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
new file mode 100644
index 000000000..e1f4174cf
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp
@@ -0,0 +1,30 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/exception.h"
6#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
7
8namespace Shader::Maxwell {
9
10IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width,
11 u32 selector, bool is_signed) {
12 switch (width) {
13 case VideoWidth::Byte:
14 case VideoWidth::Unknown:
15 return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed);
16 case VideoWidth::Short:
17 return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed);
18 case VideoWidth::Word:
19 return value;
20 default:
21 throw NotImplementedException("Unknown VideoWidth {}", width);
22 }
23}
24
25VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) {
26 // immediates must be 16-bit format.
27 return is_immediate ? VideoWidth::Short : width;
28}
29
30} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
new file mode 100644
index 000000000..40c0b907c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h
@@ -0,0 +1,23 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "common/common_types.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9
10namespace Shader::Maxwell {
11enum class VideoWidth : u64 {
12 Byte,
13 Unknown,
14 Short,
15 Word,
16};
17
18[[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value,
19 VideoWidth width, u32 selector, bool is_signed);
20
21[[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate);
22
23} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
new file mode 100644
index 000000000..78869601f
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp
@@ -0,0 +1,92 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
9
10namespace Shader::Maxwell {
11namespace {
12enum class VideoMinMaxOps : u64 {
13 MRG_16H,
14 MRG_16L,
15 MRG_8B0,
16 MRG_8B2,
17 ACC,
18 MIN,
19 MAX,
20};
21
22[[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs,
23 VideoMinMaxOps op, bool is_signed) {
24 switch (op) {
25 case VideoMinMaxOps::MIN:
26 return ir.IMin(lhs, rhs, is_signed);
27 case VideoMinMaxOps::MAX:
28 return ir.IMax(lhs, rhs, is_signed);
29 default:
30 throw NotImplementedException("VMNMX op {}", op);
31 }
32}
33} // Anonymous namespace
34
35void TranslatorVisitor::VMNMX(u64 insn) {
36 union {
37 u64 raw;
38 BitField<0, 8, IR::Reg> dest_reg;
39 BitField<20, 16, u64> src_b_imm;
40 BitField<28, 2, u64> src_b_selector;
41 BitField<29, 2, VideoWidth> src_b_width;
42 BitField<36, 2, u64> src_a_selector;
43 BitField<37, 2, VideoWidth> src_a_width;
44 BitField<47, 1, u64> cc;
45 BitField<48, 1, u64> src_a_sign;
46 BitField<49, 1, u64> src_b_sign;
47 BitField<50, 1, u64> is_src_b_reg;
48 BitField<51, 3, VideoMinMaxOps> op;
49 BitField<54, 1, u64> dest_sign;
50 BitField<55, 1, u64> sat;
51 BitField<56, 1, u64> mx;
52 } const vmnmx{insn};
53
54 if (vmnmx.cc != 0) {
55 throw NotImplementedException("VMNMX CC");
56 }
57 if (vmnmx.sat != 0) {
58 throw NotImplementedException("VMNMX SAT");
59 }
60 // Selectors were shown to default to 2 in unit tests
61 if (vmnmx.src_a_selector != 2) {
62 throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value());
63 }
64 if (vmnmx.src_b_selector != 2) {
65 throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value());
66 }
67 if (vmnmx.src_a_width != VideoWidth::Word) {
68 throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value());
69 }
70
71 const bool is_b_imm{vmnmx.is_src_b_reg == 0};
72 const IR::U32 src_a{GetReg8(insn)};
73 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)};
74 const IR::U32 src_c{GetReg39(insn)};
75
76 const VideoWidth a_width{vmnmx.src_a_width};
77 const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)};
78
79 const bool src_a_signed{vmnmx.src_a_sign != 0};
80 const bool src_b_signed{vmnmx.src_b_sign != 0};
81 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)};
82 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)};
83
84 // First operation's sign is only dependent on operand b's sign
85 const bool op_1_signed{src_b_signed};
86
87 const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed)
88 : ir.IMin(op_a, op_b, op_1_signed)};
89 X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0));
90}
91
92} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
new file mode 100644
index 000000000..cc2e6d6e6
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp
@@ -0,0 +1,64 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
9
10namespace Shader::Maxwell {
11void TranslatorVisitor::VMAD(u64 insn) {
12 union {
13 u64 raw;
14 BitField<0, 8, IR::Reg> dest_reg;
15 BitField<20, 16, u64> src_b_imm;
16 BitField<28, 2, u64> src_b_selector;
17 BitField<29, 2, VideoWidth> src_b_width;
18 BitField<36, 2, u64> src_a_selector;
19 BitField<37, 2, VideoWidth> src_a_width;
20 BitField<47, 1, u64> cc;
21 BitField<48, 1, u64> src_a_sign;
22 BitField<49, 1, u64> src_b_sign;
23 BitField<50, 1, u64> is_src_b_reg;
24 BitField<51, 2, u64> scale;
25 BitField<53, 1, u64> src_c_neg;
26 BitField<54, 1, u64> src_a_neg;
27 BitField<55, 1, u64> sat;
28 } const vmad{insn};
29
30 if (vmad.cc != 0) {
31 throw NotImplementedException("VMAD CC");
32 }
33 if (vmad.sat != 0) {
34 throw NotImplementedException("VMAD SAT");
35 }
36 if (vmad.scale != 0) {
37 throw NotImplementedException("VMAD SCALE");
38 }
39 if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) {
40 throw NotImplementedException("VMAD PO");
41 }
42 if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) {
43 throw NotImplementedException("VMAD NEG");
44 }
45 const bool is_b_imm{vmad.is_src_b_reg == 0};
46 const IR::U32 src_a{GetReg8(insn)};
47 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)};
48 const IR::U32 src_c{GetReg39(insn)};
49
50 const u32 a_selector{static_cast<u32>(vmad.src_a_selector)};
51 // Immediate values can't have a selector
52 const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)};
53 const VideoWidth a_width{vmad.src_a_width};
54 const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)};
55
56 const bool src_a_signed{vmad.src_a_sign != 0};
57 const bool src_b_signed{vmad.src_b_sign != 0};
58 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
59 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
60
61 X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c));
62}
63
64} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
new file mode 100644
index 000000000..1b66abc33
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp
@@ -0,0 +1,92 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/common_types.h"
6#include "shader_recompiler/exception.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
8#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class VsetpCompareOp : u64 {
14 False = 0,
15 LessThan,
16 Equal,
17 LessThanEqual,
18 GreaterThan = 16,
19 NotEqual,
20 GreaterThanEqual,
21 True,
22};
23
24CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) {
25 switch (op) {
26 case VsetpCompareOp::False:
27 return CompareOp::False;
28 case VsetpCompareOp::LessThan:
29 return CompareOp::LessThan;
30 case VsetpCompareOp::Equal:
31 return CompareOp::Equal;
32 case VsetpCompareOp::LessThanEqual:
33 return CompareOp::LessThanEqual;
34 case VsetpCompareOp::GreaterThan:
35 return CompareOp::GreaterThan;
36 case VsetpCompareOp::NotEqual:
37 return CompareOp::NotEqual;
38 case VsetpCompareOp::GreaterThanEqual:
39 return CompareOp::GreaterThanEqual;
40 case VsetpCompareOp::True:
41 return CompareOp::True;
42 default:
43 throw NotImplementedException("Invalid compare op {}", op);
44 }
45}
46} // Anonymous namespace
47
48void TranslatorVisitor::VSETP(u64 insn) {
49 union {
50 u64 raw;
51 BitField<0, 3, IR::Pred> dest_pred_b;
52 BitField<3, 3, IR::Pred> dest_pred_a;
53 BitField<20, 16, u64> src_b_imm;
54 BitField<28, 2, u64> src_b_selector;
55 BitField<29, 2, VideoWidth> src_b_width;
56 BitField<36, 2, u64> src_a_selector;
57 BitField<37, 2, VideoWidth> src_a_width;
58 BitField<39, 3, IR::Pred> bop_pred;
59 BitField<42, 1, u64> neg_bop_pred;
60 BitField<43, 5, VsetpCompareOp> compare_op;
61 BitField<45, 2, BooleanOp> bop;
62 BitField<48, 1, u64> src_a_sign;
63 BitField<49, 1, u64> src_b_sign;
64 BitField<50, 1, u64> is_src_b_reg;
65 } const vsetp{insn};
66
67 const bool is_b_imm{vsetp.is_src_b_reg == 0};
68 const IR::U32 src_a{GetReg8(insn)};
69 const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)};
70
71 const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)};
72 const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)};
73 const VideoWidth a_width{vsetp.src_a_width};
74 const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)};
75
76 const bool src_a_signed{vsetp.src_a_sign != 0};
77 const bool src_b_signed{vsetp.src_b_sign != 0};
78 const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)};
79 const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)};
80
81 // Compare operation's sign is only dependent on operand b's sign
82 const bool compare_signed{src_b_signed};
83 const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)};
84 const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)};
85 const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)};
86 const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)};
87 const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)};
88 ir.SetPred(vsetp.dest_pred_a, result_a);
89 ir.SetPred(vsetp.dest_pred_b, result_b);
90}
91
92} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
new file mode 100644
index 000000000..7ce370f09
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp
@@ -0,0 +1,54 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/bit_field.h"
6#include "common/common_types.h"
7#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
8
9namespace Shader::Maxwell {
10namespace {
11enum class VoteOp : u64 {
12 ALL,
13 ANY,
14 EQ,
15};
16
17[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
18 switch (vote_op) {
19 case VoteOp::ALL:
20 return ir.VoteAll(pred);
21 case VoteOp::ANY:
22 return ir.VoteAny(pred);
23 case VoteOp::EQ:
24 return ir.VoteEqual(pred);
25 default:
26 throw NotImplementedException("Invalid VOTE op {}", vote_op);
27 }
28}
29
30void Vote(TranslatorVisitor& v, u64 insn) {
31 union {
32 u64 insn;
33 BitField<0, 8, IR::Reg> dest_reg;
34 BitField<39, 3, IR::Pred> pred_a;
35 BitField<42, 1, u64> neg_pred_a;
36 BitField<45, 3, IR::Pred> pred_b;
37 BitField<48, 2, VoteOp> vote_op;
38 } const vote{insn};
39
40 const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
41 v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
42 v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
43}
44} // Anonymous namespace
45
46void TranslatorVisitor::VOTE(u64 insn) {
47 Vote(*this, insn);
48}
49
50void TranslatorVisitor::VOTE_vtg(u64) {
51 LOG_WARNING(Shader, "(STUBBED) called");
52}
53
54} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
new file mode 100644
index 000000000..550fed55c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp
@@ -0,0 +1,69 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <optional>
6
7#include "common/bit_field.h"
8#include "common/common_types.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10
11namespace Shader::Maxwell {
12namespace {
13enum class ShuffleMode : u64 {
14 IDX,
15 UP,
16 DOWN,
17 BFLY,
18};
19
20[[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value,
21 const IR::U32& index, const IR::U32& mask,
22 ShuffleMode shfl_op) {
23 const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))};
24 const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))};
25 switch (shfl_op) {
26 case ShuffleMode::IDX:
27 return ir.ShuffleIndex(value, index, clamp, seg_mask);
28 case ShuffleMode::UP:
29 return ir.ShuffleUp(value, index, clamp, seg_mask);
30 case ShuffleMode::DOWN:
31 return ir.ShuffleDown(value, index, clamp, seg_mask);
32 case ShuffleMode::BFLY:
33 return ir.ShuffleButterfly(value, index, clamp, seg_mask);
34 default:
35 throw NotImplementedException("Invalid SHFL op {}", shfl_op);
36 }
37}
38
39void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) {
40 union {
41 u64 insn;
42 BitField<0, 8, IR::Reg> dest_reg;
43 BitField<8, 8, IR::Reg> src_reg;
44 BitField<30, 2, ShuffleMode> mode;
45 BitField<48, 3, IR::Pred> pred;
46 } const shfl{insn};
47
48 const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)};
49 v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result));
50 v.X(shfl.dest_reg, result);
51}
52} // Anonymous namespace
53
54void TranslatorVisitor::SHFL(u64 insn) {
55 union {
56 u64 insn;
57 BitField<20, 5, u64> src_a_imm;
58 BitField<28, 1, u64> src_a_flag;
59 BitField<29, 1, u64> src_b_flag;
60 BitField<34, 13, u64> src_b_imm;
61 } const flags{insn};
62 const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm))
63 : GetReg20(insn)};
64 const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm))
65 : GetReg39(insn)};
66 Shuffle(*this, insn, src_a, src_b);
67}
68
69} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
new file mode 100644
index 000000000..8e3c4c5d5
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp
@@ -0,0 +1,52 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "shader_recompiler/environment.h"
6#include "shader_recompiler/frontend/ir/basic_block.h"
7#include "shader_recompiler/frontend/maxwell/decode.h"
8#include "shader_recompiler/frontend/maxwell/location.h"
9#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
10#include "shader_recompiler/frontend/maxwell/translate/translate.h"
11
12namespace Shader::Maxwell {
13
14template <auto method>
15static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) {
16 using MethodType = decltype(method);
17 if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) {
18 (visitor.*method)(pc, insn);
19 } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) {
20 (visitor.*method)(insn);
21 } else {
22 (visitor.*method)();
23 }
24}
25
26void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) {
27 if (location_begin == location_end) {
28 return;
29 }
30 TranslatorVisitor visitor{env, *block};
31 for (Location pc = location_begin; pc != location_end; ++pc) {
32 const u64 insn{env.ReadInstruction(pc.Offset())};
33 try {
34 const Opcode opcode{Decode(insn)};
35 switch (opcode) {
36#define INST(name, cute, mask) \
37 case Opcode::name: \
38 Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \
39 break;
40#include "shader_recompiler/frontend/maxwell/maxwell.inc"
41#undef OPCODE
42 default:
43 throw LogicError("Invalid opcode {}", opcode);
44 }
45 } catch (Exception& exception) {
46 exception.Prepend(fmt::format("Translate {}: ", Decode(insn)));
47 throw;
48 }
49 }
50}
51
52} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h
new file mode 100644
index 000000000..a3edd2e46
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h
@@ -0,0 +1,14 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9
10namespace Shader::Maxwell {
11
12void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end);
13
14} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
new file mode 100644
index 000000000..c067d459c
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -0,0 +1,223 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <memory>
7#include <vector>
8
9#include "common/settings.h"
10#include "shader_recompiler/exception.h"
11#include "shader_recompiler/frontend/ir/basic_block.h"
12#include "shader_recompiler/frontend/ir/post_order.h"
13#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
14#include "shader_recompiler/frontend/maxwell/translate/translate.h"
15#include "shader_recompiler/frontend/maxwell/translate_program.h"
16#include "shader_recompiler/host_translate_info.h"
17#include "shader_recompiler/ir_opt/passes.h"
18
19namespace Shader::Maxwell {
20namespace {
21IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
22 size_t num_syntax_blocks{};
23 for (const auto& node : syntax_list) {
24 if (node.type == IR::AbstractSyntaxNode::Type::Block) {
25 ++num_syntax_blocks;
26 }
27 }
28 IR::BlockList blocks;
29 blocks.reserve(num_syntax_blocks);
30 for (const auto& node : syntax_list) {
31 if (node.type == IR::AbstractSyntaxNode::Type::Block) {
32 blocks.push_back(node.data.block);
33 }
34 }
35 return blocks;
36}
37
38void RemoveUnreachableBlocks(IR::Program& program) {
39 // Some blocks might be unreachable if a function call exists unconditionally
40 // If this happens the number of blocks and post order blocks will mismatch
41 if (program.blocks.size() == program.post_order_blocks.size()) {
42 return;
43 }
44 const auto begin{program.blocks.begin() + 1};
45 const auto end{program.blocks.end()};
46 const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }};
47 program.blocks.erase(std::remove_if(begin, end, pred), end);
48}
49
50void CollectInterpolationInfo(Environment& env, IR::Program& program) {
51 if (program.stage != Stage::Fragment) {
52 return;
53 }
54 const ProgramHeader& sph{env.SPH()};
55 for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
56 std::optional<PixelImap> imap;
57 for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) {
58 if (value == PixelImap::Unused) {
59 continue;
60 }
61 if (imap && imap != value) {
62 throw NotImplementedException("Per component interpolation");
63 }
64 imap = value;
65 }
66 if (!imap) {
67 continue;
68 }
69 program.info.interpolation[index] = [&] {
70 switch (*imap) {
71 case PixelImap::Unused:
72 case PixelImap::Perspective:
73 return Interpolation::Smooth;
74 case PixelImap::Constant:
75 return Interpolation::Flat;
76 case PixelImap::ScreenLinear:
77 return Interpolation::NoPerspective;
78 }
79 throw NotImplementedException("Unknown interpolation {}", *imap);
80 }();
81 }
82}
83
84void AddNVNStorageBuffers(IR::Program& program) {
85 if (!program.info.uses_global_memory) {
86 return;
87 }
88 const u32 driver_cbuf{0};
89 const u32 descriptor_size{0x10};
90 const u32 num_buffers{16};
91 const u32 base{[&] {
92 switch (program.stage) {
93 case Stage::VertexA:
94 case Stage::VertexB:
95 return 0x110u;
96 case Stage::TessellationControl:
97 return 0x210u;
98 case Stage::TessellationEval:
99 return 0x310u;
100 case Stage::Geometry:
101 return 0x410u;
102 case Stage::Fragment:
103 return 0x510u;
104 case Stage::Compute:
105 return 0x310u;
106 }
107 throw InvalidArgument("Invalid stage {}", program.stage);
108 }()};
109 auto& descs{program.info.storage_buffers_descriptors};
110 for (u32 index = 0; index < num_buffers; ++index) {
111 if (!program.info.nvn_buffer_used[index]) {
112 continue;
113 }
114 const u32 offset{base + index * descriptor_size};
115 const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
116 if (it != descs.end()) {
117 it->is_written |= program.info.stores_global_memory;
118 continue;
119 }
120 descs.push_back({
121 .cbuf_index = driver_cbuf,
122 .cbuf_offset = offset,
123 .count = 1,
124 .is_written = program.info.stores_global_memory,
125 });
126 }
127}
128} // Anonymous namespace
129
130IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
131 Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
132 IR::Program program;
133 program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
134 program.blocks = GenerateBlocks(program.syntax_list);
135 program.post_order_blocks = PostOrder(program.syntax_list.front());
136 program.stage = env.ShaderStage();
137 program.local_memory_size = env.LocalMemorySize();
138 switch (program.stage) {
139 case Stage::TessellationControl: {
140 const ProgramHeader& sph{env.SPH()};
141 program.invocations = sph.common2.threads_per_input_primitive;
142 break;
143 }
144 case Stage::Geometry: {
145 const ProgramHeader& sph{env.SPH()};
146 program.output_topology = sph.common3.output_topology;
147 program.output_vertices = sph.common4.max_output_vertices;
148 program.invocations = sph.common2.threads_per_input_primitive;
149 program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0;
150 if (program.is_geometry_passthrough) {
151 const auto& mask{env.GpPassthroughMask()};
152 for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) {
153 program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0;
154 }
155 }
156 break;
157 }
158 case Stage::Compute:
159 program.workgroup_size = env.WorkgroupSize();
160 program.shared_memory_size = env.SharedMemorySize();
161 break;
162 default:
163 break;
164 }
165 RemoveUnreachableBlocks(program);
166
167 // Replace instructions before the SSA rewrite
168 if (!host_info.support_float16) {
169 Optimization::LowerFp16ToFp32(program);
170 }
171 if (!host_info.support_int64) {
172 Optimization::LowerInt64ToInt32(program);
173 }
174 Optimization::SsaRewritePass(program);
175
176 Optimization::GlobalMemoryToStorageBufferPass(program);
177 Optimization::TexturePass(env, program);
178
179 Optimization::ConstantPropagationPass(program);
180 Optimization::DeadCodeEliminationPass(program);
181 if (Settings::values.renderer_debug) {
182 Optimization::VerificationPass(program);
183 }
184 Optimization::CollectShaderInfoPass(env, program);
185 CollectInterpolationInfo(env, program);
186 AddNVNStorageBuffers(program);
187 return program;
188}
189
190IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
191 Environment& env_vertex_b) {
192 IR::Program result{};
193 Optimization::VertexATransformPass(vertex_a);
194 Optimization::VertexBTransformPass(vertex_b);
195 for (const auto& term : vertex_a.syntax_list) {
196 if (term.type != IR::AbstractSyntaxNode::Type::Return) {
197 result.syntax_list.push_back(term);
198 }
199 }
200 result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(),
201 vertex_b.syntax_list.end());
202 result.blocks = GenerateBlocks(result.syntax_list);
203 result.post_order_blocks = vertex_b.post_order_blocks;
204 for (const auto& block : vertex_a.post_order_blocks) {
205 result.post_order_blocks.push_back(block);
206 }
207 result.stage = Stage::VertexB;
208 result.info = vertex_a.info;
209 result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
210 result.info.loads.mask |= vertex_b.info.loads.mask;
211 result.info.stores.mask |= vertex_b.info.stores.mask;
212
213 Optimization::JoinTextureInfo(result.info, vertex_b.info);
214 Optimization::JoinStorageInfo(result.info, vertex_b.info);
215 Optimization::DeadCodeEliminationPass(result);
216 if (Settings::values.renderer_debug) {
217 Optimization::VerificationPass(result);
218 }
219 Optimization::CollectShaderInfoPass(env_vertex_b, result);
220 return result;
221}
222
223} // namespace Shader::Maxwell
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h
new file mode 100644
index 000000000..a84814811
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.h
@@ -0,0 +1,23 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "shader_recompiler/environment.h"
8#include "shader_recompiler/frontend/ir/basic_block.h"
9#include "shader_recompiler/frontend/ir/program.h"
10#include "shader_recompiler/frontend/maxwell/control_flow.h"
11#include "shader_recompiler/host_translate_info.h"
12#include "shader_recompiler/object_pool.h"
13
14namespace Shader::Maxwell {
15
16[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
17 ObjectPool<IR::Block>& block_pool, Environment& env,
18 Flow::CFG& cfg, const HostTranslateInfo& host_info);
19
20[[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
21 Environment& env_vertex_b);
22
23} // namespace Shader::Maxwell