diff options
Diffstat (limited to 'src/shader_recompiler/frontend/maxwell')
108 files changed, 12603 insertions, 0 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.cpp b/src/shader_recompiler/frontend/maxwell/control_flow.cpp new file mode 100644 index 000000000..1a954a509 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.cpp | |||
| @@ -0,0 +1,642 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <optional> | ||
| 8 | #include <string> | ||
| 9 | #include <utility> | ||
| 10 | |||
| 11 | #include <fmt/format.h> | ||
| 12 | |||
| 13 | #include "shader_recompiler/exception.h" | ||
| 14 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 15 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 16 | #include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" | ||
| 17 | #include "shader_recompiler/frontend/maxwell/location.h" | ||
| 18 | |||
| 19 | namespace Shader::Maxwell::Flow { | ||
| 20 | namespace { | ||
| 21 | struct Compare { | ||
| 22 | bool operator()(const Block& lhs, Location rhs) const noexcept { | ||
| 23 | return lhs.begin < rhs; | ||
| 24 | } | ||
| 25 | |||
| 26 | bool operator()(Location lhs, const Block& rhs) const noexcept { | ||
| 27 | return lhs < rhs.begin; | ||
| 28 | } | ||
| 29 | |||
| 30 | bool operator()(const Block& lhs, const Block& rhs) const noexcept { | ||
| 31 | return lhs.begin < rhs.begin; | ||
| 32 | } | ||
| 33 | }; | ||
| 34 | |||
| 35 | u32 BranchOffset(Location pc, Instruction inst) { | ||
| 36 | return pc.Offset() + static_cast<u32>(inst.branch.Offset()) + 8u; | ||
| 37 | } | ||
| 38 | |||
| 39 | void Split(Block* old_block, Block* new_block, Location pc) { | ||
| 40 | if (pc <= old_block->begin || pc >= old_block->end) { | ||
| 41 | throw InvalidArgument("Invalid address to split={}", pc); | ||
| 42 | } | ||
| 43 | *new_block = Block{}; | ||
| 44 | new_block->begin = pc; | ||
| 45 | new_block->end = old_block->end; | ||
| 46 | new_block->end_class = old_block->end_class; | ||
| 47 | new_block->cond = old_block->cond; | ||
| 48 | new_block->stack = old_block->stack; | ||
| 49 | new_block->branch_true = old_block->branch_true; | ||
| 50 | new_block->branch_false = old_block->branch_false; | ||
| 51 | new_block->function_call = old_block->function_call; | ||
| 52 | new_block->return_block = old_block->return_block; | ||
| 53 | new_block->branch_reg = old_block->branch_reg; | ||
| 54 | new_block->branch_offset = old_block->branch_offset; | ||
| 55 | new_block->indirect_branches = std::move(old_block->indirect_branches); | ||
| 56 | |||
| 57 | const Location old_begin{old_block->begin}; | ||
| 58 | Stack old_stack{std::move(old_block->stack)}; | ||
| 59 | *old_block = Block{}; | ||
| 60 | old_block->begin = old_begin; | ||
| 61 | old_block->end = pc; | ||
| 62 | old_block->end_class = EndClass::Branch; | ||
| 63 | old_block->cond = IR::Condition(true); | ||
| 64 | old_block->stack = old_stack; | ||
| 65 | old_block->branch_true = new_block; | ||
| 66 | old_block->branch_false = nullptr; | ||
| 67 | } | ||
| 68 | |||
| 69 | Token OpcodeToken(Opcode opcode) { | ||
| 70 | switch (opcode) { | ||
| 71 | case Opcode::PBK: | ||
| 72 | case Opcode::BRK: | ||
| 73 | return Token::PBK; | ||
| 74 | case Opcode::PCNT: | ||
| 75 | case Opcode::CONT: | ||
| 76 | return Token::PBK; | ||
| 77 | case Opcode::PEXIT: | ||
| 78 | case Opcode::EXIT: | ||
| 79 | return Token::PEXIT; | ||
| 80 | case Opcode::PLONGJMP: | ||
| 81 | case Opcode::LONGJMP: | ||
| 82 | return Token::PLONGJMP; | ||
| 83 | case Opcode::PRET: | ||
| 84 | case Opcode::RET: | ||
| 85 | case Opcode::CAL: | ||
| 86 | return Token::PRET; | ||
| 87 | case Opcode::SSY: | ||
| 88 | case Opcode::SYNC: | ||
| 89 | return Token::SSY; | ||
| 90 | default: | ||
| 91 | throw InvalidArgument("{}", opcode); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | bool IsAbsoluteJump(Opcode opcode) { | ||
| 96 | switch (opcode) { | ||
| 97 | case Opcode::JCAL: | ||
| 98 | case Opcode::JMP: | ||
| 99 | case Opcode::JMX: | ||
| 100 | return true; | ||
| 101 | default: | ||
| 102 | return false; | ||
| 103 | } | ||
| 104 | } | ||
| 105 | |||
| 106 | bool HasFlowTest(Opcode opcode) { | ||
| 107 | switch (opcode) { | ||
| 108 | case Opcode::BRA: | ||
| 109 | case Opcode::BRX: | ||
| 110 | case Opcode::EXIT: | ||
| 111 | case Opcode::JMP: | ||
| 112 | case Opcode::JMX: | ||
| 113 | case Opcode::KIL: | ||
| 114 | case Opcode::BRK: | ||
| 115 | case Opcode::CONT: | ||
| 116 | case Opcode::LONGJMP: | ||
| 117 | case Opcode::RET: | ||
| 118 | case Opcode::SYNC: | ||
| 119 | return true; | ||
| 120 | case Opcode::CAL: | ||
| 121 | case Opcode::JCAL: | ||
| 122 | return false; | ||
| 123 | default: | ||
| 124 | throw InvalidArgument("Invalid branch {}", opcode); | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | std::string NameOf(const Block& block) { | ||
| 129 | if (block.begin.IsVirtual()) { | ||
| 130 | return fmt::format("\"Virtual {}\"", block.begin); | ||
| 131 | } else { | ||
| 132 | return fmt::format("\"{}\"", block.begin); | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } // Anonymous namespace | ||
| 136 | |||
| 137 | void Stack::Push(Token token, Location target) { | ||
| 138 | entries.push_back({ | ||
| 139 | .token = token, | ||
| 140 | .target{target}, | ||
| 141 | }); | ||
| 142 | } | ||
| 143 | |||
| 144 | std::pair<Location, Stack> Stack::Pop(Token token) const { | ||
| 145 | const std::optional<Location> pc{Peek(token)}; | ||
| 146 | if (!pc) { | ||
| 147 | throw LogicError("Token could not be found"); | ||
| 148 | } | ||
| 149 | return {*pc, Remove(token)}; | ||
| 150 | } | ||
| 151 | |||
| 152 | std::optional<Location> Stack::Peek(Token token) const { | ||
| 153 | const auto it{std::find_if(entries.rbegin(), entries.rend(), | ||
| 154 | [token](const auto& entry) { return entry.token == token; })}; | ||
| 155 | if (it == entries.rend()) { | ||
| 156 | return std::nullopt; | ||
| 157 | } | ||
| 158 | return it->target; | ||
| 159 | } | ||
| 160 | |||
| 161 | Stack Stack::Remove(Token token) const { | ||
| 162 | const auto it{std::find_if(entries.rbegin(), entries.rend(), | ||
| 163 | [token](const auto& entry) { return entry.token == token; })}; | ||
| 164 | const auto pos{std::distance(entries.rbegin(), it)}; | ||
| 165 | Stack result; | ||
| 166 | result.entries.insert(result.entries.end(), entries.begin(), entries.end() - pos - 1); | ||
| 167 | return result; | ||
| 168 | } | ||
| 169 | |||
| 170 | bool Block::Contains(Location pc) const noexcept { | ||
| 171 | return pc >= begin && pc < end; | ||
| 172 | } | ||
| 173 | |||
| 174 | Function::Function(ObjectPool<Block>& block_pool, Location start_address) | ||
| 175 | : entrypoint{start_address} { | ||
| 176 | Label& label{labels.emplace_back()}; | ||
| 177 | label.address = start_address; | ||
| 178 | label.block = block_pool.Create(Block{}); | ||
| 179 | label.block->begin = start_address; | ||
| 180 | label.block->end = start_address; | ||
| 181 | label.block->end_class = EndClass::Branch; | ||
| 182 | label.block->cond = IR::Condition(true); | ||
| 183 | label.block->branch_true = nullptr; | ||
| 184 | label.block->branch_false = nullptr; | ||
| 185 | } | ||
| 186 | |||
| 187 | CFG::CFG(Environment& env_, ObjectPool<Block>& block_pool_, Location start_address, | ||
| 188 | bool exits_to_dispatcher_) | ||
| 189 | : env{env_}, block_pool{block_pool_}, program_start{start_address}, exits_to_dispatcher{ | ||
| 190 | exits_to_dispatcher_} { | ||
| 191 | if (exits_to_dispatcher) { | ||
| 192 | dispatch_block = block_pool.Create(Block{}); | ||
| 193 | dispatch_block->begin = {}; | ||
| 194 | dispatch_block->end = {}; | ||
| 195 | dispatch_block->end_class = EndClass::Exit; | ||
| 196 | dispatch_block->cond = IR::Condition(true); | ||
| 197 | dispatch_block->stack = {}; | ||
| 198 | dispatch_block->branch_true = nullptr; | ||
| 199 | dispatch_block->branch_false = nullptr; | ||
| 200 | } | ||
| 201 | functions.emplace_back(block_pool, start_address); | ||
| 202 | for (FunctionId function_id = 0; function_id < functions.size(); ++function_id) { | ||
| 203 | while (!functions[function_id].labels.empty()) { | ||
| 204 | Function& function{functions[function_id]}; | ||
| 205 | Label label{function.labels.back()}; | ||
| 206 | function.labels.pop_back(); | ||
| 207 | AnalyzeLabel(function_id, label); | ||
| 208 | } | ||
| 209 | } | ||
| 210 | if (exits_to_dispatcher) { | ||
| 211 | const auto last_block{functions[0].blocks.rbegin()}; | ||
| 212 | dispatch_block->begin = last_block->end + 1; | ||
| 213 | dispatch_block->end = last_block->end + 1; | ||
| 214 | functions[0].blocks.insert(*dispatch_block); | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | void CFG::AnalyzeLabel(FunctionId function_id, Label& label) { | ||
| 219 | if (InspectVisitedBlocks(function_id, label)) { | ||
| 220 | // Label address has been visited | ||
| 221 | return; | ||
| 222 | } | ||
| 223 | // Try to find the next block | ||
| 224 | Function* const function{&functions[function_id]}; | ||
| 225 | Location pc{label.address}; | ||
| 226 | const auto next_it{function->blocks.upper_bound(pc, Compare{})}; | ||
| 227 | const bool is_last{next_it == function->blocks.end()}; | ||
| 228 | Block* const next{is_last ? nullptr : &*next_it}; | ||
| 229 | // Insert before the next block | ||
| 230 | Block* const block{label.block}; | ||
| 231 | // Analyze instructions until it reaches an already visited block or there's a branch | ||
| 232 | bool is_branch{false}; | ||
| 233 | while (!next || pc < next->begin) { | ||
| 234 | is_branch = AnalyzeInst(block, function_id, pc) == AnalysisState::Branch; | ||
| 235 | if (is_branch) { | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | ++pc; | ||
| 239 | } | ||
| 240 | if (!is_branch) { | ||
| 241 | // If the block finished without a branch, | ||
| 242 | // it means that the next instruction is already visited, jump to it | ||
| 243 | block->end = pc; | ||
| 244 | block->cond = IR::Condition{true}; | ||
| 245 | block->branch_true = next; | ||
| 246 | block->branch_false = nullptr; | ||
| 247 | } | ||
| 248 | // Function's pointer might be invalid, resolve it again | ||
| 249 | // Insert the new block | ||
| 250 | functions[function_id].blocks.insert(*block); | ||
| 251 | } | ||
| 252 | |||
| 253 | bool CFG::InspectVisitedBlocks(FunctionId function_id, const Label& label) { | ||
| 254 | const Location pc{label.address}; | ||
| 255 | Function& function{functions[function_id]}; | ||
| 256 | const auto it{ | ||
| 257 | std::ranges::find_if(function.blocks, [pc](auto& block) { return block.Contains(pc); })}; | ||
| 258 | if (it == function.blocks.end()) { | ||
| 259 | // Address has not been visited | ||
| 260 | return false; | ||
| 261 | } | ||
| 262 | Block* const visited_block{&*it}; | ||
| 263 | if (visited_block->begin == pc) { | ||
| 264 | throw LogicError("Dangling block"); | ||
| 265 | } | ||
| 266 | Block* const new_block{label.block}; | ||
| 267 | Split(visited_block, new_block, pc); | ||
| 268 | function.blocks.insert(it, *new_block); | ||
| 269 | return true; | ||
| 270 | } | ||
| 271 | |||
| 272 | CFG::AnalysisState CFG::AnalyzeInst(Block* block, FunctionId function_id, Location pc) { | ||
| 273 | const Instruction inst{env.ReadInstruction(pc.Offset())}; | ||
| 274 | const Opcode opcode{Decode(inst.raw)}; | ||
| 275 | switch (opcode) { | ||
| 276 | case Opcode::BRA: | ||
| 277 | case Opcode::JMP: | ||
| 278 | case Opcode::RET: | ||
| 279 | if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { | ||
| 280 | return AnalysisState::Continue; | ||
| 281 | } | ||
| 282 | switch (opcode) { | ||
| 283 | case Opcode::BRA: | ||
| 284 | case Opcode::JMP: | ||
| 285 | AnalyzeBRA(block, function_id, pc, inst, IsAbsoluteJump(opcode)); | ||
| 286 | break; | ||
| 287 | case Opcode::RET: | ||
| 288 | block->end_class = EndClass::Return; | ||
| 289 | break; | ||
| 290 | default: | ||
| 291 | break; | ||
| 292 | } | ||
| 293 | block->end = pc; | ||
| 294 | return AnalysisState::Branch; | ||
| 295 | case Opcode::BRK: | ||
| 296 | case Opcode::CONT: | ||
| 297 | case Opcode::LONGJMP: | ||
| 298 | case Opcode::SYNC: { | ||
| 299 | if (!AnalyzeBranch(block, function_id, pc, inst, opcode)) { | ||
| 300 | return AnalysisState::Continue; | ||
| 301 | } | ||
| 302 | const auto [stack_pc, new_stack]{block->stack.Pop(OpcodeToken(opcode))}; | ||
| 303 | block->branch_true = AddLabel(block, new_stack, stack_pc, function_id); | ||
| 304 | block->end = pc; | ||
| 305 | return AnalysisState::Branch; | ||
| 306 | } | ||
| 307 | case Opcode::KIL: { | ||
| 308 | const Predicate pred{inst.Pred()}; | ||
| 309 | const auto ir_pred{static_cast<IR::Pred>(pred.index)}; | ||
| 310 | const IR::Condition cond{inst.branch.flow_test, ir_pred, pred.negated}; | ||
| 311 | AnalyzeCondInst(block, function_id, pc, EndClass::Kill, cond); | ||
| 312 | return AnalysisState::Branch; | ||
| 313 | } | ||
| 314 | case Opcode::PBK: | ||
| 315 | case Opcode::PCNT: | ||
| 316 | case Opcode::PEXIT: | ||
| 317 | case Opcode::PLONGJMP: | ||
| 318 | case Opcode::SSY: | ||
| 319 | block->stack.Push(OpcodeToken(opcode), BranchOffset(pc, inst)); | ||
| 320 | return AnalysisState::Continue; | ||
| 321 | case Opcode::BRX: | ||
| 322 | case Opcode::JMX: | ||
| 323 | return AnalyzeBRX(block, pc, inst, IsAbsoluteJump(opcode), function_id); | ||
| 324 | case Opcode::EXIT: | ||
| 325 | return AnalyzeEXIT(block, function_id, pc, inst); | ||
| 326 | case Opcode::PRET: | ||
| 327 | throw NotImplementedException("PRET flow analysis"); | ||
| 328 | case Opcode::CAL: | ||
| 329 | case Opcode::JCAL: { | ||
| 330 | const bool is_absolute{IsAbsoluteJump(opcode)}; | ||
| 331 | const Location cal_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; | ||
| 332 | // Technically CAL pushes into PRET, but that's implicit in the function call for us | ||
| 333 | // Insert the function into the list if it doesn't exist | ||
| 334 | const auto it{std::ranges::find(functions, cal_pc, &Function::entrypoint)}; | ||
| 335 | const bool exists{it != functions.end()}; | ||
| 336 | const FunctionId call_id{exists ? static_cast<size_t>(std::distance(functions.begin(), it)) | ||
| 337 | : functions.size()}; | ||
| 338 | if (!exists) { | ||
| 339 | functions.emplace_back(block_pool, cal_pc); | ||
| 340 | } | ||
| 341 | block->end_class = EndClass::Call; | ||
| 342 | block->function_call = call_id; | ||
| 343 | block->return_block = AddLabel(block, block->stack, pc + 1, function_id); | ||
| 344 | block->end = pc; | ||
| 345 | return AnalysisState::Branch; | ||
| 346 | } | ||
| 347 | default: | ||
| 348 | break; | ||
| 349 | } | ||
| 350 | const Predicate pred{inst.Pred()}; | ||
| 351 | if (pred == Predicate{true} || pred == Predicate{false}) { | ||
| 352 | return AnalysisState::Continue; | ||
| 353 | } | ||
| 354 | const IR::Condition cond{static_cast<IR::Pred>(pred.index), pred.negated}; | ||
| 355 | AnalyzeCondInst(block, function_id, pc, EndClass::Branch, cond); | ||
| 356 | return AnalysisState::Branch; | ||
| 357 | } | ||
| 358 | |||
| 359 | void CFG::AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, | ||
| 360 | EndClass insn_end_class, IR::Condition cond) { | ||
| 361 | if (block->begin != pc) { | ||
| 362 | // If the block doesn't start in the conditional instruction | ||
| 363 | // mark it as a label to visit it later | ||
| 364 | block->end = pc; | ||
| 365 | block->cond = IR::Condition{true}; | ||
| 366 | block->branch_true = AddLabel(block, block->stack, pc, function_id); | ||
| 367 | block->branch_false = nullptr; | ||
| 368 | return; | ||
| 369 | } | ||
| 370 | // Create a virtual block and a conditional block | ||
| 371 | Block* const conditional_block{block_pool.Create()}; | ||
| 372 | Block virtual_block{}; | ||
| 373 | virtual_block.begin = block->begin.Virtual(); | ||
| 374 | virtual_block.end = block->begin.Virtual(); | ||
| 375 | virtual_block.end_class = EndClass::Branch; | ||
| 376 | virtual_block.stack = block->stack; | ||
| 377 | virtual_block.cond = cond; | ||
| 378 | virtual_block.branch_true = conditional_block; | ||
| 379 | virtual_block.branch_false = nullptr; | ||
| 380 | // Save the contents of the visited block in the conditional block | ||
| 381 | *conditional_block = std::move(*block); | ||
| 382 | // Impersonate the visited block with a virtual block | ||
| 383 | *block = std::move(virtual_block); | ||
| 384 | // Set the end properties of the conditional instruction | ||
| 385 | conditional_block->end = pc + 1; | ||
| 386 | conditional_block->end_class = insn_end_class; | ||
| 387 | // Add a label to the instruction after the conditional instruction | ||
| 388 | Block* const endif_block{AddLabel(conditional_block, block->stack, pc + 1, function_id)}; | ||
| 389 | // Branch to the next instruction from the virtual block | ||
| 390 | block->branch_false = endif_block; | ||
| 391 | // And branch to it from the conditional instruction if it is a branch or a kill instruction | ||
| 392 | // Kill instructions are considered a branch because they demote to a helper invocation and | ||
| 393 | // execution may continue. | ||
| 394 | if (insn_end_class == EndClass::Branch || insn_end_class == EndClass::Kill) { | ||
| 395 | conditional_block->cond = IR::Condition{true}; | ||
| 396 | conditional_block->branch_true = endif_block; | ||
| 397 | conditional_block->branch_false = nullptr; | ||
| 398 | } | ||
| 399 | // Finally insert the condition block into the list of blocks | ||
| 400 | functions[function_id].blocks.insert(*conditional_block); | ||
| 401 | } | ||
| 402 | |||
| 403 | bool CFG::AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, | ||
| 404 | Opcode opcode) { | ||
| 405 | if (inst.branch.is_cbuf) { | ||
| 406 | throw NotImplementedException("Branch with constant buffer offset"); | ||
| 407 | } | ||
| 408 | const Predicate pred{inst.Pred()}; | ||
| 409 | if (pred == Predicate{false}) { | ||
| 410 | return false; | ||
| 411 | } | ||
| 412 | const bool has_flow_test{HasFlowTest(opcode)}; | ||
| 413 | const IR::FlowTest flow_test{has_flow_test ? inst.branch.flow_test.Value() : IR::FlowTest::T}; | ||
| 414 | if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { | ||
| 415 | block->cond = IR::Condition(flow_test, static_cast<IR::Pred>(pred.index), pred.negated); | ||
| 416 | block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); | ||
| 417 | } else { | ||
| 418 | block->cond = IR::Condition{true}; | ||
| 419 | } | ||
| 420 | return true; | ||
| 421 | } | ||
| 422 | |||
| 423 | void CFG::AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, | ||
| 424 | bool is_absolute) { | ||
| 425 | const Location bra_pc{is_absolute ? inst.branch.Absolute() : BranchOffset(pc, inst)}; | ||
| 426 | block->branch_true = AddLabel(block, block->stack, bra_pc, function_id); | ||
| 427 | } | ||
| 428 | |||
| 429 | CFG::AnalysisState CFG::AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, | ||
| 430 | FunctionId function_id) { | ||
| 431 | const std::optional brx_table{TrackIndirectBranchTable(env, pc, program_start)}; | ||
| 432 | if (!brx_table) { | ||
| 433 | TrackIndirectBranchTable(env, pc, program_start); | ||
| 434 | throw NotImplementedException("Failed to track indirect branch"); | ||
| 435 | } | ||
| 436 | const IR::FlowTest flow_test{inst.branch.flow_test}; | ||
| 437 | const Predicate pred{inst.Pred()}; | ||
| 438 | if (flow_test != IR::FlowTest::T || pred != Predicate{true}) { | ||
| 439 | throw NotImplementedException("Conditional indirect branch"); | ||
| 440 | } | ||
| 441 | std::vector<u32> targets; | ||
| 442 | targets.reserve(brx_table->num_entries); | ||
| 443 | for (u32 i = 0; i < brx_table->num_entries; ++i) { | ||
| 444 | u32 target{env.ReadCbufValue(brx_table->cbuf_index, brx_table->cbuf_offset + i * 4)}; | ||
| 445 | if (!is_absolute) { | ||
| 446 | target += pc.Offset(); | ||
| 447 | } | ||
| 448 | target += static_cast<u32>(brx_table->branch_offset); | ||
| 449 | target += 8; | ||
| 450 | targets.push_back(target); | ||
| 451 | } | ||
| 452 | std::ranges::sort(targets); | ||
| 453 | targets.erase(std::unique(targets.begin(), targets.end()), targets.end()); | ||
| 454 | |||
| 455 | block->indirect_branches.reserve(targets.size()); | ||
| 456 | for (const u32 target : targets) { | ||
| 457 | Block* const branch{AddLabel(block, block->stack, target, function_id)}; | ||
| 458 | block->indirect_branches.push_back({ | ||
| 459 | .block = branch, | ||
| 460 | .address = target, | ||
| 461 | }); | ||
| 462 | } | ||
| 463 | block->cond = IR::Condition{true}; | ||
| 464 | block->end = pc + 1; | ||
| 465 | block->end_class = EndClass::IndirectBranch; | ||
| 466 | block->branch_reg = brx_table->branch_reg; | ||
| 467 | block->branch_offset = brx_table->branch_offset + 8; | ||
| 468 | if (!is_absolute) { | ||
| 469 | block->branch_offset += pc.Offset(); | ||
| 470 | } | ||
| 471 | return AnalysisState::Branch; | ||
| 472 | } | ||
| 473 | |||
| 474 | CFG::AnalysisState CFG::AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, | ||
| 475 | Instruction inst) { | ||
| 476 | const IR::FlowTest flow_test{inst.branch.flow_test}; | ||
| 477 | const Predicate pred{inst.Pred()}; | ||
| 478 | if (pred == Predicate{false} || flow_test == IR::FlowTest::F) { | ||
| 479 | // EXIT will never be taken | ||
| 480 | return AnalysisState::Continue; | ||
| 481 | } | ||
| 482 | if (exits_to_dispatcher && function_id != 0) { | ||
| 483 | throw NotImplementedException("Dispatch EXIT on external function"); | ||
| 484 | } | ||
| 485 | if (pred != Predicate{true} || flow_test != IR::FlowTest::T) { | ||
| 486 | if (block->stack.Peek(Token::PEXIT).has_value()) { | ||
| 487 | throw NotImplementedException("Conditional EXIT with PEXIT token"); | ||
| 488 | } | ||
| 489 | const IR::Condition cond{flow_test, static_cast<IR::Pred>(pred.index), pred.negated}; | ||
| 490 | if (exits_to_dispatcher) { | ||
| 491 | block->end = pc; | ||
| 492 | block->end_class = EndClass::Branch; | ||
| 493 | block->cond = cond; | ||
| 494 | block->branch_true = dispatch_block; | ||
| 495 | block->branch_false = AddLabel(block, block->stack, pc + 1, function_id); | ||
| 496 | return AnalysisState::Branch; | ||
| 497 | } | ||
| 498 | AnalyzeCondInst(block, function_id, pc, EndClass::Exit, cond); | ||
| 499 | return AnalysisState::Branch; | ||
| 500 | } | ||
| 501 | if (const std::optional<Location> exit_pc{block->stack.Peek(Token::PEXIT)}) { | ||
| 502 | const Stack popped_stack{block->stack.Remove(Token::PEXIT)}; | ||
| 503 | block->cond = IR::Condition{true}; | ||
| 504 | block->branch_true = AddLabel(block, popped_stack, *exit_pc, function_id); | ||
| 505 | block->branch_false = nullptr; | ||
| 506 | return AnalysisState::Branch; | ||
| 507 | } | ||
| 508 | if (exits_to_dispatcher) { | ||
| 509 | block->cond = IR::Condition{true}; | ||
| 510 | block->end = pc; | ||
| 511 | block->end_class = EndClass::Branch; | ||
| 512 | block->branch_true = dispatch_block; | ||
| 513 | block->branch_false = nullptr; | ||
| 514 | return AnalysisState::Branch; | ||
| 515 | } | ||
| 516 | block->end = pc + 1; | ||
| 517 | block->end_class = EndClass::Exit; | ||
| 518 | return AnalysisState::Branch; | ||
| 519 | } | ||
| 520 | |||
| 521 | Block* CFG::AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id) { | ||
| 522 | Function& function{functions[function_id]}; | ||
| 523 | if (block->begin == pc) { | ||
| 524 | // Jumps to itself | ||
| 525 | return block; | ||
| 526 | } | ||
| 527 | if (const auto it{function.blocks.find(pc, Compare{})}; it != function.blocks.end()) { | ||
| 528 | // Block already exists and it has been visited | ||
| 529 | if (function.blocks.begin() != it) { | ||
| 530 | // Check if the previous node is the virtual variant of the label | ||
| 531 | // This won't exist if a virtual node is not needed or it hasn't been visited | ||
| 532 | // If it hasn't been visited and a virtual node is needed, this will still behave as | ||
| 533 | // expected because the node impersonated with its virtual node. | ||
| 534 | const auto prev{std::prev(it)}; | ||
| 535 | if (it->begin.Virtual() == prev->begin) { | ||
| 536 | return &*prev; | ||
| 537 | } | ||
| 538 | } | ||
| 539 | return &*it; | ||
| 540 | } | ||
| 541 | // Make sure we don't insert the same layer twice | ||
| 542 | const auto label_it{std::ranges::find(function.labels, pc, &Label::address)}; | ||
| 543 | if (label_it != function.labels.end()) { | ||
| 544 | return label_it->block; | ||
| 545 | } | ||
| 546 | Block* const new_block{block_pool.Create()}; | ||
| 547 | new_block->begin = pc; | ||
| 548 | new_block->end = pc; | ||
| 549 | new_block->end_class = EndClass::Branch; | ||
| 550 | new_block->cond = IR::Condition(true); | ||
| 551 | new_block->stack = stack; | ||
| 552 | new_block->branch_true = nullptr; | ||
| 553 | new_block->branch_false = nullptr; | ||
| 554 | function.labels.push_back(Label{ | ||
| 555 | .address{pc}, | ||
| 556 | .block = new_block, | ||
| 557 | .stack{std::move(stack)}, | ||
| 558 | }); | ||
| 559 | return new_block; | ||
| 560 | } | ||
| 561 | |||
| 562 | std::string CFG::Dot() const { | ||
| 563 | int node_uid{0}; | ||
| 564 | |||
| 565 | std::string dot{"digraph shader {\n"}; | ||
| 566 | for (const Function& function : functions) { | ||
| 567 | dot += fmt::format("\tsubgraph cluster_{} {{\n", function.entrypoint); | ||
| 568 | dot += fmt::format("\t\tnode [style=filled];\n"); | ||
| 569 | for (const Block& block : function.blocks) { | ||
| 570 | const std::string name{NameOf(block)}; | ||
| 571 | const auto add_branch = [&](Block* branch, bool add_label) { | ||
| 572 | dot += fmt::format("\t\t{}->{}", name, NameOf(*branch)); | ||
| 573 | if (add_label && block.cond != IR::Condition{true} && | ||
| 574 | block.cond != IR::Condition{false}) { | ||
| 575 | dot += fmt::format(" [label=\"{}\"]", block.cond); | ||
| 576 | } | ||
| 577 | dot += '\n'; | ||
| 578 | }; | ||
| 579 | dot += fmt::format("\t\t{};\n", name); | ||
| 580 | switch (block.end_class) { | ||
| 581 | case EndClass::Branch: | ||
| 582 | if (block.cond != IR::Condition{false}) { | ||
| 583 | add_branch(block.branch_true, true); | ||
| 584 | } | ||
| 585 | if (block.cond != IR::Condition{true}) { | ||
| 586 | add_branch(block.branch_false, false); | ||
| 587 | } | ||
| 588 | break; | ||
| 589 | case EndClass::IndirectBranch: | ||
| 590 | for (const IndirectBranch& branch : block.indirect_branches) { | ||
| 591 | add_branch(branch.block, false); | ||
| 592 | } | ||
| 593 | break; | ||
| 594 | case EndClass::Call: | ||
| 595 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 596 | dot += fmt::format("\t\tN{}->{};\n", node_uid, NameOf(*block.return_block)); | ||
| 597 | dot += fmt::format("\t\tN{} [label=\"Call {}\"][shape=square][style=stripped];\n", | ||
| 598 | node_uid, block.function_call); | ||
| 599 | dot += '\n'; | ||
| 600 | ++node_uid; | ||
| 601 | break; | ||
| 602 | case EndClass::Exit: | ||
| 603 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 604 | dot += fmt::format("\t\tN{} [label=\"Exit\"][shape=square][style=stripped];\n", | ||
| 605 | node_uid); | ||
| 606 | ++node_uid; | ||
| 607 | break; | ||
| 608 | case EndClass::Return: | ||
| 609 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 610 | dot += fmt::format("\t\tN{} [label=\"Return\"][shape=square][style=stripped];\n", | ||
| 611 | node_uid); | ||
| 612 | ++node_uid; | ||
| 613 | break; | ||
| 614 | case EndClass::Kill: | ||
| 615 | dot += fmt::format("\t\t{}->N{};\n", name, node_uid); | ||
| 616 | dot += fmt::format("\t\tN{} [label=\"Kill\"][shape=square][style=stripped];\n", | ||
| 617 | node_uid); | ||
| 618 | ++node_uid; | ||
| 619 | break; | ||
| 620 | } | ||
| 621 | } | ||
| 622 | if (function.entrypoint == 8) { | ||
| 623 | dot += fmt::format("\t\tlabel = \"main\";\n"); | ||
| 624 | } else { | ||
| 625 | dot += fmt::format("\t\tlabel = \"Function {}\";\n", function.entrypoint); | ||
| 626 | } | ||
| 627 | dot += "\t}\n"; | ||
| 628 | } | ||
| 629 | if (!functions.empty()) { | ||
| 630 | auto& function{functions.front()}; | ||
| 631 | if (function.blocks.empty()) { | ||
| 632 | dot += "Start;\n"; | ||
| 633 | } else { | ||
| 634 | dot += fmt::format("\tStart -> {};\n", NameOf(*function.blocks.begin())); | ||
| 635 | } | ||
| 636 | dot += fmt::format("\tStart [shape=diamond];\n"); | ||
| 637 | } | ||
| 638 | dot += "}\n"; | ||
| 639 | return dot; | ||
| 640 | } | ||
| 641 | |||
| 642 | } // namespace Shader::Maxwell::Flow | ||
diff --git a/src/shader_recompiler/frontend/maxwell/control_flow.h b/src/shader_recompiler/frontend/maxwell/control_flow.h new file mode 100644 index 000000000..a6bd3e196 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/control_flow.h | |||
| @@ -0,0 +1,169 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <compare> | ||
| 8 | #include <optional> | ||
| 9 | #include <span> | ||
| 10 | #include <string> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | #include <boost/container/small_vector.hpp> | ||
| 14 | #include <boost/intrusive/set.hpp> | ||
| 15 | |||
| 16 | #include "shader_recompiler/environment.h" | ||
| 17 | #include "shader_recompiler/frontend/ir/condition.h" | ||
| 18 | #include "shader_recompiler/frontend/maxwell/instruction.h" | ||
| 19 | #include "shader_recompiler/frontend/maxwell/location.h" | ||
| 20 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 21 | #include "shader_recompiler/object_pool.h" | ||
| 22 | |||
| 23 | namespace Shader::Maxwell::Flow { | ||
| 24 | |||
| 25 | struct Block; | ||
| 26 | |||
| 27 | using FunctionId = size_t; | ||
| 28 | |||
| 29 | enum class EndClass { | ||
| 30 | Branch, | ||
| 31 | IndirectBranch, | ||
| 32 | Call, | ||
| 33 | Exit, | ||
| 34 | Return, | ||
| 35 | Kill, | ||
| 36 | }; | ||
| 37 | |||
| 38 | enum class Token { | ||
| 39 | SSY, | ||
| 40 | PBK, | ||
| 41 | PEXIT, | ||
| 42 | PRET, | ||
| 43 | PCNT, | ||
| 44 | PLONGJMP, | ||
| 45 | }; | ||
| 46 | |||
| 47 | struct StackEntry { | ||
| 48 | auto operator<=>(const StackEntry&) const noexcept = default; | ||
| 49 | |||
| 50 | Token token; | ||
| 51 | Location target; | ||
| 52 | }; | ||
| 53 | |||
| 54 | class Stack { | ||
| 55 | public: | ||
| 56 | void Push(Token token, Location target); | ||
| 57 | [[nodiscard]] std::pair<Location, Stack> Pop(Token token) const; | ||
| 58 | [[nodiscard]] std::optional<Location> Peek(Token token) const; | ||
| 59 | [[nodiscard]] Stack Remove(Token token) const; | ||
| 60 | |||
| 61 | private: | ||
| 62 | boost::container::small_vector<StackEntry, 3> entries; | ||
| 63 | }; | ||
| 64 | |||
| 65 | struct IndirectBranch { | ||
| 66 | Block* block; | ||
| 67 | u32 address; | ||
| 68 | }; | ||
| 69 | |||
| 70 | struct Block : boost::intrusive::set_base_hook< | ||
| 71 | // Normal link is ~2.5% faster compared to safe link | ||
| 72 | boost::intrusive::link_mode<boost::intrusive::normal_link>> { | ||
| 73 | [[nodiscard]] bool Contains(Location pc) const noexcept; | ||
| 74 | |||
| 75 | bool operator<(const Block& rhs) const noexcept { | ||
| 76 | return begin < rhs.begin; | ||
| 77 | } | ||
| 78 | |||
| 79 | Location begin; | ||
| 80 | Location end; | ||
| 81 | EndClass end_class{}; | ||
| 82 | IR::Condition cond{}; | ||
| 83 | Stack stack; | ||
| 84 | Block* branch_true{}; | ||
| 85 | Block* branch_false{}; | ||
| 86 | FunctionId function_call{}; | ||
| 87 | Block* return_block{}; | ||
| 88 | IR::Reg branch_reg{}; | ||
| 89 | s32 branch_offset{}; | ||
| 90 | std::vector<IndirectBranch> indirect_branches; | ||
| 91 | }; | ||
| 92 | |||
| 93 | struct Label { | ||
| 94 | Location address; | ||
| 95 | Block* block; | ||
| 96 | Stack stack; | ||
| 97 | }; | ||
| 98 | |||
| 99 | struct Function { | ||
| 100 | explicit Function(ObjectPool<Block>& block_pool, Location start_address); | ||
| 101 | |||
| 102 | Location entrypoint; | ||
| 103 | boost::container::small_vector<Label, 16> labels; | ||
| 104 | boost::intrusive::set<Block> blocks; | ||
| 105 | }; | ||
| 106 | |||
| 107 | class CFG { | ||
| 108 | enum class AnalysisState { | ||
| 109 | Branch, | ||
| 110 | Continue, | ||
| 111 | }; | ||
| 112 | |||
| 113 | public: | ||
| 114 | explicit CFG(Environment& env, ObjectPool<Block>& block_pool, Location start_address, | ||
| 115 | bool exits_to_dispatcher = false); | ||
| 116 | |||
| 117 | CFG& operator=(const CFG&) = delete; | ||
| 118 | CFG(const CFG&) = delete; | ||
| 119 | |||
| 120 | CFG& operator=(CFG&&) = delete; | ||
| 121 | CFG(CFG&&) = delete; | ||
| 122 | |||
| 123 | [[nodiscard]] std::string Dot() const; | ||
| 124 | |||
| 125 | [[nodiscard]] std::span<const Function> Functions() const noexcept { | ||
| 126 | return std::span(functions.data(), functions.size()); | ||
| 127 | } | ||
| 128 | [[nodiscard]] std::span<Function> Functions() noexcept { | ||
| 129 | return std::span(functions.data(), functions.size()); | ||
| 130 | } | ||
| 131 | |||
| 132 | [[nodiscard]] bool ExitsToDispatcher() const { | ||
| 133 | return exits_to_dispatcher; | ||
| 134 | } | ||
| 135 | |||
| 136 | private: | ||
| 137 | void AnalyzeLabel(FunctionId function_id, Label& label); | ||
| 138 | |||
| 139 | /// Inspect already visited blocks. | ||
| 140 | /// Return true when the block has already been visited | ||
| 141 | bool InspectVisitedBlocks(FunctionId function_id, const Label& label); | ||
| 142 | |||
| 143 | AnalysisState AnalyzeInst(Block* block, FunctionId function_id, Location pc); | ||
| 144 | |||
| 145 | void AnalyzeCondInst(Block* block, FunctionId function_id, Location pc, EndClass insn_end_class, | ||
| 146 | IR::Condition cond); | ||
| 147 | |||
| 148 | /// Return true when the branch instruction is confirmed to be a branch | ||
| 149 | bool AnalyzeBranch(Block* block, FunctionId function_id, Location pc, Instruction inst, | ||
| 150 | Opcode opcode); | ||
| 151 | |||
| 152 | void AnalyzeBRA(Block* block, FunctionId function_id, Location pc, Instruction inst, | ||
| 153 | bool is_absolute); | ||
| 154 | AnalysisState AnalyzeBRX(Block* block, Location pc, Instruction inst, bool is_absolute, | ||
| 155 | FunctionId function_id); | ||
| 156 | AnalysisState AnalyzeEXIT(Block* block, FunctionId function_id, Location pc, Instruction inst); | ||
| 157 | |||
| 158 | /// Return the branch target block id | ||
| 159 | Block* AddLabel(Block* block, Stack stack, Location pc, FunctionId function_id); | ||
| 160 | |||
| 161 | Environment& env; | ||
| 162 | ObjectPool<Block>& block_pool; | ||
| 163 | boost::container::small_vector<Function, 1> functions; | ||
| 164 | Location program_start; | ||
| 165 | bool exits_to_dispatcher{}; | ||
| 166 | Block* dispatch_block{}; | ||
| 167 | }; | ||
| 168 | |||
| 169 | } // namespace Shader::Maxwell::Flow | ||
diff --git a/src/shader_recompiler/frontend/maxwell/decode.cpp b/src/shader_recompiler/frontend/maxwell/decode.cpp new file mode 100644 index 000000000..972f677dc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.cpp | |||
| @@ -0,0 +1,149 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <bit> | ||
| 8 | #include <memory> | ||
| 9 | #include <string_view> | ||
| 10 | |||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "shader_recompiler/exception.h" | ||
| 13 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 14 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 15 | |||
| 16 | namespace Shader::Maxwell { | ||
| 17 | namespace { | ||
| 18 | struct MaskValue { | ||
| 19 | u64 mask; | ||
| 20 | u64 value; | ||
| 21 | }; | ||
| 22 | |||
| 23 | constexpr MaskValue MaskValueFromEncoding(const char* encoding) { | ||
| 24 | u64 mask{}; | ||
| 25 | u64 value{}; | ||
| 26 | u64 bit{u64(1) << 63}; | ||
| 27 | while (*encoding) { | ||
| 28 | switch (*encoding) { | ||
| 29 | case '0': | ||
| 30 | mask |= bit; | ||
| 31 | break; | ||
| 32 | case '1': | ||
| 33 | mask |= bit; | ||
| 34 | value |= bit; | ||
| 35 | break; | ||
| 36 | case '-': | ||
| 37 | break; | ||
| 38 | case ' ': | ||
| 39 | break; | ||
| 40 | default: | ||
| 41 | throw LogicError("Invalid encoding character '{}'", *encoding); | ||
| 42 | } | ||
| 43 | ++encoding; | ||
| 44 | if (*encoding != ' ') { | ||
| 45 | bit >>= 1; | ||
| 46 | } | ||
| 47 | } | ||
| 48 | return MaskValue{.mask = mask, .value = value}; | ||
| 49 | } | ||
| 50 | |||
| 51 | struct InstEncoding { | ||
| 52 | MaskValue mask_value; | ||
| 53 | Opcode opcode; | ||
| 54 | }; | ||
| 55 | constexpr std::array UNORDERED_ENCODINGS{ | ||
| 56 | #define INST(name, cute, encode) \ | ||
| 57 | InstEncoding{ \ | ||
| 58 | .mask_value{MaskValueFromEncoding(encode)}, \ | ||
| 59 | .opcode = Opcode::name, \ | ||
| 60 | }, | ||
| 61 | #include "maxwell.inc" | ||
| 62 | #undef INST | ||
| 63 | }; | ||
| 64 | |||
| 65 | constexpr auto SortedEncodings() { | ||
| 66 | std::array encodings{UNORDERED_ENCODINGS}; | ||
| 67 | std::ranges::sort(encodings, [](const InstEncoding& lhs, const InstEncoding& rhs) { | ||
| 68 | return std::popcount(lhs.mask_value.mask) > std::popcount(rhs.mask_value.mask); | ||
| 69 | }); | ||
| 70 | return encodings; | ||
| 71 | } | ||
| 72 | constexpr auto ENCODINGS{SortedEncodings()}; | ||
| 73 | |||
| 74 | constexpr int WidestLeftBits() { | ||
| 75 | int bits{64}; | ||
| 76 | for (const InstEncoding& encoding : ENCODINGS) { | ||
| 77 | bits = std::min(bits, std::countr_zero(encoding.mask_value.mask)); | ||
| 78 | } | ||
| 79 | return 64 - bits; | ||
| 80 | } | ||
| 81 | constexpr int WIDEST_LEFT_BITS{WidestLeftBits()}; | ||
| 82 | constexpr int MASK_SHIFT{64 - WIDEST_LEFT_BITS}; | ||
| 83 | |||
| 84 | constexpr size_t ToFastLookupIndex(u64 value) { | ||
| 85 | return static_cast<size_t>(value >> MASK_SHIFT); | ||
| 86 | } | ||
| 87 | |||
| 88 | constexpr size_t FastLookupSize() { | ||
| 89 | size_t max_width{}; | ||
| 90 | for (const InstEncoding& encoding : ENCODINGS) { | ||
| 91 | max_width = std::max(max_width, ToFastLookupIndex(encoding.mask_value.mask)); | ||
| 92 | } | ||
| 93 | return max_width + 1; | ||
| 94 | } | ||
| 95 | constexpr size_t FAST_LOOKUP_SIZE{FastLookupSize()}; | ||
| 96 | |||
| 97 | struct InstInfo { | ||
| 98 | [[nodiscard]] u64 Mask() const noexcept { | ||
| 99 | return static_cast<u64>(high_mask) << MASK_SHIFT; | ||
| 100 | } | ||
| 101 | |||
| 102 | [[nodiscard]] u64 Value() const noexcept { | ||
| 103 | return static_cast<u64>(high_value) << MASK_SHIFT; | ||
| 104 | } | ||
| 105 | |||
| 106 | u16 high_mask; | ||
| 107 | u16 high_value; | ||
| 108 | Opcode opcode; | ||
| 109 | }; | ||
| 110 | |||
| 111 | constexpr auto MakeFastLookupTableIndex(size_t index) { | ||
| 112 | std::array<InstInfo, 2> encodings{}; | ||
| 113 | size_t element{}; | ||
| 114 | for (const auto& encoding : ENCODINGS) { | ||
| 115 | const size_t mask{ToFastLookupIndex(encoding.mask_value.mask)}; | ||
| 116 | const size_t value{ToFastLookupIndex(encoding.mask_value.value)}; | ||
| 117 | if ((index & mask) == value) { | ||
| 118 | encodings.at(element) = InstInfo{ | ||
| 119 | .high_mask = static_cast<u16>(encoding.mask_value.mask >> MASK_SHIFT), | ||
| 120 | .high_value = static_cast<u16>(encoding.mask_value.value >> MASK_SHIFT), | ||
| 121 | .opcode = encoding.opcode, | ||
| 122 | }; | ||
| 123 | ++element; | ||
| 124 | } | ||
| 125 | } | ||
| 126 | return encodings; | ||
| 127 | } | ||
| 128 | |||
| 129 | /*constexpr*/ auto MakeFastLookupTable() { | ||
| 130 | auto encodings{std::make_unique<std::array<std::array<InstInfo, 2>, FAST_LOOKUP_SIZE>>()}; | ||
| 131 | for (size_t index = 0; index < FAST_LOOKUP_SIZE; ++index) { | ||
| 132 | (*encodings)[index] = MakeFastLookupTableIndex(index); | ||
| 133 | } | ||
| 134 | return encodings; | ||
| 135 | } | ||
| 136 | const auto FAST_LOOKUP_TABLE{MakeFastLookupTable()}; | ||
| 137 | } // Anonymous namespace | ||
| 138 | |||
| 139 | Opcode Decode(u64 insn) { | ||
| 140 | const auto& table{(*FAST_LOOKUP_TABLE)[ToFastLookupIndex(insn)]}; | ||
| 141 | const auto it{std::ranges::find_if( | ||
| 142 | table, [insn](const InstInfo& info) { return (insn & info.Mask()) == info.Value(); })}; | ||
| 143 | if (it == table.end()) { | ||
| 144 | throw NotImplementedException("Instruction 0x{:016x} is unknown / unimplemented", insn); | ||
| 145 | } | ||
| 146 | return it->opcode; | ||
| 147 | } | ||
| 148 | |||
| 149 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/decode.h b/src/shader_recompiler/frontend/maxwell/decode.h new file mode 100644 index 000000000..b4f080fd7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/decode.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | [[nodiscard]] Opcode Decode(u64 insn); | ||
| 13 | |||
| 14 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp new file mode 100644 index 000000000..008625cb3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.cpp | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/indirect_branch_table_track.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 12 | #include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" | ||
| 13 | |||
| 14 | namespace Shader::Maxwell { | ||
| 15 | namespace { | ||
| 16 | union Encoding { | ||
| 17 | u64 raw; | ||
| 18 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 19 | BitField<8, 8, IR::Reg> src_reg; | ||
| 20 | BitField<20, 19, u64> immediate; | ||
| 21 | BitField<56, 1, u64> is_negative; | ||
| 22 | BitField<20, 24, s64> brx_offset; | ||
| 23 | }; | ||
| 24 | |||
| 25 | template <typename Callable> | ||
| 26 | std::optional<u64> Track(Environment& env, Location block_begin, Location& pos, Callable&& func) { | ||
| 27 | while (pos >= block_begin) { | ||
| 28 | const u64 insn{env.ReadInstruction(pos.Offset())}; | ||
| 29 | --pos; | ||
| 30 | if (func(insn, Decode(insn))) { | ||
| 31 | return insn; | ||
| 32 | } | ||
| 33 | } | ||
| 34 | return std::nullopt; | ||
| 35 | } | ||
| 36 | |||
| 37 | std::optional<u64> TrackLDC(Environment& env, Location block_begin, Location& pos, | ||
| 38 | IR::Reg brx_reg) { | ||
| 39 | return Track(env, block_begin, pos, [brx_reg](u64 insn, Opcode opcode) { | ||
| 40 | const LDC::Encoding ldc{insn}; | ||
| 41 | return opcode == Opcode::LDC && ldc.dest_reg == brx_reg && ldc.size == LDC::Size::B32 && | ||
| 42 | ldc.mode == LDC::Mode::Default; | ||
| 43 | }); | ||
| 44 | } | ||
| 45 | |||
| 46 | std::optional<u64> TrackSHL(Environment& env, Location block_begin, Location& pos, | ||
| 47 | IR::Reg ldc_reg) { | ||
| 48 | return Track(env, block_begin, pos, [ldc_reg](u64 insn, Opcode opcode) { | ||
| 49 | const Encoding shl{insn}; | ||
| 50 | return opcode == Opcode::SHL_imm && shl.dest_reg == ldc_reg; | ||
| 51 | }); | ||
| 52 | } | ||
| 53 | |||
| 54 | std::optional<u64> TrackIMNMX(Environment& env, Location block_begin, Location& pos, | ||
| 55 | IR::Reg shl_reg) { | ||
| 56 | return Track(env, block_begin, pos, [shl_reg](u64 insn, Opcode opcode) { | ||
| 57 | const Encoding imnmx{insn}; | ||
| 58 | return opcode == Opcode::IMNMX_imm && imnmx.dest_reg == shl_reg; | ||
| 59 | }); | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos, | ||
| 64 | Location block_begin) { | ||
| 65 | const u64 brx_insn{env.ReadInstruction(brx_pos.Offset())}; | ||
| 66 | const Opcode brx_opcode{Decode(brx_insn)}; | ||
| 67 | if (brx_opcode != Opcode::BRX && brx_opcode != Opcode::JMX) { | ||
| 68 | throw LogicError("Tracked instruction is not BRX or JMX"); | ||
| 69 | } | ||
| 70 | const IR::Reg brx_reg{Encoding{brx_insn}.src_reg}; | ||
| 71 | const s32 brx_offset{static_cast<s32>(Encoding{brx_insn}.brx_offset)}; | ||
| 72 | |||
| 73 | Location pos{brx_pos}; | ||
| 74 | const std::optional<u64> ldc_insn{TrackLDC(env, block_begin, pos, brx_reg)}; | ||
| 75 | if (!ldc_insn) { | ||
| 76 | return std::nullopt; | ||
| 77 | } | ||
| 78 | const LDC::Encoding ldc{*ldc_insn}; | ||
| 79 | const u32 cbuf_index{static_cast<u32>(ldc.index)}; | ||
| 80 | const u32 cbuf_offset{static_cast<u32>(static_cast<s32>(ldc.offset.Value()))}; | ||
| 81 | const IR::Reg ldc_reg{ldc.src_reg}; | ||
| 82 | |||
| 83 | const std::optional<u64> shl_insn{TrackSHL(env, block_begin, pos, ldc_reg)}; | ||
| 84 | if (!shl_insn) { | ||
| 85 | return std::nullopt; | ||
| 86 | } | ||
| 87 | const Encoding shl{*shl_insn}; | ||
| 88 | const IR::Reg shl_reg{shl.src_reg}; | ||
| 89 | |||
| 90 | const std::optional<u64> imnmx_insn{TrackIMNMX(env, block_begin, pos, shl_reg)}; | ||
| 91 | if (!imnmx_insn) { | ||
| 92 | return std::nullopt; | ||
| 93 | } | ||
| 94 | const Encoding imnmx{*imnmx_insn}; | ||
| 95 | if (imnmx.is_negative != 0) { | ||
| 96 | return std::nullopt; | ||
| 97 | } | ||
| 98 | const u32 imnmx_immediate{static_cast<u32>(imnmx.immediate.Value())}; | ||
| 99 | return IndirectBranchTableInfo{ | ||
| 100 | .cbuf_index = cbuf_index, | ||
| 101 | .cbuf_offset = cbuf_offset, | ||
| 102 | .num_entries = imnmx_immediate + 1, | ||
| 103 | .branch_offset = brx_offset, | ||
| 104 | .branch_reg = brx_reg, | ||
| 105 | }; | ||
| 106 | } | ||
| 107 | |||
| 108 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h new file mode 100644 index 000000000..eee5102fa --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/indirect_branch_table_track.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <optional> | ||
| 8 | |||
| 9 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "shader_recompiler/environment.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 13 | #include "shader_recompiler/frontend/maxwell/location.h" | ||
| 14 | |||
| 15 | namespace Shader::Maxwell { | ||
| 16 | |||
| 17 | struct IndirectBranchTableInfo { | ||
| 18 | u32 cbuf_index{}; | ||
| 19 | u32 cbuf_offset{}; | ||
| 20 | u32 num_entries{}; | ||
| 21 | s32 branch_offset{}; | ||
| 22 | IR::Reg branch_reg{}; | ||
| 23 | }; | ||
| 24 | |||
| 25 | std::optional<IndirectBranchTableInfo> TrackIndirectBranchTable(Environment& env, Location brx_pos, | ||
| 26 | Location block_begin); | ||
| 27 | |||
| 28 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/instruction.h b/src/shader_recompiler/frontend/maxwell/instruction.h new file mode 100644 index 000000000..743d68d61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/instruction.h | |||
| @@ -0,0 +1,63 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/flow_test.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | struct Predicate { | ||
| 15 | Predicate() = default; | ||
| 16 | Predicate(unsigned index_, bool negated_ = false) : index{index_}, negated{negated_} {} | ||
| 17 | Predicate(bool value) : index{7}, negated{!value} {} | ||
| 18 | Predicate(u64 raw) : index{static_cast<unsigned>(raw & 7)}, negated{(raw & 8) != 0} {} | ||
| 19 | |||
| 20 | unsigned index; | ||
| 21 | bool negated; | ||
| 22 | }; | ||
| 23 | |||
| 24 | inline bool operator==(const Predicate& lhs, const Predicate& rhs) noexcept { | ||
| 25 | return lhs.index == rhs.index && lhs.negated == rhs.negated; | ||
| 26 | } | ||
| 27 | |||
| 28 | inline bool operator!=(const Predicate& lhs, const Predicate& rhs) noexcept { | ||
| 29 | return !(lhs == rhs); | ||
| 30 | } | ||
| 31 | |||
| 32 | union Instruction { | ||
| 33 | Instruction(u64 raw_) : raw{raw_} {} | ||
| 34 | |||
| 35 | u64 raw; | ||
| 36 | |||
| 37 | union { | ||
| 38 | BitField<5, 1, u64> is_cbuf; | ||
| 39 | BitField<0, 5, IR::FlowTest> flow_test; | ||
| 40 | |||
| 41 | [[nodiscard]] u32 Absolute() const noexcept { | ||
| 42 | return static_cast<u32>(absolute); | ||
| 43 | } | ||
| 44 | |||
| 45 | [[nodiscard]] s32 Offset() const noexcept { | ||
| 46 | return static_cast<s32>(offset); | ||
| 47 | } | ||
| 48 | |||
| 49 | private: | ||
| 50 | BitField<20, 24, s64> offset; | ||
| 51 | BitField<20, 32, u64> absolute; | ||
| 52 | } branch; | ||
| 53 | |||
| 54 | [[nodiscard]] Predicate Pred() const noexcept { | ||
| 55 | return Predicate{pred}; | ||
| 56 | } | ||
| 57 | |||
| 58 | private: | ||
| 59 | BitField<16, 4, u64> pred; | ||
| 60 | }; | ||
| 61 | static_assert(std::is_trivially_copyable_v<Instruction>); | ||
| 62 | |||
| 63 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/location.h b/src/shader_recompiler/frontend/maxwell/location.h new file mode 100644 index 000000000..26d29eae2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/location.h | |||
| @@ -0,0 +1,112 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <compare> | ||
| 8 | #include <iterator> | ||
| 9 | |||
| 10 | #include <fmt/format.h> | ||
| 11 | |||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "shader_recompiler/exception.h" | ||
| 14 | |||
| 15 | namespace Shader::Maxwell { | ||
| 16 | |||
| 17 | class Location { | ||
| 18 | static constexpr u32 VIRTUAL_BIAS{4}; | ||
| 19 | |||
| 20 | public: | ||
| 21 | constexpr Location() = default; | ||
| 22 | |||
| 23 | constexpr Location(u32 initial_offset) : offset{initial_offset} { | ||
| 24 | if (initial_offset % 8 != 0) { | ||
| 25 | throw InvalidArgument("initial_offset={} is not a multiple of 8", initial_offset); | ||
| 26 | } | ||
| 27 | Align(); | ||
| 28 | } | ||
| 29 | |||
| 30 | constexpr Location Virtual() const noexcept { | ||
| 31 | Location virtual_location; | ||
| 32 | virtual_location.offset = offset - VIRTUAL_BIAS; | ||
| 33 | return virtual_location; | ||
| 34 | } | ||
| 35 | |||
| 36 | [[nodiscard]] constexpr u32 Offset() const noexcept { | ||
| 37 | return offset; | ||
| 38 | } | ||
| 39 | |||
| 40 | [[nodiscard]] constexpr bool IsVirtual() const { | ||
| 41 | return offset % 8 == VIRTUAL_BIAS; | ||
| 42 | } | ||
| 43 | |||
| 44 | constexpr auto operator<=>(const Location&) const noexcept = default; | ||
| 45 | |||
| 46 | constexpr Location operator++() noexcept { | ||
| 47 | const Location copy{*this}; | ||
| 48 | Step(); | ||
| 49 | return copy; | ||
| 50 | } | ||
| 51 | |||
| 52 | constexpr Location operator++(int) noexcept { | ||
| 53 | Step(); | ||
| 54 | return *this; | ||
| 55 | } | ||
| 56 | |||
| 57 | constexpr Location operator--() noexcept { | ||
| 58 | const Location copy{*this}; | ||
| 59 | Back(); | ||
| 60 | return copy; | ||
| 61 | } | ||
| 62 | |||
| 63 | constexpr Location operator--(int) noexcept { | ||
| 64 | Back(); | ||
| 65 | return *this; | ||
| 66 | } | ||
| 67 | |||
| 68 | constexpr Location operator+(int number) const { | ||
| 69 | Location new_pc{*this}; | ||
| 70 | while (number > 0) { | ||
| 71 | --number; | ||
| 72 | ++new_pc; | ||
| 73 | } | ||
| 74 | while (number < 0) { | ||
| 75 | ++number; | ||
| 76 | --new_pc; | ||
| 77 | } | ||
| 78 | return new_pc; | ||
| 79 | } | ||
| 80 | |||
| 81 | constexpr Location operator-(int number) const { | ||
| 82 | return operator+(-number); | ||
| 83 | } | ||
| 84 | |||
| 85 | private: | ||
| 86 | constexpr void Align() { | ||
| 87 | offset += offset % 32 == 0 ? 8 : 0; | ||
| 88 | } | ||
| 89 | |||
| 90 | constexpr void Step() { | ||
| 91 | offset += 8 + (offset % 32 == 24 ? 8 : 0); | ||
| 92 | } | ||
| 93 | |||
| 94 | constexpr void Back() { | ||
| 95 | offset -= 8 + (offset % 32 == 8 ? 8 : 0); | ||
| 96 | } | ||
| 97 | |||
| 98 | u32 offset{0xcccccccc}; | ||
| 99 | }; | ||
| 100 | |||
| 101 | } // namespace Shader::Maxwell | ||
| 102 | |||
| 103 | template <> | ||
| 104 | struct fmt::formatter<Shader::Maxwell::Location> { | ||
| 105 | constexpr auto parse(format_parse_context& ctx) { | ||
| 106 | return ctx.begin(); | ||
| 107 | } | ||
| 108 | template <typename FormatContext> | ||
| 109 | auto format(const Shader::Maxwell::Location& location, FormatContext& ctx) { | ||
| 110 | return fmt::format_to(ctx.out(), "{:04x}", location.Offset()); | ||
| 111 | } | ||
| 112 | }; | ||
diff --git a/src/shader_recompiler/frontend/maxwell/maxwell.inc b/src/shader_recompiler/frontend/maxwell/maxwell.inc new file mode 100644 index 000000000..2fee591bb --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/maxwell.inc | |||
| @@ -0,0 +1,286 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | INST(AL2P, "AL2P", "1110 1111 1010 0---") | ||
| 6 | INST(ALD, "ALD", "1110 1111 1101 1---") | ||
| 7 | INST(AST, "AST", "1110 1111 1111 0---") | ||
| 8 | INST(ATOM_cas, "ATOM (cas)", "1110 1110 1111 ----") | ||
| 9 | INST(ATOM, "ATOM", "1110 1101 ---- ----") | ||
| 10 | INST(ATOMS_cas, "ATOMS (cas)", "1110 1110 ---- ----") | ||
| 11 | INST(ATOMS, "ATOMS", "1110 1100 ---- ----") | ||
| 12 | INST(B2R, "B2R", "1111 0000 1011 1---") | ||
| 13 | INST(BAR, "BAR", "1111 0000 1010 1---") | ||
| 14 | INST(BFE_reg, "BFE (reg)", "0101 1100 0000 0---") | ||
| 15 | INST(BFE_cbuf, "BFE (cbuf)", "0100 1100 0000 0---") | ||
| 16 | INST(BFE_imm, "BFE (imm)", "0011 100- 0000 0---") | ||
| 17 | INST(BFI_reg, "BFI (reg)", "0101 1011 1111 0---") | ||
| 18 | INST(BFI_rc, "BFI (rc)", "0101 0011 1111 0---") | ||
| 19 | INST(BFI_cr, "BFI (cr)", "0100 1011 1111 0---") | ||
| 20 | INST(BFI_imm, "BFI (imm)", "0011 011- 1111 0---") | ||
| 21 | INST(BPT, "BPT", "1110 0011 1010 ----") | ||
| 22 | INST(BRA, "BRA", "1110 0010 0100 ----") | ||
| 23 | INST(BRK, "BRK", "1110 0011 0100 ----") | ||
| 24 | INST(BRX, "BRX", "1110 0010 0101 ----") | ||
| 25 | INST(CAL, "CAL", "1110 0010 0110 ----") | ||
| 26 | INST(CCTL, "CCTL", "1110 1111 011- ----") | ||
| 27 | INST(CCTLL, "CCTLL", "1110 1111 100- ----") | ||
| 28 | INST(CONT, "CONT", "1110 0011 0101 ----") | ||
| 29 | INST(CS2R, "CS2R", "0101 0000 1100 1---") | ||
| 30 | INST(CSET, "CSET", "0101 0000 1001 1---") | ||
| 31 | INST(CSETP, "CSETP", "0101 0000 1010 0---") | ||
| 32 | INST(DADD_reg, "DADD (reg)", "0101 1100 0111 0---") | ||
| 33 | INST(DADD_cbuf, "DADD (cbuf)", "0100 1100 0111 0---") | ||
| 34 | INST(DADD_imm, "DADD (imm)", "0011 100- 0111 0---") | ||
| 35 | INST(DEPBAR, "DEPBAR", "1111 0000 1111 0---") | ||
| 36 | INST(DFMA_reg, "DFMA (reg)", "0101 1011 0111 ----") | ||
| 37 | INST(DFMA_rc, "DFMA (rc)", "0101 0011 0111 ----") | ||
| 38 | INST(DFMA_cr, "DFMA (cr)", "0100 1011 0111 ----") | ||
| 39 | INST(DFMA_imm, "DFMA (imm)", "0011 011- 0111 ----") | ||
| 40 | INST(DMNMX_reg, "DMNMX (reg)", "0101 1100 0101 0---") | ||
| 41 | INST(DMNMX_cbuf, "DMNMX (cbuf)", "0100 1100 0101 0---") | ||
| 42 | INST(DMNMX_imm, "DMNMX (imm)", "0011 100- 0101 0---") | ||
| 43 | INST(DMUL_reg, "DMUL (reg)", "0101 1100 1000 0---") | ||
| 44 | INST(DMUL_cbuf, "DMUL (cbuf)", "0100 1100 1000 0---") | ||
| 45 | INST(DMUL_imm, "DMUL (imm)", "0011 100- 1000 0---") | ||
| 46 | INST(DSET_reg, "DSET (reg)", "0101 1001 0--- ----") | ||
| 47 | INST(DSET_cbuf, "DSET (cbuf)", "0100 1001 0--- ----") | ||
| 48 | INST(DSET_imm, "DSET (imm)", "0011 001- 0--- ----") | ||
| 49 | INST(DSETP_reg, "DSETP (reg)", "0101 1011 1000 ----") | ||
| 50 | INST(DSETP_cbuf, "DSETP (cbuf)", "0100 1011 1000 ----") | ||
| 51 | INST(DSETP_imm, "DSETP (imm)", "0011 011- 1000 ----") | ||
| 52 | INST(EXIT, "EXIT", "1110 0011 0000 ----") | ||
| 53 | INST(F2F_reg, "F2F (reg)", "0101 1100 1010 1---") | ||
| 54 | INST(F2F_cbuf, "F2F (cbuf)", "0100 1100 1010 1---") | ||
| 55 | INST(F2F_imm, "F2F (imm)", "0011 100- 1010 1---") | ||
| 56 | INST(F2I_reg, "F2I (reg)", "0101 1100 1011 0---") | ||
| 57 | INST(F2I_cbuf, "F2I (cbuf)", "0100 1100 1011 0---") | ||
| 58 | INST(F2I_imm, "F2I (imm)", "0011 100- 1011 0---") | ||
| 59 | INST(FADD_reg, "FADD (reg)", "0101 1100 0101 1---") | ||
| 60 | INST(FADD_cbuf, "FADD (cbuf)", "0100 1100 0101 1---") | ||
| 61 | INST(FADD_imm, "FADD (imm)", "0011 100- 0101 1---") | ||
| 62 | INST(FADD32I, "FADD32I", "0000 10-- ---- ----") | ||
| 63 | INST(FCHK_reg, "FCHK (reg)", "0101 1100 1000 1---") | ||
| 64 | INST(FCHK_cbuf, "FCHK (cbuf)", "0100 1100 1000 1---") | ||
| 65 | INST(FCHK_imm, "FCHK (imm)", "0011 100- 1000 1---") | ||
| 66 | INST(FCMP_reg, "FCMP (reg)", "0101 1011 1010 ----") | ||
| 67 | INST(FCMP_rc, "FCMP (rc)", "0101 0011 1010 ----") | ||
| 68 | INST(FCMP_cr, "FCMP (cr)", "0100 1011 1010 ----") | ||
| 69 | INST(FCMP_imm, "FCMP (imm)", "0011 011- 1010 ----") | ||
| 70 | INST(FFMA_reg, "FFMA (reg)", "0101 1001 1--- ----") | ||
| 71 | INST(FFMA_rc, "FFMA (rc)", "0101 0001 1--- ----") | ||
| 72 | INST(FFMA_cr, "FFMA (cr)", "0100 1001 1--- ----") | ||
| 73 | INST(FFMA_imm, "FFMA (imm)", "0011 001- 1--- ----") | ||
| 74 | INST(FFMA32I, "FFMA32I", "0000 11-- ---- ----") | ||
| 75 | INST(FLO_reg, "FLO (reg)", "0101 1100 0011 0---") | ||
| 76 | INST(FLO_cbuf, "FLO (cbuf)", "0100 1100 0011 0---") | ||
| 77 | INST(FLO_imm, "FLO (imm)", "0011 100- 0011 0---") | ||
| 78 | INST(FMNMX_reg, "FMNMX (reg)", "0101 1100 0110 0---") | ||
| 79 | INST(FMNMX_cbuf, "FMNMX (cbuf)", "0100 1100 0110 0---") | ||
| 80 | INST(FMNMX_imm, "FMNMX (imm)", "0011 100- 0110 0---") | ||
| 81 | INST(FMUL_reg, "FMUL (reg)", "0101 1100 0110 1---") | ||
| 82 | INST(FMUL_cbuf, "FMUL (cbuf)", "0100 1100 0110 1---") | ||
| 83 | INST(FMUL_imm, "FMUL (imm)", "0011 100- 0110 1---") | ||
| 84 | INST(FMUL32I, "FMUL32I", "0001 1110 ---- ----") | ||
| 85 | INST(FSET_reg, "FSET (reg)", "0101 1000 ---- ----") | ||
| 86 | INST(FSET_cbuf, "FSET (cbuf)", "0100 1000 ---- ----") | ||
| 87 | INST(FSET_imm, "FSET (imm)", "0011 000- ---- ----") | ||
| 88 | INST(FSETP_reg, "FSETP (reg)", "0101 1011 1011 ----") | ||
| 89 | INST(FSETP_cbuf, "FSETP (cbuf)", "0100 1011 1011 ----") | ||
| 90 | INST(FSETP_imm, "FSETP (imm)", "0011 011- 1011 ----") | ||
| 91 | INST(FSWZADD, "FSWZADD", "0101 0000 1111 1---") | ||
| 92 | INST(GETCRSPTR, "GETCRSPTR", "1110 0010 1100 ----") | ||
| 93 | INST(GETLMEMBASE, "GETLMEMBASE", "1110 0010 1101 ----") | ||
| 94 | INST(HADD2_reg, "HADD2 (reg)", "0101 1101 0001 0---") | ||
| 95 | INST(HADD2_cbuf, "HADD2 (cbuf)", "0111 101- 1--- ----") | ||
| 96 | INST(HADD2_imm, "HADD2 (imm)", "0111 101- 0--- ----") | ||
| 97 | INST(HADD2_32I, "HADD2_32I", "0010 110- ---- ----") | ||
| 98 | INST(HFMA2_reg, "HFMA2 (reg)", "0101 1101 0000 0---") | ||
| 99 | INST(HFMA2_rc, "HFMA2 (rc)", "0110 0--- 1--- ----") | ||
| 100 | INST(HFMA2_cr, "HFMA2 (cr)", "0111 0--- 1--- ----") | ||
| 101 | INST(HFMA2_imm, "HFMA2 (imm)", "0111 0--- 0--- ----") | ||
| 102 | INST(HFMA2_32I, "HFMA2_32I", "0010 100- ---- ----") | ||
| 103 | INST(HMUL2_reg, "HMUL2 (reg)", "0101 1101 0000 1---") | ||
| 104 | INST(HMUL2_cbuf, "HMUL2 (cbuf)", "0111 100- 1--- ----") | ||
| 105 | INST(HMUL2_imm, "HMUL2 (imm)", "0111 100- 0--- ----") | ||
| 106 | INST(HMUL2_32I, "HMUL2_32I", "0010 101- ---- ----") | ||
| 107 | INST(HSET2_reg, "HSET2 (reg)", "0101 1101 0001 1---") | ||
| 108 | INST(HSET2_cbuf, "HSET2 (cbuf)", "0111 110- 1--- ----") | ||
| 109 | INST(HSET2_imm, "HSET2 (imm)", "0111 110- 0--- ----") | ||
| 110 | INST(HSETP2_reg, "HSETP2 (reg)", "0101 1101 0010 0---") | ||
| 111 | INST(HSETP2_cbuf, "HSETP2 (cbuf)", "0111 111- 1--- ----") | ||
| 112 | INST(HSETP2_imm, "HSETP2 (imm)", "0111 111- 0--- ----") | ||
| 113 | INST(I2F_reg, "I2F (reg)", "0101 1100 1011 1---") | ||
| 114 | INST(I2F_cbuf, "I2F (cbuf)", "0100 1100 1011 1---") | ||
| 115 | INST(I2F_imm, "I2F (imm)", "0011 100- 1011 1---") | ||
| 116 | INST(I2I_reg, "I2I (reg)", "0101 1100 1110 0---") | ||
| 117 | INST(I2I_cbuf, "I2I (cbuf)", "0100 1100 1110 0---") | ||
| 118 | INST(I2I_imm, "I2I (imm)", "0011 100- 1110 0---") | ||
| 119 | INST(IADD_reg, "IADD (reg)", "0101 1100 0001 0---") | ||
| 120 | INST(IADD_cbuf, "IADD (cbuf)", "0100 1100 0001 0---") | ||
| 121 | INST(IADD_imm, "IADD (imm)", "0011 100- 0001 0---") | ||
| 122 | INST(IADD3_reg, "IADD3 (reg)", "0101 1100 1100 ----") | ||
| 123 | INST(IADD3_cbuf, "IADD3 (cbuf)", "0100 1100 1100 ----") | ||
| 124 | INST(IADD3_imm, "IADD3 (imm)", "0011 100- 1100 ----") | ||
| 125 | INST(IADD32I, "IADD32I", "0001 110- ---- ----") | ||
| 126 | INST(ICMP_reg, "ICMP (reg)", "0101 1011 0100 ----") | ||
| 127 | INST(ICMP_rc, "ICMP (rc)", "0101 0011 0100 ----") | ||
| 128 | INST(ICMP_cr, "ICMP (cr)", "0100 1011 0100 ----") | ||
| 129 | INST(ICMP_imm, "ICMP (imm)", "0011 011- 0100 ----") | ||
| 130 | INST(IDE, "IDE", "1110 0011 1001 ----") | ||
| 131 | INST(IDP_reg, "IDP (reg)", "0101 0011 1111 1---") | ||
| 132 | INST(IDP_imm, "IDP (imm)", "0101 0011 1101 1---") | ||
| 133 | INST(IMAD_reg, "IMAD (reg)", "0101 1010 0--- ----") | ||
| 134 | INST(IMAD_rc, "IMAD (rc)", "0101 0010 0--- ----") | ||
| 135 | INST(IMAD_cr, "IMAD (cr)", "0100 1010 0--- ----") | ||
| 136 | INST(IMAD_imm, "IMAD (imm)", "0011 010- 0--- ----") | ||
| 137 | INST(IMAD32I, "IMAD32I", "1000 00-- ---- ----") | ||
| 138 | INST(IMADSP_reg, "IMADSP (reg)", "0101 1010 1--- ----") | ||
| 139 | INST(IMADSP_rc, "IMADSP (rc)", "0101 0010 1--- ----") | ||
| 140 | INST(IMADSP_cr, "IMADSP (cr)", "0100 1010 1--- ----") | ||
| 141 | INST(IMADSP_imm, "IMADSP (imm)", "0011 010- 1--- ----") | ||
| 142 | INST(IMNMX_reg, "IMNMX (reg)", "0101 1100 0010 0---") | ||
| 143 | INST(IMNMX_cbuf, "IMNMX (cbuf)", "0100 1100 0010 0---") | ||
| 144 | INST(IMNMX_imm, "IMNMX (imm)", "0011 100- 0010 0---") | ||
| 145 | INST(IMUL_reg, "IMUL (reg)", "0101 1100 0011 1---") | ||
| 146 | INST(IMUL_cbuf, "IMUL (cbuf)", "0100 1100 0011 1---") | ||
| 147 | INST(IMUL_imm, "IMUL (imm)", "0011 100- 0011 1---") | ||
| 148 | INST(IMUL32I, "IMUL32I", "0001 1111 ---- ----") | ||
| 149 | INST(IPA, "IPA", "1110 0000 ---- ----") | ||
| 150 | INST(ISBERD, "ISBERD", "1110 1111 1101 0---") | ||
| 151 | INST(ISCADD_reg, "ISCADD (reg)", "0101 1100 0001 1---") | ||
| 152 | INST(ISCADD_cbuf, "ISCADD (cbuf)", "0100 1100 0001 1---") | ||
| 153 | INST(ISCADD_imm, "ISCADD (imm)", "0011 100- 0001 1---") | ||
| 154 | INST(ISCADD32I, "ISCADD32I", "0001 01-- ---- ----") | ||
| 155 | INST(ISET_reg, "ISET (reg)", "0101 1011 0101 ----") | ||
| 156 | INST(ISET_cbuf, "ISET (cbuf)", "0100 1011 0101 ----") | ||
| 157 | INST(ISET_imm, "ISET (imm)", "0011 011- 0101 ----") | ||
| 158 | INST(ISETP_reg, "ISETP (reg)", "0101 1011 0110 ----") | ||
| 159 | INST(ISETP_cbuf, "ISETP (cbuf)", "0100 1011 0110 ----") | ||
| 160 | INST(ISETP_imm, "ISETP (imm)", "0011 011- 0110 ----") | ||
| 161 | INST(JCAL, "JCAL", "1110 0010 0010 ----") | ||
| 162 | INST(JMP, "JMP", "1110 0010 0001 ----") | ||
| 163 | INST(JMX, "JMX", "1110 0010 0000 ----") | ||
| 164 | INST(KIL, "KIL", "1110 0011 0011 ----") | ||
| 165 | INST(LD, "LD", "100- ---- ---- ----") | ||
| 166 | INST(LDC, "LDC", "1110 1111 1001 0---") | ||
| 167 | INST(LDG, "LDG", "1110 1110 1101 0---") | ||
| 168 | INST(LDL, "LDL", "1110 1111 0100 0---") | ||
| 169 | INST(LDS, "LDS", "1110 1111 0100 1---") | ||
| 170 | INST(LEA_hi_reg, "LEA (hi reg)", "0101 1011 1101 1---") | ||
| 171 | INST(LEA_hi_cbuf, "LEA (hi cbuf)", "0001 10-- ---- ----") | ||
| 172 | INST(LEA_lo_reg, "LEA (lo reg)", "0101 1011 1101 0---") | ||
| 173 | INST(LEA_lo_cbuf, "LEA (lo cbuf)", "0100 1011 1101 ----") | ||
| 174 | INST(LEA_lo_imm, "LEA (lo imm)", "0011 011- 1101 0---") | ||
| 175 | INST(LEPC, "LEPC", "0101 0000 1101 0---") | ||
| 176 | INST(LONGJMP, "LONGJMP", "1110 0011 0001 ----") | ||
| 177 | INST(LOP_reg, "LOP (reg)", "0101 1100 0100 0---") | ||
| 178 | INST(LOP_cbuf, "LOP (cbuf)", "0100 1100 0100 0---") | ||
| 179 | INST(LOP_imm, "LOP (imm)", "0011 100- 0100 0---") | ||
| 180 | INST(LOP3_reg, "LOP3 (reg)", "0101 1011 1110 0---") | ||
| 181 | INST(LOP3_cbuf, "LOP3 (cbuf)", "0000 001- ---- ----") | ||
| 182 | INST(LOP3_imm, "LOP3 (imm)", "0011 11-- ---- ----") | ||
| 183 | INST(LOP32I, "LOP32I", "0000 01-- ---- ----") | ||
| 184 | INST(MEMBAR, "MEMBAR", "1110 1111 1001 1---") | ||
| 185 | INST(MOV_reg, "MOV (reg)", "0101 1100 1001 1---") | ||
| 186 | INST(MOV_cbuf, "MOV (cbuf)", "0100 1100 1001 1---") | ||
| 187 | INST(MOV_imm, "MOV (imm)", "0011 100- 1001 1---") | ||
| 188 | INST(MOV32I, "MOV32I", "0000 0001 0000 ----") | ||
| 189 | INST(MUFU, "MUFU", "0101 0000 1000 0---") | ||
| 190 | INST(NOP, "NOP", "0101 0000 1011 0---") | ||
| 191 | INST(OUT_reg, "OUT (reg)", "1111 1011 1110 0---") | ||
| 192 | INST(OUT_cbuf, "OUT (cbuf)", "1110 1011 1110 0---") | ||
| 193 | INST(OUT_imm, "OUT (imm)", "1111 011- 1110 0---") | ||
| 194 | INST(P2R_reg, "P2R (reg)", "0101 1100 1110 1---") | ||
| 195 | INST(P2R_cbuf, "P2R (cbuf)", "0100 1100 1110 1---") | ||
| 196 | INST(P2R_imm, "P2R (imm)", "0011 1000 1110 1---") | ||
| 197 | INST(PBK, "PBK", "1110 0010 1010 ----") | ||
| 198 | INST(PCNT, "PCNT", "1110 0010 1011 ----") | ||
| 199 | INST(PEXIT, "PEXIT", "1110 0010 0011 ----") | ||
| 200 | INST(PIXLD, "PIXLD", "1110 1111 1110 1---") | ||
| 201 | INST(PLONGJMP, "PLONGJMP", "1110 0010 1000 ----") | ||
| 202 | INST(POPC_reg, "POPC (reg)", "0101 1100 0000 1---") | ||
| 203 | INST(POPC_cbuf, "POPC (cbuf)", "0100 1100 0000 1---") | ||
| 204 | INST(POPC_imm, "POPC (imm)", "0011 100- 0000 1---") | ||
| 205 | INST(PRET, "PRET", "1110 0010 0111 ----") | ||
| 206 | INST(PRMT_reg, "PRMT (reg)", "0101 1011 1100 ----") | ||
| 207 | INST(PRMT_rc, "PRMT (rc)", "0101 0011 1100 ----") | ||
| 208 | INST(PRMT_cr, "PRMT (cr)", "0100 1011 1100 ----") | ||
| 209 | INST(PRMT_imm, "PRMT (imm)", "0011 011- 1100 ----") | ||
| 210 | INST(PSET, "PSET", "0101 0000 1000 1---") | ||
| 211 | INST(PSETP, "PSETP", "0101 0000 1001 0---") | ||
| 212 | INST(R2B, "R2B", "1111 0000 1100 0---") | ||
| 213 | INST(R2P_reg, "R2P (reg)", "0101 1100 1111 0---") | ||
| 214 | INST(R2P_cbuf, "R2P (cbuf)", "0100 1100 1111 0---") | ||
| 215 | INST(R2P_imm, "R2P (imm)", "0011 100- 1111 0---") | ||
| 216 | INST(RAM, "RAM", "1110 0011 1000 ----") | ||
| 217 | INST(RED, "RED", "1110 1011 1111 1---") | ||
| 218 | INST(RET, "RET", "1110 0011 0010 ----") | ||
| 219 | INST(RRO_reg, "RRO (reg)", "0101 1100 1001 0---") | ||
| 220 | INST(RRO_cbuf, "RRO (cbuf)", "0100 1100 1001 0---") | ||
| 221 | INST(RRO_imm, "RRO (imm)", "0011 100- 1001 0---") | ||
| 222 | INST(RTT, "RTT", "1110 0011 0110 ----") | ||
| 223 | INST(S2R, "S2R", "1111 0000 1100 1---") | ||
| 224 | INST(SAM, "SAM", "1110 0011 0111 ----") | ||
| 225 | INST(SEL_reg, "SEL (reg)", "0101 1100 1010 0---") | ||
| 226 | INST(SEL_cbuf, "SEL (cbuf)", "0100 1100 1010 0---") | ||
| 227 | INST(SEL_imm, "SEL (imm)", "0011 100- 1010 0---") | ||
| 228 | INST(SETCRSPTR, "SETCRSPTR", "1110 0010 1110 ----") | ||
| 229 | INST(SETLMEMBASE, "SETLMEMBASE", "1110 0010 1111 ----") | ||
| 230 | INST(SHF_l_reg, "SHF (l reg)", "0101 1011 1111 1---") | ||
| 231 | INST(SHF_l_imm, "SHF (l imm)", "0011 011- 1111 1---") | ||
| 232 | INST(SHF_r_reg, "SHF (r reg)", "0101 1100 1111 1---") | ||
| 233 | INST(SHF_r_imm, "SHF (r imm)", "0011 100- 1111 1---") | ||
| 234 | INST(SHFL, "SHFL", "1110 1111 0001 0---") | ||
| 235 | INST(SHL_reg, "SHL (reg)", "0101 1100 0100 1---") | ||
| 236 | INST(SHL_cbuf, "SHL (cbuf)", "0100 1100 0100 1---") | ||
| 237 | INST(SHL_imm, "SHL (imm)", "0011 100- 0100 1---") | ||
| 238 | INST(SHR_reg, "SHR (reg)", "0101 1100 0010 1---") | ||
| 239 | INST(SHR_cbuf, "SHR (cbuf)", "0100 1100 0010 1---") | ||
| 240 | INST(SHR_imm, "SHR (imm)", "0011 100- 0010 1---") | ||
| 241 | INST(SSY, "SSY", "1110 0010 1001 ----") | ||
| 242 | INST(ST, "ST", "101- ---- ---- ----") | ||
| 243 | INST(STG, "STG", "1110 1110 1101 1---") | ||
| 244 | INST(STL, "STL", "1110 1111 0101 0---") | ||
| 245 | INST(STP, "STP", "1110 1110 1010 0---") | ||
| 246 | INST(STS, "STS", "1110 1111 0101 1---") | ||
| 247 | INST(SUATOM, "SUATOM", "1110 1010 0--- ----") | ||
| 248 | INST(SUATOM_cas, "SUATOM_cas", "1110 1010 1--- ----") | ||
| 249 | INST(SULD, "SULD", "1110 1011 000- ----") | ||
| 250 | INST(SURED, "SURED", "1110 1011 010- ----") | ||
| 251 | INST(SUST, "SUST", "1110 1011 001- ----") | ||
| 252 | INST(SYNC, "SYNC", "1111 0000 1111 1---") | ||
| 253 | INST(TEX, "TEX", "1100 0--- ---- ----") | ||
| 254 | INST(TEX_b, "TEX (b)", "1101 1110 10-- ----") | ||
| 255 | INST(TEXS, "TEXS", "1101 -00- ---- ----") | ||
| 256 | INST(TLD, "TLD", "1101 1100 ---- ----") | ||
| 257 | INST(TLD_b, "TLD (b)", "1101 1101 ---- ----") | ||
| 258 | INST(TLD4, "TLD4", "1100 10-- ---- ----") | ||
| 259 | INST(TLD4_b, "TLD4 (b)", "1101 1110 11-- ----") | ||
| 260 | INST(TLD4S, "TLD4S", "1101 1111 -0-- ----") | ||
| 261 | INST(TLDS, "TLDS", "1101 -01- ---- ----") | ||
| 262 | INST(TMML, "TMML", "1101 1111 0101 1---") | ||
| 263 | INST(TMML_b, "TMML (b)", "1101 1111 0110 0---") | ||
| 264 | INST(TXA, "TXA", "1101 1111 0100 0---") | ||
| 265 | INST(TXD, "TXD", "1101 1110 00-- ----") | ||
| 266 | INST(TXD_b, "TXD (b)", "1101 1110 01-- ----") | ||
| 267 | INST(TXQ, "TXQ", "1101 1111 0100 1---") | ||
| 268 | INST(TXQ_b, "TXQ (b)", "1101 1111 0101 0---") | ||
| 269 | INST(VABSDIFF, "VABSDIFF", "0101 0100 ---- ----") | ||
| 270 | INST(VABSDIFF4, "VABSDIFF4", "0101 0000 0--- ----") | ||
| 271 | INST(VADD, "VADD", "0010 00-- ---- ----") | ||
| 272 | INST(VMAD, "VMAD", "0101 1111 ---- ----") | ||
| 273 | INST(VMNMX, "VMNMX", "0011 101- ---- ----") | ||
| 274 | INST(VOTE, "VOTE", "0101 0000 1101 1---") | ||
| 275 | INST(VOTE_vtg, "VOTE (vtg)", "0101 0000 1110 0---") | ||
| 276 | INST(VSET, "VSET", "0100 000- ---- ----") | ||
| 277 | INST(VSETP, "VSETP", "0101 0000 1111 0---") | ||
| 278 | INST(VSHL, "VSHL", "0101 0111 ---- ----") | ||
| 279 | INST(VSHR, "VSHR", "0101 0110 ---- ----") | ||
| 280 | INST(XMAD_reg, "XMAD (reg)", "0101 1011 00-- ----") | ||
| 281 | INST(XMAD_rc, "XMAD (rc)", "0101 0001 0--- ----") | ||
| 282 | INST(XMAD_cr, "XMAD (cr)", "0100 111- ---- ----") | ||
| 283 | INST(XMAD_imm, "XMAD (imm)", "0011 011- 00-- ----") | ||
| 284 | |||
| 285 | // Removed due to its weird formatting making fast tables larger | ||
| 286 | // INST(CCTLT, "CCTLT", "1110 1011 1111 0--0") | ||
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.cpp b/src/shader_recompiler/frontend/maxwell/opcodes.cpp new file mode 100644 index 000000000..ccc40c20c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.cpp | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | constexpr std::array NAME_TABLE{ | ||
| 13 | #define INST(name, cute, encode) cute, | ||
| 14 | #include "maxwell.inc" | ||
| 15 | #undef INST | ||
| 16 | }; | ||
| 17 | } // Anonymous namespace | ||
| 18 | |||
| 19 | const char* NameOf(Opcode opcode) { | ||
| 20 | if (static_cast<size_t>(opcode) >= NAME_TABLE.size()) { | ||
| 21 | throw InvalidArgument("Invalid opcode with raw value {}", static_cast<int>(opcode)); | ||
| 22 | } | ||
| 23 | return NAME_TABLE[static_cast<size_t>(opcode)]; | ||
| 24 | } | ||
| 25 | |||
| 26 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/opcodes.h b/src/shader_recompiler/frontend/maxwell/opcodes.h new file mode 100644 index 000000000..cd574f29d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/opcodes.h | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | |||
| 11 | enum class Opcode { | ||
| 12 | #define INST(name, cute, encode) name, | ||
| 13 | #include "maxwell.inc" | ||
| 14 | #undef INST | ||
| 15 | }; | ||
| 16 | |||
| 17 | const char* NameOf(Opcode opcode); | ||
| 18 | |||
| 19 | } // namespace Shader::Maxwell | ||
| 20 | |||
| 21 | template <> | ||
| 22 | struct fmt::formatter<Shader::Maxwell::Opcode> { | ||
| 23 | constexpr auto parse(format_parse_context& ctx) { | ||
| 24 | return ctx.begin(); | ||
| 25 | } | ||
| 26 | template <typename FormatContext> | ||
| 27 | auto format(const Shader::Maxwell::Opcode& opcode, FormatContext& ctx) { | ||
| 28 | return format_to(ctx.out(), "{}", NameOf(opcode)); | ||
| 29 | } | ||
| 30 | }; | ||
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp new file mode 100644 index 000000000..8b3e0a15c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp | |||
| @@ -0,0 +1,883 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <memory> | ||
| 7 | #include <string> | ||
| 8 | #include <unordered_map> | ||
| 9 | #include <utility> | ||
| 10 | #include <vector> | ||
| 11 | #include <version> | ||
| 12 | |||
| 13 | #include <fmt/format.h> | ||
| 14 | |||
| 15 | #include <boost/intrusive/list.hpp> | ||
| 16 | |||
| 17 | #include "shader_recompiler/environment.h" | ||
| 18 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 19 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 20 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 21 | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" | ||
| 22 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||
| 23 | #include "shader_recompiler/object_pool.h" | ||
| 24 | |||
| 25 | namespace Shader::Maxwell { | ||
| 26 | namespace { | ||
| 27 | struct Statement; | ||
| 28 | |||
| 29 | // Use normal_link because we are not guaranteed to destroy the tree in order | ||
| 30 | using ListBaseHook = | ||
| 31 | boost::intrusive::list_base_hook<boost::intrusive::link_mode<boost::intrusive::normal_link>>; | ||
| 32 | |||
| 33 | using Tree = boost::intrusive::list<Statement, | ||
| 34 | // Allow using Statement without a definition | ||
| 35 | boost::intrusive::base_hook<ListBaseHook>, | ||
| 36 | // Avoid linear complexity on splice, size is never called | ||
| 37 | boost::intrusive::constant_time_size<false>>; | ||
| 38 | using Node = Tree::iterator; | ||
| 39 | |||
| 40 | enum class StatementType { | ||
| 41 | Code, | ||
| 42 | Goto, | ||
| 43 | Label, | ||
| 44 | If, | ||
| 45 | Loop, | ||
| 46 | Break, | ||
| 47 | Return, | ||
| 48 | Kill, | ||
| 49 | Unreachable, | ||
| 50 | Function, | ||
| 51 | Identity, | ||
| 52 | Not, | ||
| 53 | Or, | ||
| 54 | SetVariable, | ||
| 55 | SetIndirectBranchVariable, | ||
| 56 | Variable, | ||
| 57 | IndirectBranchCond, | ||
| 58 | }; | ||
| 59 | |||
| 60 | bool HasChildren(StatementType type) { | ||
| 61 | switch (type) { | ||
| 62 | case StatementType::If: | ||
| 63 | case StatementType::Loop: | ||
| 64 | case StatementType::Function: | ||
| 65 | return true; | ||
| 66 | default: | ||
| 67 | return false; | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 71 | struct Goto {}; | ||
| 72 | struct Label {}; | ||
| 73 | struct If {}; | ||
| 74 | struct Loop {}; | ||
| 75 | struct Break {}; | ||
| 76 | struct Return {}; | ||
| 77 | struct Kill {}; | ||
| 78 | struct Unreachable {}; | ||
| 79 | struct FunctionTag {}; | ||
| 80 | struct Identity {}; | ||
| 81 | struct Not {}; | ||
| 82 | struct Or {}; | ||
| 83 | struct SetVariable {}; | ||
| 84 | struct SetIndirectBranchVariable {}; | ||
| 85 | struct Variable {}; | ||
| 86 | struct IndirectBranchCond {}; | ||
| 87 | |||
| 88 | #ifdef _MSC_VER | ||
| 89 | #pragma warning(push) | ||
| 90 | #pragma warning(disable : 26495) // Always initialize a member variable, expected in Statement | ||
| 91 | #endif | ||
| 92 | struct Statement : ListBaseHook { | ||
| 93 | Statement(const Flow::Block* block_, Statement* up_) | ||
| 94 | : block{block_}, up{up_}, type{StatementType::Code} {} | ||
| 95 | Statement(Goto, Statement* cond_, Node label_, Statement* up_) | ||
| 96 | : label{label_}, cond{cond_}, up{up_}, type{StatementType::Goto} {} | ||
| 97 | Statement(Label, u32 id_, Statement* up_) : id{id_}, up{up_}, type{StatementType::Label} {} | ||
| 98 | Statement(If, Statement* cond_, Tree&& children_, Statement* up_) | ||
| 99 | : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::If} {} | ||
| 100 | Statement(Loop, Statement* cond_, Tree&& children_, Statement* up_) | ||
| 101 | : children{std::move(children_)}, cond{cond_}, up{up_}, type{StatementType::Loop} {} | ||
| 102 | Statement(Break, Statement* cond_, Statement* up_) | ||
| 103 | : cond{cond_}, up{up_}, type{StatementType::Break} {} | ||
| 104 | Statement(Return, Statement* up_) : up{up_}, type{StatementType::Return} {} | ||
| 105 | Statement(Kill, Statement* up_) : up{up_}, type{StatementType::Kill} {} | ||
| 106 | Statement(Unreachable, Statement* up_) : up{up_}, type{StatementType::Unreachable} {} | ||
| 107 | Statement(FunctionTag) : children{}, type{StatementType::Function} {} | ||
| 108 | Statement(Identity, IR::Condition cond_, Statement* up_) | ||
| 109 | : guest_cond{cond_}, up{up_}, type{StatementType::Identity} {} | ||
| 110 | Statement(Not, Statement* op_, Statement* up_) : op{op_}, up{up_}, type{StatementType::Not} {} | ||
| 111 | Statement(Or, Statement* op_a_, Statement* op_b_, Statement* up_) | ||
| 112 | : op_a{op_a_}, op_b{op_b_}, up{up_}, type{StatementType::Or} {} | ||
| 113 | Statement(SetVariable, u32 id_, Statement* op_, Statement* up_) | ||
| 114 | : op{op_}, id{id_}, up{up_}, type{StatementType::SetVariable} {} | ||
| 115 | Statement(SetIndirectBranchVariable, IR::Reg branch_reg_, s32 branch_offset_, Statement* up_) | ||
| 116 | : branch_offset{branch_offset_}, | ||
| 117 | branch_reg{branch_reg_}, up{up_}, type{StatementType::SetIndirectBranchVariable} {} | ||
| 118 | Statement(Variable, u32 id_, Statement* up_) | ||
| 119 | : id{id_}, up{up_}, type{StatementType::Variable} {} | ||
| 120 | Statement(IndirectBranchCond, u32 location_, Statement* up_) | ||
| 121 | : location{location_}, up{up_}, type{StatementType::IndirectBranchCond} {} | ||
| 122 | |||
| 123 | ~Statement() { | ||
| 124 | if (HasChildren(type)) { | ||
| 125 | std::destroy_at(&children); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | union { | ||
| 130 | const Flow::Block* block; | ||
| 131 | Node label; | ||
| 132 | Tree children; | ||
| 133 | IR::Condition guest_cond; | ||
| 134 | Statement* op; | ||
| 135 | Statement* op_a; | ||
| 136 | u32 location; | ||
| 137 | s32 branch_offset; | ||
| 138 | }; | ||
| 139 | union { | ||
| 140 | Statement* cond; | ||
| 141 | Statement* op_b; | ||
| 142 | u32 id; | ||
| 143 | IR::Reg branch_reg; | ||
| 144 | }; | ||
| 145 | Statement* up{}; | ||
| 146 | StatementType type; | ||
| 147 | }; | ||
| 148 | #ifdef _MSC_VER | ||
| 149 | #pragma warning(pop) | ||
| 150 | #endif | ||
| 151 | |||
| 152 | std::string DumpExpr(const Statement* stmt) { | ||
| 153 | switch (stmt->type) { | ||
| 154 | case StatementType::Identity: | ||
| 155 | return fmt::format("{}", stmt->guest_cond); | ||
| 156 | case StatementType::Not: | ||
| 157 | return fmt::format("!{}", DumpExpr(stmt->op)); | ||
| 158 | case StatementType::Or: | ||
| 159 | return fmt::format("{} || {}", DumpExpr(stmt->op_a), DumpExpr(stmt->op_b)); | ||
| 160 | case StatementType::Variable: | ||
| 161 | return fmt::format("goto_L{}", stmt->id); | ||
| 162 | case StatementType::IndirectBranchCond: | ||
| 163 | return fmt::format("(indirect_branch == {:x})", stmt->location); | ||
| 164 | default: | ||
| 165 | return "<invalid type>"; | ||
| 166 | } | ||
| 167 | } | ||
| 168 | |||
| 169 | [[maybe_unused]] std::string DumpTree(const Tree& tree, u32 indentation = 0) { | ||
| 170 | std::string ret; | ||
| 171 | std::string indent(indentation, ' '); | ||
| 172 | for (auto stmt = tree.begin(); stmt != tree.end(); ++stmt) { | ||
| 173 | switch (stmt->type) { | ||
| 174 | case StatementType::Code: | ||
| 175 | ret += fmt::format("{} Block {:04x} -> {:04x} (0x{:016x});\n", indent, | ||
| 176 | stmt->block->begin.Offset(), stmt->block->end.Offset(), | ||
| 177 | reinterpret_cast<uintptr_t>(stmt->block)); | ||
| 178 | break; | ||
| 179 | case StatementType::Goto: | ||
| 180 | ret += fmt::format("{} if ({}) goto L{};\n", indent, DumpExpr(stmt->cond), | ||
| 181 | stmt->label->id); | ||
| 182 | break; | ||
| 183 | case StatementType::Label: | ||
| 184 | ret += fmt::format("{}L{}:\n", indent, stmt->id); | ||
| 185 | break; | ||
| 186 | case StatementType::If: | ||
| 187 | ret += fmt::format("{} if ({}) {{\n", indent, DumpExpr(stmt->cond)); | ||
| 188 | ret += DumpTree(stmt->children, indentation + 4); | ||
| 189 | ret += fmt::format("{} }}\n", indent); | ||
| 190 | break; | ||
| 191 | case StatementType::Loop: | ||
| 192 | ret += fmt::format("{} do {{\n", indent); | ||
| 193 | ret += DumpTree(stmt->children, indentation + 4); | ||
| 194 | ret += fmt::format("{} }} while ({});\n", indent, DumpExpr(stmt->cond)); | ||
| 195 | break; | ||
| 196 | case StatementType::Break: | ||
| 197 | ret += fmt::format("{} if ({}) break;\n", indent, DumpExpr(stmt->cond)); | ||
| 198 | break; | ||
| 199 | case StatementType::Return: | ||
| 200 | ret += fmt::format("{} return;\n", indent); | ||
| 201 | break; | ||
| 202 | case StatementType::Kill: | ||
| 203 | ret += fmt::format("{} kill;\n", indent); | ||
| 204 | break; | ||
| 205 | case StatementType::Unreachable: | ||
| 206 | ret += fmt::format("{} unreachable;\n", indent); | ||
| 207 | break; | ||
| 208 | case StatementType::SetVariable: | ||
| 209 | ret += fmt::format("{} goto_L{} = {};\n", indent, stmt->id, DumpExpr(stmt->op)); | ||
| 210 | break; | ||
| 211 | case StatementType::SetIndirectBranchVariable: | ||
| 212 | ret += fmt::format("{} indirect_branch = {} + {};\n", indent, stmt->branch_reg, | ||
| 213 | stmt->branch_offset); | ||
| 214 | break; | ||
| 215 | case StatementType::Function: | ||
| 216 | case StatementType::Identity: | ||
| 217 | case StatementType::Not: | ||
| 218 | case StatementType::Or: | ||
| 219 | case StatementType::Variable: | ||
| 220 | case StatementType::IndirectBranchCond: | ||
| 221 | throw LogicError("Statement can't be printed"); | ||
| 222 | } | ||
| 223 | } | ||
| 224 | return ret; | ||
| 225 | } | ||
| 226 | |||
| 227 | void SanitizeNoBreaks(const Tree& tree) { | ||
| 228 | if (std::ranges::find(tree, StatementType::Break, &Statement::type) != tree.end()) { | ||
| 229 | throw NotImplementedException("Capturing statement with break nodes"); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | size_t Level(Node stmt) { | ||
| 234 | size_t level{0}; | ||
| 235 | Statement* node{stmt->up}; | ||
| 236 | while (node) { | ||
| 237 | ++level; | ||
| 238 | node = node->up; | ||
| 239 | } | ||
| 240 | return level; | ||
| 241 | } | ||
| 242 | |||
| 243 | bool IsDirectlyRelated(Node goto_stmt, Node label_stmt) { | ||
| 244 | const size_t goto_level{Level(goto_stmt)}; | ||
| 245 | const size_t label_level{Level(label_stmt)}; | ||
| 246 | size_t min_level; | ||
| 247 | size_t max_level; | ||
| 248 | Node min; | ||
| 249 | Node max; | ||
| 250 | if (label_level < goto_level) { | ||
| 251 | min_level = label_level; | ||
| 252 | max_level = goto_level; | ||
| 253 | min = label_stmt; | ||
| 254 | max = goto_stmt; | ||
| 255 | } else { // goto_level < label_level | ||
| 256 | min_level = goto_level; | ||
| 257 | max_level = label_level; | ||
| 258 | min = goto_stmt; | ||
| 259 | max = label_stmt; | ||
| 260 | } | ||
| 261 | while (max_level > min_level) { | ||
| 262 | --max_level; | ||
| 263 | max = max->up; | ||
| 264 | } | ||
| 265 | return min->up == max->up; | ||
| 266 | } | ||
| 267 | |||
| 268 | bool IsIndirectlyRelated(Node goto_stmt, Node label_stmt) { | ||
| 269 | return goto_stmt->up != label_stmt->up && !IsDirectlyRelated(goto_stmt, label_stmt); | ||
| 270 | } | ||
| 271 | |||
| 272 | [[maybe_unused]] bool AreSiblings(Node goto_stmt, Node label_stmt) noexcept { | ||
| 273 | Node it{goto_stmt}; | ||
| 274 | do { | ||
| 275 | if (it == label_stmt) { | ||
| 276 | return true; | ||
| 277 | } | ||
| 278 | --it; | ||
| 279 | } while (it != goto_stmt->up->children.begin()); | ||
| 280 | while (it != goto_stmt->up->children.end()) { | ||
| 281 | if (it == label_stmt) { | ||
| 282 | return true; | ||
| 283 | } | ||
| 284 | ++it; | ||
| 285 | } | ||
| 286 | return false; | ||
| 287 | } | ||
| 288 | |||
| 289 | Node SiblingFromNephew(Node uncle, Node nephew) noexcept { | ||
| 290 | Statement* const parent{uncle->up}; | ||
| 291 | Statement* it{&*nephew}; | ||
| 292 | while (it->up != parent) { | ||
| 293 | it = it->up; | ||
| 294 | } | ||
| 295 | return Tree::s_iterator_to(*it); | ||
| 296 | } | ||
| 297 | |||
| 298 | bool AreOrdered(Node left_sibling, Node right_sibling) noexcept { | ||
| 299 | const Node end{right_sibling->up->children.end()}; | ||
| 300 | for (auto it = right_sibling; it != end; ++it) { | ||
| 301 | if (it == left_sibling) { | ||
| 302 | return false; | ||
| 303 | } | ||
| 304 | } | ||
| 305 | return true; | ||
| 306 | } | ||
| 307 | |||
| 308 | bool NeedsLift(Node goto_stmt, Node label_stmt) noexcept { | ||
| 309 | const Node sibling{SiblingFromNephew(goto_stmt, label_stmt)}; | ||
| 310 | return AreOrdered(sibling, goto_stmt); | ||
| 311 | } | ||
| 312 | |||
| 313 | class GotoPass { | ||
| 314 | public: | ||
| 315 | explicit GotoPass(Flow::CFG& cfg, ObjectPool<Statement>& stmt_pool) : pool{stmt_pool} { | ||
| 316 | std::vector gotos{BuildTree(cfg)}; | ||
| 317 | const auto end{gotos.rend()}; | ||
| 318 | for (auto goto_stmt = gotos.rbegin(); goto_stmt != end; ++goto_stmt) { | ||
| 319 | RemoveGoto(*goto_stmt); | ||
| 320 | } | ||
| 321 | } | ||
| 322 | |||
| 323 | Statement& RootStatement() noexcept { | ||
| 324 | return root_stmt; | ||
| 325 | } | ||
| 326 | |||
| 327 | private: | ||
| 328 | void RemoveGoto(Node goto_stmt) { | ||
| 329 | // Force goto_stmt and label_stmt to be directly related | ||
| 330 | const Node label_stmt{goto_stmt->label}; | ||
| 331 | if (IsIndirectlyRelated(goto_stmt, label_stmt)) { | ||
| 332 | // Move goto_stmt out using outward-movement transformation until it becomes | ||
| 333 | // directly related to label_stmt | ||
| 334 | while (!IsDirectlyRelated(goto_stmt, label_stmt)) { | ||
| 335 | goto_stmt = MoveOutward(goto_stmt); | ||
| 336 | } | ||
| 337 | } | ||
| 338 | // Force goto_stmt and label_stmt to be siblings | ||
| 339 | if (IsDirectlyRelated(goto_stmt, label_stmt)) { | ||
| 340 | const size_t label_level{Level(label_stmt)}; | ||
| 341 | size_t goto_level{Level(goto_stmt)}; | ||
| 342 | if (goto_level > label_level) { | ||
| 343 | // Move goto_stmt out of its level using outward-movement transformations | ||
| 344 | while (goto_level > label_level) { | ||
| 345 | goto_stmt = MoveOutward(goto_stmt); | ||
| 346 | --goto_level; | ||
| 347 | } | ||
| 348 | } else { // Level(goto_stmt) < Level(label_stmt) | ||
| 349 | if (NeedsLift(goto_stmt, label_stmt)) { | ||
| 350 | // Lift goto_stmt to above stmt containing label_stmt using goto-lifting | ||
| 351 | // transformations | ||
| 352 | goto_stmt = Lift(goto_stmt); | ||
| 353 | } | ||
| 354 | // Move goto_stmt into label_stmt's level using inward-movement transformation | ||
| 355 | while (goto_level < label_level) { | ||
| 356 | goto_stmt = MoveInward(goto_stmt); | ||
| 357 | ++goto_level; | ||
| 358 | } | ||
| 359 | } | ||
| 360 | } | ||
| 361 | // Expensive operation: | ||
| 362 | // if (!AreSiblings(goto_stmt, label_stmt)) { | ||
| 363 | // throw LogicError("Goto is not a sibling with the label"); | ||
| 364 | // } | ||
| 365 | // goto_stmt and label_stmt are guaranteed to be siblings, eliminate | ||
| 366 | if (std::next(goto_stmt) == label_stmt) { | ||
| 367 | // Simply eliminate the goto if the label is next to it | ||
| 368 | goto_stmt->up->children.erase(goto_stmt); | ||
| 369 | } else if (AreOrdered(goto_stmt, label_stmt)) { | ||
| 370 | // Eliminate goto_stmt with a conditional | ||
| 371 | EliminateAsConditional(goto_stmt, label_stmt); | ||
| 372 | } else { | ||
| 373 | // Eliminate goto_stmt with a loop | ||
| 374 | EliminateAsLoop(goto_stmt, label_stmt); | ||
| 375 | } | ||
| 376 | } | ||
| 377 | |||
| 378 | std::vector<Node> BuildTree(Flow::CFG& cfg) { | ||
| 379 | u32 label_id{0}; | ||
| 380 | std::vector<Node> gotos; | ||
| 381 | Flow::Function& first_function{cfg.Functions().front()}; | ||
| 382 | BuildTree(cfg, first_function, label_id, gotos, root_stmt.children.end(), std::nullopt); | ||
| 383 | return gotos; | ||
| 384 | } | ||
| 385 | |||
| 386 | void BuildTree(Flow::CFG& cfg, Flow::Function& function, u32& label_id, | ||
| 387 | std::vector<Node>& gotos, Node function_insert_point, | ||
| 388 | std::optional<Node> return_label) { | ||
| 389 | Statement* const false_stmt{pool.Create(Identity{}, IR::Condition{false}, &root_stmt)}; | ||
| 390 | Tree& root{root_stmt.children}; | ||
| 391 | std::unordered_map<Flow::Block*, Node> local_labels; | ||
| 392 | local_labels.reserve(function.blocks.size()); | ||
| 393 | |||
| 394 | for (Flow::Block& block : function.blocks) { | ||
| 395 | Statement* const label{pool.Create(Label{}, label_id, &root_stmt)}; | ||
| 396 | const Node label_it{root.insert(function_insert_point, *label)}; | ||
| 397 | local_labels.emplace(&block, label_it); | ||
| 398 | ++label_id; | ||
| 399 | } | ||
| 400 | for (Flow::Block& block : function.blocks) { | ||
| 401 | const Node label{local_labels.at(&block)}; | ||
| 402 | // Insertion point | ||
| 403 | const Node ip{std::next(label)}; | ||
| 404 | |||
| 405 | // Reset goto variables before the first block and after its respective label | ||
| 406 | const auto make_reset_variable{[&]() -> Statement& { | ||
| 407 | return *pool.Create(SetVariable{}, label->id, false_stmt, &root_stmt); | ||
| 408 | }}; | ||
| 409 | root.push_front(make_reset_variable()); | ||
| 410 | root.insert(ip, make_reset_variable()); | ||
| 411 | root.insert(ip, *pool.Create(&block, &root_stmt)); | ||
| 412 | |||
| 413 | switch (block.end_class) { | ||
| 414 | case Flow::EndClass::Branch: { | ||
| 415 | Statement* const always_cond{ | ||
| 416 | pool.Create(Identity{}, IR::Condition{true}, &root_stmt)}; | ||
| 417 | if (block.cond == IR::Condition{true}) { | ||
| 418 | const Node true_label{local_labels.at(block.branch_true)}; | ||
| 419 | gotos.push_back( | ||
| 420 | root.insert(ip, *pool.Create(Goto{}, always_cond, true_label, &root_stmt))); | ||
| 421 | } else if (block.cond == IR::Condition{false}) { | ||
| 422 | const Node false_label{local_labels.at(block.branch_false)}; | ||
| 423 | gotos.push_back(root.insert( | ||
| 424 | ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); | ||
| 425 | } else { | ||
| 426 | const Node true_label{local_labels.at(block.branch_true)}; | ||
| 427 | const Node false_label{local_labels.at(block.branch_false)}; | ||
| 428 | Statement* const true_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; | ||
| 429 | gotos.push_back( | ||
| 430 | root.insert(ip, *pool.Create(Goto{}, true_cond, true_label, &root_stmt))); | ||
| 431 | gotos.push_back(root.insert( | ||
| 432 | ip, *pool.Create(Goto{}, always_cond, false_label, &root_stmt))); | ||
| 433 | } | ||
| 434 | break; | ||
| 435 | } | ||
| 436 | case Flow::EndClass::IndirectBranch: | ||
| 437 | root.insert(ip, *pool.Create(SetIndirectBranchVariable{}, block.branch_reg, | ||
| 438 | block.branch_offset, &root_stmt)); | ||
| 439 | for (const Flow::IndirectBranch& indirect : block.indirect_branches) { | ||
| 440 | const Node indirect_label{local_labels.at(indirect.block)}; | ||
| 441 | Statement* cond{ | ||
| 442 | pool.Create(IndirectBranchCond{}, indirect.address, &root_stmt)}; | ||
| 443 | Statement* goto_stmt{pool.Create(Goto{}, cond, indirect_label, &root_stmt)}; | ||
| 444 | gotos.push_back(root.insert(ip, *goto_stmt)); | ||
| 445 | } | ||
| 446 | root.insert(ip, *pool.Create(Unreachable{}, &root_stmt)); | ||
| 447 | break; | ||
| 448 | case Flow::EndClass::Call: { | ||
| 449 | Flow::Function& call{cfg.Functions()[block.function_call]}; | ||
| 450 | const Node call_return_label{local_labels.at(block.return_block)}; | ||
| 451 | BuildTree(cfg, call, label_id, gotos, ip, call_return_label); | ||
| 452 | break; | ||
| 453 | } | ||
| 454 | case Flow::EndClass::Exit: | ||
| 455 | root.insert(ip, *pool.Create(Return{}, &root_stmt)); | ||
| 456 | break; | ||
| 457 | case Flow::EndClass::Return: { | ||
| 458 | Statement* const always_cond{pool.Create(Identity{}, block.cond, &root_stmt)}; | ||
| 459 | auto goto_stmt{pool.Create(Goto{}, always_cond, return_label.value(), &root_stmt)}; | ||
| 460 | gotos.push_back(root.insert(ip, *goto_stmt)); | ||
| 461 | break; | ||
| 462 | } | ||
| 463 | case Flow::EndClass::Kill: | ||
| 464 | root.insert(ip, *pool.Create(Kill{}, &root_stmt)); | ||
| 465 | break; | ||
| 466 | } | ||
| 467 | } | ||
| 468 | } | ||
| 469 | |||
| 470 | void UpdateTreeUp(Statement* tree) { | ||
| 471 | for (Statement& stmt : tree->children) { | ||
| 472 | stmt.up = tree; | ||
| 473 | } | ||
| 474 | } | ||
| 475 | |||
| 476 | void EliminateAsConditional(Node goto_stmt, Node label_stmt) { | ||
| 477 | Tree& body{goto_stmt->up->children}; | ||
| 478 | Tree if_body; | ||
| 479 | if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_stmt); | ||
| 480 | Statement* const cond{pool.Create(Not{}, goto_stmt->cond, &root_stmt)}; | ||
| 481 | Statement* const if_stmt{pool.Create(If{}, cond, std::move(if_body), goto_stmt->up)}; | ||
| 482 | UpdateTreeUp(if_stmt); | ||
| 483 | body.insert(goto_stmt, *if_stmt); | ||
| 484 | body.erase(goto_stmt); | ||
| 485 | } | ||
| 486 | |||
| 487 | void EliminateAsLoop(Node goto_stmt, Node label_stmt) { | ||
| 488 | Tree& body{goto_stmt->up->children}; | ||
| 489 | Tree loop_body; | ||
| 490 | loop_body.splice(loop_body.begin(), body, label_stmt, goto_stmt); | ||
| 491 | Statement* const cond{goto_stmt->cond}; | ||
| 492 | Statement* const loop{pool.Create(Loop{}, cond, std::move(loop_body), goto_stmt->up)}; | ||
| 493 | UpdateTreeUp(loop); | ||
| 494 | body.insert(goto_stmt, *loop); | ||
| 495 | body.erase(goto_stmt); | ||
| 496 | } | ||
| 497 | |||
| 498 | [[nodiscard]] Node MoveOutward(Node goto_stmt) { | ||
| 499 | switch (goto_stmt->up->type) { | ||
| 500 | case StatementType::If: | ||
| 501 | return MoveOutwardIf(goto_stmt); | ||
| 502 | case StatementType::Loop: | ||
| 503 | return MoveOutwardLoop(goto_stmt); | ||
| 504 | default: | ||
| 505 | throw LogicError("Invalid outward movement"); | ||
| 506 | } | ||
| 507 | } | ||
| 508 | |||
| 509 | [[nodiscard]] Node MoveInward(Node goto_stmt) { | ||
| 510 | Statement* const parent{goto_stmt->up}; | ||
| 511 | Tree& body{parent->children}; | ||
| 512 | const Node label{goto_stmt->label}; | ||
| 513 | const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; | ||
| 514 | const u32 label_id{label->id}; | ||
| 515 | |||
| 516 | Statement* const goto_cond{goto_stmt->cond}; | ||
| 517 | Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; | ||
| 518 | body.insert(goto_stmt, *set_var); | ||
| 519 | |||
| 520 | Tree if_body; | ||
| 521 | if_body.splice(if_body.begin(), body, std::next(goto_stmt), label_nested_stmt); | ||
| 522 | Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 523 | Statement* const neg_var{pool.Create(Not{}, variable, &root_stmt)}; | ||
| 524 | if (!if_body.empty()) { | ||
| 525 | Statement* const if_stmt{pool.Create(If{}, neg_var, std::move(if_body), parent)}; | ||
| 526 | UpdateTreeUp(if_stmt); | ||
| 527 | body.insert(goto_stmt, *if_stmt); | ||
| 528 | } | ||
| 529 | body.erase(goto_stmt); | ||
| 530 | |||
| 531 | switch (label_nested_stmt->type) { | ||
| 532 | case StatementType::If: | ||
| 533 | // Update nested if condition | ||
| 534 | label_nested_stmt->cond = | ||
| 535 | pool.Create(Or{}, variable, label_nested_stmt->cond, &root_stmt); | ||
| 536 | break; | ||
| 537 | case StatementType::Loop: | ||
| 538 | break; | ||
| 539 | default: | ||
| 540 | throw LogicError("Invalid inward movement"); | ||
| 541 | } | ||
| 542 | Tree& nested_tree{label_nested_stmt->children}; | ||
| 543 | Statement* const new_goto{pool.Create(Goto{}, variable, label, &*label_nested_stmt)}; | ||
| 544 | return nested_tree.insert(nested_tree.begin(), *new_goto); | ||
| 545 | } | ||
| 546 | |||
| 547 | [[nodiscard]] Node Lift(Node goto_stmt) { | ||
| 548 | Statement* const parent{goto_stmt->up}; | ||
| 549 | Tree& body{parent->children}; | ||
| 550 | const Node label{goto_stmt->label}; | ||
| 551 | const u32 label_id{label->id}; | ||
| 552 | const Node label_nested_stmt{SiblingFromNephew(goto_stmt, label)}; | ||
| 553 | |||
| 554 | Tree loop_body; | ||
| 555 | loop_body.splice(loop_body.begin(), body, label_nested_stmt, goto_stmt); | ||
| 556 | SanitizeNoBreaks(loop_body); | ||
| 557 | Statement* const variable{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 558 | Statement* const loop_stmt{pool.Create(Loop{}, variable, std::move(loop_body), parent)}; | ||
| 559 | UpdateTreeUp(loop_stmt); | ||
| 560 | body.insert(goto_stmt, *loop_stmt); | ||
| 561 | |||
| 562 | Statement* const new_goto{pool.Create(Goto{}, variable, label, loop_stmt)}; | ||
| 563 | loop_stmt->children.push_front(*new_goto); | ||
| 564 | const Node new_goto_node{loop_stmt->children.begin()}; | ||
| 565 | |||
| 566 | Statement* const set_var{pool.Create(SetVariable{}, label_id, goto_stmt->cond, loop_stmt)}; | ||
| 567 | loop_stmt->children.push_back(*set_var); | ||
| 568 | |||
| 569 | body.erase(goto_stmt); | ||
| 570 | return new_goto_node; | ||
| 571 | } | ||
| 572 | |||
| 573 | Node MoveOutwardIf(Node goto_stmt) { | ||
| 574 | const Node parent{Tree::s_iterator_to(*goto_stmt->up)}; | ||
| 575 | Tree& body{parent->children}; | ||
| 576 | const u32 label_id{goto_stmt->label->id}; | ||
| 577 | Statement* const goto_cond{goto_stmt->cond}; | ||
| 578 | Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, &*parent)}; | ||
| 579 | body.insert(goto_stmt, *set_goto_var); | ||
| 580 | |||
| 581 | Tree if_body; | ||
| 582 | if_body.splice(if_body.begin(), body, std::next(goto_stmt), body.end()); | ||
| 583 | if_body.pop_front(); | ||
| 584 | Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 585 | Statement* const neg_cond{pool.Create(Not{}, cond, &root_stmt)}; | ||
| 586 | Statement* const if_stmt{pool.Create(If{}, neg_cond, std::move(if_body), &*parent)}; | ||
| 587 | UpdateTreeUp(if_stmt); | ||
| 588 | body.insert(goto_stmt, *if_stmt); | ||
| 589 | |||
| 590 | body.erase(goto_stmt); | ||
| 591 | |||
| 592 | Statement* const new_cond{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 593 | Statement* const new_goto{pool.Create(Goto{}, new_cond, goto_stmt->label, parent->up)}; | ||
| 594 | Tree& parent_tree{parent->up->children}; | ||
| 595 | return parent_tree.insert(std::next(parent), *new_goto); | ||
| 596 | } | ||
| 597 | |||
| 598 | Node MoveOutwardLoop(Node goto_stmt) { | ||
| 599 | Statement* const parent{goto_stmt->up}; | ||
| 600 | Tree& body{parent->children}; | ||
| 601 | const u32 label_id{goto_stmt->label->id}; | ||
| 602 | Statement* const goto_cond{goto_stmt->cond}; | ||
| 603 | Statement* const set_goto_var{pool.Create(SetVariable{}, label_id, goto_cond, parent)}; | ||
| 604 | Statement* const cond{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 605 | Statement* const break_stmt{pool.Create(Break{}, cond, parent)}; | ||
| 606 | body.insert(goto_stmt, *set_goto_var); | ||
| 607 | body.insert(goto_stmt, *break_stmt); | ||
| 608 | body.erase(goto_stmt); | ||
| 609 | |||
| 610 | const Node loop{Tree::s_iterator_to(*goto_stmt->up)}; | ||
| 611 | Statement* const new_goto_cond{pool.Create(Variable{}, label_id, &root_stmt)}; | ||
| 612 | Statement* const new_goto{pool.Create(Goto{}, new_goto_cond, goto_stmt->label, loop->up)}; | ||
| 613 | Tree& parent_tree{loop->up->children}; | ||
| 614 | return parent_tree.insert(std::next(loop), *new_goto); | ||
| 615 | } | ||
| 616 | |||
| 617 | ObjectPool<Statement>& pool; | ||
| 618 | Statement root_stmt{FunctionTag{}}; | ||
| 619 | }; | ||
| 620 | |||
| 621 | [[nodiscard]] Statement* TryFindForwardBlock(Statement& stmt) { | ||
| 622 | Tree& tree{stmt.up->children}; | ||
| 623 | const Node end{tree.end()}; | ||
| 624 | Node forward_node{std::next(Tree::s_iterator_to(stmt))}; | ||
| 625 | while (forward_node != end && !HasChildren(forward_node->type)) { | ||
| 626 | if (forward_node->type == StatementType::Code) { | ||
| 627 | return &*forward_node; | ||
| 628 | } | ||
| 629 | ++forward_node; | ||
| 630 | } | ||
| 631 | return nullptr; | ||
| 632 | } | ||
| 633 | |||
| 634 | [[nodiscard]] IR::U1 VisitExpr(IR::IREmitter& ir, const Statement& stmt) { | ||
| 635 | switch (stmt.type) { | ||
| 636 | case StatementType::Identity: | ||
| 637 | return ir.Condition(stmt.guest_cond); | ||
| 638 | case StatementType::Not: | ||
| 639 | return ir.LogicalNot(IR::U1{VisitExpr(ir, *stmt.op)}); | ||
| 640 | case StatementType::Or: | ||
| 641 | return ir.LogicalOr(VisitExpr(ir, *stmt.op_a), VisitExpr(ir, *stmt.op_b)); | ||
| 642 | case StatementType::Variable: | ||
| 643 | return ir.GetGotoVariable(stmt.id); | ||
| 644 | case StatementType::IndirectBranchCond: | ||
| 645 | return ir.IEqual(ir.GetIndirectBranchVariable(), ir.Imm32(stmt.location)); | ||
| 646 | default: | ||
| 647 | throw NotImplementedException("Statement type {}", stmt.type); | ||
| 648 | } | ||
| 649 | } | ||
| 650 | |||
| 651 | class TranslatePass { | ||
| 652 | public: | ||
| 653 | TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, | ||
| 654 | ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, | ||
| 655 | IR::AbstractSyntaxList& syntax_list_) | ||
| 656 | : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, | ||
| 657 | syntax_list{syntax_list_} { | ||
| 658 | Visit(root_stmt, nullptr, nullptr); | ||
| 659 | |||
| 660 | IR::Block& first_block{*syntax_list.front().data.block}; | ||
| 661 | IR::IREmitter ir(first_block, first_block.begin()); | ||
| 662 | ir.Prologue(); | ||
| 663 | } | ||
| 664 | |||
| 665 | private: | ||
| 666 | void Visit(Statement& parent, IR::Block* break_block, IR::Block* fallthrough_block) { | ||
| 667 | IR::Block* current_block{}; | ||
| 668 | const auto ensure_block{[&] { | ||
| 669 | if (current_block) { | ||
| 670 | return; | ||
| 671 | } | ||
| 672 | current_block = block_pool.Create(inst_pool); | ||
| 673 | auto& node{syntax_list.emplace_back()}; | ||
| 674 | node.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 675 | node.data.block = current_block; | ||
| 676 | }}; | ||
| 677 | Tree& tree{parent.children}; | ||
| 678 | for (auto it = tree.begin(); it != tree.end(); ++it) { | ||
| 679 | Statement& stmt{*it}; | ||
| 680 | switch (stmt.type) { | ||
| 681 | case StatementType::Label: | ||
| 682 | // Labels can be ignored | ||
| 683 | break; | ||
| 684 | case StatementType::Code: { | ||
| 685 | ensure_block(); | ||
| 686 | Translate(env, current_block, stmt.block->begin.Offset(), stmt.block->end.Offset()); | ||
| 687 | break; | ||
| 688 | } | ||
| 689 | case StatementType::SetVariable: { | ||
| 690 | ensure_block(); | ||
| 691 | IR::IREmitter ir{*current_block}; | ||
| 692 | ir.SetGotoVariable(stmt.id, VisitExpr(ir, *stmt.op)); | ||
| 693 | break; | ||
| 694 | } | ||
| 695 | case StatementType::SetIndirectBranchVariable: { | ||
| 696 | ensure_block(); | ||
| 697 | IR::IREmitter ir{*current_block}; | ||
| 698 | IR::U32 address{ir.IAdd(ir.GetReg(stmt.branch_reg), ir.Imm32(stmt.branch_offset))}; | ||
| 699 | ir.SetIndirectBranchVariable(address); | ||
| 700 | break; | ||
| 701 | } | ||
| 702 | case StatementType::If: { | ||
| 703 | ensure_block(); | ||
| 704 | IR::Block* const merge_block{MergeBlock(parent, stmt)}; | ||
| 705 | |||
| 706 | // Implement if header block | ||
| 707 | IR::IREmitter ir{*current_block}; | ||
| 708 | const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; | ||
| 709 | |||
| 710 | const size_t if_node_index{syntax_list.size()}; | ||
| 711 | syntax_list.emplace_back(); | ||
| 712 | |||
| 713 | // Visit children | ||
| 714 | const size_t then_block_index{syntax_list.size()}; | ||
| 715 | Visit(stmt, break_block, merge_block); | ||
| 716 | |||
| 717 | IR::Block* const then_block{syntax_list.at(then_block_index).data.block}; | ||
| 718 | current_block->AddBranch(then_block); | ||
| 719 | current_block->AddBranch(merge_block); | ||
| 720 | current_block = merge_block; | ||
| 721 | |||
| 722 | auto& if_node{syntax_list[if_node_index]}; | ||
| 723 | if_node.type = IR::AbstractSyntaxNode::Type::If; | ||
| 724 | if_node.data.if_node.cond = cond; | ||
| 725 | if_node.data.if_node.body = then_block; | ||
| 726 | if_node.data.if_node.merge = merge_block; | ||
| 727 | |||
| 728 | auto& endif_node{syntax_list.emplace_back()}; | ||
| 729 | endif_node.type = IR::AbstractSyntaxNode::Type::EndIf; | ||
| 730 | endif_node.data.end_if.merge = merge_block; | ||
| 731 | |||
| 732 | auto& merge{syntax_list.emplace_back()}; | ||
| 733 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 734 | merge.data.block = merge_block; | ||
| 735 | break; | ||
| 736 | } | ||
| 737 | case StatementType::Loop: { | ||
| 738 | IR::Block* const loop_header_block{block_pool.Create(inst_pool)}; | ||
| 739 | if (current_block) { | ||
| 740 | current_block->AddBranch(loop_header_block); | ||
| 741 | } | ||
| 742 | auto& header_node{syntax_list.emplace_back()}; | ||
| 743 | header_node.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 744 | header_node.data.block = loop_header_block; | ||
| 745 | |||
| 746 | IR::Block* const continue_block{block_pool.Create(inst_pool)}; | ||
| 747 | IR::Block* const merge_block{MergeBlock(parent, stmt)}; | ||
| 748 | |||
| 749 | const size_t loop_node_index{syntax_list.size()}; | ||
| 750 | syntax_list.emplace_back(); | ||
| 751 | |||
| 752 | // Visit children | ||
| 753 | const size_t body_block_index{syntax_list.size()}; | ||
| 754 | Visit(stmt, merge_block, continue_block); | ||
| 755 | |||
| 756 | // The continue block is located at the end of the loop | ||
| 757 | IR::IREmitter ir{*continue_block}; | ||
| 758 | const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; | ||
| 759 | |||
| 760 | IR::Block* const body_block{syntax_list.at(body_block_index).data.block}; | ||
| 761 | loop_header_block->AddBranch(body_block); | ||
| 762 | |||
| 763 | continue_block->AddBranch(loop_header_block); | ||
| 764 | continue_block->AddBranch(merge_block); | ||
| 765 | |||
| 766 | current_block = merge_block; | ||
| 767 | |||
| 768 | auto& loop{syntax_list[loop_node_index]}; | ||
| 769 | loop.type = IR::AbstractSyntaxNode::Type::Loop; | ||
| 770 | loop.data.loop.body = body_block; | ||
| 771 | loop.data.loop.continue_block = continue_block; | ||
| 772 | loop.data.loop.merge = merge_block; | ||
| 773 | |||
| 774 | auto& continue_block_node{syntax_list.emplace_back()}; | ||
| 775 | continue_block_node.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 776 | continue_block_node.data.block = continue_block; | ||
| 777 | |||
| 778 | auto& repeat{syntax_list.emplace_back()}; | ||
| 779 | repeat.type = IR::AbstractSyntaxNode::Type::Repeat; | ||
| 780 | repeat.data.repeat.cond = cond; | ||
| 781 | repeat.data.repeat.loop_header = loop_header_block; | ||
| 782 | repeat.data.repeat.merge = merge_block; | ||
| 783 | |||
| 784 | auto& merge{syntax_list.emplace_back()}; | ||
| 785 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 786 | merge.data.block = merge_block; | ||
| 787 | break; | ||
| 788 | } | ||
| 789 | case StatementType::Break: { | ||
| 790 | ensure_block(); | ||
| 791 | IR::Block* const skip_block{MergeBlock(parent, stmt)}; | ||
| 792 | |||
| 793 | IR::IREmitter ir{*current_block}; | ||
| 794 | const IR::U1 cond{ir.ConditionRef(VisitExpr(ir, *stmt.cond))}; | ||
| 795 | current_block->AddBranch(break_block); | ||
| 796 | current_block->AddBranch(skip_block); | ||
| 797 | current_block = skip_block; | ||
| 798 | |||
| 799 | auto& break_node{syntax_list.emplace_back()}; | ||
| 800 | break_node.type = IR::AbstractSyntaxNode::Type::Break; | ||
| 801 | break_node.data.break_node.cond = cond; | ||
| 802 | break_node.data.break_node.merge = break_block; | ||
| 803 | break_node.data.break_node.skip = skip_block; | ||
| 804 | |||
| 805 | auto& merge{syntax_list.emplace_back()}; | ||
| 806 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 807 | merge.data.block = skip_block; | ||
| 808 | break; | ||
| 809 | } | ||
| 810 | case StatementType::Return: { | ||
| 811 | ensure_block(); | ||
| 812 | IR::IREmitter{*current_block}.Epilogue(); | ||
| 813 | current_block = nullptr; | ||
| 814 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; | ||
| 815 | break; | ||
| 816 | } | ||
| 817 | case StatementType::Kill: { | ||
| 818 | ensure_block(); | ||
| 819 | IR::Block* demote_block{MergeBlock(parent, stmt)}; | ||
| 820 | IR::IREmitter{*current_block}.DemoteToHelperInvocation(); | ||
| 821 | current_block->AddBranch(demote_block); | ||
| 822 | current_block = demote_block; | ||
| 823 | |||
| 824 | auto& merge{syntax_list.emplace_back()}; | ||
| 825 | merge.type = IR::AbstractSyntaxNode::Type::Block; | ||
| 826 | merge.data.block = demote_block; | ||
| 827 | break; | ||
| 828 | } | ||
| 829 | case StatementType::Unreachable: { | ||
| 830 | ensure_block(); | ||
| 831 | current_block = nullptr; | ||
| 832 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; | ||
| 833 | break; | ||
| 834 | } | ||
| 835 | default: | ||
| 836 | throw NotImplementedException("Statement type {}", stmt.type); | ||
| 837 | } | ||
| 838 | } | ||
| 839 | if (current_block) { | ||
| 840 | if (fallthrough_block) { | ||
| 841 | current_block->AddBranch(fallthrough_block); | ||
| 842 | } else { | ||
| 843 | syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Unreachable; | ||
| 844 | } | ||
| 845 | } | ||
| 846 | } | ||
| 847 | |||
| 848 | IR::Block* MergeBlock(Statement& parent, Statement& stmt) { | ||
| 849 | Statement* merge_stmt{TryFindForwardBlock(stmt)}; | ||
| 850 | if (!merge_stmt) { | ||
| 851 | // Create a merge block we can visit later | ||
| 852 | merge_stmt = stmt_pool.Create(&dummy_flow_block, &parent); | ||
| 853 | parent.children.insert(std::next(Tree::s_iterator_to(stmt)), *merge_stmt); | ||
| 854 | } | ||
| 855 | return block_pool.Create(inst_pool); | ||
| 856 | } | ||
| 857 | |||
| 858 | ObjectPool<Statement>& stmt_pool; | ||
| 859 | ObjectPool<IR::Inst>& inst_pool; | ||
| 860 | ObjectPool<IR::Block>& block_pool; | ||
| 861 | Environment& env; | ||
| 862 | IR::AbstractSyntaxList& syntax_list; | ||
| 863 | |||
| 864 | // TODO: C++20 Remove this when all compilers support constexpr std::vector | ||
| 865 | #if __cpp_lib_constexpr_vector >= 201907 | ||
| 866 | static constexpr Flow::Block dummy_flow_block; | ||
| 867 | #else | ||
| 868 | const Flow::Block dummy_flow_block; | ||
| 869 | #endif | ||
| 870 | }; | ||
| 871 | } // Anonymous namespace | ||
| 872 | |||
| 873 | IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | ||
| 874 | Environment& env, Flow::CFG& cfg) { | ||
| 875 | ObjectPool<Statement> stmt_pool{64}; | ||
| 876 | GotoPass goto_pass{cfg, stmt_pool}; | ||
| 877 | Statement& root{goto_pass.RootStatement()}; | ||
| 878 | IR::AbstractSyntaxList syntax_list; | ||
| 879 | TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; | ||
| 880 | return syntax_list; | ||
| 881 | } | ||
| 882 | |||
| 883 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h new file mode 100644 index 000000000..88b083649 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h | |||
| @@ -0,0 +1,20 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/abstract_syntax_list.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/value.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 12 | #include "shader_recompiler/object_pool.h" | ||
| 13 | |||
| 14 | namespace Shader::Maxwell { | ||
| 15 | |||
| 16 | [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, | ||
| 17 | ObjectPool<IR::Block>& block_pool, Environment& env, | ||
| 18 | Flow::CFG& cfg); | ||
| 19 | |||
| 20 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp new file mode 100644 index 000000000..d9f999e05 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp | |||
| @@ -0,0 +1,214 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class AtomOp : u64 { | ||
| 12 | ADD, | ||
| 13 | MIN, | ||
| 14 | MAX, | ||
| 15 | INC, | ||
| 16 | DEC, | ||
| 17 | AND, | ||
| 18 | OR, | ||
| 19 | XOR, | ||
| 20 | EXCH, | ||
| 21 | SAFEADD, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class AtomSize : u64 { | ||
| 25 | U32, | ||
| 26 | S32, | ||
| 27 | U64, | ||
| 28 | F32, | ||
| 29 | F16x2, | ||
| 30 | S64, | ||
| 31 | }; | ||
| 32 | |||
| 33 | IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, | ||
| 34 | AtomOp op, bool is_signed) { | ||
| 35 | switch (op) { | ||
| 36 | case AtomOp::ADD: | ||
| 37 | return ir.GlobalAtomicIAdd(offset, op_b); | ||
| 38 | case AtomOp::MIN: | ||
| 39 | return ir.GlobalAtomicIMin(offset, op_b, is_signed); | ||
| 40 | case AtomOp::MAX: | ||
| 41 | return ir.GlobalAtomicIMax(offset, op_b, is_signed); | ||
| 42 | case AtomOp::INC: | ||
| 43 | return ir.GlobalAtomicInc(offset, op_b); | ||
| 44 | case AtomOp::DEC: | ||
| 45 | return ir.GlobalAtomicDec(offset, op_b); | ||
| 46 | case AtomOp::AND: | ||
| 47 | return ir.GlobalAtomicAnd(offset, op_b); | ||
| 48 | case AtomOp::OR: | ||
| 49 | return ir.GlobalAtomicOr(offset, op_b); | ||
| 50 | case AtomOp::XOR: | ||
| 51 | return ir.GlobalAtomicXor(offset, op_b); | ||
| 52 | case AtomOp::EXCH: | ||
| 53 | return ir.GlobalAtomicExchange(offset, op_b); | ||
| 54 | default: | ||
| 55 | throw NotImplementedException("Integer Atom Operation {}", op); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, | ||
| 60 | AtomSize size) { | ||
| 61 | static constexpr IR::FpControl f16_control{ | ||
| 62 | .no_contraction = false, | ||
| 63 | .rounding = IR::FpRounding::RN, | ||
| 64 | .fmz_mode = IR::FmzMode::DontCare, | ||
| 65 | }; | ||
| 66 | static constexpr IR::FpControl f32_control{ | ||
| 67 | .no_contraction = false, | ||
| 68 | .rounding = IR::FpRounding::RN, | ||
| 69 | .fmz_mode = IR::FmzMode::FTZ, | ||
| 70 | }; | ||
| 71 | switch (op) { | ||
| 72 | case AtomOp::ADD: | ||
| 73 | return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) | ||
| 74 | : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); | ||
| 75 | case AtomOp::MIN: | ||
| 76 | return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); | ||
| 77 | case AtomOp::MAX: | ||
| 78 | return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); | ||
| 79 | default: | ||
| 80 | throw NotImplementedException("FP Atom Operation {}", op); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 88 | BitField<28, 20, s64> addr_offset; | ||
| 89 | BitField<28, 20, u64> rz_addr_offset; | ||
| 90 | BitField<48, 1, u64> e; | ||
| 91 | } const mem{insn}; | ||
| 92 | |||
| 93 | const IR::U64 address{[&]() -> IR::U64 { | ||
| 94 | if (mem.e == 0) { | ||
| 95 | return v.ir.UConvert(64, v.X(mem.addr_reg)); | ||
| 96 | } | ||
| 97 | return v.L(mem.addr_reg); | ||
| 98 | }()}; | ||
| 99 | const u64 addr_offset{[&]() -> u64 { | ||
| 100 | if (mem.addr_reg == IR::Reg::RZ) { | ||
| 101 | // When RZ is used, the address is an absolute address | ||
| 102 | return static_cast<u64>(mem.rz_addr_offset.Value()); | ||
| 103 | } else { | ||
| 104 | return static_cast<u64>(mem.addr_offset.Value()); | ||
| 105 | } | ||
| 106 | }()}; | ||
| 107 | return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||
| 108 | } | ||
| 109 | |||
| 110 | bool AtomOpNotApplicable(AtomSize size, AtomOp op) { | ||
| 111 | // TODO: SAFEADD | ||
| 112 | switch (size) { | ||
| 113 | case AtomSize::S32: | ||
| 114 | case AtomSize::U64: | ||
| 115 | return (op == AtomOp::INC || op == AtomOp::DEC); | ||
| 116 | case AtomSize::S64: | ||
| 117 | return !(op == AtomOp::MIN || op == AtomOp::MAX); | ||
| 118 | case AtomSize::F32: | ||
| 119 | return op != AtomOp::ADD; | ||
| 120 | case AtomSize::F16x2: | ||
| 121 | return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); | ||
| 122 | default: | ||
| 123 | return false; | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { | ||
| 128 | switch (size) { | ||
| 129 | case AtomSize::U32: | ||
| 130 | case AtomSize::S32: | ||
| 131 | case AtomSize::F32: | ||
| 132 | case AtomSize::F16x2: | ||
| 133 | return ir.LoadGlobal32(offset); | ||
| 134 | case AtomSize::U64: | ||
| 135 | case AtomSize::S64: | ||
| 136 | return ir.PackUint2x32(ir.LoadGlobal64(offset)); | ||
| 137 | default: | ||
| 138 | throw NotImplementedException("Atom Size {}", size); | ||
| 139 | } | ||
| 140 | } | ||
| 141 | |||
| 142 | void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { | ||
| 143 | switch (size) { | ||
| 144 | case AtomSize::U32: | ||
| 145 | case AtomSize::S32: | ||
| 146 | case AtomSize::F16x2: | ||
| 147 | return v.X(dest_reg, IR::U32{result}); | ||
| 148 | case AtomSize::U64: | ||
| 149 | case AtomSize::S64: | ||
| 150 | return v.L(dest_reg, IR::U64{result}); | ||
| 151 | case AtomSize::F32: | ||
| 152 | return v.F(dest_reg, IR::F32{result}); | ||
| 153 | default: | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | IR::Value ApplyAtomOp(TranslatorVisitor& v, IR::Reg operand_reg, const IR::U64& offset, | ||
| 159 | AtomSize size, AtomOp op) { | ||
| 160 | switch (size) { | ||
| 161 | case AtomSize::U32: | ||
| 162 | case AtomSize::S32: | ||
| 163 | return ApplyIntegerAtomOp(v.ir, offset, v.X(operand_reg), op, size == AtomSize::S32); | ||
| 164 | case AtomSize::U64: | ||
| 165 | case AtomSize::S64: | ||
| 166 | return ApplyIntegerAtomOp(v.ir, offset, v.L(operand_reg), op, size == AtomSize::S64); | ||
| 167 | case AtomSize::F32: | ||
| 168 | return ApplyFpAtomOp(v.ir, offset, v.F(operand_reg), op, size); | ||
| 169 | case AtomSize::F16x2: { | ||
| 170 | return ApplyFpAtomOp(v.ir, offset, v.ir.UnpackFloat2x16(v.X(operand_reg)), op, size); | ||
| 171 | } | ||
| 172 | default: | ||
| 173 | throw NotImplementedException("Atom Size {}", size); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | void GlobalAtomic(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, | ||
| 178 | const IR::U64& offset, AtomSize size, AtomOp op, bool write_dest) { | ||
| 179 | IR::Value result; | ||
| 180 | if (AtomOpNotApplicable(size, op)) { | ||
| 181 | result = LoadGlobal(v.ir, offset, size); | ||
| 182 | } else { | ||
| 183 | result = ApplyAtomOp(v, operand_reg, offset, size, op); | ||
| 184 | } | ||
| 185 | if (write_dest) { | ||
| 186 | StoreResult(v, dest_reg, result, size); | ||
| 187 | } | ||
| 188 | } | ||
| 189 | } // Anonymous namespace | ||
| 190 | |||
| 191 | void TranslatorVisitor::ATOM(u64 insn) { | ||
| 192 | union { | ||
| 193 | u64 raw; | ||
| 194 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 195 | BitField<20, 8, IR::Reg> operand_reg; | ||
| 196 | BitField<49, 3, AtomSize> size; | ||
| 197 | BitField<52, 4, AtomOp> op; | ||
| 198 | } const atom{insn}; | ||
| 199 | const IR::U64 offset{AtomOffset(*this, insn)}; | ||
| 200 | GlobalAtomic(*this, atom.dest_reg, atom.operand_reg, offset, atom.size, atom.op, true); | ||
| 201 | } | ||
| 202 | |||
| 203 | void TranslatorVisitor::RED(u64 insn) { | ||
| 204 | union { | ||
| 205 | u64 raw; | ||
| 206 | BitField<0, 8, IR::Reg> operand_reg; | ||
| 207 | BitField<20, 3, AtomSize> size; | ||
| 208 | BitField<23, 3, AtomOp> op; | ||
| 209 | } const red{insn}; | ||
| 210 | const IR::U64 offset{AtomOffset(*this, insn)}; | ||
| 211 | GlobalAtomic(*this, IR::Reg::RZ, red.operand_reg, offset, red.size, red.op, true); | ||
| 212 | } | ||
| 213 | |||
| 214 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp new file mode 100644 index 000000000..8b974621e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp | |||
| @@ -0,0 +1,110 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class AtomOp : u64 { | ||
| 12 | ADD, | ||
| 13 | MIN, | ||
| 14 | MAX, | ||
| 15 | INC, | ||
| 16 | DEC, | ||
| 17 | AND, | ||
| 18 | OR, | ||
| 19 | XOR, | ||
| 20 | EXCH, | ||
| 21 | }; | ||
| 22 | |||
| 23 | enum class AtomsSize : u64 { | ||
| 24 | U32, | ||
| 25 | S32, | ||
| 26 | U64, | ||
| 27 | }; | ||
| 28 | |||
| 29 | IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, | ||
| 30 | bool is_signed) { | ||
| 31 | switch (op) { | ||
| 32 | case AtomOp::ADD: | ||
| 33 | return ir.SharedAtomicIAdd(offset, op_b); | ||
| 34 | case AtomOp::MIN: | ||
| 35 | return ir.SharedAtomicIMin(offset, op_b, is_signed); | ||
| 36 | case AtomOp::MAX: | ||
| 37 | return ir.SharedAtomicIMax(offset, op_b, is_signed); | ||
| 38 | case AtomOp::INC: | ||
| 39 | return ir.SharedAtomicInc(offset, op_b); | ||
| 40 | case AtomOp::DEC: | ||
| 41 | return ir.SharedAtomicDec(offset, op_b); | ||
| 42 | case AtomOp::AND: | ||
| 43 | return ir.SharedAtomicAnd(offset, op_b); | ||
| 44 | case AtomOp::OR: | ||
| 45 | return ir.SharedAtomicOr(offset, op_b); | ||
| 46 | case AtomOp::XOR: | ||
| 47 | return ir.SharedAtomicXor(offset, op_b); | ||
| 48 | case AtomOp::EXCH: | ||
| 49 | return ir.SharedAtomicExchange(offset, op_b); | ||
| 50 | default: | ||
| 51 | throw NotImplementedException("Integer Atoms Operation {}", op); | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { | ||
| 56 | union { | ||
| 57 | u64 raw; | ||
| 58 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 59 | BitField<30, 22, u64> absolute_offset; | ||
| 60 | BitField<30, 22, s64> relative_offset; | ||
| 61 | } const encoding{insn}; | ||
| 62 | |||
| 63 | if (encoding.offset_reg == IR::Reg::RZ) { | ||
| 64 | return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2)); | ||
| 65 | } else { | ||
| 66 | const s32 relative{static_cast<s32>(encoding.relative_offset << 2)}; | ||
| 67 | return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||
| 68 | } | ||
| 69 | } | ||
| 70 | |||
| 71 | void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { | ||
| 72 | switch (size) { | ||
| 73 | case AtomsSize::U32: | ||
| 74 | case AtomsSize::S32: | ||
| 75 | return v.X(dest_reg, IR::U32{result}); | ||
| 76 | case AtomsSize::U64: | ||
| 77 | return v.L(dest_reg, IR::U64{result}); | ||
| 78 | default: | ||
| 79 | break; | ||
| 80 | } | ||
| 81 | } | ||
| 82 | } // Anonymous namespace | ||
| 83 | |||
| 84 | void TranslatorVisitor::ATOMS(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 88 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 89 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 90 | BitField<28, 2, AtomsSize> size; | ||
| 91 | BitField<52, 4, AtomOp> op; | ||
| 92 | } const atoms{insn}; | ||
| 93 | |||
| 94 | const bool size_64{atoms.size == AtomsSize::U64}; | ||
| 95 | if (size_64 && atoms.op != AtomOp::EXCH) { | ||
| 96 | throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); | ||
| 97 | } | ||
| 98 | const bool is_signed{atoms.size == AtomsSize::S32}; | ||
| 99 | const IR::U32 offset{AtomsOffset(*this, insn)}; | ||
| 100 | |||
| 101 | IR::Value result; | ||
| 102 | if (size_64) { | ||
| 103 | result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); | ||
| 104 | } else { | ||
| 105 | result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); | ||
| 106 | } | ||
| 107 | StoreResult(*this, atoms.dest_reg, result, atoms.size); | ||
| 108 | } | ||
| 109 | |||
| 110 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp new file mode 100644 index 000000000..fb3f00d3f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | enum class BitSize : u64 { | ||
| 13 | B32, | ||
| 14 | B64, | ||
| 15 | B96, | ||
| 16 | B128, | ||
| 17 | }; | ||
| 18 | |||
| 19 | void TranslatorVisitor::AL2P(u64 inst) { | ||
| 20 | union { | ||
| 21 | u64 raw; | ||
| 22 | BitField<0, 8, IR::Reg> result_register; | ||
| 23 | BitField<8, 8, IR::Reg> indexing_register; | ||
| 24 | BitField<20, 11, s64> offset; | ||
| 25 | BitField<47, 2, BitSize> bitsize; | ||
| 26 | } al2p{inst}; | ||
| 27 | if (al2p.bitsize != BitSize::B32) { | ||
| 28 | throw NotImplementedException("BitSize {}", al2p.bitsize.Value()); | ||
| 29 | } | ||
| 30 | const IR::U32 converted_offset{ir.Imm32(static_cast<u32>(al2p.offset.Value()))}; | ||
| 31 | const IR::U32 result{ir.IAdd(X(al2p.indexing_register), converted_offset)}; | ||
| 32 | X(al2p.result_register, result); | ||
| 33 | } | ||
| 34 | |||
| 35 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp new file mode 100644 index 000000000..86e433e41 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/barrier_operations.cpp | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | // Seems to be in CUDA terminology. | ||
| 14 | enum class LocalScope : u64 { | ||
| 15 | CTA, | ||
| 16 | GL, | ||
| 17 | SYS, | ||
| 18 | VC, | ||
| 19 | }; | ||
| 20 | } // Anonymous namespace | ||
| 21 | |||
| 22 | void TranslatorVisitor::MEMBAR(u64 inst) { | ||
| 23 | union { | ||
| 24 | u64 raw; | ||
| 25 | BitField<8, 2, LocalScope> scope; | ||
| 26 | } const membar{inst}; | ||
| 27 | |||
| 28 | if (membar.scope == LocalScope::CTA) { | ||
| 29 | ir.WorkgroupMemoryBarrier(); | ||
| 30 | } else { | ||
| 31 | ir.DeviceMemoryBarrier(); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | void TranslatorVisitor::DEPBAR() { | ||
| 36 | // DEPBAR is a no-op | ||
| 37 | } | ||
| 38 | |||
| 39 | void TranslatorVisitor::BAR(u64 insn) { | ||
| 40 | enum class Mode { | ||
| 41 | RedPopc, | ||
| 42 | Scan, | ||
| 43 | RedAnd, | ||
| 44 | RedOr, | ||
| 45 | Sync, | ||
| 46 | Arrive, | ||
| 47 | }; | ||
| 48 | union { | ||
| 49 | u64 raw; | ||
| 50 | BitField<43, 1, u64> is_a_imm; | ||
| 51 | BitField<44, 1, u64> is_b_imm; | ||
| 52 | BitField<8, 8, u64> imm_a; | ||
| 53 | BitField<20, 12, u64> imm_b; | ||
| 54 | BitField<42, 1, u64> neg_pred; | ||
| 55 | BitField<39, 3, IR::Pred> pred; | ||
| 56 | } const bar{insn}; | ||
| 57 | |||
| 58 | const Mode mode{[insn] { | ||
| 59 | switch (insn & 0x0000009B00000000ULL) { | ||
| 60 | case 0x0000000200000000ULL: | ||
| 61 | return Mode::RedPopc; | ||
| 62 | case 0x0000000300000000ULL: | ||
| 63 | return Mode::Scan; | ||
| 64 | case 0x0000000A00000000ULL: | ||
| 65 | return Mode::RedAnd; | ||
| 66 | case 0x0000001200000000ULL: | ||
| 67 | return Mode::RedOr; | ||
| 68 | case 0x0000008000000000ULL: | ||
| 69 | return Mode::Sync; | ||
| 70 | case 0x0000008100000000ULL: | ||
| 71 | return Mode::Arrive; | ||
| 72 | } | ||
| 73 | throw NotImplementedException("Invalid encoding"); | ||
| 74 | }()}; | ||
| 75 | if (mode != Mode::Sync) { | ||
| 76 | throw NotImplementedException("BAR mode {}", mode); | ||
| 77 | } | ||
| 78 | if (bar.is_a_imm == 0) { | ||
| 79 | throw NotImplementedException("Non-immediate input A"); | ||
| 80 | } | ||
| 81 | if (bar.imm_a != 0) { | ||
| 82 | throw NotImplementedException("Non-zero input A"); | ||
| 83 | } | ||
| 84 | if (bar.is_b_imm == 0) { | ||
| 85 | throw NotImplementedException("Non-immediate input B"); | ||
| 86 | } | ||
| 87 | if (bar.imm_b != 0) { | ||
| 88 | throw NotImplementedException("Non-zero input B"); | ||
| 89 | } | ||
| 90 | if (bar.pred != IR::Pred::PT && bar.neg_pred != 0) { | ||
| 91 | throw NotImplementedException("Non-true input predicate"); | ||
| 92 | } | ||
| 93 | ir.Barrier(); | ||
| 94 | } | ||
| 95 | |||
| 96 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp new file mode 100644 index 000000000..9d5a87e52 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_extract.cpp | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void BFE(TranslatorVisitor& v, u64 insn, const IR::U32& src) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 16 | BitField<40, 1, u64> brev; | ||
| 17 | BitField<47, 1, u64> cc; | ||
| 18 | BitField<48, 1, u64> is_signed; | ||
| 19 | } const bfe{insn}; | ||
| 20 | |||
| 21 | const IR::U32 offset{v.ir.BitFieldExtract(src, v.ir.Imm32(0), v.ir.Imm32(8), false)}; | ||
| 22 | const IR::U32 count{v.ir.BitFieldExtract(src, v.ir.Imm32(8), v.ir.Imm32(8), false)}; | ||
| 23 | |||
| 24 | // Common constants | ||
| 25 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 26 | const IR::U32 one{v.ir.Imm32(1)}; | ||
| 27 | const IR::U32 max_size{v.ir.Imm32(32)}; | ||
| 28 | // Edge case conditions | ||
| 29 | const IR::U1 zero_count{v.ir.IEqual(count, zero)}; | ||
| 30 | const IR::U1 exceed_count{v.ir.IGreaterThanEqual(v.ir.IAdd(offset, count), max_size, false)}; | ||
| 31 | const IR::U1 replicate{v.ir.IGreaterThanEqual(offset, max_size, false)}; | ||
| 32 | |||
| 33 | IR::U32 base{v.X(bfe.offset_reg)}; | ||
| 34 | if (bfe.brev != 0) { | ||
| 35 | base = v.ir.BitReverse(base); | ||
| 36 | } | ||
| 37 | IR::U32 result{v.ir.BitFieldExtract(base, offset, count, bfe.is_signed != 0)}; | ||
| 38 | if (bfe.is_signed != 0) { | ||
| 39 | const IR::U1 is_negative{v.ir.ILessThan(base, zero, true)}; | ||
| 40 | const IR::U32 replicated_bit{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; | ||
| 41 | const IR::U32 exceed_bit{v.ir.BitFieldExtract(base, v.ir.Imm32(31), one, false)}; | ||
| 42 | // Replicate condition | ||
| 43 | result = IR::U32{v.ir.Select(replicate, replicated_bit, result)}; | ||
| 44 | // Exceeding condition | ||
| 45 | const IR::U32 exceed_result{v.ir.BitFieldInsert(result, exceed_bit, v.ir.Imm32(31), one)}; | ||
| 46 | result = IR::U32{v.ir.Select(exceed_count, exceed_result, result)}; | ||
| 47 | } | ||
| 48 | // Zero count condition | ||
| 49 | result = IR::U32{v.ir.Select(zero_count, zero, result)}; | ||
| 50 | |||
| 51 | v.X(bfe.dest_reg, result); | ||
| 52 | |||
| 53 | if (bfe.cc != 0) { | ||
| 54 | v.SetZFlag(v.ir.IEqual(result, zero)); | ||
| 55 | v.SetSFlag(v.ir.ILessThan(result, zero, true)); | ||
| 56 | v.ResetCFlag(); | ||
| 57 | v.ResetOFlag(); | ||
| 58 | } | ||
| 59 | } | ||
| 60 | } // Anonymous namespace | ||
| 61 | |||
| 62 | void TranslatorVisitor::BFE_reg(u64 insn) { | ||
| 63 | BFE(*this, insn, GetReg20(insn)); | ||
| 64 | } | ||
| 65 | |||
| 66 | void TranslatorVisitor::BFE_cbuf(u64 insn) { | ||
| 67 | BFE(*this, insn, GetCbuf(insn)); | ||
| 68 | } | ||
| 69 | |||
| 70 | void TranslatorVisitor::BFE_imm(u64 insn) { | ||
| 71 | BFE(*this, insn, GetImm20(insn)); | ||
| 72 | } | ||
| 73 | |||
| 74 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp new file mode 100644 index 000000000..1e1ec2119 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/bitfield_insert.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void BFI(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& base) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> insert_reg; | ||
| 16 | BitField<47, 1, u64> cc; | ||
| 17 | } const bfi{insn}; | ||
| 18 | |||
| 19 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 20 | const IR::U32 offset{v.ir.BitFieldExtract(src_a, zero, v.ir.Imm32(8), false)}; | ||
| 21 | const IR::U32 unsafe_count{v.ir.BitFieldExtract(src_a, v.ir.Imm32(8), v.ir.Imm32(8), false)}; | ||
| 22 | const IR::U32 max_size{v.ir.Imm32(32)}; | ||
| 23 | |||
| 24 | // Edge case conditions | ||
| 25 | const IR::U1 exceed_offset{v.ir.IGreaterThanEqual(offset, max_size, false)}; | ||
| 26 | const IR::U1 exceed_count{v.ir.IGreaterThan(unsafe_count, max_size, false)}; | ||
| 27 | |||
| 28 | const IR::U32 remaining_size{v.ir.ISub(max_size, offset)}; | ||
| 29 | const IR::U32 safe_count{v.ir.Select(exceed_count, remaining_size, unsafe_count)}; | ||
| 30 | |||
| 31 | const IR::U32 insert{v.X(bfi.insert_reg)}; | ||
| 32 | IR::U32 result{v.ir.BitFieldInsert(base, insert, offset, safe_count)}; | ||
| 33 | |||
| 34 | result = IR::U32{v.ir.Select(exceed_offset, base, result)}; | ||
| 35 | |||
| 36 | v.X(bfi.dest_reg, result); | ||
| 37 | if (bfi.cc != 0) { | ||
| 38 | v.SetZFlag(v.ir.IEqual(result, zero)); | ||
| 39 | v.SetSFlag(v.ir.ILessThan(result, zero, true)); | ||
| 40 | v.ResetCFlag(); | ||
| 41 | v.ResetOFlag(); | ||
| 42 | } | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | void TranslatorVisitor::BFI_reg(u64 insn) { | ||
| 47 | BFI(*this, insn, GetReg20(insn), GetReg39(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::BFI_rc(u64 insn) { | ||
| 51 | BFI(*this, insn, GetReg39(insn), GetCbuf(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::BFI_cr(u64 insn) { | ||
| 55 | BFI(*this, insn, GetCbuf(insn), GetReg39(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | void TranslatorVisitor::BFI_imm(u64 insn) { | ||
| 59 | BFI(*this, insn, GetImm20(insn), GetReg39(insn)); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp new file mode 100644 index 000000000..371c0e0f7 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/branch_indirect.cpp | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void Check(u64 insn) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<5, 1, u64> cbuf_mode; | ||
| 16 | BitField<6, 1, u64> lmt; | ||
| 17 | } const encoding{insn}; | ||
| 18 | |||
| 19 | if (encoding.cbuf_mode != 0) { | ||
| 20 | throw NotImplementedException("Constant buffer mode"); | ||
| 21 | } | ||
| 22 | if (encoding.lmt != 0) { | ||
| 23 | throw NotImplementedException("LMT"); | ||
| 24 | } | ||
| 25 | } | ||
| 26 | } // Anonymous namespace | ||
| 27 | |||
| 28 | void TranslatorVisitor::BRX(u64 insn) { | ||
| 29 | Check(insn); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::JMX(u64 insn) { | ||
| 33 | Check(insn); | ||
| 34 | } | ||
| 35 | |||
| 36 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h new file mode 100644 index 000000000..fd73f656c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h | |||
| @@ -0,0 +1,57 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | enum class FpRounding : u64 { | ||
| 15 | RN, | ||
| 16 | RM, | ||
| 17 | RP, | ||
| 18 | RZ, | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum class FmzMode : u64 { | ||
| 22 | None, | ||
| 23 | FTZ, | ||
| 24 | FMZ, | ||
| 25 | INVALIDFMZ3, | ||
| 26 | }; | ||
| 27 | |||
| 28 | inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { | ||
| 29 | switch (fp_rounding) { | ||
| 30 | case FpRounding::RN: | ||
| 31 | return IR::FpRounding::RN; | ||
| 32 | case FpRounding::RM: | ||
| 33 | return IR::FpRounding::RM; | ||
| 34 | case FpRounding::RP: | ||
| 35 | return IR::FpRounding::RP; | ||
| 36 | case FpRounding::RZ: | ||
| 37 | return IR::FpRounding::RZ; | ||
| 38 | } | ||
| 39 | throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); | ||
| 40 | } | ||
| 41 | |||
| 42 | inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { | ||
| 43 | switch (fmz_mode) { | ||
| 44 | case FmzMode::None: | ||
| 45 | return IR::FmzMode::None; | ||
| 46 | case FmzMode::FTZ: | ||
| 47 | return IR::FmzMode::FTZ; | ||
| 48 | case FmzMode::FMZ: | ||
| 49 | // FMZ is manually handled in the instruction | ||
| 50 | return IR::FmzMode::FTZ; | ||
| 51 | case FmzMode::INVALIDFMZ3: | ||
| 52 | break; | ||
| 53 | } | ||
| 54 | throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); | ||
| 55 | } | ||
| 56 | |||
| 57 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp new file mode 100644 index 000000000..20458d2ad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp | |||
| @@ -0,0 +1,153 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 9 | CompareOp compare_op, bool is_signed) { | ||
| 10 | switch (compare_op) { | ||
| 11 | case CompareOp::False: | ||
| 12 | return ir.Imm1(false); | ||
| 13 | case CompareOp::LessThan: | ||
| 14 | return ir.ILessThan(operand_1, operand_2, is_signed); | ||
| 15 | case CompareOp::Equal: | ||
| 16 | return ir.IEqual(operand_1, operand_2); | ||
| 17 | case CompareOp::LessThanEqual: | ||
| 18 | return ir.ILessThanEqual(operand_1, operand_2, is_signed); | ||
| 19 | case CompareOp::GreaterThan: | ||
| 20 | return ir.IGreaterThan(operand_1, operand_2, is_signed); | ||
| 21 | case CompareOp::NotEqual: | ||
| 22 | return ir.INotEqual(operand_1, operand_2); | ||
| 23 | case CompareOp::GreaterThanEqual: | ||
| 24 | return ir.IGreaterThanEqual(operand_1, operand_2, is_signed); | ||
| 25 | case CompareOp::True: | ||
| 26 | return ir.Imm1(true); | ||
| 27 | default: | ||
| 28 | throw NotImplementedException("Invalid compare op {}", compare_op); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | |||
| 32 | IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 33 | CompareOp compare_op, bool is_signed) { | ||
| 34 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 35 | const IR::U32 carry{ir.Select(ir.GetCFlag(), ir.Imm32(1), zero)}; | ||
| 36 | const IR::U1 z_flag{ir.GetZFlag()}; | ||
| 37 | const IR::U32 intermediate{ir.IAdd(ir.IAdd(operand_1, ir.BitwiseNot(operand_2)), carry)}; | ||
| 38 | const IR::U1 flip_logic{is_signed ? ir.Imm1(false) | ||
| 39 | : ir.LogicalXor(ir.ILessThan(operand_1, zero, true), | ||
| 40 | ir.ILessThan(operand_2, zero, true))}; | ||
| 41 | switch (compare_op) { | ||
| 42 | case CompareOp::False: | ||
| 43 | return ir.Imm1(false); | ||
| 44 | case CompareOp::LessThan: | ||
| 45 | return IR::U1{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), | ||
| 46 | ir.ILessThan(intermediate, zero, true))}; | ||
| 47 | case CompareOp::Equal: | ||
| 48 | return ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag); | ||
| 49 | case CompareOp::LessThanEqual: { | ||
| 50 | const IR::U1 base_cmp{ir.Select(flip_logic, ir.IGreaterThanEqual(intermediate, zero, true), | ||
| 51 | ir.ILessThan(intermediate, zero, true))}; | ||
| 52 | return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); | ||
| 53 | } | ||
| 54 | case CompareOp::GreaterThan: { | ||
| 55 | const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThanEqual(intermediate, zero, true), | ||
| 56 | ir.IGreaterThan(intermediate, zero, true))}; | ||
| 57 | const IR::U1 not_z{ir.LogicalNot(z_flag)}; | ||
| 58 | return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), not_z)); | ||
| 59 | } | ||
| 60 | case CompareOp::NotEqual: | ||
| 61 | return ir.LogicalOr(ir.INotEqual(intermediate, zero), | ||
| 62 | ir.LogicalAnd(ir.IEqual(intermediate, zero), ir.LogicalNot(z_flag))); | ||
| 63 | case CompareOp::GreaterThanEqual: { | ||
| 64 | const IR::U1 base_cmp{ir.Select(flip_logic, ir.ILessThan(intermediate, zero, true), | ||
| 65 | ir.IGreaterThanEqual(intermediate, zero, true))}; | ||
| 66 | return ir.LogicalOr(base_cmp, ir.LogicalAnd(ir.IEqual(intermediate, zero), z_flag)); | ||
| 67 | } | ||
| 68 | case CompareOp::True: | ||
| 69 | return ir.Imm1(true); | ||
| 70 | default: | ||
| 71 | throw NotImplementedException("Invalid compare op {}", compare_op); | ||
| 72 | } | ||
| 73 | } | ||
| 74 | |||
| 75 | IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, const IR::U1& predicate_2, | ||
| 76 | BooleanOp bop) { | ||
| 77 | switch (bop) { | ||
| 78 | case BooleanOp::AND: | ||
| 79 | return ir.LogicalAnd(predicate_1, predicate_2); | ||
| 80 | case BooleanOp::OR: | ||
| 81 | return ir.LogicalOr(predicate_1, predicate_2); | ||
| 82 | case BooleanOp::XOR: | ||
| 83 | return ir.LogicalXor(predicate_1, predicate_2); | ||
| 84 | default: | ||
| 85 | throw NotImplementedException("Invalid bop {}", bop); | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op) { | ||
| 90 | switch (op) { | ||
| 91 | case PredicateOp::False: | ||
| 92 | return ir.Imm1(false); | ||
| 93 | case PredicateOp::True: | ||
| 94 | return ir.Imm1(true); | ||
| 95 | case PredicateOp::Zero: | ||
| 96 | return ir.IEqual(result, ir.Imm32(0)); | ||
| 97 | case PredicateOp::NonZero: | ||
| 98 | return ir.INotEqual(result, ir.Imm32(0)); | ||
| 99 | default: | ||
| 100 | throw NotImplementedException("Invalid Predicate operation {}", op); | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | bool IsCompareOpOrdered(FPCompareOp op) { | ||
| 105 | switch (op) { | ||
| 106 | case FPCompareOp::LTU: | ||
| 107 | case FPCompareOp::EQU: | ||
| 108 | case FPCompareOp::LEU: | ||
| 109 | case FPCompareOp::GTU: | ||
| 110 | case FPCompareOp::NEU: | ||
| 111 | case FPCompareOp::GEU: | ||
| 112 | return false; | ||
| 113 | default: | ||
| 114 | return true; | ||
| 115 | } | ||
| 116 | } | ||
| 117 | |||
| 118 | IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, | ||
| 119 | const IR::F16F32F64& operand_2, FPCompareOp compare_op, | ||
| 120 | IR::FpControl control) { | ||
| 121 | const bool ordered{IsCompareOpOrdered(compare_op)}; | ||
| 122 | switch (compare_op) { | ||
| 123 | case FPCompareOp::F: | ||
| 124 | return ir.Imm1(false); | ||
| 125 | case FPCompareOp::LT: | ||
| 126 | case FPCompareOp::LTU: | ||
| 127 | return ir.FPLessThan(operand_1, operand_2, control, ordered); | ||
| 128 | case FPCompareOp::EQ: | ||
| 129 | case FPCompareOp::EQU: | ||
| 130 | return ir.FPEqual(operand_1, operand_2, control, ordered); | ||
| 131 | case FPCompareOp::LE: | ||
| 132 | case FPCompareOp::LEU: | ||
| 133 | return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); | ||
| 134 | case FPCompareOp::GT: | ||
| 135 | case FPCompareOp::GTU: | ||
| 136 | return ir.FPGreaterThan(operand_1, operand_2, control, ordered); | ||
| 137 | case FPCompareOp::NE: | ||
| 138 | case FPCompareOp::NEU: | ||
| 139 | return ir.FPNotEqual(operand_1, operand_2, control, ordered); | ||
| 140 | case FPCompareOp::GE: | ||
| 141 | case FPCompareOp::GEU: | ||
| 142 | return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); | ||
| 143 | case FPCompareOp::NUM: | ||
| 144 | return ir.FPOrdered(operand_1, operand_2); | ||
| 145 | case FPCompareOp::Nan: | ||
| 146 | return ir.FPUnordered(operand_1, operand_2); | ||
| 147 | case FPCompareOp::T: | ||
| 148 | return ir.Imm1(true); | ||
| 149 | default: | ||
| 150 | throw NotImplementedException("Invalid FP compare op {}", compare_op); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h new file mode 100644 index 000000000..214d0af3c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | [[nodiscard]] IR::U1 IntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, | ||
| 12 | const IR::U32& operand_2, CompareOp compare_op, bool is_signed); | ||
| 13 | |||
| 14 | [[nodiscard]] IR::U1 ExtendedIntegerCompare(IR::IREmitter& ir, const IR::U32& operand_1, | ||
| 15 | const IR::U32& operand_2, CompareOp compare_op, | ||
| 16 | bool is_signed); | ||
| 17 | |||
| 18 | [[nodiscard]] IR::U1 PredicateCombine(IR::IREmitter& ir, const IR::U1& predicate_1, | ||
| 19 | const IR::U1& predicate_2, BooleanOp bop); | ||
| 20 | |||
| 21 | [[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); | ||
| 22 | |||
| 23 | [[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); | ||
| 24 | |||
| 25 | [[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, | ||
| 26 | const IR::F16F32F64& operand_2, FPCompareOp compare_op, | ||
| 27 | IR::FpControl control = {}); | ||
| 28 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp new file mode 100644 index 000000000..420f2fb94 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/condition_code_set.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | void TranslatorVisitor::CSET(u64 insn) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 5, IR::FlowTest> cc_test; | ||
| 17 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 18 | BitField<42, 1, u64> neg_bop_pred; | ||
| 19 | BitField<44, 1, u64> bf; | ||
| 20 | BitField<45, 2, BooleanOp> bop; | ||
| 21 | BitField<47, 1, u64> cc; | ||
| 22 | } const cset{insn}; | ||
| 23 | |||
| 24 | const IR::U32 one_mask{ir.Imm32(-1)}; | ||
| 25 | const IR::U32 fp_one{ir.Imm32(0x3f800000)}; | ||
| 26 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 27 | const IR::U32 pass_result{cset.bf == 0 ? one_mask : fp_one}; | ||
| 28 | const IR::U1 cc_test_result{ir.GetFlowTestResult(cset.cc_test)}; | ||
| 29 | const IR::U1 bop_pred{ir.GetPred(cset.bop_pred, cset.neg_bop_pred != 0)}; | ||
| 30 | const IR::U1 pred_result{PredicateCombine(ir, cc_test_result, bop_pred, cset.bop)}; | ||
| 31 | const IR::U32 result{ir.Select(pred_result, pass_result, zero)}; | ||
| 32 | X(cset.dest_reg, result); | ||
| 33 | if (cset.cc != 0) { | ||
| 34 | const IR::U1 is_zero{ir.IEqual(result, zero)}; | ||
| 35 | SetZFlag(is_zero); | ||
| 36 | if (cset.bf != 0) { | ||
| 37 | ResetSFlag(); | ||
| 38 | } else { | ||
| 39 | SetSFlag(ir.LogicalNot(is_zero)); | ||
| 40 | } | ||
| 41 | ResetOFlag(); | ||
| 42 | ResetCFlag(); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::CSETP(u64 insn) { | ||
| 47 | union { | ||
| 48 | u64 raw; | ||
| 49 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 50 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 51 | BitField<8, 5, IR::FlowTest> cc_test; | ||
| 52 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 53 | BitField<42, 1, u64> neg_bop_pred; | ||
| 54 | BitField<45, 2, BooleanOp> bop; | ||
| 55 | } const csetp{insn}; | ||
| 56 | |||
| 57 | const BooleanOp bop{csetp.bop}; | ||
| 58 | const IR::U1 bop_pred{ir.GetPred(csetp.bop_pred, csetp.neg_bop_pred != 0)}; | ||
| 59 | const IR::U1 cc_test_result{ir.GetFlowTestResult(csetp.cc_test)}; | ||
| 60 | const IR::U1 result_a{PredicateCombine(ir, cc_test_result, bop_pred, bop)}; | ||
| 61 | const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(cc_test_result), bop_pred, bop)}; | ||
| 62 | ir.SetPred(csetp.dest_pred_a, result_a); | ||
| 63 | ir.SetPred(csetp.dest_pred_b, result_b); | ||
| 64 | } | ||
| 65 | |||
| 66 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp new file mode 100644 index 000000000..5a1b3a8fc --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_add.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void DADD(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 18 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 19 | BitField<45, 1, u64> neg_b; | ||
| 20 | BitField<46, 1, u64> abs_a; | ||
| 21 | BitField<47, 1, u64> cc; | ||
| 22 | BitField<48, 1, u64> neg_a; | ||
| 23 | BitField<49, 1, u64> abs_b; | ||
| 24 | } const dadd{insn}; | ||
| 25 | if (dadd.cc != 0) { | ||
| 26 | throw NotImplementedException("DADD CC"); | ||
| 27 | } | ||
| 28 | |||
| 29 | const IR::F64 src_a{v.D(dadd.src_a_reg)}; | ||
| 30 | const IR::F64 op_a{v.ir.FPAbsNeg(src_a, dadd.abs_a != 0, dadd.neg_a != 0)}; | ||
| 31 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dadd.abs_b != 0, dadd.neg_b != 0)}; | ||
| 32 | |||
| 33 | const IR::FpControl control{ | ||
| 34 | .no_contraction = true, | ||
| 35 | .rounding = CastFpRounding(dadd.fp_rounding), | ||
| 36 | .fmz_mode = IR::FmzMode::None, | ||
| 37 | }; | ||
| 38 | |||
| 39 | v.D(dadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); | ||
| 40 | } | ||
| 41 | } // Anonymous namespace | ||
| 42 | |||
| 43 | void TranslatorVisitor::DADD_reg(u64 insn) { | ||
| 44 | DADD(*this, insn, GetDoubleReg20(insn)); | ||
| 45 | } | ||
| 46 | |||
| 47 | void TranslatorVisitor::DADD_cbuf(u64 insn) { | ||
| 48 | DADD(*this, insn, GetDoubleCbuf(insn)); | ||
| 49 | } | ||
| 50 | |||
| 51 | void TranslatorVisitor::DADD_imm(u64 insn) { | ||
| 52 | DADD(*this, insn, GetDoubleImm20(insn)); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp new file mode 100644 index 000000000..1173192e4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_compare_and_set.cpp | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void DSET(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 17 | BitField<39, 3, IR::Pred> pred; | ||
| 18 | BitField<42, 1, u64> neg_pred; | ||
| 19 | BitField<43, 1, u64> negate_a; | ||
| 20 | BitField<44, 1, u64> abs_b; | ||
| 21 | BitField<45, 2, BooleanOp> bop; | ||
| 22 | BitField<47, 1, u64> cc; | ||
| 23 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 24 | BitField<52, 1, u64> bf; | ||
| 25 | BitField<53, 1, u64> negate_b; | ||
| 26 | BitField<54, 1, u64> abs_a; | ||
| 27 | } const dset{insn}; | ||
| 28 | |||
| 29 | const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dset.src_a_reg), dset.abs_a != 0, dset.negate_a != 0)}; | ||
| 30 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dset.abs_b != 0, dset.negate_b != 0)}; | ||
| 31 | |||
| 32 | IR::U1 pred{v.ir.GetPred(dset.pred)}; | ||
| 33 | if (dset.neg_pred != 0) { | ||
| 34 | pred = v.ir.LogicalNot(pred); | ||
| 35 | } | ||
| 36 | const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, dset.compare_op)}; | ||
| 37 | const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, dset.bop)}; | ||
| 38 | |||
| 39 | const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||
| 40 | const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||
| 41 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 42 | const IR::U32 pass_result{dset.bf == 0 ? one_mask : fp_one}; | ||
| 43 | const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; | ||
| 44 | |||
| 45 | v.X(dset.dest_reg, result); | ||
| 46 | if (dset.cc != 0) { | ||
| 47 | const IR::U1 is_zero{v.ir.IEqual(result, zero)}; | ||
| 48 | v.SetZFlag(is_zero); | ||
| 49 | if (dset.bf != 0) { | ||
| 50 | v.ResetSFlag(); | ||
| 51 | } else { | ||
| 52 | v.SetSFlag(v.ir.LogicalNot(is_zero)); | ||
| 53 | } | ||
| 54 | v.ResetCFlag(); | ||
| 55 | v.ResetOFlag(); | ||
| 56 | } | ||
| 57 | } | ||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | void TranslatorVisitor::DSET_reg(u64 insn) { | ||
| 61 | DSET(*this, insn, GetDoubleReg20(insn)); | ||
| 62 | } | ||
| 63 | |||
| 64 | void TranslatorVisitor::DSET_cbuf(u64 insn) { | ||
| 65 | DSET(*this, insn, GetDoubleCbuf(insn)); | ||
| 66 | } | ||
| 67 | |||
| 68 | void TranslatorVisitor::DSET_imm(u64 insn) { | ||
| 69 | DSET(*this, insn, GetDoubleImm20(insn)); | ||
| 70 | } | ||
| 71 | |||
| 72 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp new file mode 100644 index 000000000..f66097014 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_fused_multiply_add.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void DFMA(TranslatorVisitor& v, u64 insn, const IR::F64& src_b, const IR::F64& src_c) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 18 | BitField<50, 2, FpRounding> fp_rounding; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> neg_b; | ||
| 21 | BitField<49, 1, u64> neg_c; | ||
| 22 | } const dfma{insn}; | ||
| 23 | |||
| 24 | if (dfma.cc != 0) { | ||
| 25 | throw NotImplementedException("DFMA CC"); | ||
| 26 | } | ||
| 27 | |||
| 28 | const IR::F64 src_a{v.D(dfma.src_a_reg)}; | ||
| 29 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, false, dfma.neg_b != 0)}; | ||
| 30 | const IR::F64 op_c{v.ir.FPAbsNeg(src_c, false, dfma.neg_c != 0)}; | ||
| 31 | |||
| 32 | const IR::FpControl control{ | ||
| 33 | .no_contraction = true, | ||
| 34 | .rounding = CastFpRounding(dfma.fp_rounding), | ||
| 35 | .fmz_mode = IR::FmzMode::None, | ||
| 36 | }; | ||
| 37 | |||
| 38 | v.D(dfma.dest_reg, v.ir.FPFma(src_a, op_b, op_c, control)); | ||
| 39 | } | ||
| 40 | } // Anonymous namespace | ||
| 41 | |||
| 42 | void TranslatorVisitor::DFMA_reg(u64 insn) { | ||
| 43 | DFMA(*this, insn, GetDoubleReg20(insn), GetDoubleReg39(insn)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::DFMA_cr(u64 insn) { | ||
| 47 | DFMA(*this, insn, GetDoubleCbuf(insn), GetDoubleReg39(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::DFMA_rc(u64 insn) { | ||
| 51 | DFMA(*this, insn, GetDoubleReg39(insn), GetDoubleCbuf(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::DFMA_imm(u64 insn) { | ||
| 55 | DFMA(*this, insn, GetDoubleImm20(insn), GetDoubleReg39(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp new file mode 100644 index 000000000..6b551847c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_min_max.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void DMNMX(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<45, 1, u64> negate_b; | ||
| 19 | BitField<46, 1, u64> abs_a; | ||
| 20 | BitField<47, 1, u64> cc; | ||
| 21 | BitField<48, 1, u64> negate_a; | ||
| 22 | BitField<49, 1, u64> abs_b; | ||
| 23 | } const dmnmx{insn}; | ||
| 24 | |||
| 25 | if (dmnmx.cc != 0) { | ||
| 26 | throw NotImplementedException("DMNMX CC"); | ||
| 27 | } | ||
| 28 | |||
| 29 | const IR::U1 pred{v.ir.GetPred(dmnmx.pred)}; | ||
| 30 | const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dmnmx.src_a_reg), dmnmx.abs_a != 0, dmnmx.negate_a != 0)}; | ||
| 31 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dmnmx.abs_b != 0, dmnmx.negate_b != 0)}; | ||
| 32 | |||
| 33 | IR::F64 max{v.ir.FPMax(op_a, op_b)}; | ||
| 34 | IR::F64 min{v.ir.FPMin(op_a, op_b)}; | ||
| 35 | |||
| 36 | if (dmnmx.neg_pred != 0) { | ||
| 37 | std::swap(min, max); | ||
| 38 | } | ||
| 39 | v.D(dmnmx.dest_reg, IR::F64{v.ir.Select(pred, min, max)}); | ||
| 40 | } | ||
| 41 | } // Anonymous namespace | ||
| 42 | |||
| 43 | void TranslatorVisitor::DMNMX_reg(u64 insn) { | ||
| 44 | DMNMX(*this, insn, GetDoubleReg20(insn)); | ||
| 45 | } | ||
| 46 | |||
| 47 | void TranslatorVisitor::DMNMX_cbuf(u64 insn) { | ||
| 48 | DMNMX(*this, insn, GetDoubleCbuf(insn)); | ||
| 49 | } | ||
| 50 | |||
| 51 | void TranslatorVisitor::DMNMX_imm(u64 insn) { | ||
| 52 | DMNMX(*this, insn, GetDoubleImm20(insn)); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp new file mode 100644 index 000000000..c0159fb65 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_multiply.cpp | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | |||
| 13 | void DMUL(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 18 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> neg; | ||
| 21 | } const dmul{insn}; | ||
| 22 | |||
| 23 | if (dmul.cc != 0) { | ||
| 24 | throw NotImplementedException("DMUL CC"); | ||
| 25 | } | ||
| 26 | |||
| 27 | const IR::F64 src_a{v.ir.FPAbsNeg(v.D(dmul.src_a_reg), false, dmul.neg != 0)}; | ||
| 28 | const IR::FpControl control{ | ||
| 29 | .no_contraction = true, | ||
| 30 | .rounding = CastFpRounding(dmul.fp_rounding), | ||
| 31 | .fmz_mode = IR::FmzMode::None, | ||
| 32 | }; | ||
| 33 | |||
| 34 | v.D(dmul.dest_reg, v.ir.FPMul(src_a, src_b, control)); | ||
| 35 | } | ||
| 36 | } // Anonymous namespace | ||
| 37 | |||
| 38 | void TranslatorVisitor::DMUL_reg(u64 insn) { | ||
| 39 | DMUL(*this, insn, GetDoubleReg20(insn)); | ||
| 40 | } | ||
| 41 | |||
| 42 | void TranslatorVisitor::DMUL_cbuf(u64 insn) { | ||
| 43 | DMUL(*this, insn, GetDoubleCbuf(insn)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::DMUL_imm(u64 insn) { | ||
| 47 | DMUL(*this, insn, GetDoubleImm20(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp new file mode 100644 index 000000000..b8e74ee44 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/double_set_predicate.cpp | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void DSETP(TranslatorVisitor& v, u64 insn, const IR::F64& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 16 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 17 | BitField<6, 1, u64> negate_b; | ||
| 18 | BitField<7, 1, u64> abs_a; | ||
| 19 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 20 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 21 | BitField<42, 1, u64> neg_bop_pred; | ||
| 22 | BitField<43, 1, u64> negate_a; | ||
| 23 | BitField<44, 1, u64> abs_b; | ||
| 24 | BitField<45, 2, BooleanOp> bop; | ||
| 25 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 26 | } const dsetp{insn}; | ||
| 27 | |||
| 28 | const IR::F64 op_a{v.ir.FPAbsNeg(v.D(dsetp.src_a_reg), dsetp.abs_a != 0, dsetp.negate_a != 0)}; | ||
| 29 | const IR::F64 op_b{v.ir.FPAbsNeg(src_b, dsetp.abs_b != 0, dsetp.negate_b != 0)}; | ||
| 30 | |||
| 31 | const BooleanOp bop{dsetp.bop}; | ||
| 32 | const FPCompareOp compare_op{dsetp.compare_op}; | ||
| 33 | const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op)}; | ||
| 34 | const IR::U1 bop_pred{v.ir.GetPred(dsetp.bop_pred, dsetp.neg_bop_pred != 0)}; | ||
| 35 | const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||
| 36 | const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||
| 37 | v.ir.SetPred(dsetp.dest_pred_a, result_a); | ||
| 38 | v.ir.SetPred(dsetp.dest_pred_b, result_b); | ||
| 39 | } | ||
| 40 | } // Anonymous namespace | ||
| 41 | |||
| 42 | void TranslatorVisitor::DSETP_reg(u64 insn) { | ||
| 43 | DSETP(*this, insn, GetDoubleReg20(insn)); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::DSETP_cbuf(u64 insn) { | ||
| 47 | DSETP(*this, insn, GetDoubleCbuf(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::DSETP_imm(u64 insn) { | ||
| 51 | DSETP(*this, insn, GetDoubleImm20(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp new file mode 100644 index 000000000..c2443c886 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/exit_program.cpp | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void ExitFragment(TranslatorVisitor& v) { | ||
| 12 | const ProgramHeader sph{v.env.SPH()}; | ||
| 13 | IR::Reg src_reg{IR::Reg::R0}; | ||
| 14 | for (u32 render_target = 0; render_target < 8; ++render_target) { | ||
| 15 | const std::array<bool, 4> mask{sph.ps.EnabledOutputComponents(render_target)}; | ||
| 16 | for (u32 component = 0; component < 4; ++component) { | ||
| 17 | if (!mask[component]) { | ||
| 18 | continue; | ||
| 19 | } | ||
| 20 | v.ir.SetFragColor(render_target, component, v.F(src_reg)); | ||
| 21 | ++src_reg; | ||
| 22 | } | ||
| 23 | } | ||
| 24 | if (sph.ps.omap.sample_mask != 0) { | ||
| 25 | v.ir.SetSampleMask(v.X(src_reg)); | ||
| 26 | } | ||
| 27 | if (sph.ps.omap.depth != 0) { | ||
| 28 | v.ir.SetFragDepth(v.F(src_reg + 1)); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::EXIT() { | ||
| 34 | switch (env.ShaderStage()) { | ||
| 35 | case Stage::Fragment: | ||
| 36 | ExitFragment(*this); | ||
| 37 | break; | ||
| 38 | default: | ||
| 39 | break; | ||
| 40 | } | ||
| 41 | } | ||
| 42 | |||
| 43 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp new file mode 100644 index 000000000..f0cb25d61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/find_leading_one.cpp | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void FLO(TranslatorVisitor& v, u64 insn, IR::U32 src) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<40, 1, u64> tilde; | ||
| 16 | BitField<41, 1, u64> shift; | ||
| 17 | BitField<47, 1, u64> cc; | ||
| 18 | BitField<48, 1, u64> is_signed; | ||
| 19 | } const flo{insn}; | ||
| 20 | |||
| 21 | if (flo.cc != 0) { | ||
| 22 | throw NotImplementedException("CC"); | ||
| 23 | } | ||
| 24 | if (flo.tilde != 0) { | ||
| 25 | src = v.ir.BitwiseNot(src); | ||
| 26 | } | ||
| 27 | IR::U32 result{flo.is_signed != 0 ? v.ir.FindSMsb(src) : v.ir.FindUMsb(src)}; | ||
| 28 | if (flo.shift != 0) { | ||
| 29 | const IR::U1 not_found{v.ir.IEqual(result, v.ir.Imm32(-1))}; | ||
| 30 | result = IR::U32{v.ir.Select(not_found, result, v.ir.BitwiseXor(result, v.ir.Imm32(31)))}; | ||
| 31 | } | ||
| 32 | v.X(flo.dest_reg, result); | ||
| 33 | } | ||
| 34 | } // Anonymous namespace | ||
| 35 | |||
| 36 | void TranslatorVisitor::FLO_reg(u64 insn) { | ||
| 37 | FLO(*this, insn, GetReg20(insn)); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::FLO_cbuf(u64 insn) { | ||
| 41 | FLO(*this, insn, GetCbuf(insn)); | ||
| 42 | } | ||
| 43 | |||
| 44 | void TranslatorVisitor::FLO_imm(u64 insn) { | ||
| 45 | FLO(*this, insn, GetImm20(insn)); | ||
| 46 | } | ||
| 47 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp new file mode 100644 index 000000000..b8c89810c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_add.cpp | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, | ||
| 13 | const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a; | ||
| 18 | } const fadd{insn}; | ||
| 19 | |||
| 20 | if (cc) { | ||
| 21 | throw NotImplementedException("FADD CC"); | ||
| 22 | } | ||
| 23 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)}; | ||
| 24 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; | ||
| 25 | IR::FpControl control{ | ||
| 26 | .no_contraction = true, | ||
| 27 | .rounding = CastFpRounding(fp_rounding), | ||
| 28 | .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 29 | }; | ||
| 30 | IR::F32 value{v.ir.FPAdd(op_a, op_b, control)}; | ||
| 31 | if (sat) { | ||
| 32 | value = v.ir.FPSaturate(value); | ||
| 33 | } | ||
| 34 | v.F(fadd.dest_reg, value); | ||
| 35 | } | ||
| 36 | |||
| 37 | void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 38 | union { | ||
| 39 | u64 raw; | ||
| 40 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 41 | BitField<44, 1, u64> ftz; | ||
| 42 | BitField<45, 1, u64> neg_b; | ||
| 43 | BitField<46, 1, u64> abs_a; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<48, 1, u64> neg_a; | ||
| 46 | BitField<49, 1, u64> abs_b; | ||
| 47 | BitField<50, 1, u64> sat; | ||
| 48 | } const fadd{insn}; | ||
| 49 | |||
| 50 | FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, | ||
| 51 | fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); | ||
| 52 | } | ||
| 53 | } // Anonymous namespace | ||
| 54 | |||
| 55 | void TranslatorVisitor::FADD_reg(u64 insn) { | ||
| 56 | FADD(*this, insn, GetFloatReg20(insn)); | ||
| 57 | } | ||
| 58 | |||
| 59 | void TranslatorVisitor::FADD_cbuf(u64 insn) { | ||
| 60 | FADD(*this, insn, GetFloatCbuf(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::FADD_imm(u64 insn) { | ||
| 64 | FADD(*this, insn, GetFloatImm20(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::FADD32I(u64 insn) { | ||
| 68 | union { | ||
| 69 | u64 raw; | ||
| 70 | BitField<55, 1, u64> ftz; | ||
| 71 | BitField<56, 1, u64> neg_a; | ||
| 72 | BitField<54, 1, u64> abs_a; | ||
| 73 | BitField<52, 1, u64> cc; | ||
| 74 | BitField<53, 1, u64> neg_b; | ||
| 75 | BitField<57, 1, u64> abs_b; | ||
| 76 | } const fadd32i{insn}; | ||
| 77 | |||
| 78 | FADD(*this, insn, false, fadd32i.cc != 0, fadd32i.ftz != 0, FpRounding::RN, GetFloatImm32(insn), | ||
| 79 | fadd32i.abs_a != 0, fadd32i.neg_a != 0, fadd32i.abs_b != 0, fadd32i.neg_b != 0); | ||
| 80 | } | ||
| 81 | |||
| 82 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp new file mode 100644 index 000000000..7127ebf54 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_reg; | ||
| 17 | BitField<47, 1, u64> ftz; | ||
| 18 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 19 | } const fcmp{insn}; | ||
| 20 | |||
| 21 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 22 | const IR::FpControl control{.fmz_mode = (fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None)}; | ||
| 23 | const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)}; | ||
| 24 | const IR::U32 src_reg{v.X(fcmp.src_reg)}; | ||
| 25 | const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; | ||
| 26 | |||
| 27 | v.X(fcmp.dest_reg, result); | ||
| 28 | } | ||
| 29 | } // Anonymous namespace | ||
| 30 | |||
| 31 | void TranslatorVisitor::FCMP_reg(u64 insn) { | ||
| 32 | FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn)); | ||
| 33 | } | ||
| 34 | |||
| 35 | void TranslatorVisitor::FCMP_rc(u64 insn) { | ||
| 36 | FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn)); | ||
| 37 | } | ||
| 38 | |||
| 39 | void TranslatorVisitor::FCMP_cr(u64 insn) { | ||
| 40 | FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn)); | ||
| 41 | } | ||
| 42 | |||
| 43 | void TranslatorVisitor::FCMP_imm(u64 insn) { | ||
| 44 | union { | ||
| 45 | u64 raw; | ||
| 46 | BitField<20, 19, u64> value; | ||
| 47 | BitField<56, 1, u64> is_negative; | ||
| 48 | } const fcmp{insn}; | ||
| 49 | const u32 sign_bit{fcmp.is_negative != 0 ? (1U << 31) : 0}; | ||
| 50 | const u32 value{static_cast<u32>(fcmp.value) << 12}; | ||
| 51 | |||
| 52 | FCMP(*this, insn, ir.Imm32(value | sign_bit), GetFloatReg39(insn)); | ||
| 53 | } | ||
| 54 | |||
| 55 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp new file mode 100644 index 000000000..eece4f28f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp | |||
| @@ -0,0 +1,78 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 17 | BitField<39, 3, IR::Pred> pred; | ||
| 18 | BitField<42, 1, u64> neg_pred; | ||
| 19 | BitField<43, 1, u64> negate_a; | ||
| 20 | BitField<44, 1, u64> abs_b; | ||
| 21 | BitField<45, 2, BooleanOp> bop; | ||
| 22 | BitField<47, 1, u64> cc; | ||
| 23 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 24 | BitField<52, 1, u64> bf; | ||
| 25 | BitField<53, 1, u64> negate_b; | ||
| 26 | BitField<54, 1, u64> abs_a; | ||
| 27 | BitField<55, 1, u64> ftz; | ||
| 28 | } const fset{insn}; | ||
| 29 | |||
| 30 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; | ||
| 31 | const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); | ||
| 32 | const IR::FpControl control{ | ||
| 33 | .no_contraction = false, | ||
| 34 | .rounding = IR::FpRounding::DontCare, | ||
| 35 | .fmz_mode = (fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 36 | }; | ||
| 37 | |||
| 38 | IR::U1 pred{v.ir.GetPred(fset.pred)}; | ||
| 39 | if (fset.neg_pred != 0) { | ||
| 40 | pred = v.ir.LogicalNot(pred); | ||
| 41 | } | ||
| 42 | const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)}; | ||
| 43 | const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)}; | ||
| 44 | |||
| 45 | const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||
| 46 | const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||
| 47 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 48 | const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; | ||
| 49 | const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; | ||
| 50 | |||
| 51 | v.X(fset.dest_reg, result); | ||
| 52 | if (fset.cc != 0) { | ||
| 53 | const IR::U1 is_zero{v.ir.IEqual(result, zero)}; | ||
| 54 | v.SetZFlag(is_zero); | ||
| 55 | if (fset.bf != 0) { | ||
| 56 | v.ResetSFlag(); | ||
| 57 | } else { | ||
| 58 | v.SetSFlag(v.ir.LogicalNot(is_zero)); | ||
| 59 | } | ||
| 60 | v.ResetCFlag(); | ||
| 61 | v.ResetOFlag(); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | } // Anonymous namespace | ||
| 65 | |||
| 66 | void TranslatorVisitor::FSET_reg(u64 insn) { | ||
| 67 | FSET(*this, insn, GetFloatReg20(insn)); | ||
| 68 | } | ||
| 69 | |||
| 70 | void TranslatorVisitor::FSET_cbuf(u64 insn) { | ||
| 71 | FSET(*this, insn, GetFloatCbuf(insn)); | ||
| 72 | } | ||
| 73 | |||
| 74 | void TranslatorVisitor::FSET_imm(u64 insn) { | ||
| 75 | FSET(*this, insn, GetFloatImm20(insn)); | ||
| 76 | } | ||
| 77 | |||
| 78 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp new file mode 100644 index 000000000..02ab023c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp | |||
| @@ -0,0 +1,214 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 6 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 7 | |||
| 8 | namespace Shader::Maxwell { | ||
| 9 | namespace { | ||
| 10 | enum class FloatFormat : u64 { | ||
| 11 | F16 = 1, | ||
| 12 | F32 = 2, | ||
| 13 | F64 = 3, | ||
| 14 | }; | ||
| 15 | |||
| 16 | enum class RoundingOp : u64 { | ||
| 17 | None = 0, | ||
| 18 | Pass = 3, | ||
| 19 | Round = 8, | ||
| 20 | Floor = 9, | ||
| 21 | Ceil = 10, | ||
| 22 | Trunc = 11, | ||
| 23 | }; | ||
| 24 | |||
| 25 | [[nodiscard]] u32 WidthSize(FloatFormat width) { | ||
| 26 | switch (width) { | ||
| 27 | case FloatFormat::F16: | ||
| 28 | return 16; | ||
| 29 | case FloatFormat::F32: | ||
| 30 | return 32; | ||
| 31 | case FloatFormat::F64: | ||
| 32 | return 64; | ||
| 33 | default: | ||
| 34 | throw NotImplementedException("Invalid width {}", width); | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) { | ||
| 39 | union { | ||
| 40 | u64 insn; | ||
| 41 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 42 | BitField<44, 1, u64> ftz; | ||
| 43 | BitField<45, 1, u64> neg; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<50, 1, u64> sat; | ||
| 46 | BitField<39, 4, u64> rounding_op; | ||
| 47 | BitField<39, 2, FpRounding> rounding; | ||
| 48 | BitField<10, 2, FloatFormat> src_size; | ||
| 49 | BitField<8, 2, FloatFormat> dst_size; | ||
| 50 | |||
| 51 | [[nodiscard]] RoundingOp RoundingOperation() const { | ||
| 52 | constexpr u64 rounding_mask = 0x0B; | ||
| 53 | return static_cast<RoundingOp>(rounding_op.Value() & rounding_mask); | ||
| 54 | } | ||
| 55 | } const f2f{insn}; | ||
| 56 | |||
| 57 | if (f2f.cc != 0) { | ||
| 58 | throw NotImplementedException("F2F CC"); | ||
| 59 | } | ||
| 60 | |||
| 61 | IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)}; | ||
| 62 | |||
| 63 | const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64}; | ||
| 64 | IR::FpControl fp_control{ | ||
| 65 | .no_contraction = false, | ||
| 66 | .rounding = IR::FpRounding::DontCare, | ||
| 67 | .fmz_mode = (f2f.ftz != 0 && !any_fp64 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 68 | }; | ||
| 69 | if (f2f.src_size != f2f.dst_size) { | ||
| 70 | fp_control.rounding = CastFpRounding(f2f.rounding); | ||
| 71 | input = v.ir.FPConvert(WidthSize(f2f.dst_size), input, fp_control); | ||
| 72 | } else { | ||
| 73 | switch (f2f.RoundingOperation()) { | ||
| 74 | case RoundingOp::None: | ||
| 75 | case RoundingOp::Pass: | ||
| 76 | // Make sure NANs are handled properly | ||
| 77 | switch (f2f.src_size) { | ||
| 78 | case FloatFormat::F16: | ||
| 79 | input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control); | ||
| 80 | break; | ||
| 81 | case FloatFormat::F32: | ||
| 82 | input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control); | ||
| 83 | break; | ||
| 84 | case FloatFormat::F64: | ||
| 85 | input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control); | ||
| 86 | break; | ||
| 87 | } | ||
| 88 | break; | ||
| 89 | case RoundingOp::Round: | ||
| 90 | input = v.ir.FPRoundEven(input, fp_control); | ||
| 91 | break; | ||
| 92 | case RoundingOp::Floor: | ||
| 93 | input = v.ir.FPFloor(input, fp_control); | ||
| 94 | break; | ||
| 95 | case RoundingOp::Ceil: | ||
| 96 | input = v.ir.FPCeil(input, fp_control); | ||
| 97 | break; | ||
| 98 | case RoundingOp::Trunc: | ||
| 99 | input = v.ir.FPTrunc(input, fp_control); | ||
| 100 | break; | ||
| 101 | default: | ||
| 102 | throw NotImplementedException("Unimplemented rounding mode {}", f2f.rounding.Value()); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | if (f2f.sat != 0 && !any_fp64) { | ||
| 106 | input = v.ir.FPSaturate(input); | ||
| 107 | } | ||
| 108 | |||
| 109 | switch (f2f.dst_size) { | ||
| 110 | case FloatFormat::F16: { | ||
| 111 | const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; | ||
| 112 | v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm))); | ||
| 113 | break; | ||
| 114 | } | ||
| 115 | case FloatFormat::F32: | ||
| 116 | v.F(f2f.dest_reg, input); | ||
| 117 | break; | ||
| 118 | case FloatFormat::F64: | ||
| 119 | v.D(f2f.dest_reg, input); | ||
| 120 | break; | ||
| 121 | default: | ||
| 122 | throw NotImplementedException("Invalid dest format {}", f2f.dst_size.Value()); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | } // Anonymous namespace | ||
| 126 | |||
| 127 | void TranslatorVisitor::F2F_reg(u64 insn) { | ||
| 128 | union { | ||
| 129 | u64 insn; | ||
| 130 | BitField<49, 1, u64> abs; | ||
| 131 | BitField<10, 2, FloatFormat> src_size; | ||
| 132 | BitField<41, 1, u64> selector; | ||
| 133 | } const f2f{insn}; | ||
| 134 | |||
| 135 | IR::F16F32F64 src_a; | ||
| 136 | switch (f2f.src_size) { | ||
| 137 | case FloatFormat::F16: { | ||
| 138 | auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)}; | ||
| 139 | src_a = f2f.selector != 0 ? rhs_a : lhs_a; | ||
| 140 | break; | ||
| 141 | } | ||
| 142 | case FloatFormat::F32: | ||
| 143 | src_a = GetFloatReg20(insn); | ||
| 144 | break; | ||
| 145 | case FloatFormat::F64: | ||
| 146 | src_a = GetDoubleReg20(insn); | ||
| 147 | break; | ||
| 148 | default: | ||
| 149 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 150 | } | ||
| 151 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 152 | } | ||
| 153 | |||
| 154 | void TranslatorVisitor::F2F_cbuf(u64 insn) { | ||
| 155 | union { | ||
| 156 | u64 insn; | ||
| 157 | BitField<49, 1, u64> abs; | ||
| 158 | BitField<10, 2, FloatFormat> src_size; | ||
| 159 | BitField<41, 1, u64> selector; | ||
| 160 | } const f2f{insn}; | ||
| 161 | |||
| 162 | IR::F16F32F64 src_a; | ||
| 163 | switch (f2f.src_size) { | ||
| 164 | case FloatFormat::F16: { | ||
| 165 | auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)}; | ||
| 166 | src_a = f2f.selector != 0 ? rhs_a : lhs_a; | ||
| 167 | break; | ||
| 168 | } | ||
| 169 | case FloatFormat::F32: | ||
| 170 | src_a = GetFloatCbuf(insn); | ||
| 171 | break; | ||
| 172 | case FloatFormat::F64: | ||
| 173 | src_a = GetDoubleCbuf(insn); | ||
| 174 | break; | ||
| 175 | default: | ||
| 176 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 177 | } | ||
| 178 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 179 | } | ||
| 180 | |||
| 181 | void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) { | ||
| 182 | union { | ||
| 183 | u64 insn; | ||
| 184 | BitField<49, 1, u64> abs; | ||
| 185 | BitField<10, 2, FloatFormat> src_size; | ||
| 186 | BitField<41, 1, u64> selector; | ||
| 187 | BitField<20, 19, u64> imm; | ||
| 188 | BitField<56, 1, u64> imm_neg; | ||
| 189 | } const f2f{insn}; | ||
| 190 | |||
| 191 | IR::F16F32F64 src_a; | ||
| 192 | switch (f2f.src_size) { | ||
| 193 | case FloatFormat::F16: { | ||
| 194 | const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)}; | ||
| 195 | const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))}; | ||
| 196 | src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)}; | ||
| 197 | if (f2f.imm_neg != 0) { | ||
| 198 | throw NotImplementedException("Neg bit on F16"); | ||
| 199 | } | ||
| 200 | break; | ||
| 201 | } | ||
| 202 | case FloatFormat::F32: | ||
| 203 | src_a = GetFloatImm20(insn); | ||
| 204 | break; | ||
| 205 | case FloatFormat::F64: | ||
| 206 | src_a = GetDoubleImm20(insn); | ||
| 207 | break; | ||
| 208 | default: | ||
| 209 | throw NotImplementedException("Invalid dest format {}", f2f.src_size.Value()); | ||
| 210 | } | ||
| 211 | F2F(*this, insn, src_a, f2f.abs != 0); | ||
| 212 | } | ||
| 213 | |||
| 214 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp new file mode 100644 index 000000000..92b1ce015 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp | |||
| @@ -0,0 +1,253 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class DestFormat : u64 { | ||
| 15 | Invalid, | ||
| 16 | I16, | ||
| 17 | I32, | ||
| 18 | I64, | ||
| 19 | }; | ||
| 20 | enum class SrcFormat : u64 { | ||
| 21 | Invalid, | ||
| 22 | F16, | ||
| 23 | F32, | ||
| 24 | F64, | ||
| 25 | }; | ||
| 26 | enum class Rounding : u64 { | ||
| 27 | Round, | ||
| 28 | Floor, | ||
| 29 | Ceil, | ||
| 30 | Trunc, | ||
| 31 | }; | ||
| 32 | |||
| 33 | union F2I { | ||
| 34 | u64 raw; | ||
| 35 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 36 | BitField<8, 2, DestFormat> dest_format; | ||
| 37 | BitField<10, 2, SrcFormat> src_format; | ||
| 38 | BitField<12, 1, u64> is_signed; | ||
| 39 | BitField<39, 2, Rounding> rounding; | ||
| 40 | BitField<41, 1, u64> half; | ||
| 41 | BitField<44, 1, u64> ftz; | ||
| 42 | BitField<45, 1, u64> abs; | ||
| 43 | BitField<47, 1, u64> cc; | ||
| 44 | BitField<49, 1, u64> neg; | ||
| 45 | }; | ||
| 46 | |||
| 47 | size_t BitSize(DestFormat dest_format) { | ||
| 48 | switch (dest_format) { | ||
| 49 | case DestFormat::I16: | ||
| 50 | return 16; | ||
| 51 | case DestFormat::I32: | ||
| 52 | return 32; | ||
| 53 | case DestFormat::I64: | ||
| 54 | return 64; | ||
| 55 | default: | ||
| 56 | throw NotImplementedException("Invalid destination format {}", dest_format); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | |||
| 60 | std::pair<f64, f64> ClampBounds(DestFormat format, bool is_signed) { | ||
| 61 | if (is_signed) { | ||
| 62 | switch (format) { | ||
| 63 | case DestFormat::I16: | ||
| 64 | return {static_cast<f64>(std::numeric_limits<s16>::max()), | ||
| 65 | static_cast<f64>(std::numeric_limits<s16>::min())}; | ||
| 66 | case DestFormat::I32: | ||
| 67 | return {static_cast<f64>(std::numeric_limits<s32>::max()), | ||
| 68 | static_cast<f64>(std::numeric_limits<s32>::min())}; | ||
| 69 | case DestFormat::I64: | ||
| 70 | return {static_cast<f64>(std::numeric_limits<s64>::max()), | ||
| 71 | static_cast<f64>(std::numeric_limits<s64>::min())}; | ||
| 72 | default: | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } else { | ||
| 76 | switch (format) { | ||
| 77 | case DestFormat::I16: | ||
| 78 | return {static_cast<f64>(std::numeric_limits<u16>::max()), | ||
| 79 | static_cast<f64>(std::numeric_limits<u16>::min())}; | ||
| 80 | case DestFormat::I32: | ||
| 81 | return {static_cast<f64>(std::numeric_limits<u32>::max()), | ||
| 82 | static_cast<f64>(std::numeric_limits<u32>::min())}; | ||
| 83 | case DestFormat::I64: | ||
| 84 | return {static_cast<f64>(std::numeric_limits<u64>::max()), | ||
| 85 | static_cast<f64>(std::numeric_limits<u64>::min())}; | ||
| 86 | default: | ||
| 87 | break; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | throw NotImplementedException("Invalid destination format {}", format); | ||
| 91 | } | ||
| 92 | |||
| 93 | IR::F64 UnpackCbuf(TranslatorVisitor& v, u64 insn) { | ||
| 94 | union { | ||
| 95 | u64 raw; | ||
| 96 | BitField<20, 14, s64> offset; | ||
| 97 | BitField<34, 5, u64> binding; | ||
| 98 | } const cbuf{insn}; | ||
| 99 | if (cbuf.binding >= 18) { | ||
| 100 | throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); | ||
| 101 | } | ||
| 102 | if (cbuf.offset >= 0x4'000 || cbuf.offset < 0) { | ||
| 103 | throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset * 4); | ||
| 104 | } | ||
| 105 | if (cbuf.offset % 2 != 0) { | ||
| 106 | throw NotImplementedException("Unaligned F64 constant buffer offset {}", cbuf.offset * 4); | ||
| 107 | } | ||
| 108 | const IR::U32 binding{v.ir.Imm32(static_cast<u32>(cbuf.binding))}; | ||
| 109 | const IR::U32 byte_offset{v.ir.Imm32(static_cast<u32>(cbuf.offset) * 4 + 4)}; | ||
| 110 | const IR::U32 cbuf_data{v.ir.GetCbuf(binding, byte_offset)}; | ||
| 111 | const IR::Value vector{v.ir.CompositeConstruct(v.ir.Imm32(0U), cbuf_data)}; | ||
| 112 | return v.ir.PackDouble2x32(vector); | ||
| 113 | } | ||
| 114 | |||
| 115 | void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { | ||
| 116 | // F2I is used to convert from a floating point value to an integer | ||
| 117 | const F2I f2i{insn}; | ||
| 118 | |||
| 119 | const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && | ||
| 120 | f2i.dest_format != DestFormat::I64}; | ||
| 121 | IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; | ||
| 122 | if (denorm_cares) { | ||
| 123 | fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; | ||
| 124 | } | ||
| 125 | const IR::FpControl fp_control{ | ||
| 126 | .no_contraction = true, | ||
| 127 | .rounding = IR::FpRounding::DontCare, | ||
| 128 | .fmz_mode = fmz_mode, | ||
| 129 | }; | ||
| 130 | const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; | ||
| 131 | const IR::F16F32F64 rounded_value{[&] { | ||
| 132 | switch (f2i.rounding) { | ||
| 133 | case Rounding::Round: | ||
| 134 | return v.ir.FPRoundEven(op_a, fp_control); | ||
| 135 | case Rounding::Floor: | ||
| 136 | return v.ir.FPFloor(op_a, fp_control); | ||
| 137 | case Rounding::Ceil: | ||
| 138 | return v.ir.FPCeil(op_a, fp_control); | ||
| 139 | case Rounding::Trunc: | ||
| 140 | return v.ir.FPTrunc(op_a, fp_control); | ||
| 141 | default: | ||
| 142 | throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); | ||
| 143 | } | ||
| 144 | }()}; | ||
| 145 | const bool is_signed{f2i.is_signed != 0}; | ||
| 146 | const auto [max_bound, min_bound] = ClampBounds(f2i.dest_format, is_signed); | ||
| 147 | |||
| 148 | IR::F16F32F64 intermediate; | ||
| 149 | switch (f2i.src_format) { | ||
| 150 | case SrcFormat::F16: { | ||
| 151 | const IR::F16 max_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(max_bound)))}; | ||
| 152 | const IR::F16 min_val{v.ir.FPConvert(16, v.ir.Imm32(static_cast<f32>(min_bound)))}; | ||
| 153 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | case SrcFormat::F32: { | ||
| 157 | const IR::F32 max_val{v.ir.Imm32(static_cast<f32>(max_bound))}; | ||
| 158 | const IR::F32 min_val{v.ir.Imm32(static_cast<f32>(min_bound))}; | ||
| 159 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | case SrcFormat::F64: { | ||
| 163 | const IR::F64 max_val{v.ir.Imm64(max_bound)}; | ||
| 164 | const IR::F64 min_val{v.ir.Imm64(min_bound)}; | ||
| 165 | intermediate = v.ir.FPClamp(rounded_value, min_val, max_val); | ||
| 166 | break; | ||
| 167 | } | ||
| 168 | default: | ||
| 169 | throw NotImplementedException("Invalid destination format {}", f2i.dest_format.Value()); | ||
| 170 | } | ||
| 171 | |||
| 172 | const size_t bitsize{std::max<size_t>(32, BitSize(f2i.dest_format))}; | ||
| 173 | IR::U16U32U64 result{v.ir.ConvertFToI(bitsize, is_signed, intermediate)}; | ||
| 174 | |||
| 175 | bool handled_special_case = false; | ||
| 176 | const bool special_nan_cases = | ||
| 177 | (f2i.src_format == SrcFormat::F64) != (f2i.dest_format == DestFormat::I64); | ||
| 178 | if (special_nan_cases) { | ||
| 179 | if (f2i.dest_format == DestFormat::I32) { | ||
| 180 | handled_special_case = true; | ||
| 181 | result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0x8000'0000U), result)}; | ||
| 182 | } else if (f2i.dest_format == DestFormat::I64) { | ||
| 183 | handled_special_case = true; | ||
| 184 | result = IR::U64{ | ||
| 185 | v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(0x8000'0000'0000'0000UL), result)}; | ||
| 186 | } | ||
| 187 | } | ||
| 188 | if (!handled_special_case && is_signed) { | ||
| 189 | if (bitsize != 64) { | ||
| 190 | result = IR::U32{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm32(0U), result)}; | ||
| 191 | } else { | ||
| 192 | result = IR::U64{v.ir.Select(v.ir.FPIsNan(op_a), v.ir.Imm64(u64{0}), result)}; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | |||
| 196 | if (bitsize == 64) { | ||
| 197 | v.L(f2i.dest_reg, result); | ||
| 198 | } else { | ||
| 199 | v.X(f2i.dest_reg, result); | ||
| 200 | } | ||
| 201 | |||
| 202 | if (f2i.cc != 0) { | ||
| 203 | throw NotImplementedException("F2I CC"); | ||
| 204 | } | ||
| 205 | } | ||
| 206 | } // Anonymous namespace | ||
| 207 | |||
| 208 | void TranslatorVisitor::F2I_reg(u64 insn) { | ||
| 209 | union { | ||
| 210 | u64 raw; | ||
| 211 | F2I base; | ||
| 212 | BitField<20, 8, IR::Reg> src_reg; | ||
| 213 | } const f2i{insn}; | ||
| 214 | |||
| 215 | const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { | ||
| 216 | switch (f2i.base.src_format) { | ||
| 217 | case SrcFormat::F16: | ||
| 218 | return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)}; | ||
| 219 | case SrcFormat::F32: | ||
| 220 | return F(f2i.src_reg); | ||
| 221 | case SrcFormat::F64: | ||
| 222 | return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); | ||
| 223 | default: | ||
| 224 | throw NotImplementedException("Invalid F2I source format {}", | ||
| 225 | f2i.base.src_format.Value()); | ||
| 226 | } | ||
| 227 | }()}; | ||
| 228 | TranslateF2I(*this, insn, op_a); | ||
| 229 | } | ||
| 230 | |||
| 231 | void TranslatorVisitor::F2I_cbuf(u64 insn) { | ||
| 232 | const F2I f2i{insn}; | ||
| 233 | const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 { | ||
| 234 | switch (f2i.src_format) { | ||
| 235 | case SrcFormat::F16: | ||
| 236 | return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(GetCbuf(insn)), f2i.half)}; | ||
| 237 | case SrcFormat::F32: | ||
| 238 | return GetFloatCbuf(insn); | ||
| 239 | case SrcFormat::F64: { | ||
| 240 | return UnpackCbuf(*this, insn); | ||
| 241 | } | ||
| 242 | default: | ||
| 243 | throw NotImplementedException("Invalid F2I source format {}", f2i.src_format.Value()); | ||
| 244 | } | ||
| 245 | }()}; | ||
| 246 | TranslateF2I(*this, insn, op_a); | ||
| 247 | } | ||
| 248 | |||
| 249 | void TranslatorVisitor::F2I_imm(u64) { | ||
| 250 | throw NotImplementedException("{}", Opcode::F2I_imm); | ||
| 251 | } | ||
| 252 | |||
| 253 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..fa2a7807b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp | |||
| @@ -0,0 +1,94 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a, | ||
| 13 | bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<8, 8, IR::Reg> src_a; | ||
| 18 | } const ffma{insn}; | ||
| 19 | |||
| 20 | if (cc) { | ||
| 21 | throw NotImplementedException("FFMA CC"); | ||
| 22 | } | ||
| 23 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)}; | ||
| 24 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||
| 25 | const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; | ||
| 26 | const IR::FpControl fp_control{ | ||
| 27 | .no_contraction = true, | ||
| 28 | .rounding = CastFpRounding(fp_rounding), | ||
| 29 | .fmz_mode = CastFmzMode(fmz_mode), | ||
| 30 | }; | ||
| 31 | IR::F32 value{v.ir.FPFma(op_a, op_b, op_c, fp_control)}; | ||
| 32 | if (fmz_mode == FmzMode::FMZ && !sat) { | ||
| 33 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 34 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 35 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 36 | const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; | ||
| 37 | const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; | ||
| 38 | const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; | ||
| 39 | value = IR::F32{v.ir.Select(any_zero, op_c, value)}; | ||
| 40 | } | ||
| 41 | if (sat) { | ||
| 42 | value = v.ir.FPSaturate(value); | ||
| 43 | } | ||
| 44 | v.F(ffma.dest_reg, value); | ||
| 45 | } | ||
| 46 | |||
| 47 | void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) { | ||
| 48 | union { | ||
| 49 | u64 raw; | ||
| 50 | BitField<47, 1, u64> cc; | ||
| 51 | BitField<48, 1, u64> neg_b; | ||
| 52 | BitField<49, 1, u64> neg_c; | ||
| 53 | BitField<50, 1, u64> sat; | ||
| 54 | BitField<51, 2, FpRounding> fp_rounding; | ||
| 55 | BitField<53, 2, FmzMode> fmz_mode; | ||
| 56 | } const ffma{insn}; | ||
| 57 | |||
| 58 | FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, | ||
| 59 | ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | void TranslatorVisitor::FFMA_reg(u64 insn) { | ||
| 64 | FFMA(*this, insn, GetFloatReg20(insn), GetFloatReg39(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::FFMA_rc(u64 insn) { | ||
| 68 | FFMA(*this, insn, GetFloatReg39(insn), GetFloatCbuf(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | void TranslatorVisitor::FFMA_cr(u64 insn) { | ||
| 72 | FFMA(*this, insn, GetFloatCbuf(insn), GetFloatReg39(insn)); | ||
| 73 | } | ||
| 74 | |||
| 75 | void TranslatorVisitor::FFMA_imm(u64 insn) { | ||
| 76 | FFMA(*this, insn, GetFloatImm20(insn), GetFloatReg39(insn)); | ||
| 77 | } | ||
| 78 | |||
| 79 | void TranslatorVisitor::FFMA32I(u64 insn) { | ||
| 80 | union { | ||
| 81 | u64 raw; | ||
| 82 | BitField<0, 8, IR::Reg> src_c; // FFMA32I mirrors the destination and addition register | ||
| 83 | BitField<52, 1, u64> cc; | ||
| 84 | BitField<53, 2, FmzMode> fmz_mode; | ||
| 85 | BitField<55, 1, u64> sat; | ||
| 86 | BitField<56, 1, u64> neg_a; | ||
| 87 | BitField<57, 1, u64> neg_c; | ||
| 88 | } const ffma32i{insn}; | ||
| 89 | |||
| 90 | FFMA(*this, insn, GetFloatImm32(insn), F(ffma32i.src_c), ffma32i.neg_a != 0, false, | ||
| 91 | ffma32i.neg_c != 0, ffma32i.sat != 0, ffma32i.cc != 0, ffma32i.fmz_mode, FpRounding::RN); | ||
| 92 | } | ||
| 93 | |||
| 94 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp new file mode 100644 index 000000000..c0d6ee5af --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_min_max.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void FMNMX(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<44, 1, u64> ftz; | ||
| 19 | BitField<45, 1, u64> negate_b; | ||
| 20 | BitField<46, 1, u64> abs_a; | ||
| 21 | BitField<47, 1, u64> cc; | ||
| 22 | BitField<48, 1, u64> negate_a; | ||
| 23 | BitField<49, 1, u64> abs_b; | ||
| 24 | } const fmnmx{insn}; | ||
| 25 | |||
| 26 | if (fmnmx.cc) { | ||
| 27 | throw NotImplementedException("FMNMX CC"); | ||
| 28 | } | ||
| 29 | |||
| 30 | const IR::U1 pred{v.ir.GetPred(fmnmx.pred)}; | ||
| 31 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fmnmx.src_a_reg), fmnmx.abs_a != 0, fmnmx.negate_a != 0)}; | ||
| 32 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, fmnmx.abs_b != 0, fmnmx.negate_b != 0)}; | ||
| 33 | |||
| 34 | const IR::FpControl control{ | ||
| 35 | .no_contraction = false, | ||
| 36 | .rounding = IR::FpRounding::DontCare, | ||
| 37 | .fmz_mode = (fmnmx.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 38 | }; | ||
| 39 | IR::F32 max{v.ir.FPMax(op_a, op_b, control)}; | ||
| 40 | IR::F32 min{v.ir.FPMin(op_a, op_b, control)}; | ||
| 41 | |||
| 42 | if (fmnmx.neg_pred != 0) { | ||
| 43 | std::swap(min, max); | ||
| 44 | } | ||
| 45 | |||
| 46 | v.F(fmnmx.dest_reg, IR::F32{v.ir.Select(pred, min, max)}); | ||
| 47 | } | ||
| 48 | } // Anonymous namespace | ||
| 49 | |||
| 50 | void TranslatorVisitor::FMNMX_reg(u64 insn) { | ||
| 51 | FMNMX(*this, insn, GetFloatReg20(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::FMNMX_cbuf(u64 insn) { | ||
| 55 | FMNMX(*this, insn, GetFloatCbuf(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | void TranslatorVisitor::FMNMX_imm(u64 insn) { | ||
| 59 | FMNMX(*this, insn, GetFloatImm20(insn)); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp new file mode 100644 index 000000000..2f8605619 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multi_function.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class Operation : u64 { | ||
| 14 | Cos = 0, | ||
| 15 | Sin = 1, | ||
| 16 | Ex2 = 2, // Base 2 exponent | ||
| 17 | Lg2 = 3, // Base 2 logarithm | ||
| 18 | Rcp = 4, // Reciprocal | ||
| 19 | Rsq = 5, // Reciprocal square root | ||
| 20 | Rcp64H = 6, // 64-bit reciprocal | ||
| 21 | Rsq64H = 7, // 64-bit reciprocal square root | ||
| 22 | Sqrt = 8, | ||
| 23 | }; | ||
| 24 | } // Anonymous namespace | ||
| 25 | |||
| 26 | void TranslatorVisitor::MUFU(u64 insn) { | ||
| 27 | // MUFU is used to implement a bunch of special functions. See Operation. | ||
| 28 | union { | ||
| 29 | u64 raw; | ||
| 30 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 31 | BitField<8, 8, IR::Reg> src_reg; | ||
| 32 | BitField<20, 4, Operation> operation; | ||
| 33 | BitField<46, 1, u64> abs; | ||
| 34 | BitField<48, 1, u64> neg; | ||
| 35 | BitField<50, 1, u64> sat; | ||
| 36 | } const mufu{insn}; | ||
| 37 | |||
| 38 | const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; | ||
| 39 | IR::F32 value{[&]() -> IR::F32 { | ||
| 40 | switch (mufu.operation) { | ||
| 41 | case Operation::Cos: | ||
| 42 | return ir.FPCos(op_a); | ||
| 43 | case Operation::Sin: | ||
| 44 | return ir.FPSin(op_a); | ||
| 45 | case Operation::Ex2: | ||
| 46 | return ir.FPExp2(op_a); | ||
| 47 | case Operation::Lg2: | ||
| 48 | return ir.FPLog2(op_a); | ||
| 49 | case Operation::Rcp: | ||
| 50 | return ir.FPRecip(op_a); | ||
| 51 | case Operation::Rsq: | ||
| 52 | return ir.FPRecipSqrt(op_a); | ||
| 53 | case Operation::Rcp64H: | ||
| 54 | throw NotImplementedException("MUFU.RCP64H"); | ||
| 55 | case Operation::Rsq64H: | ||
| 56 | throw NotImplementedException("MUFU.RSQ64H"); | ||
| 57 | case Operation::Sqrt: | ||
| 58 | return ir.FPSqrt(op_a); | ||
| 59 | default: | ||
| 60 | throw NotImplementedException("Invalid MUFU operation {}", mufu.operation.Value()); | ||
| 61 | } | ||
| 62 | }()}; | ||
| 63 | |||
| 64 | if (mufu.sat) { | ||
| 65 | value = ir.FPSaturate(value); | ||
| 66 | } | ||
| 67 | |||
| 68 | F(mufu.dest_reg, value); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp new file mode 100644 index 000000000..06226b7ce --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_multiply.cpp | |||
| @@ -0,0 +1,127 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Scale : u64 { | ||
| 15 | None, | ||
| 16 | D2, | ||
| 17 | D4, | ||
| 18 | D8, | ||
| 19 | M8, | ||
| 20 | M4, | ||
| 21 | M2, | ||
| 22 | INVALIDSCALE37, | ||
| 23 | }; | ||
| 24 | |||
| 25 | float ScaleFactor(Scale scale) { | ||
| 26 | switch (scale) { | ||
| 27 | case Scale::None: | ||
| 28 | return 1.0f; | ||
| 29 | case Scale::D2: | ||
| 30 | return 1.0f / 2.0f; | ||
| 31 | case Scale::D4: | ||
| 32 | return 1.0f / 4.0f; | ||
| 33 | case Scale::D8: | ||
| 34 | return 1.0f / 8.0f; | ||
| 35 | case Scale::M8: | ||
| 36 | return 8.0f; | ||
| 37 | case Scale::M4: | ||
| 38 | return 4.0f; | ||
| 39 | case Scale::M2: | ||
| 40 | return 2.0f; | ||
| 41 | case Scale::INVALIDSCALE37: | ||
| 42 | break; | ||
| 43 | } | ||
| 44 | throw NotImplementedException("Invalid FMUL scale {}", scale); | ||
| 45 | } | ||
| 46 | |||
| 47 | void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode, | ||
| 48 | FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { | ||
| 49 | union { | ||
| 50 | u64 raw; | ||
| 51 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 52 | BitField<8, 8, IR::Reg> src_a; | ||
| 53 | } const fmul{insn}; | ||
| 54 | |||
| 55 | if (cc) { | ||
| 56 | throw NotImplementedException("FMUL CC"); | ||
| 57 | } | ||
| 58 | IR::F32 op_a{v.F(fmul.src_a)}; | ||
| 59 | if (scale != Scale::None) { | ||
| 60 | if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { | ||
| 61 | throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); | ||
| 62 | } | ||
| 63 | op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); | ||
| 64 | } | ||
| 65 | const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||
| 66 | const IR::FpControl fp_control{ | ||
| 67 | .no_contraction = true, | ||
| 68 | .rounding = CastFpRounding(fp_rounding), | ||
| 69 | .fmz_mode = CastFmzMode(fmz_mode), | ||
| 70 | }; | ||
| 71 | IR::F32 value{v.ir.FPMul(op_a, op_b, fp_control)}; | ||
| 72 | if (fmz_mode == FmzMode::FMZ && !sat) { | ||
| 73 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 74 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 75 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 76 | const IR::U1 zero_a{v.ir.FPEqual(op_a, zero)}; | ||
| 77 | const IR::U1 zero_b{v.ir.FPEqual(op_b, zero)}; | ||
| 78 | const IR::U1 any_zero{v.ir.LogicalOr(zero_a, zero_b)}; | ||
| 79 | value = IR::F32{v.ir.Select(any_zero, zero, value)}; | ||
| 80 | } | ||
| 81 | if (sat) { | ||
| 82 | value = v.ir.FPSaturate(value); | ||
| 83 | } | ||
| 84 | v.F(fmul.dest_reg, value); | ||
| 85 | } | ||
| 86 | |||
| 87 | void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 88 | union { | ||
| 89 | u64 raw; | ||
| 90 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 91 | BitField<41, 3, Scale> scale; | ||
| 92 | BitField<44, 2, FmzMode> fmz; | ||
| 93 | BitField<47, 1, u64> cc; | ||
| 94 | BitField<48, 1, u64> neg_b; | ||
| 95 | BitField<50, 1, u64> sat; | ||
| 96 | } const fmul{insn}; | ||
| 97 | |||
| 98 | FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, | ||
| 99 | fmul.neg_b != 0); | ||
| 100 | } | ||
| 101 | } // Anonymous namespace | ||
| 102 | |||
| 103 | void TranslatorVisitor::FMUL_reg(u64 insn) { | ||
| 104 | return FMUL(*this, insn, GetFloatReg20(insn)); | ||
| 105 | } | ||
| 106 | |||
| 107 | void TranslatorVisitor::FMUL_cbuf(u64 insn) { | ||
| 108 | return FMUL(*this, insn, GetFloatCbuf(insn)); | ||
| 109 | } | ||
| 110 | |||
| 111 | void TranslatorVisitor::FMUL_imm(u64 insn) { | ||
| 112 | return FMUL(*this, insn, GetFloatImm20(insn)); | ||
| 113 | } | ||
| 114 | |||
| 115 | void TranslatorVisitor::FMUL32I(u64 insn) { | ||
| 116 | union { | ||
| 117 | u64 raw; | ||
| 118 | BitField<52, 1, u64> cc; | ||
| 119 | BitField<53, 2, FmzMode> fmz; | ||
| 120 | BitField<55, 1, u64> sat; | ||
| 121 | } const fmul32i{insn}; | ||
| 122 | |||
| 123 | FMUL(*this, insn, GetFloatImm32(insn), fmul32i.fmz, FpRounding::RN, Scale::None, | ||
| 124 | fmul32i.sat != 0, fmul32i.cc != 0, false); | ||
| 125 | } | ||
| 126 | |||
| 127 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp new file mode 100644 index 000000000..f91b93fad --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_range_reduction.cpp | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | SINCOS, | ||
| 13 | EX2, | ||
| 14 | }; | ||
| 15 | |||
| 16 | void RRO(TranslatorVisitor& v, u64 insn, const IR::F32& src) { | ||
| 17 | union { | ||
| 18 | u64 raw; | ||
| 19 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 20 | BitField<39, 1, Mode> mode; | ||
| 21 | BitField<45, 1, u64> neg; | ||
| 22 | BitField<49, 1, u64> abs; | ||
| 23 | } const rro{insn}; | ||
| 24 | |||
| 25 | v.F(rro.dest_reg, v.ir.FPAbsNeg(src, rro.abs != 0, rro.neg != 0)); | ||
| 26 | } | ||
| 27 | } // Anonymous namespace | ||
| 28 | |||
| 29 | void TranslatorVisitor::RRO_reg(u64 insn) { | ||
| 30 | RRO(*this, insn, GetFloatReg20(insn)); | ||
| 31 | } | ||
| 32 | |||
| 33 | void TranslatorVisitor::RRO_cbuf(u64 insn) { | ||
| 34 | RRO(*this, insn, GetFloatCbuf(insn)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void TranslatorVisitor::RRO_imm(u64) { | ||
| 38 | throw NotImplementedException("RRO (imm)"); | ||
| 39 | } | ||
| 40 | |||
| 41 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp new file mode 100644 index 000000000..5f93a1513 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp | |||
| @@ -0,0 +1,60 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 16 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 17 | BitField<6, 1, u64> negate_b; | ||
| 18 | BitField<7, 1, u64> abs_a; | ||
| 19 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 20 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 21 | BitField<42, 1, u64> neg_bop_pred; | ||
| 22 | BitField<43, 1, u64> negate_a; | ||
| 23 | BitField<44, 1, u64> abs_b; | ||
| 24 | BitField<45, 2, BooleanOp> bop; | ||
| 25 | BitField<47, 1, u64> ftz; | ||
| 26 | BitField<48, 4, FPCompareOp> compare_op; | ||
| 27 | } const fsetp{insn}; | ||
| 28 | |||
| 29 | const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; | ||
| 30 | const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); | ||
| 31 | const IR::FpControl control{ | ||
| 32 | .no_contraction = false, | ||
| 33 | .rounding = IR::FpRounding::DontCare, | ||
| 34 | .fmz_mode = (fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 35 | }; | ||
| 36 | |||
| 37 | const BooleanOp bop{fsetp.bop}; | ||
| 38 | const FPCompareOp compare_op{fsetp.compare_op}; | ||
| 39 | const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)}; | ||
| 40 | const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)}; | ||
| 41 | const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||
| 42 | const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||
| 43 | v.ir.SetPred(fsetp.dest_pred_a, result_a); | ||
| 44 | v.ir.SetPred(fsetp.dest_pred_b, result_b); | ||
| 45 | } | ||
| 46 | } // Anonymous namespace | ||
| 47 | |||
| 48 | void TranslatorVisitor::FSETP_reg(u64 insn) { | ||
| 49 | FSETP(*this, insn, GetFloatReg20(insn)); | ||
| 50 | } | ||
| 51 | |||
| 52 | void TranslatorVisitor::FSETP_cbuf(u64 insn) { | ||
| 53 | FSETP(*this, insn, GetFloatCbuf(insn)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::FSETP_imm(u64 insn) { | ||
| 57 | FSETP(*this, insn, GetFloatImm20(insn)); | ||
| 58 | } | ||
| 59 | |||
| 60 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp new file mode 100644 index 000000000..7550a8d4c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_swizzled_add.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::FSWZADD(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<28, 8, u64> swizzle; | ||
| 16 | BitField<38, 1, u64> ndv; | ||
| 17 | BitField<39, 2, FpRounding> round; | ||
| 18 | BitField<44, 1, u64> ftz; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | } const fswzadd{insn}; | ||
| 21 | |||
| 22 | if (fswzadd.ndv != 0) { | ||
| 23 | throw NotImplementedException("FSWZADD NDV"); | ||
| 24 | } | ||
| 25 | |||
| 26 | const IR::F32 src_a{GetFloatReg8(insn)}; | ||
| 27 | const IR::F32 src_b{GetFloatReg20(insn)}; | ||
| 28 | const IR::U32 swizzle{ir.Imm32(static_cast<u32>(fswzadd.swizzle))}; | ||
| 29 | |||
| 30 | const IR::FpControl fp_control{ | ||
| 31 | .no_contraction = false, | ||
| 32 | .rounding = CastFpRounding(fswzadd.round), | ||
| 33 | .fmz_mode = (fswzadd.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 34 | }; | ||
| 35 | |||
| 36 | const IR::F32 result{ir.FSwizzleAdd(src_a, src_b, swizzle, fp_control)}; | ||
| 37 | F(fswzadd.dest_reg, result); | ||
| 38 | |||
| 39 | if (fswzadd.cc != 0) { | ||
| 40 | throw NotImplementedException("FSWZADD CC"); | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
| 44 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp new file mode 100644 index 000000000..f2738a93b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_add.cpp | |||
| @@ -0,0 +1,125 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, | ||
| 10 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { | ||
| 11 | union { | ||
| 12 | u64 raw; | ||
| 13 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 14 | BitField<8, 8, IR::Reg> src_a; | ||
| 15 | } const hadd2{insn}; | ||
| 16 | |||
| 17 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; | ||
| 18 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 19 | const bool promotion{lhs_a.Type() != lhs_b.Type()}; | ||
| 20 | if (promotion) { | ||
| 21 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 22 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 23 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 24 | } | ||
| 25 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 26 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 27 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); | ||
| 31 | rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); | ||
| 32 | |||
| 33 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 34 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 35 | |||
| 36 | const IR::FpControl fp_control{ | ||
| 37 | .no_contraction = true, | ||
| 38 | .rounding = IR::FpRounding::DontCare, | ||
| 39 | .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 40 | }; | ||
| 41 | IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; | ||
| 42 | IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; | ||
| 43 | if (sat) { | ||
| 44 | lhs = v.ir.FPSaturate(lhs); | ||
| 45 | rhs = v.ir.FPSaturate(rhs); | ||
| 46 | } | ||
| 47 | if (promotion) { | ||
| 48 | lhs = v.ir.FPConvert(16, lhs); | ||
| 49 | rhs = v.ir.FPConvert(16, rhs); | ||
| 50 | } | ||
| 51 | v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void HADD2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_b, bool neg_b, Swizzle swizzle_b, | ||
| 55 | const IR::U32& src_b) { | ||
| 56 | union { | ||
| 57 | u64 raw; | ||
| 58 | BitField<49, 2, Merge> merge; | ||
| 59 | BitField<39, 1, u64> ftz; | ||
| 60 | BitField<43, 1, u64> neg_a; | ||
| 61 | BitField<44, 1, u64> abs_a; | ||
| 62 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 63 | } const hadd2{insn}; | ||
| 64 | |||
| 65 | HADD2(v, insn, hadd2.merge, hadd2.ftz != 0, sat, hadd2.abs_a != 0, hadd2.neg_a != 0, | ||
| 66 | hadd2.swizzle_a, abs_b, neg_b, swizzle_b, src_b); | ||
| 67 | } | ||
| 68 | } // Anonymous namespace | ||
| 69 | |||
| 70 | void TranslatorVisitor::HADD2_reg(u64 insn) { | ||
| 71 | union { | ||
| 72 | u64 raw; | ||
| 73 | BitField<32, 1, u64> sat; | ||
| 74 | BitField<31, 1, u64> neg_b; | ||
| 75 | BitField<30, 1, u64> abs_b; | ||
| 76 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 77 | } const hadd2{insn}; | ||
| 78 | |||
| 79 | HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, | ||
| 80 | GetReg20(insn)); | ||
| 81 | } | ||
| 82 | |||
| 83 | void TranslatorVisitor::HADD2_cbuf(u64 insn) { | ||
| 84 | union { | ||
| 85 | u64 raw; | ||
| 86 | BitField<52, 1, u64> sat; | ||
| 87 | BitField<56, 1, u64> neg_b; | ||
| 88 | BitField<54, 1, u64> abs_b; | ||
| 89 | } const hadd2{insn}; | ||
| 90 | |||
| 91 | HADD2(*this, insn, hadd2.sat != 0, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, | ||
| 92 | GetCbuf(insn)); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::HADD2_imm(u64 insn) { | ||
| 96 | union { | ||
| 97 | u64 raw; | ||
| 98 | BitField<52, 1, u64> sat; | ||
| 99 | BitField<56, 1, u64> neg_high; | ||
| 100 | BitField<30, 9, u64> high; | ||
| 101 | BitField<29, 1, u64> neg_low; | ||
| 102 | BitField<20, 9, u64> low; | ||
| 103 | } const hadd2{insn}; | ||
| 104 | |||
| 105 | const u32 imm{ | ||
| 106 | static_cast<u32>(hadd2.low << 6) | static_cast<u32>((hadd2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 107 | static_cast<u32>(hadd2.high << 22) | static_cast<u32>((hadd2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 108 | HADD2(*this, insn, hadd2.sat != 0, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 109 | } | ||
| 110 | |||
| 111 | void TranslatorVisitor::HADD2_32I(u64 insn) { | ||
| 112 | union { | ||
| 113 | u64 raw; | ||
| 114 | BitField<55, 1, u64> ftz; | ||
| 115 | BitField<52, 1, u64> sat; | ||
| 116 | BitField<56, 1, u64> neg_a; | ||
| 117 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 118 | BitField<20, 32, u64> imm32; | ||
| 119 | } const hadd2{insn}; | ||
| 120 | |||
| 121 | const u32 imm{static_cast<u32>(hadd2.imm32)}; | ||
| 122 | HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, | ||
| 123 | hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 124 | } | ||
| 125 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp new file mode 100644 index 000000000..fd7986701 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_fused_multiply_add.cpp | |||
| @@ -0,0 +1,169 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HFMA2(TranslatorVisitor& v, u64 insn, Merge merge, Swizzle swizzle_a, bool neg_b, bool neg_c, | ||
| 10 | Swizzle swizzle_b, Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, | ||
| 11 | bool sat, HalfPrecision precision) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a; | ||
| 16 | } const hfma2{insn}; | ||
| 17 | |||
| 18 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hfma2.src_a), swizzle_a)}; | ||
| 19 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 20 | auto [lhs_c, rhs_c]{Extract(v.ir, src_c, swizzle_c)}; | ||
| 21 | const bool promotion{lhs_a.Type() != lhs_b.Type() || lhs_a.Type() != lhs_c.Type()}; | ||
| 22 | if (promotion) { | ||
| 23 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 24 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 25 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 26 | } | ||
| 27 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 28 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 29 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 30 | } | ||
| 31 | if (lhs_c.Type() == IR::Type::F16) { | ||
| 32 | lhs_c = v.ir.FPConvert(32, lhs_c); | ||
| 33 | rhs_c = v.ir.FPConvert(32, rhs_c); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | lhs_b = v.ir.FPAbsNeg(lhs_b, false, neg_b); | ||
| 38 | rhs_b = v.ir.FPAbsNeg(rhs_b, false, neg_b); | ||
| 39 | |||
| 40 | lhs_c = v.ir.FPAbsNeg(lhs_c, false, neg_c); | ||
| 41 | rhs_c = v.ir.FPAbsNeg(rhs_c, false, neg_c); | ||
| 42 | |||
| 43 | const IR::FpControl fp_control{ | ||
| 44 | .no_contraction = true, | ||
| 45 | .rounding = IR::FpRounding::DontCare, | ||
| 46 | .fmz_mode = HalfPrecision2FmzMode(precision), | ||
| 47 | }; | ||
| 48 | IR::F16F32F64 lhs{v.ir.FPFma(lhs_a, lhs_b, lhs_c, fp_control)}; | ||
| 49 | IR::F16F32F64 rhs{v.ir.FPFma(rhs_a, rhs_b, rhs_c, fp_control)}; | ||
| 50 | if (precision == HalfPrecision::FMZ && !sat) { | ||
| 51 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 52 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 53 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 54 | const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; | ||
| 55 | const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; | ||
| 56 | const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; | ||
| 57 | lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, lhs_c, lhs)}; | ||
| 58 | |||
| 59 | const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; | ||
| 60 | const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; | ||
| 61 | const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; | ||
| 62 | rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, rhs_c, rhs)}; | ||
| 63 | } | ||
| 64 | if (sat) { | ||
| 65 | lhs = v.ir.FPSaturate(lhs); | ||
| 66 | rhs = v.ir.FPSaturate(rhs); | ||
| 67 | } | ||
| 68 | if (promotion) { | ||
| 69 | lhs = v.ir.FPConvert(16, lhs); | ||
| 70 | rhs = v.ir.FPConvert(16, rhs); | ||
| 71 | } | ||
| 72 | v.X(hfma2.dest_reg, MergeResult(v.ir, hfma2.dest_reg, lhs, rhs, merge)); | ||
| 73 | } | ||
| 74 | |||
| 75 | void HFMA2(TranslatorVisitor& v, u64 insn, bool neg_b, bool neg_c, Swizzle swizzle_b, | ||
| 76 | Swizzle swizzle_c, const IR::U32& src_b, const IR::U32& src_c, bool sat, | ||
| 77 | HalfPrecision precision) { | ||
| 78 | union { | ||
| 79 | u64 raw; | ||
| 80 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 81 | BitField<49, 2, Merge> merge; | ||
| 82 | } const hfma2{insn}; | ||
| 83 | |||
| 84 | HFMA2(v, insn, hfma2.merge, hfma2.swizzle_a, neg_b, neg_c, swizzle_b, swizzle_c, src_b, src_c, | ||
| 85 | sat, precision); | ||
| 86 | } | ||
| 87 | } // Anonymous namespace | ||
| 88 | |||
| 89 | void TranslatorVisitor::HFMA2_reg(u64 insn) { | ||
| 90 | union { | ||
| 91 | u64 raw; | ||
| 92 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 93 | BitField<32, 1, u64> saturate; | ||
| 94 | BitField<31, 1, u64> neg_b; | ||
| 95 | BitField<30, 1, u64> neg_c; | ||
| 96 | BitField<35, 2, Swizzle> swizzle_c; | ||
| 97 | BitField<37, 2, HalfPrecision> precision; | ||
| 98 | } const hfma2{insn}; | ||
| 99 | |||
| 100 | HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, hfma2.swizzle_c, | ||
| 101 | GetReg20(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::HFMA2_rc(u64 insn) { | ||
| 105 | union { | ||
| 106 | u64 raw; | ||
| 107 | BitField<51, 1, u64> neg_c; | ||
| 108 | BitField<52, 1, u64> saturate; | ||
| 109 | BitField<53, 2, Swizzle> swizzle_b; | ||
| 110 | BitField<56, 1, u64> neg_b; | ||
| 111 | BitField<57, 2, HalfPrecision> precision; | ||
| 112 | } const hfma2{insn}; | ||
| 113 | |||
| 114 | HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, hfma2.swizzle_b, Swizzle::F32, | ||
| 115 | GetReg39(insn), GetCbuf(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 116 | } | ||
| 117 | |||
| 118 | void TranslatorVisitor::HFMA2_cr(u64 insn) { | ||
| 119 | union { | ||
| 120 | u64 raw; | ||
| 121 | BitField<51, 1, u64> neg_c; | ||
| 122 | BitField<52, 1, u64> saturate; | ||
| 123 | BitField<53, 2, Swizzle> swizzle_c; | ||
| 124 | BitField<56, 1, u64> neg_b; | ||
| 125 | BitField<57, 2, HalfPrecision> precision; | ||
| 126 | } const hfma2{insn}; | ||
| 127 | |||
| 128 | HFMA2(*this, insn, hfma2.neg_b != 0, hfma2.neg_c != 0, Swizzle::F32, hfma2.swizzle_c, | ||
| 129 | GetCbuf(insn), GetReg39(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 130 | } | ||
| 131 | |||
| 132 | void TranslatorVisitor::HFMA2_imm(u64 insn) { | ||
| 133 | union { | ||
| 134 | u64 raw; | ||
| 135 | BitField<51, 1, u64> neg_c; | ||
| 136 | BitField<52, 1, u64> saturate; | ||
| 137 | BitField<53, 2, Swizzle> swizzle_c; | ||
| 138 | |||
| 139 | BitField<56, 1, u64> neg_high; | ||
| 140 | BitField<30, 9, u64> high; | ||
| 141 | BitField<29, 1, u64> neg_low; | ||
| 142 | BitField<20, 9, u64> low; | ||
| 143 | BitField<57, 2, HalfPrecision> precision; | ||
| 144 | } const hfma2{insn}; | ||
| 145 | |||
| 146 | const u32 imm{ | ||
| 147 | static_cast<u32>(hfma2.low << 6) | static_cast<u32>((hfma2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 148 | static_cast<u32>(hfma2.high << 22) | static_cast<u32>((hfma2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 149 | |||
| 150 | HFMA2(*this, insn, false, hfma2.neg_c != 0, Swizzle::H1_H0, hfma2.swizzle_c, ir.Imm32(imm), | ||
| 151 | GetReg39(insn), hfma2.saturate != 0, hfma2.precision); | ||
| 152 | } | ||
| 153 | |||
| 154 | void TranslatorVisitor::HFMA2_32I(u64 insn) { | ||
| 155 | union { | ||
| 156 | u64 raw; | ||
| 157 | BitField<0, 8, IR::Reg> src_c; | ||
| 158 | BitField<20, 32, u64> imm32; | ||
| 159 | BitField<52, 1, u64> neg_c; | ||
| 160 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 161 | BitField<55, 2, HalfPrecision> precision; | ||
| 162 | } const hfma2{insn}; | ||
| 163 | |||
| 164 | const u32 imm{static_cast<u32>(hfma2.imm32)}; | ||
| 165 | HFMA2(*this, insn, Merge::H1_H0, hfma2.swizzle_a, false, hfma2.neg_c != 0, Swizzle::H1_H0, | ||
| 166 | Swizzle::H1_H0, ir.Imm32(imm), X(hfma2.src_c), false, hfma2.precision); | ||
| 167 | } | ||
| 168 | |||
| 169 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp new file mode 100644 index 000000000..0dbeb7f56 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | |||
| 9 | IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision) { | ||
| 10 | switch (precision) { | ||
| 11 | case HalfPrecision::None: | ||
| 12 | return IR::FmzMode::None; | ||
| 13 | case HalfPrecision::FTZ: | ||
| 14 | return IR::FmzMode::FTZ; | ||
| 15 | case HalfPrecision::FMZ: | ||
| 16 | return IR::FmzMode::FMZ; | ||
| 17 | default: | ||
| 18 | return IR::FmzMode::DontCare; | ||
| 19 | } | ||
| 20 | } | ||
| 21 | |||
| 22 | std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { | ||
| 23 | switch (swizzle) { | ||
| 24 | case Swizzle::H1_H0: { | ||
| 25 | const IR::Value vector{ir.UnpackFloat2x16(value)}; | ||
| 26 | return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; | ||
| 27 | } | ||
| 28 | case Swizzle::H0_H0: { | ||
| 29 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; | ||
| 30 | return {scalar, scalar}; | ||
| 31 | } | ||
| 32 | case Swizzle::H1_H1: { | ||
| 33 | const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; | ||
| 34 | return {scalar, scalar}; | ||
| 35 | } | ||
| 36 | case Swizzle::F32: { | ||
| 37 | const IR::F32 scalar{ir.BitCast<IR::F32>(value)}; | ||
| 38 | return {scalar, scalar}; | ||
| 39 | } | ||
| 40 | } | ||
| 41 | throw InvalidArgument("Invalid swizzle {}", swizzle); | ||
| 42 | } | ||
| 43 | |||
| 44 | IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, | ||
| 45 | Merge merge) { | ||
| 46 | switch (merge) { | ||
| 47 | case Merge::H1_H0: | ||
| 48 | return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); | ||
| 49 | case Merge::F32: | ||
| 50 | return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs)); | ||
| 51 | case Merge::MRG_H0: | ||
| 52 | case Merge::MRG_H1: { | ||
| 53 | const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; | ||
| 54 | const bool is_h0{merge == Merge::MRG_H0}; | ||
| 55 | const IR::F16 insert{ir.FPConvert(16, is_h0 ? lhs : rhs)}; | ||
| 56 | return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, is_h0 ? 0 : 1)); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | throw InvalidArgument("Invalid merge {}", merge); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h new file mode 100644 index 000000000..59da56a7e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/exception.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 12 | |||
| 13 | namespace Shader::Maxwell { | ||
| 14 | |||
| 15 | enum class Merge : u64 { | ||
| 16 | H1_H0, | ||
| 17 | F32, | ||
| 18 | MRG_H0, | ||
| 19 | MRG_H1, | ||
| 20 | }; | ||
| 21 | |||
| 22 | enum class Swizzle : u64 { | ||
| 23 | H1_H0, | ||
| 24 | F32, | ||
| 25 | H0_H0, | ||
| 26 | H1_H1, | ||
| 27 | }; | ||
| 28 | |||
| 29 | enum class HalfPrecision : u64 { | ||
| 30 | None = 0, | ||
| 31 | FTZ = 1, | ||
| 32 | FMZ = 2, | ||
| 33 | }; | ||
| 34 | |||
| 35 | IR::FmzMode HalfPrecision2FmzMode(HalfPrecision precision); | ||
| 36 | |||
| 37 | std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle); | ||
| 38 | |||
| 39 | IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, | ||
| 40 | Merge merge); | ||
| 41 | |||
| 42 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp new file mode 100644 index 000000000..3f548ce76 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_multiply.cpp | |||
| @@ -0,0 +1,143 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HMUL2(TranslatorVisitor& v, u64 insn, Merge merge, bool sat, bool abs_a, bool neg_a, | ||
| 10 | Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b, | ||
| 11 | HalfPrecision precision) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_a; | ||
| 16 | } const hmul2{insn}; | ||
| 17 | |||
| 18 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hmul2.src_a), swizzle_a)}; | ||
| 19 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 20 | const bool promotion{lhs_a.Type() != lhs_b.Type()}; | ||
| 21 | if (promotion) { | ||
| 22 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 23 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 24 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 25 | } | ||
| 26 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 27 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 28 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 29 | } | ||
| 30 | } | ||
| 31 | lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); | ||
| 32 | rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); | ||
| 33 | |||
| 34 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 35 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 36 | |||
| 37 | const IR::FpControl fp_control{ | ||
| 38 | .no_contraction = true, | ||
| 39 | .rounding = IR::FpRounding::DontCare, | ||
| 40 | .fmz_mode = HalfPrecision2FmzMode(precision), | ||
| 41 | }; | ||
| 42 | IR::F16F32F64 lhs{v.ir.FPMul(lhs_a, lhs_b, fp_control)}; | ||
| 43 | IR::F16F32F64 rhs{v.ir.FPMul(rhs_a, rhs_b, fp_control)}; | ||
| 44 | if (precision == HalfPrecision::FMZ && !sat) { | ||
| 45 | // Do not implement FMZ if SAT is enabled, as it does the logic for us. | ||
| 46 | // On D3D9 mode, anything * 0 is zero, even NAN and infinity | ||
| 47 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 48 | const IR::U1 lhs_zero_a{v.ir.FPEqual(lhs_a, zero)}; | ||
| 49 | const IR::U1 lhs_zero_b{v.ir.FPEqual(lhs_b, zero)}; | ||
| 50 | const IR::U1 lhs_any_zero{v.ir.LogicalOr(lhs_zero_a, lhs_zero_b)}; | ||
| 51 | lhs = IR::F16F32F64{v.ir.Select(lhs_any_zero, zero, lhs)}; | ||
| 52 | |||
| 53 | const IR::U1 rhs_zero_a{v.ir.FPEqual(rhs_a, zero)}; | ||
| 54 | const IR::U1 rhs_zero_b{v.ir.FPEqual(rhs_b, zero)}; | ||
| 55 | const IR::U1 rhs_any_zero{v.ir.LogicalOr(rhs_zero_a, rhs_zero_b)}; | ||
| 56 | rhs = IR::F16F32F64{v.ir.Select(rhs_any_zero, zero, rhs)}; | ||
| 57 | } | ||
| 58 | if (sat) { | ||
| 59 | lhs = v.ir.FPSaturate(lhs); | ||
| 60 | rhs = v.ir.FPSaturate(rhs); | ||
| 61 | } | ||
| 62 | if (promotion) { | ||
| 63 | lhs = v.ir.FPConvert(16, lhs); | ||
| 64 | rhs = v.ir.FPConvert(16, rhs); | ||
| 65 | } | ||
| 66 | v.X(hmul2.dest_reg, MergeResult(v.ir, hmul2.dest_reg, lhs, rhs, merge)); | ||
| 67 | } | ||
| 68 | |||
| 69 | void HMUL2(TranslatorVisitor& v, u64 insn, bool sat, bool abs_a, bool neg_a, bool abs_b, bool neg_b, | ||
| 70 | Swizzle swizzle_b, const IR::U32& src_b) { | ||
| 71 | union { | ||
| 72 | u64 raw; | ||
| 73 | BitField<49, 2, Merge> merge; | ||
| 74 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 75 | BitField<39, 2, HalfPrecision> precision; | ||
| 76 | } const hmul2{insn}; | ||
| 77 | |||
| 78 | HMUL2(v, insn, hmul2.merge, sat, abs_a, neg_a, hmul2.swizzle_a, abs_b, neg_b, swizzle_b, src_b, | ||
| 79 | hmul2.precision); | ||
| 80 | } | ||
| 81 | } // Anonymous namespace | ||
| 82 | |||
| 83 | void TranslatorVisitor::HMUL2_reg(u64 insn) { | ||
| 84 | union { | ||
| 85 | u64 raw; | ||
| 86 | BitField<32, 1, u64> sat; | ||
| 87 | BitField<31, 1, u64> neg_b; | ||
| 88 | BitField<30, 1, u64> abs_b; | ||
| 89 | BitField<44, 1, u64> abs_a; | ||
| 90 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 91 | } const hmul2{insn}; | ||
| 92 | |||
| 93 | HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, false, hmul2.abs_b != 0, hmul2.neg_b != 0, | ||
| 94 | hmul2.swizzle_b, GetReg20(insn)); | ||
| 95 | } | ||
| 96 | |||
| 97 | void TranslatorVisitor::HMUL2_cbuf(u64 insn) { | ||
| 98 | union { | ||
| 99 | u64 raw; | ||
| 100 | BitField<52, 1, u64> sat; | ||
| 101 | BitField<54, 1, u64> abs_b; | ||
| 102 | BitField<43, 1, u64> neg_a; | ||
| 103 | BitField<44, 1, u64> abs_a; | ||
| 104 | } const hmul2{insn}; | ||
| 105 | |||
| 106 | HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, hmul2.abs_b != 0, false, | ||
| 107 | Swizzle::F32, GetCbuf(insn)); | ||
| 108 | } | ||
| 109 | |||
| 110 | void TranslatorVisitor::HMUL2_imm(u64 insn) { | ||
| 111 | union { | ||
| 112 | u64 raw; | ||
| 113 | BitField<52, 1, u64> sat; | ||
| 114 | BitField<56, 1, u64> neg_high; | ||
| 115 | BitField<30, 9, u64> high; | ||
| 116 | BitField<29, 1, u64> neg_low; | ||
| 117 | BitField<20, 9, u64> low; | ||
| 118 | BitField<43, 1, u64> neg_a; | ||
| 119 | BitField<44, 1, u64> abs_a; | ||
| 120 | } const hmul2{insn}; | ||
| 121 | |||
| 122 | const u32 imm{ | ||
| 123 | static_cast<u32>(hmul2.low << 6) | static_cast<u32>((hmul2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 124 | static_cast<u32>(hmul2.high << 22) | static_cast<u32>((hmul2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 125 | HMUL2(*this, insn, hmul2.sat != 0, hmul2.abs_a != 0, hmul2.neg_a != 0, false, false, | ||
| 126 | Swizzle::H1_H0, ir.Imm32(imm)); | ||
| 127 | } | ||
| 128 | |||
| 129 | void TranslatorVisitor::HMUL2_32I(u64 insn) { | ||
| 130 | union { | ||
| 131 | u64 raw; | ||
| 132 | BitField<55, 2, HalfPrecision> precision; | ||
| 133 | BitField<52, 1, u64> sat; | ||
| 134 | BitField<53, 2, Swizzle> swizzle_a; | ||
| 135 | BitField<20, 32, u64> imm32; | ||
| 136 | } const hmul2{insn}; | ||
| 137 | |||
| 138 | const u32 imm{static_cast<u32>(hmul2.imm32)}; | ||
| 139 | HMUL2(*this, insn, Merge::H1_H0, hmul2.sat != 0, false, false, hmul2.swizzle_a, false, false, | ||
| 140 | Swizzle::H1_H0, ir.Imm32(imm), hmul2.precision); | ||
| 141 | } | ||
| 142 | |||
| 143 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp new file mode 100644 index 000000000..cca5b831f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp | |||
| @@ -0,0 +1,117 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool ftz, bool neg_b, | ||
| 10 | bool abs_b, FPCompareOp compare_op, Swizzle swizzle_b) { | ||
| 11 | union { | ||
| 12 | u64 insn; | ||
| 13 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 14 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 15 | BitField<39, 3, IR::Pred> pred; | ||
| 16 | BitField<42, 1, u64> neg_pred; | ||
| 17 | BitField<43, 1, u64> neg_a; | ||
| 18 | BitField<45, 2, BooleanOp> bop; | ||
| 19 | BitField<44, 1, u64> abs_a; | ||
| 20 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 21 | } const hset2{insn}; | ||
| 22 | |||
| 23 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; | ||
| 24 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 25 | |||
| 26 | if (lhs_a.Type() != lhs_b.Type()) { | ||
| 27 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 28 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 29 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 30 | } | ||
| 31 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 32 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 33 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); | ||
| 38 | rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); | ||
| 39 | |||
| 40 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 41 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 42 | |||
| 43 | const IR::FpControl control{ | ||
| 44 | .no_contraction = false, | ||
| 45 | .rounding = IR::FpRounding::DontCare, | ||
| 46 | .fmz_mode = (ftz ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 47 | }; | ||
| 48 | |||
| 49 | IR::U1 pred{v.ir.GetPred(hset2.pred)}; | ||
| 50 | if (hset2.neg_pred != 0) { | ||
| 51 | pred = v.ir.LogicalNot(pred); | ||
| 52 | } | ||
| 53 | const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; | ||
| 54 | const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; | ||
| 55 | const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hset2.bop)}; | ||
| 56 | const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hset2.bop)}; | ||
| 57 | |||
| 58 | const u32 true_value = bf ? 0x3c00 : 0xffff; | ||
| 59 | const IR::U32 true_val_lhs{v.ir.Imm32(true_value)}; | ||
| 60 | const IR::U32 true_val_rhs{v.ir.Imm32(true_value << 16)}; | ||
| 61 | const IR::U32 fail_result{v.ir.Imm32(0)}; | ||
| 62 | const IR::U32 result_lhs{v.ir.Select(bop_result_lhs, true_val_lhs, fail_result)}; | ||
| 63 | const IR::U32 result_rhs{v.ir.Select(bop_result_rhs, true_val_rhs, fail_result)}; | ||
| 64 | |||
| 65 | v.X(hset2.dest_reg, IR::U32{v.ir.BitwiseOr(result_lhs, result_rhs)}); | ||
| 66 | } | ||
| 67 | } // Anonymous namespace | ||
| 68 | |||
| 69 | void TranslatorVisitor::HSET2_reg(u64 insn) { | ||
| 70 | union { | ||
| 71 | u64 insn; | ||
| 72 | BitField<30, 1, u64> abs_b; | ||
| 73 | BitField<49, 1, u64> bf; | ||
| 74 | BitField<31, 1, u64> neg_b; | ||
| 75 | BitField<50, 1, u64> ftz; | ||
| 76 | BitField<35, 4, FPCompareOp> compare_op; | ||
| 77 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 78 | } const hset2{insn}; | ||
| 79 | |||
| 80 | HSET2(*this, insn, GetReg20(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, | ||
| 81 | hset2.abs_b != 0, hset2.compare_op, hset2.swizzle_b); | ||
| 82 | } | ||
| 83 | |||
| 84 | void TranslatorVisitor::HSET2_cbuf(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 insn; | ||
| 87 | BitField<53, 1, u64> bf; | ||
| 88 | BitField<56, 1, u64> neg_b; | ||
| 89 | BitField<54, 1, u64> ftz; | ||
| 90 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 91 | } const hset2{insn}; | ||
| 92 | |||
| 93 | HSET2(*this, insn, GetCbuf(insn), hset2.bf != 0, hset2.ftz != 0, hset2.neg_b != 0, false, | ||
| 94 | hset2.compare_op, Swizzle::F32); | ||
| 95 | } | ||
| 96 | |||
| 97 | void TranslatorVisitor::HSET2_imm(u64 insn) { | ||
| 98 | union { | ||
| 99 | u64 insn; | ||
| 100 | BitField<53, 1, u64> bf; | ||
| 101 | BitField<54, 1, u64> ftz; | ||
| 102 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 103 | BitField<56, 1, u64> neg_high; | ||
| 104 | BitField<30, 9, u64> high; | ||
| 105 | BitField<29, 1, u64> neg_low; | ||
| 106 | BitField<20, 9, u64> low; | ||
| 107 | } const hset2{insn}; | ||
| 108 | |||
| 109 | const u32 imm{ | ||
| 110 | static_cast<u32>(hset2.low << 6) | static_cast<u32>((hset2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 111 | static_cast<u32>(hset2.high << 22) | static_cast<u32>((hset2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 112 | |||
| 113 | HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, | ||
| 114 | Swizzle::H1_H0); | ||
| 115 | } | ||
| 116 | |||
| 117 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp new file mode 100644 index 000000000..b3931dae3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp | |||
| @@ -0,0 +1,118 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_helper.h" | ||
| 6 | |||
| 7 | namespace Shader::Maxwell { | ||
| 8 | namespace { | ||
| 9 | void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bool abs_b, | ||
| 10 | Swizzle swizzle_b, FPCompareOp compare_op, bool h_and) { | ||
| 11 | union { | ||
| 12 | u64 insn; | ||
| 13 | BitField<8, 8, IR::Reg> src_a_reg; | ||
| 14 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 15 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<43, 1, u64> neg_a; | ||
| 19 | BitField<45, 2, BooleanOp> bop; | ||
| 20 | BitField<44, 1, u64> abs_a; | ||
| 21 | BitField<6, 1, u64> ftz; | ||
| 22 | BitField<47, 2, Swizzle> swizzle_a; | ||
| 23 | } const hsetp2{insn}; | ||
| 24 | |||
| 25 | auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; | ||
| 26 | auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||
| 27 | |||
| 28 | if (lhs_a.Type() != lhs_b.Type()) { | ||
| 29 | if (lhs_a.Type() == IR::Type::F16) { | ||
| 30 | lhs_a = v.ir.FPConvert(32, lhs_a); | ||
| 31 | rhs_a = v.ir.FPConvert(32, rhs_a); | ||
| 32 | } | ||
| 33 | if (lhs_b.Type() == IR::Type::F16) { | ||
| 34 | lhs_b = v.ir.FPConvert(32, lhs_b); | ||
| 35 | rhs_b = v.ir.FPConvert(32, rhs_b); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); | ||
| 40 | rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); | ||
| 41 | |||
| 42 | lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||
| 43 | rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||
| 44 | |||
| 45 | const IR::FpControl control{ | ||
| 46 | .no_contraction = false, | ||
| 47 | .rounding = IR::FpRounding::DontCare, | ||
| 48 | .fmz_mode = (hsetp2.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None), | ||
| 49 | }; | ||
| 50 | |||
| 51 | IR::U1 pred{v.ir.GetPred(hsetp2.pred)}; | ||
| 52 | if (hsetp2.neg_pred != 0) { | ||
| 53 | pred = v.ir.LogicalNot(pred); | ||
| 54 | } | ||
| 55 | const IR::U1 cmp_result_lhs{FloatingPointCompare(v.ir, lhs_a, lhs_b, compare_op, control)}; | ||
| 56 | const IR::U1 cmp_result_rhs{FloatingPointCompare(v.ir, rhs_a, rhs_b, compare_op, control)}; | ||
| 57 | const IR::U1 bop_result_lhs{PredicateCombine(v.ir, cmp_result_lhs, pred, hsetp2.bop)}; | ||
| 58 | const IR::U1 bop_result_rhs{PredicateCombine(v.ir, cmp_result_rhs, pred, hsetp2.bop)}; | ||
| 59 | |||
| 60 | if (h_and) { | ||
| 61 | auto result = v.ir.LogicalAnd(bop_result_lhs, bop_result_rhs); | ||
| 62 | v.ir.SetPred(hsetp2.dest_pred_a, result); | ||
| 63 | v.ir.SetPred(hsetp2.dest_pred_b, v.ir.LogicalNot(result)); | ||
| 64 | } else { | ||
| 65 | v.ir.SetPred(hsetp2.dest_pred_a, bop_result_lhs); | ||
| 66 | v.ir.SetPred(hsetp2.dest_pred_b, bop_result_rhs); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | } // Anonymous namespace | ||
| 70 | |||
| 71 | void TranslatorVisitor::HSETP2_reg(u64 insn) { | ||
| 72 | union { | ||
| 73 | u64 insn; | ||
| 74 | BitField<30, 1, u64> abs_b; | ||
| 75 | BitField<49, 1, u64> h_and; | ||
| 76 | BitField<31, 1, u64> neg_b; | ||
| 77 | BitField<35, 4, FPCompareOp> compare_op; | ||
| 78 | BitField<28, 2, Swizzle> swizzle_b; | ||
| 79 | } const hsetp2{insn}; | ||
| 80 | HSETP2(*this, insn, GetReg20(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, hsetp2.swizzle_b, | ||
| 81 | hsetp2.compare_op, hsetp2.h_and != 0); | ||
| 82 | } | ||
| 83 | |||
| 84 | void TranslatorVisitor::HSETP2_cbuf(u64 insn) { | ||
| 85 | union { | ||
| 86 | u64 insn; | ||
| 87 | BitField<53, 1, u64> h_and; | ||
| 88 | BitField<54, 1, u64> abs_b; | ||
| 89 | BitField<56, 1, u64> neg_b; | ||
| 90 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 91 | } const hsetp2{insn}; | ||
| 92 | |||
| 93 | HSETP2(*this, insn, GetCbuf(insn), hsetp2.neg_b != 0, hsetp2.abs_b != 0, Swizzle::F32, | ||
| 94 | hsetp2.compare_op, hsetp2.h_and != 0); | ||
| 95 | } | ||
| 96 | |||
| 97 | void TranslatorVisitor::HSETP2_imm(u64 insn) { | ||
| 98 | union { | ||
| 99 | u64 insn; | ||
| 100 | BitField<53, 1, u64> h_and; | ||
| 101 | BitField<54, 1, u64> ftz; | ||
| 102 | BitField<49, 4, FPCompareOp> compare_op; | ||
| 103 | BitField<56, 1, u64> neg_high; | ||
| 104 | BitField<30, 9, u64> high; | ||
| 105 | BitField<29, 1, u64> neg_low; | ||
| 106 | BitField<20, 9, u64> low; | ||
| 107 | } const hsetp2{insn}; | ||
| 108 | |||
| 109 | const u32 imm{static_cast<u32>(hsetp2.low << 6) | | ||
| 110 | static_cast<u32>((hsetp2.neg_low != 0 ? 1 : 0) << 15) | | ||
| 111 | static_cast<u32>(hsetp2.high << 22) | | ||
| 112 | static_cast<u32>((hsetp2.neg_high != 0 ? 1 : 0) << 31)}; | ||
| 113 | |||
| 114 | HSETP2(*this, insn, ir.Imm32(imm), false, false, Swizzle::H1_H0, hsetp2.compare_op, | ||
| 115 | hsetp2.h_and != 0); | ||
| 116 | } | ||
| 117 | |||
| 118 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp new file mode 100644 index 000000000..b446aae0e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp | |||
| @@ -0,0 +1,272 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | [[nodiscard]] IR::U32 CbufLowerBits(IR::IREmitter& ir, bool unaligned, const IR::U32& binding, | ||
| 12 | u32 offset) { | ||
| 13 | if (unaligned) { | ||
| 14 | return ir.Imm32(0); | ||
| 15 | } | ||
| 16 | return ir.GetCbuf(binding, IR::U32{IR::Value{offset}}); | ||
| 17 | } | ||
| 18 | } // Anonymous namespace | ||
| 19 | |||
| 20 | IR::U32 TranslatorVisitor::X(IR::Reg reg) { | ||
| 21 | return ir.GetReg(reg); | ||
| 22 | } | ||
| 23 | |||
| 24 | IR::U64 TranslatorVisitor::L(IR::Reg reg) { | ||
| 25 | if (!IR::IsAligned(reg, 2)) { | ||
| 26 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 27 | } | ||
| 28 | return IR::U64{ir.PackUint2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; | ||
| 29 | } | ||
| 30 | |||
| 31 | IR::F32 TranslatorVisitor::F(IR::Reg reg) { | ||
| 32 | return ir.BitCast<IR::F32>(X(reg)); | ||
| 33 | } | ||
| 34 | |||
| 35 | IR::F64 TranslatorVisitor::D(IR::Reg reg) { | ||
| 36 | if (!IR::IsAligned(reg, 2)) { | ||
| 37 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 38 | } | ||
| 39 | return IR::F64{ir.PackDouble2x32(ir.CompositeConstruct(X(reg), X(reg + 1)))}; | ||
| 40 | } | ||
| 41 | |||
| 42 | void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { | ||
| 43 | ir.SetReg(dest_reg, value); | ||
| 44 | } | ||
| 45 | |||
| 46 | void TranslatorVisitor::L(IR::Reg dest_reg, const IR::U64& value) { | ||
| 47 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 48 | throw NotImplementedException("Unaligned destination register {}", dest_reg); | ||
| 49 | } | ||
| 50 | const IR::Value result{ir.UnpackUint2x32(value)}; | ||
| 51 | for (int i = 0; i < 2; i++) { | ||
| 52 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); | ||
| 53 | } | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) { | ||
| 57 | X(dest_reg, ir.BitCast<IR::U32>(value)); | ||
| 58 | } | ||
| 59 | |||
| 60 | void TranslatorVisitor::D(IR::Reg dest_reg, const IR::F64& value) { | ||
| 61 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 62 | throw NotImplementedException("Unaligned destination register {}", dest_reg); | ||
| 63 | } | ||
| 64 | const IR::Value result{ir.UnpackDouble2x32(value)}; | ||
| 65 | for (int i = 0; i < 2; i++) { | ||
| 66 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | IR::U32 TranslatorVisitor::GetReg8(u64 insn) { | ||
| 71 | union { | ||
| 72 | u64 raw; | ||
| 73 | BitField<8, 8, IR::Reg> index; | ||
| 74 | } const reg{insn}; | ||
| 75 | return X(reg.index); | ||
| 76 | } | ||
| 77 | |||
| 78 | IR::U32 TranslatorVisitor::GetReg20(u64 insn) { | ||
| 79 | union { | ||
| 80 | u64 raw; | ||
| 81 | BitField<20, 8, IR::Reg> index; | ||
| 82 | } const reg{insn}; | ||
| 83 | return X(reg.index); | ||
| 84 | } | ||
| 85 | |||
| 86 | IR::U32 TranslatorVisitor::GetReg39(u64 insn) { | ||
| 87 | union { | ||
| 88 | u64 raw; | ||
| 89 | BitField<39, 8, IR::Reg> index; | ||
| 90 | } const reg{insn}; | ||
| 91 | return X(reg.index); | ||
| 92 | } | ||
| 93 | |||
| 94 | IR::F32 TranslatorVisitor::GetFloatReg8(u64 insn) { | ||
| 95 | return ir.BitCast<IR::F32>(GetReg8(insn)); | ||
| 96 | } | ||
| 97 | |||
| 98 | IR::F32 TranslatorVisitor::GetFloatReg20(u64 insn) { | ||
| 99 | return ir.BitCast<IR::F32>(GetReg20(insn)); | ||
| 100 | } | ||
| 101 | |||
| 102 | IR::F32 TranslatorVisitor::GetFloatReg39(u64 insn) { | ||
| 103 | return ir.BitCast<IR::F32>(GetReg39(insn)); | ||
| 104 | } | ||
| 105 | |||
| 106 | IR::F64 TranslatorVisitor::GetDoubleReg20(u64 insn) { | ||
| 107 | union { | ||
| 108 | u64 raw; | ||
| 109 | BitField<20, 8, IR::Reg> index; | ||
| 110 | } const reg{insn}; | ||
| 111 | return D(reg.index); | ||
| 112 | } | ||
| 113 | |||
| 114 | IR::F64 TranslatorVisitor::GetDoubleReg39(u64 insn) { | ||
| 115 | union { | ||
| 116 | u64 raw; | ||
| 117 | BitField<39, 8, IR::Reg> index; | ||
| 118 | } const reg{insn}; | ||
| 119 | return D(reg.index); | ||
| 120 | } | ||
| 121 | |||
| 122 | static std::pair<IR::U32, IR::U32> CbufAddr(u64 insn) { | ||
| 123 | union { | ||
| 124 | u64 raw; | ||
| 125 | BitField<20, 14, u64> offset; | ||
| 126 | BitField<34, 5, u64> binding; | ||
| 127 | } const cbuf{insn}; | ||
| 128 | |||
| 129 | if (cbuf.binding >= 18) { | ||
| 130 | throw NotImplementedException("Out of bounds constant buffer binding {}", cbuf.binding); | ||
| 131 | } | ||
| 132 | if (cbuf.offset >= 0x10'000) { | ||
| 133 | throw NotImplementedException("Out of bounds constant buffer offset {}", cbuf.offset); | ||
| 134 | } | ||
| 135 | const IR::Value binding{static_cast<u32>(cbuf.binding)}; | ||
| 136 | const IR::Value byte_offset{static_cast<u32>(cbuf.offset) * 4}; | ||
| 137 | return {IR::U32{binding}, IR::U32{byte_offset}}; | ||
| 138 | } | ||
| 139 | |||
| 140 | IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { | ||
| 141 | const auto [binding, byte_offset]{CbufAddr(insn)}; | ||
| 142 | return ir.GetCbuf(binding, byte_offset); | ||
| 143 | } | ||
| 144 | |||
| 145 | IR::F32 TranslatorVisitor::GetFloatCbuf(u64 insn) { | ||
| 146 | const auto [binding, byte_offset]{CbufAddr(insn)}; | ||
| 147 | return ir.GetFloatCbuf(binding, byte_offset); | ||
| 148 | } | ||
| 149 | |||
| 150 | IR::F64 TranslatorVisitor::GetDoubleCbuf(u64 insn) { | ||
| 151 | union { | ||
| 152 | u64 raw; | ||
| 153 | BitField<20, 1, u64> unaligned; | ||
| 154 | } const cbuf{insn}; | ||
| 155 | |||
| 156 | const auto [binding, offset_value]{CbufAddr(insn)}; | ||
| 157 | const bool unaligned{cbuf.unaligned != 0}; | ||
| 158 | const u32 offset{offset_value.U32()}; | ||
| 159 | const IR::Value addr{unaligned ? offset | 4u : (offset & ~7u) | 4u}; | ||
| 160 | |||
| 161 | const IR::U32 value{ir.GetCbuf(binding, IR::U32{addr})}; | ||
| 162 | const IR::U32 lower_bits{CbufLowerBits(ir, unaligned, binding, offset)}; | ||
| 163 | return ir.PackDouble2x32(ir.CompositeConstruct(lower_bits, value)); | ||
| 164 | } | ||
| 165 | |||
| 166 | IR::U64 TranslatorVisitor::GetPackedCbuf(u64 insn) { | ||
| 167 | union { | ||
| 168 | u64 raw; | ||
| 169 | BitField<20, 1, u64> unaligned; | ||
| 170 | } const cbuf{insn}; | ||
| 171 | |||
| 172 | if (cbuf.unaligned != 0) { | ||
| 173 | throw NotImplementedException("Unaligned packed constant buffer read"); | ||
| 174 | } | ||
| 175 | const auto [binding, lower_offset]{CbufAddr(insn)}; | ||
| 176 | const IR::U32 upper_offset{ir.Imm32(lower_offset.U32() + 4)}; | ||
| 177 | const IR::U32 lower_value{ir.GetCbuf(binding, lower_offset)}; | ||
| 178 | const IR::U32 upper_value{ir.GetCbuf(binding, upper_offset)}; | ||
| 179 | return ir.PackUint2x32(ir.CompositeConstruct(lower_value, upper_value)); | ||
| 180 | } | ||
| 181 | |||
| 182 | IR::U32 TranslatorVisitor::GetImm20(u64 insn) { | ||
| 183 | union { | ||
| 184 | u64 raw; | ||
| 185 | BitField<20, 19, u64> value; | ||
| 186 | BitField<56, 1, u64> is_negative; | ||
| 187 | } const imm{insn}; | ||
| 188 | |||
| 189 | if (imm.is_negative != 0) { | ||
| 190 | const s64 raw{static_cast<s64>(imm.value)}; | ||
| 191 | return ir.Imm32(static_cast<s32>(-(1LL << 19) + raw)); | ||
| 192 | } else { | ||
| 193 | return ir.Imm32(static_cast<u32>(imm.value)); | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | IR::F32 TranslatorVisitor::GetFloatImm20(u64 insn) { | ||
| 198 | union { | ||
| 199 | u64 raw; | ||
| 200 | BitField<20, 19, u64> value; | ||
| 201 | BitField<56, 1, u64> is_negative; | ||
| 202 | } const imm{insn}; | ||
| 203 | const u32 sign_bit{static_cast<u32>(imm.is_negative != 0 ? (1ULL << 31) : 0)}; | ||
| 204 | const u32 value{static_cast<u32>(imm.value) << 12}; | ||
| 205 | return ir.Imm32(Common::BitCast<f32>(value | sign_bit)); | ||
| 206 | } | ||
| 207 | |||
| 208 | IR::F64 TranslatorVisitor::GetDoubleImm20(u64 insn) { | ||
| 209 | union { | ||
| 210 | u64 raw; | ||
| 211 | BitField<20, 19, u64> value; | ||
| 212 | BitField<56, 1, u64> is_negative; | ||
| 213 | } const imm{insn}; | ||
| 214 | const u64 sign_bit{imm.is_negative != 0 ? (1ULL << 63) : 0}; | ||
| 215 | const u64 value{imm.value << 44}; | ||
| 216 | return ir.Imm64(Common::BitCast<f64>(value | sign_bit)); | ||
| 217 | } | ||
| 218 | |||
| 219 | IR::U64 TranslatorVisitor::GetPackedImm20(u64 insn) { | ||
| 220 | const s64 value{GetImm20(insn).U32()}; | ||
| 221 | return ir.Imm64(static_cast<u64>(static_cast<s64>(value) << 32)); | ||
| 222 | } | ||
| 223 | |||
| 224 | IR::U32 TranslatorVisitor::GetImm32(u64 insn) { | ||
| 225 | union { | ||
| 226 | u64 raw; | ||
| 227 | BitField<20, 32, u64> value; | ||
| 228 | } const imm{insn}; | ||
| 229 | return ir.Imm32(static_cast<u32>(imm.value)); | ||
| 230 | } | ||
| 231 | |||
| 232 | IR::F32 TranslatorVisitor::GetFloatImm32(u64 insn) { | ||
| 233 | union { | ||
| 234 | u64 raw; | ||
| 235 | BitField<20, 32, u64> value; | ||
| 236 | } const imm{insn}; | ||
| 237 | return ir.Imm32(Common::BitCast<f32>(static_cast<u32>(imm.value))); | ||
| 238 | } | ||
| 239 | |||
| 240 | void TranslatorVisitor::SetZFlag(const IR::U1& value) { | ||
| 241 | ir.SetZFlag(value); | ||
| 242 | } | ||
| 243 | |||
| 244 | void TranslatorVisitor::SetSFlag(const IR::U1& value) { | ||
| 245 | ir.SetSFlag(value); | ||
| 246 | } | ||
| 247 | |||
| 248 | void TranslatorVisitor::SetCFlag(const IR::U1& value) { | ||
| 249 | ir.SetCFlag(value); | ||
| 250 | } | ||
| 251 | |||
| 252 | void TranslatorVisitor::SetOFlag(const IR::U1& value) { | ||
| 253 | ir.SetOFlag(value); | ||
| 254 | } | ||
| 255 | |||
| 256 | void TranslatorVisitor::ResetZero() { | ||
| 257 | SetZFlag(ir.Imm1(false)); | ||
| 258 | } | ||
| 259 | |||
| 260 | void TranslatorVisitor::ResetSFlag() { | ||
| 261 | SetSFlag(ir.Imm1(false)); | ||
| 262 | } | ||
| 263 | |||
| 264 | void TranslatorVisitor::ResetCFlag() { | ||
| 265 | SetCFlag(ir.Imm1(false)); | ||
| 266 | } | ||
| 267 | |||
| 268 | void TranslatorVisitor::ResetOFlag() { | ||
| 269 | SetOFlag(ir.Imm1(false)); | ||
| 270 | } | ||
| 271 | |||
| 272 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h new file mode 100644 index 000000000..335e4f24f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h | |||
| @@ -0,0 +1,387 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/instruction.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | enum class CompareOp : u64 { | ||
| 15 | False, | ||
| 16 | LessThan, | ||
| 17 | Equal, | ||
| 18 | LessThanEqual, | ||
| 19 | GreaterThan, | ||
| 20 | NotEqual, | ||
| 21 | GreaterThanEqual, | ||
| 22 | True, | ||
| 23 | }; | ||
| 24 | |||
| 25 | enum class BooleanOp : u64 { | ||
| 26 | AND, | ||
| 27 | OR, | ||
| 28 | XOR, | ||
| 29 | }; | ||
| 30 | |||
| 31 | enum class PredicateOp : u64 { | ||
| 32 | False, | ||
| 33 | True, | ||
| 34 | Zero, | ||
| 35 | NonZero, | ||
| 36 | }; | ||
| 37 | |||
| 38 | enum class FPCompareOp : u64 { | ||
| 39 | F, | ||
| 40 | LT, | ||
| 41 | EQ, | ||
| 42 | LE, | ||
| 43 | GT, | ||
| 44 | NE, | ||
| 45 | GE, | ||
| 46 | NUM, | ||
| 47 | Nan, | ||
| 48 | LTU, | ||
| 49 | EQU, | ||
| 50 | LEU, | ||
| 51 | GTU, | ||
| 52 | NEU, | ||
| 53 | GEU, | ||
| 54 | T, | ||
| 55 | }; | ||
| 56 | |||
| 57 | class TranslatorVisitor { | ||
| 58 | public: | ||
| 59 | explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} | ||
| 60 | |||
| 61 | Environment& env; | ||
| 62 | IR::IREmitter ir; | ||
| 63 | |||
| 64 | void AL2P(u64 insn); | ||
| 65 | void ALD(u64 insn); | ||
| 66 | void AST(u64 insn); | ||
| 67 | void ATOM_cas(u64 insn); | ||
| 68 | void ATOM(u64 insn); | ||
| 69 | void ATOMS_cas(u64 insn); | ||
| 70 | void ATOMS(u64 insn); | ||
| 71 | void B2R(u64 insn); | ||
| 72 | void BAR(u64 insn); | ||
| 73 | void BFE_reg(u64 insn); | ||
| 74 | void BFE_cbuf(u64 insn); | ||
| 75 | void BFE_imm(u64 insn); | ||
| 76 | void BFI_reg(u64 insn); | ||
| 77 | void BFI_rc(u64 insn); | ||
| 78 | void BFI_cr(u64 insn); | ||
| 79 | void BFI_imm(u64 insn); | ||
| 80 | void BPT(u64 insn); | ||
| 81 | void BRA(u64 insn); | ||
| 82 | void BRK(u64 insn); | ||
| 83 | void BRX(u64 insn); | ||
| 84 | void CAL(); | ||
| 85 | void CCTL(u64 insn); | ||
| 86 | void CCTLL(u64 insn); | ||
| 87 | void CONT(u64 insn); | ||
| 88 | void CS2R(u64 insn); | ||
| 89 | void CSET(u64 insn); | ||
| 90 | void CSETP(u64 insn); | ||
| 91 | void DADD_reg(u64 insn); | ||
| 92 | void DADD_cbuf(u64 insn); | ||
| 93 | void DADD_imm(u64 insn); | ||
| 94 | void DEPBAR(); | ||
| 95 | void DFMA_reg(u64 insn); | ||
| 96 | void DFMA_rc(u64 insn); | ||
| 97 | void DFMA_cr(u64 insn); | ||
| 98 | void DFMA_imm(u64 insn); | ||
| 99 | void DMNMX_reg(u64 insn); | ||
| 100 | void DMNMX_cbuf(u64 insn); | ||
| 101 | void DMNMX_imm(u64 insn); | ||
| 102 | void DMUL_reg(u64 insn); | ||
| 103 | void DMUL_cbuf(u64 insn); | ||
| 104 | void DMUL_imm(u64 insn); | ||
| 105 | void DSET_reg(u64 insn); | ||
| 106 | void DSET_cbuf(u64 insn); | ||
| 107 | void DSET_imm(u64 insn); | ||
| 108 | void DSETP_reg(u64 insn); | ||
| 109 | void DSETP_cbuf(u64 insn); | ||
| 110 | void DSETP_imm(u64 insn); | ||
| 111 | void EXIT(); | ||
| 112 | void F2F_reg(u64 insn); | ||
| 113 | void F2F_cbuf(u64 insn); | ||
| 114 | void F2F_imm(u64 insn); | ||
| 115 | void F2I_reg(u64 insn); | ||
| 116 | void F2I_cbuf(u64 insn); | ||
| 117 | void F2I_imm(u64 insn); | ||
| 118 | void FADD_reg(u64 insn); | ||
| 119 | void FADD_cbuf(u64 insn); | ||
| 120 | void FADD_imm(u64 insn); | ||
| 121 | void FADD32I(u64 insn); | ||
| 122 | void FCHK_reg(u64 insn); | ||
| 123 | void FCHK_cbuf(u64 insn); | ||
| 124 | void FCHK_imm(u64 insn); | ||
| 125 | void FCMP_reg(u64 insn); | ||
| 126 | void FCMP_rc(u64 insn); | ||
| 127 | void FCMP_cr(u64 insn); | ||
| 128 | void FCMP_imm(u64 insn); | ||
| 129 | void FFMA_reg(u64 insn); | ||
| 130 | void FFMA_rc(u64 insn); | ||
| 131 | void FFMA_cr(u64 insn); | ||
| 132 | void FFMA_imm(u64 insn); | ||
| 133 | void FFMA32I(u64 insn); | ||
| 134 | void FLO_reg(u64 insn); | ||
| 135 | void FLO_cbuf(u64 insn); | ||
| 136 | void FLO_imm(u64 insn); | ||
| 137 | void FMNMX_reg(u64 insn); | ||
| 138 | void FMNMX_cbuf(u64 insn); | ||
| 139 | void FMNMX_imm(u64 insn); | ||
| 140 | void FMUL_reg(u64 insn); | ||
| 141 | void FMUL_cbuf(u64 insn); | ||
| 142 | void FMUL_imm(u64 insn); | ||
| 143 | void FMUL32I(u64 insn); | ||
| 144 | void FSET_reg(u64 insn); | ||
| 145 | void FSET_cbuf(u64 insn); | ||
| 146 | void FSET_imm(u64 insn); | ||
| 147 | void FSETP_reg(u64 insn); | ||
| 148 | void FSETP_cbuf(u64 insn); | ||
| 149 | void FSETP_imm(u64 insn); | ||
| 150 | void FSWZADD(u64 insn); | ||
| 151 | void GETCRSPTR(u64 insn); | ||
| 152 | void GETLMEMBASE(u64 insn); | ||
| 153 | void HADD2_reg(u64 insn); | ||
| 154 | void HADD2_cbuf(u64 insn); | ||
| 155 | void HADD2_imm(u64 insn); | ||
| 156 | void HADD2_32I(u64 insn); | ||
| 157 | void HFMA2_reg(u64 insn); | ||
| 158 | void HFMA2_rc(u64 insn); | ||
| 159 | void HFMA2_cr(u64 insn); | ||
| 160 | void HFMA2_imm(u64 insn); | ||
| 161 | void HFMA2_32I(u64 insn); | ||
| 162 | void HMUL2_reg(u64 insn); | ||
| 163 | void HMUL2_cbuf(u64 insn); | ||
| 164 | void HMUL2_imm(u64 insn); | ||
| 165 | void HMUL2_32I(u64 insn); | ||
| 166 | void HSET2_reg(u64 insn); | ||
| 167 | void HSET2_cbuf(u64 insn); | ||
| 168 | void HSET2_imm(u64 insn); | ||
| 169 | void HSETP2_reg(u64 insn); | ||
| 170 | void HSETP2_cbuf(u64 insn); | ||
| 171 | void HSETP2_imm(u64 insn); | ||
| 172 | void I2F_reg(u64 insn); | ||
| 173 | void I2F_cbuf(u64 insn); | ||
| 174 | void I2F_imm(u64 insn); | ||
| 175 | void I2I_reg(u64 insn); | ||
| 176 | void I2I_cbuf(u64 insn); | ||
| 177 | void I2I_imm(u64 insn); | ||
| 178 | void IADD_reg(u64 insn); | ||
| 179 | void IADD_cbuf(u64 insn); | ||
| 180 | void IADD_imm(u64 insn); | ||
| 181 | void IADD3_reg(u64 insn); | ||
| 182 | void IADD3_cbuf(u64 insn); | ||
| 183 | void IADD3_imm(u64 insn); | ||
| 184 | void IADD32I(u64 insn); | ||
| 185 | void ICMP_reg(u64 insn); | ||
| 186 | void ICMP_rc(u64 insn); | ||
| 187 | void ICMP_cr(u64 insn); | ||
| 188 | void ICMP_imm(u64 insn); | ||
| 189 | void IDE(u64 insn); | ||
| 190 | void IDP_reg(u64 insn); | ||
| 191 | void IDP_imm(u64 insn); | ||
| 192 | void IMAD_reg(u64 insn); | ||
| 193 | void IMAD_rc(u64 insn); | ||
| 194 | void IMAD_cr(u64 insn); | ||
| 195 | void IMAD_imm(u64 insn); | ||
| 196 | void IMAD32I(u64 insn); | ||
| 197 | void IMADSP_reg(u64 insn); | ||
| 198 | void IMADSP_rc(u64 insn); | ||
| 199 | void IMADSP_cr(u64 insn); | ||
| 200 | void IMADSP_imm(u64 insn); | ||
| 201 | void IMNMX_reg(u64 insn); | ||
| 202 | void IMNMX_cbuf(u64 insn); | ||
| 203 | void IMNMX_imm(u64 insn); | ||
| 204 | void IMUL_reg(u64 insn); | ||
| 205 | void IMUL_cbuf(u64 insn); | ||
| 206 | void IMUL_imm(u64 insn); | ||
| 207 | void IMUL32I(u64 insn); | ||
| 208 | void IPA(u64 insn); | ||
| 209 | void ISBERD(u64 insn); | ||
| 210 | void ISCADD_reg(u64 insn); | ||
| 211 | void ISCADD_cbuf(u64 insn); | ||
| 212 | void ISCADD_imm(u64 insn); | ||
| 213 | void ISCADD32I(u64 insn); | ||
| 214 | void ISET_reg(u64 insn); | ||
| 215 | void ISET_cbuf(u64 insn); | ||
| 216 | void ISET_imm(u64 insn); | ||
| 217 | void ISETP_reg(u64 insn); | ||
| 218 | void ISETP_cbuf(u64 insn); | ||
| 219 | void ISETP_imm(u64 insn); | ||
| 220 | void JCAL(u64 insn); | ||
| 221 | void JMP(u64 insn); | ||
| 222 | void JMX(u64 insn); | ||
| 223 | void KIL(); | ||
| 224 | void LD(u64 insn); | ||
| 225 | void LDC(u64 insn); | ||
| 226 | void LDG(u64 insn); | ||
| 227 | void LDL(u64 insn); | ||
| 228 | void LDS(u64 insn); | ||
| 229 | void LEA_hi_reg(u64 insn); | ||
| 230 | void LEA_hi_cbuf(u64 insn); | ||
| 231 | void LEA_lo_reg(u64 insn); | ||
| 232 | void LEA_lo_cbuf(u64 insn); | ||
| 233 | void LEA_lo_imm(u64 insn); | ||
| 234 | void LEPC(u64 insn); | ||
| 235 | void LONGJMP(u64 insn); | ||
| 236 | void LOP_reg(u64 insn); | ||
| 237 | void LOP_cbuf(u64 insn); | ||
| 238 | void LOP_imm(u64 insn); | ||
| 239 | void LOP3_reg(u64 insn); | ||
| 240 | void LOP3_cbuf(u64 insn); | ||
| 241 | void LOP3_imm(u64 insn); | ||
| 242 | void LOP32I(u64 insn); | ||
| 243 | void MEMBAR(u64 insn); | ||
| 244 | void MOV_reg(u64 insn); | ||
| 245 | void MOV_cbuf(u64 insn); | ||
| 246 | void MOV_imm(u64 insn); | ||
| 247 | void MOV32I(u64 insn); | ||
| 248 | void MUFU(u64 insn); | ||
| 249 | void NOP(u64 insn); | ||
| 250 | void OUT_reg(u64 insn); | ||
| 251 | void OUT_cbuf(u64 insn); | ||
| 252 | void OUT_imm(u64 insn); | ||
| 253 | void P2R_reg(u64 insn); | ||
| 254 | void P2R_cbuf(u64 insn); | ||
| 255 | void P2R_imm(u64 insn); | ||
| 256 | void PBK(); | ||
| 257 | void PCNT(); | ||
| 258 | void PEXIT(u64 insn); | ||
| 259 | void PIXLD(u64 insn); | ||
| 260 | void PLONGJMP(u64 insn); | ||
| 261 | void POPC_reg(u64 insn); | ||
| 262 | void POPC_cbuf(u64 insn); | ||
| 263 | void POPC_imm(u64 insn); | ||
| 264 | void PRET(u64 insn); | ||
| 265 | void PRMT_reg(u64 insn); | ||
| 266 | void PRMT_rc(u64 insn); | ||
| 267 | void PRMT_cr(u64 insn); | ||
| 268 | void PRMT_imm(u64 insn); | ||
| 269 | void PSET(u64 insn); | ||
| 270 | void PSETP(u64 insn); | ||
| 271 | void R2B(u64 insn); | ||
| 272 | void R2P_reg(u64 insn); | ||
| 273 | void R2P_cbuf(u64 insn); | ||
| 274 | void R2P_imm(u64 insn); | ||
| 275 | void RAM(u64 insn); | ||
| 276 | void RED(u64 insn); | ||
| 277 | void RET(u64 insn); | ||
| 278 | void RRO_reg(u64 insn); | ||
| 279 | void RRO_cbuf(u64 insn); | ||
| 280 | void RRO_imm(u64 insn); | ||
| 281 | void RTT(u64 insn); | ||
| 282 | void S2R(u64 insn); | ||
| 283 | void SAM(u64 insn); | ||
| 284 | void SEL_reg(u64 insn); | ||
| 285 | void SEL_cbuf(u64 insn); | ||
| 286 | void SEL_imm(u64 insn); | ||
| 287 | void SETCRSPTR(u64 insn); | ||
| 288 | void SETLMEMBASE(u64 insn); | ||
| 289 | void SHF_l_reg(u64 insn); | ||
| 290 | void SHF_l_imm(u64 insn); | ||
| 291 | void SHF_r_reg(u64 insn); | ||
| 292 | void SHF_r_imm(u64 insn); | ||
| 293 | void SHFL(u64 insn); | ||
| 294 | void SHL_reg(u64 insn); | ||
| 295 | void SHL_cbuf(u64 insn); | ||
| 296 | void SHL_imm(u64 insn); | ||
| 297 | void SHR_reg(u64 insn); | ||
| 298 | void SHR_cbuf(u64 insn); | ||
| 299 | void SHR_imm(u64 insn); | ||
| 300 | void SSY(); | ||
| 301 | void ST(u64 insn); | ||
| 302 | void STG(u64 insn); | ||
| 303 | void STL(u64 insn); | ||
| 304 | void STP(u64 insn); | ||
| 305 | void STS(u64 insn); | ||
| 306 | void SUATOM(u64 insn); | ||
| 307 | void SUATOM_cas(u64 insn); | ||
| 308 | void SULD(u64 insn); | ||
| 309 | void SURED(u64 insn); | ||
| 310 | void SUST(u64 insn); | ||
| 311 | void SYNC(u64 insn); | ||
| 312 | void TEX(u64 insn); | ||
| 313 | void TEX_b(u64 insn); | ||
| 314 | void TEXS(u64 insn); | ||
| 315 | void TLD(u64 insn); | ||
| 316 | void TLD_b(u64 insn); | ||
| 317 | void TLD4(u64 insn); | ||
| 318 | void TLD4_b(u64 insn); | ||
| 319 | void TLD4S(u64 insn); | ||
| 320 | void TLDS(u64 insn); | ||
| 321 | void TMML(u64 insn); | ||
| 322 | void TMML_b(u64 insn); | ||
| 323 | void TXA(u64 insn); | ||
| 324 | void TXD(u64 insn); | ||
| 325 | void TXD_b(u64 insn); | ||
| 326 | void TXQ(u64 insn); | ||
| 327 | void TXQ_b(u64 insn); | ||
| 328 | void VABSDIFF(u64 insn); | ||
| 329 | void VABSDIFF4(u64 insn); | ||
| 330 | void VADD(u64 insn); | ||
| 331 | void VMAD(u64 insn); | ||
| 332 | void VMNMX(u64 insn); | ||
| 333 | void VOTE(u64 insn); | ||
| 334 | void VOTE_vtg(u64 insn); | ||
| 335 | void VSET(u64 insn); | ||
| 336 | void VSETP(u64 insn); | ||
| 337 | void VSHL(u64 insn); | ||
| 338 | void VSHR(u64 insn); | ||
| 339 | void XMAD_reg(u64 insn); | ||
| 340 | void XMAD_rc(u64 insn); | ||
| 341 | void XMAD_cr(u64 insn); | ||
| 342 | void XMAD_imm(u64 insn); | ||
| 343 | |||
| 344 | [[nodiscard]] IR::U32 X(IR::Reg reg); | ||
| 345 | [[nodiscard]] IR::U64 L(IR::Reg reg); | ||
| 346 | [[nodiscard]] IR::F32 F(IR::Reg reg); | ||
| 347 | [[nodiscard]] IR::F64 D(IR::Reg reg); | ||
| 348 | |||
| 349 | void X(IR::Reg dest_reg, const IR::U32& value); | ||
| 350 | void L(IR::Reg dest_reg, const IR::U64& value); | ||
| 351 | void F(IR::Reg dest_reg, const IR::F32& value); | ||
| 352 | void D(IR::Reg dest_reg, const IR::F64& value); | ||
| 353 | |||
| 354 | [[nodiscard]] IR::U32 GetReg8(u64 insn); | ||
| 355 | [[nodiscard]] IR::U32 GetReg20(u64 insn); | ||
| 356 | [[nodiscard]] IR::U32 GetReg39(u64 insn); | ||
| 357 | [[nodiscard]] IR::F32 GetFloatReg8(u64 insn); | ||
| 358 | [[nodiscard]] IR::F32 GetFloatReg20(u64 insn); | ||
| 359 | [[nodiscard]] IR::F32 GetFloatReg39(u64 insn); | ||
| 360 | [[nodiscard]] IR::F64 GetDoubleReg20(u64 insn); | ||
| 361 | [[nodiscard]] IR::F64 GetDoubleReg39(u64 insn); | ||
| 362 | |||
| 363 | [[nodiscard]] IR::U32 GetCbuf(u64 insn); | ||
| 364 | [[nodiscard]] IR::F32 GetFloatCbuf(u64 insn); | ||
| 365 | [[nodiscard]] IR::F64 GetDoubleCbuf(u64 insn); | ||
| 366 | [[nodiscard]] IR::U64 GetPackedCbuf(u64 insn); | ||
| 367 | |||
| 368 | [[nodiscard]] IR::U32 GetImm20(u64 insn); | ||
| 369 | [[nodiscard]] IR::F32 GetFloatImm20(u64 insn); | ||
| 370 | [[nodiscard]] IR::F64 GetDoubleImm20(u64 insn); | ||
| 371 | [[nodiscard]] IR::U64 GetPackedImm20(u64 insn); | ||
| 372 | |||
| 373 | [[nodiscard]] IR::U32 GetImm32(u64 insn); | ||
| 374 | [[nodiscard]] IR::F32 GetFloatImm32(u64 insn); | ||
| 375 | |||
| 376 | void SetZFlag(const IR::U1& value); | ||
| 377 | void SetSFlag(const IR::U1& value); | ||
| 378 | void SetCFlag(const IR::U1& value); | ||
| 379 | void SetOFlag(const IR::U1& value); | ||
| 380 | |||
| 381 | void ResetZero(); | ||
| 382 | void ResetSFlag(); | ||
| 383 | void ResetCFlag(); | ||
| 384 | void ResetOFlag(); | ||
| 385 | }; | ||
| 386 | |||
| 387 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp new file mode 100644 index 000000000..8ffd84867 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add.cpp | |||
| @@ -0,0 +1,105 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, | ||
| 12 | bool cc) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_a; | ||
| 17 | } const iadd{insn}; | ||
| 18 | |||
| 19 | if (sat) { | ||
| 20 | throw NotImplementedException("IADD SAT"); | ||
| 21 | } | ||
| 22 | if (x && po) { | ||
| 23 | throw NotImplementedException("IADD X+PO"); | ||
| 24 | } | ||
| 25 | // Operand A is always read from here, negated if needed | ||
| 26 | IR::U32 op_a{v.X(iadd.src_a)}; | ||
| 27 | if (neg_a) { | ||
| 28 | op_a = v.ir.INeg(op_a); | ||
| 29 | } | ||
| 30 | // Add both operands | ||
| 31 | IR::U32 result{v.ir.IAdd(op_a, op_b)}; | ||
| 32 | if (x) { | ||
| 33 | const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; | ||
| 34 | result = v.ir.IAdd(result, carry); | ||
| 35 | } | ||
| 36 | if (po) { | ||
| 37 | // .PO adds one to the result | ||
| 38 | result = v.ir.IAdd(result, v.ir.Imm32(1)); | ||
| 39 | } | ||
| 40 | if (cc) { | ||
| 41 | // Store flags | ||
| 42 | // TODO: Does this grab the result pre-PO or after? | ||
| 43 | if (po) { | ||
| 44 | throw NotImplementedException("IADD CC+PO"); | ||
| 45 | } | ||
| 46 | // TODO: How does CC behave when X is set? | ||
| 47 | if (x) { | ||
| 48 | throw NotImplementedException("IADD X+CC"); | ||
| 49 | } | ||
| 50 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 51 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 52 | v.SetCFlag(v.ir.GetCarryFromOp(result)); | ||
| 53 | v.SetOFlag(v.ir.GetOverflowFromOp(result)); | ||
| 54 | } | ||
| 55 | // Store result | ||
| 56 | v.X(iadd.dest_reg, result); | ||
| 57 | } | ||
| 58 | |||
| 59 | void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||
| 60 | union { | ||
| 61 | u64 insn; | ||
| 62 | BitField<43, 1, u64> x; | ||
| 63 | BitField<47, 1, u64> cc; | ||
| 64 | BitField<48, 2, u64> three_for_po; | ||
| 65 | BitField<48, 1, u64> neg_b; | ||
| 66 | BitField<49, 1, u64> neg_a; | ||
| 67 | BitField<50, 1, u64> sat; | ||
| 68 | } const iadd{insn}; | ||
| 69 | |||
| 70 | const bool po{iadd.three_for_po == 3}; | ||
| 71 | if (!po && iadd.neg_b != 0) { | ||
| 72 | op_b = v.ir.INeg(op_b); | ||
| 73 | } | ||
| 74 | IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); | ||
| 75 | } | ||
| 76 | } // Anonymous namespace | ||
| 77 | |||
| 78 | void TranslatorVisitor::IADD_reg(u64 insn) { | ||
| 79 | IADD(*this, insn, GetReg20(insn)); | ||
| 80 | } | ||
| 81 | |||
| 82 | void TranslatorVisitor::IADD_cbuf(u64 insn) { | ||
| 83 | IADD(*this, insn, GetCbuf(insn)); | ||
| 84 | } | ||
| 85 | |||
| 86 | void TranslatorVisitor::IADD_imm(u64 insn) { | ||
| 87 | IADD(*this, insn, GetImm20(insn)); | ||
| 88 | } | ||
| 89 | |||
| 90 | void TranslatorVisitor::IADD32I(u64 insn) { | ||
| 91 | union { | ||
| 92 | u64 raw; | ||
| 93 | BitField<52, 1, u64> cc; | ||
| 94 | BitField<53, 1, u64> x; | ||
| 95 | BitField<54, 1, u64> sat; | ||
| 96 | BitField<55, 2, u64> three_for_po; | ||
| 97 | BitField<56, 1, u64> neg_a; | ||
| 98 | } const iadd32i{insn}; | ||
| 99 | |||
| 100 | const bool po{iadd32i.three_for_po == 3}; | ||
| 101 | const bool neg_a{!po && iadd32i.neg_a != 0}; | ||
| 102 | IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); | ||
| 103 | } | ||
| 104 | |||
| 105 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp new file mode 100644 index 000000000..040cfc10f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Shift : u64 { | ||
| 12 | None, | ||
| 13 | Right, | ||
| 14 | Left, | ||
| 15 | }; | ||
| 16 | enum class Half : u64 { | ||
| 17 | All, | ||
| 18 | Lower, | ||
| 19 | Upper, | ||
| 20 | }; | ||
| 21 | |||
| 22 | [[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) { | ||
| 23 | constexpr bool is_signed{false}; | ||
| 24 | switch (half) { | ||
| 25 | case Half::All: | ||
| 26 | return value; | ||
| 27 | case Half::Lower: | ||
| 28 | return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed); | ||
| 29 | case Half::Upper: | ||
| 30 | return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed); | ||
| 31 | } | ||
| 32 | throw NotImplementedException("Invalid half"); | ||
| 33 | } | ||
| 34 | |||
| 35 | [[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) { | ||
| 36 | switch (shift) { | ||
| 37 | case Shift::None: | ||
| 38 | return value; | ||
| 39 | case Shift::Right: { | ||
| 40 | // 33-bit RS IADD3 edge case | ||
| 41 | const IR::U1 edge_case{ir.GetCarryFromOp(value)}; | ||
| 42 | const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))}; | ||
| 43 | return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)}; | ||
| 44 | } | ||
| 45 | case Shift::Left: | ||
| 46 | return ir.ShiftLeftLogical(value, ir.Imm32(16)); | ||
| 47 | } | ||
| 48 | throw NotImplementedException("Invalid shift"); | ||
| 49 | } | ||
| 50 | |||
| 51 | void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c, | ||
| 52 | Shift shift = Shift::None) { | ||
| 53 | union { | ||
| 54 | u64 insn; | ||
| 55 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 56 | BitField<47, 1, u64> cc; | ||
| 57 | BitField<48, 1, u64> x; | ||
| 58 | BitField<49, 1, u64> neg_c; | ||
| 59 | BitField<50, 1, u64> neg_b; | ||
| 60 | BitField<51, 1, u64> neg_a; | ||
| 61 | } iadd3{insn}; | ||
| 62 | |||
| 63 | if (iadd3.neg_a != 0) { | ||
| 64 | op_a = v.ir.INeg(op_a); | ||
| 65 | } | ||
| 66 | if (iadd3.neg_b != 0) { | ||
| 67 | op_b = v.ir.INeg(op_b); | ||
| 68 | } | ||
| 69 | if (iadd3.neg_c != 0) { | ||
| 70 | op_c = v.ir.INeg(op_c); | ||
| 71 | } | ||
| 72 | IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)}; | ||
| 73 | if (iadd3.x != 0) { | ||
| 74 | // TODO: How does RS behave when X is set? | ||
| 75 | if (shift == Shift::Right) { | ||
| 76 | throw NotImplementedException("IADD3 X+RS"); | ||
| 77 | } | ||
| 78 | const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; | ||
| 79 | lhs_1 = v.ir.IAdd(lhs_1, carry); | ||
| 80 | } | ||
| 81 | const IR::U32 lhs_2{IntegerShift(v.ir, lhs_1, shift)}; | ||
| 82 | const IR::U32 result{v.ir.IAdd(lhs_2, op_c)}; | ||
| 83 | |||
| 84 | v.X(iadd3.dest_reg, result); | ||
| 85 | if (iadd3.cc != 0) { | ||
| 86 | // TODO: How does CC behave when X is set? | ||
| 87 | if (iadd3.x != 0) { | ||
| 88 | throw NotImplementedException("IADD3 X+CC"); | ||
| 89 | } | ||
| 90 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 91 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 92 | v.SetCFlag(v.ir.GetCarryFromOp(result)); | ||
| 93 | const IR::U1 of_1{v.ir.ILessThan(lhs_1, op_a, false)}; | ||
| 94 | v.SetOFlag(v.ir.LogicalOr(v.ir.GetOverflowFromOp(result), of_1)); | ||
| 95 | } | ||
| 96 | } | ||
| 97 | } // Anonymous namespace | ||
| 98 | |||
| 99 | void TranslatorVisitor::IADD3_reg(u64 insn) { | ||
| 100 | union { | ||
| 101 | u64 insn; | ||
| 102 | BitField<37, 2, Shift> shift; | ||
| 103 | BitField<35, 2, Half> half_a; | ||
| 104 | BitField<33, 2, Half> half_b; | ||
| 105 | BitField<31, 2, Half> half_c; | ||
| 106 | } const iadd3{insn}; | ||
| 107 | |||
| 108 | const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)}; | ||
| 109 | const auto op_b{IntegerHalf(ir, GetReg20(insn), iadd3.half_b)}; | ||
| 110 | const auto op_c{IntegerHalf(ir, GetReg39(insn), iadd3.half_c)}; | ||
| 111 | IADD3(*this, insn, op_a, op_b, op_c, iadd3.shift); | ||
| 112 | } | ||
| 113 | |||
| 114 | void TranslatorVisitor::IADD3_cbuf(u64 insn) { | ||
| 115 | IADD3(*this, insn, GetReg8(insn), GetCbuf(insn), GetReg39(insn)); | ||
| 116 | } | ||
| 117 | |||
| 118 | void TranslatorVisitor::IADD3_imm(u64 insn) { | ||
| 119 | IADD3(*this, insn, GetReg8(insn), GetImm20(insn), GetReg39(insn)); | ||
| 120 | } | ||
| 121 | |||
| 122 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp new file mode 100644 index 000000000..ba6e01926 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare.cpp | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | void ICMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::U32& operand) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_reg; | ||
| 17 | BitField<48, 1, u64> is_signed; | ||
| 18 | BitField<49, 3, CompareOp> compare_op; | ||
| 19 | } const icmp{insn}; | ||
| 20 | |||
| 21 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 22 | const bool is_signed{icmp.is_signed != 0}; | ||
| 23 | const IR::U1 cmp_result{IntegerCompare(v.ir, operand, zero, icmp.compare_op, is_signed)}; | ||
| 24 | |||
| 25 | const IR::U32 src_reg{v.X(icmp.src_reg)}; | ||
| 26 | const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)}; | ||
| 27 | |||
| 28 | v.X(icmp.dest_reg, result); | ||
| 29 | } | ||
| 30 | } // Anonymous namespace | ||
| 31 | |||
| 32 | void TranslatorVisitor::ICMP_reg(u64 insn) { | ||
| 33 | ICMP(*this, insn, GetReg20(insn), GetReg39(insn)); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TranslatorVisitor::ICMP_rc(u64 insn) { | ||
| 37 | ICMP(*this, insn, GetReg39(insn), GetCbuf(insn)); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::ICMP_cr(u64 insn) { | ||
| 41 | ICMP(*this, insn, GetCbuf(insn), GetReg39(insn)); | ||
| 42 | } | ||
| 43 | |||
| 44 | void TranslatorVisitor::ICMP_imm(u64 insn) { | ||
| 45 | ICMP(*this, insn, GetImm20(insn), GetReg39(insn)); | ||
| 46 | } | ||
| 47 | |||
| 48 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp new file mode 100644 index 000000000..8ce1aee04 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_compare_and_set.cpp | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | IR::U1 IsetCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 13 | CompareOp compare_op, bool is_signed, bool x) { | ||
| 14 | return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) | ||
| 15 | : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); | ||
| 16 | } | ||
| 17 | |||
| 18 | void ISET(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { | ||
| 19 | union { | ||
| 20 | u64 insn; | ||
| 21 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 22 | BitField<8, 8, IR::Reg> src_reg; | ||
| 23 | BitField<39, 3, IR::Pred> pred; | ||
| 24 | BitField<42, 1, u64> neg_pred; | ||
| 25 | BitField<43, 1, u64> x; | ||
| 26 | BitField<44, 1, u64> bf; | ||
| 27 | BitField<45, 2, BooleanOp> bop; | ||
| 28 | BitField<47, 1, u64> cc; | ||
| 29 | BitField<48, 1, u64> is_signed; | ||
| 30 | BitField<49, 3, CompareOp> compare_op; | ||
| 31 | } const iset{insn}; | ||
| 32 | |||
| 33 | const IR::U32 src_a{v.X(iset.src_reg)}; | ||
| 34 | const bool is_signed{iset.is_signed != 0}; | ||
| 35 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 36 | const bool x{iset.x != 0}; | ||
| 37 | const IR::U1 cmp_result{IsetCompare(v.ir, src_a, src_b, iset.compare_op, is_signed, x)}; | ||
| 38 | |||
| 39 | IR::U1 pred{v.ir.GetPred(iset.pred)}; | ||
| 40 | if (iset.neg_pred != 0) { | ||
| 41 | pred = v.ir.LogicalNot(pred); | ||
| 42 | } | ||
| 43 | const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, iset.bop)}; | ||
| 44 | |||
| 45 | const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||
| 46 | const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||
| 47 | const IR::U32 pass_result{iset.bf == 0 ? one_mask : fp_one}; | ||
| 48 | const IR::U32 result{v.ir.Select(bop_result, pass_result, zero)}; | ||
| 49 | |||
| 50 | v.X(iset.dest_reg, result); | ||
| 51 | if (iset.cc != 0) { | ||
| 52 | if (x) { | ||
| 53 | throw NotImplementedException("ISET.CC + X"); | ||
| 54 | } | ||
| 55 | const IR::U1 is_zero{v.ir.IEqual(result, zero)}; | ||
| 56 | v.SetZFlag(is_zero); | ||
| 57 | if (iset.bf != 0) { | ||
| 58 | v.ResetSFlag(); | ||
| 59 | } else { | ||
| 60 | v.SetSFlag(v.ir.LogicalNot(is_zero)); | ||
| 61 | } | ||
| 62 | v.ResetCFlag(); | ||
| 63 | v.ResetOFlag(); | ||
| 64 | } | ||
| 65 | } | ||
| 66 | } // Anonymous namespace | ||
| 67 | |||
| 68 | void TranslatorVisitor::ISET_reg(u64 insn) { | ||
| 69 | ISET(*this, insn, GetReg20(insn)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::ISET_cbuf(u64 insn) { | ||
| 73 | ISET(*this, insn, GetCbuf(insn)); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TranslatorVisitor::ISET_imm(u64 insn) { | ||
| 77 | ISET(*this, insn, GetImm20(insn)); | ||
| 78 | } | ||
| 79 | |||
| 80 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp new file mode 100644 index 000000000..0b8119ddd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp | |||
| @@ -0,0 +1,182 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class FloatFormat : u64 { | ||
| 13 | F16 = 1, | ||
| 14 | F32 = 2, | ||
| 15 | F64 = 3, | ||
| 16 | }; | ||
| 17 | |||
| 18 | enum class IntFormat : u64 { | ||
| 19 | U8 = 0, | ||
| 20 | U16 = 1, | ||
| 21 | U32 = 2, | ||
| 22 | U64 = 3, | ||
| 23 | }; | ||
| 24 | |||
| 25 | union Encoding { | ||
| 26 | u64 raw; | ||
| 27 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 28 | BitField<8, 2, FloatFormat> float_format; | ||
| 29 | BitField<10, 2, IntFormat> int_format; | ||
| 30 | BitField<13, 1, u64> is_signed; | ||
| 31 | BitField<39, 2, FpRounding> fp_rounding; | ||
| 32 | BitField<41, 2, u64> selector; | ||
| 33 | BitField<47, 1, u64> cc; | ||
| 34 | BitField<45, 1, u64> neg; | ||
| 35 | BitField<49, 1, u64> abs; | ||
| 36 | }; | ||
| 37 | |||
| 38 | bool Is64(u64 insn) { | ||
| 39 | return Encoding{insn}.int_format == IntFormat::U64; | ||
| 40 | } | ||
| 41 | |||
| 42 | int BitSize(FloatFormat format) { | ||
| 43 | switch (format) { | ||
| 44 | case FloatFormat::F16: | ||
| 45 | return 16; | ||
| 46 | case FloatFormat::F32: | ||
| 47 | return 32; | ||
| 48 | case FloatFormat::F64: | ||
| 49 | return 64; | ||
| 50 | } | ||
| 51 | throw NotImplementedException("Invalid float format {}", format); | ||
| 52 | } | ||
| 53 | |||
| 54 | IR::U32 SmallAbs(TranslatorVisitor& v, const IR::U32& value, int bitsize) { | ||
| 55 | const IR::U32 least_value{v.ir.Imm32(-(1 << (bitsize - 1)))}; | ||
| 56 | const IR::U32 mask{v.ir.ShiftRightArithmetic(value, v.ir.Imm32(bitsize - 1))}; | ||
| 57 | const IR::U32 absolute{v.ir.BitwiseXor(v.ir.IAdd(value, mask), mask)}; | ||
| 58 | const IR::U1 is_least{v.ir.IEqual(value, least_value)}; | ||
| 59 | return IR::U32{v.ir.Select(is_least, value, absolute)}; | ||
| 60 | } | ||
| 61 | |||
| 62 | void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) { | ||
| 63 | const Encoding i2f{insn}; | ||
| 64 | if (i2f.cc != 0) { | ||
| 65 | throw NotImplementedException("I2F CC"); | ||
| 66 | } | ||
| 67 | const bool is_signed{i2f.is_signed != 0}; | ||
| 68 | int src_bitsize{}; | ||
| 69 | switch (i2f.int_format) { | ||
| 70 | case IntFormat::U8: | ||
| 71 | src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8), | ||
| 72 | v.ir.Imm32(8), is_signed); | ||
| 73 | if (i2f.abs != 0) { | ||
| 74 | src = SmallAbs(v, src, 8); | ||
| 75 | } | ||
| 76 | src_bitsize = 8; | ||
| 77 | break; | ||
| 78 | case IntFormat::U16: | ||
| 79 | if (i2f.selector == 1 || i2f.selector == 3) { | ||
| 80 | throw NotImplementedException("Invalid U16 selector {}", i2f.selector.Value()); | ||
| 81 | } | ||
| 82 | src = v.ir.BitFieldExtract(src, v.ir.Imm32(static_cast<u32>(i2f.selector) * 8), | ||
| 83 | v.ir.Imm32(16), is_signed); | ||
| 84 | if (i2f.abs != 0) { | ||
| 85 | src = SmallAbs(v, src, 16); | ||
| 86 | } | ||
| 87 | src_bitsize = 16; | ||
| 88 | break; | ||
| 89 | case IntFormat::U32: | ||
| 90 | case IntFormat::U64: | ||
| 91 | if (i2f.selector != 0) { | ||
| 92 | throw NotImplementedException("Unexpected selector {}", i2f.selector.Value()); | ||
| 93 | } | ||
| 94 | if (i2f.abs != 0 && is_signed) { | ||
| 95 | src = v.ir.IAbs(src); | ||
| 96 | } | ||
| 97 | src_bitsize = i2f.int_format == IntFormat::U64 ? 64 : 32; | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | const int conversion_src_bitsize{i2f.int_format == IntFormat::U64 ? 64 : 32}; | ||
| 101 | const int dst_bitsize{BitSize(i2f.float_format)}; | ||
| 102 | const IR::FpControl fp_control{ | ||
| 103 | .no_contraction = false, | ||
| 104 | .rounding = CastFpRounding(i2f.fp_rounding), | ||
| 105 | .fmz_mode = IR::FmzMode::DontCare, | ||
| 106 | }; | ||
| 107 | auto value{v.ir.ConvertIToF(static_cast<size_t>(dst_bitsize), | ||
| 108 | static_cast<size_t>(conversion_src_bitsize), is_signed, src, | ||
| 109 | fp_control)}; | ||
| 110 | if (i2f.neg != 0) { | ||
| 111 | if (i2f.abs != 0 || !is_signed) { | ||
| 112 | // We know the value is positive | ||
| 113 | value = v.ir.FPNeg(value); | ||
| 114 | } else { | ||
| 115 | // Only negate if the input isn't the lowest value | ||
| 116 | IR::U1 is_least; | ||
| 117 | if (src_bitsize == 64) { | ||
| 118 | is_least = v.ir.IEqual(src, v.ir.Imm64(std::numeric_limits<s64>::min())); | ||
| 119 | } else if (src_bitsize == 32) { | ||
| 120 | is_least = v.ir.IEqual(src, v.ir.Imm32(std::numeric_limits<s32>::min())); | ||
| 121 | } else { | ||
| 122 | const IR::U32 least_value{v.ir.Imm32(-(1 << (src_bitsize - 1)))}; | ||
| 123 | is_least = v.ir.IEqual(src, least_value); | ||
| 124 | } | ||
| 125 | value = IR::F16F32F64{v.ir.Select(is_least, value, v.ir.FPNeg(value))}; | ||
| 126 | } | ||
| 127 | } | ||
| 128 | switch (i2f.float_format) { | ||
| 129 | case FloatFormat::F16: { | ||
| 130 | const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))}; | ||
| 131 | v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero))); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | case FloatFormat::F32: | ||
| 135 | v.F(i2f.dest_reg, value); | ||
| 136 | break; | ||
| 137 | case FloatFormat::F64: { | ||
| 138 | if (!IR::IsAligned(i2f.dest_reg, 2)) { | ||
| 139 | throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value()); | ||
| 140 | } | ||
| 141 | const IR::Value vector{v.ir.UnpackDouble2x32(value)}; | ||
| 142 | for (int i = 0; i < 2; ++i) { | ||
| 143 | v.X(i2f.dest_reg + i, IR::U32{v.ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 144 | } | ||
| 145 | break; | ||
| 146 | } | ||
| 147 | default: | ||
| 148 | throw NotImplementedException("Invalid float format {}", i2f.float_format.Value()); | ||
| 149 | } | ||
| 150 | } | ||
| 151 | } // Anonymous namespace | ||
| 152 | |||
| 153 | void TranslatorVisitor::I2F_reg(u64 insn) { | ||
| 154 | if (Is64(insn)) { | ||
| 155 | union { | ||
| 156 | u64 raw; | ||
| 157 | BitField<20, 8, IR::Reg> reg; | ||
| 158 | } const value{insn}; | ||
| 159 | const IR::Value regs{ir.CompositeConstruct(ir.GetReg(value.reg), ir.GetReg(value.reg + 1))}; | ||
| 160 | I2F(*this, insn, ir.PackUint2x32(regs)); | ||
| 161 | } else { | ||
| 162 | I2F(*this, insn, GetReg20(insn)); | ||
| 163 | } | ||
| 164 | } | ||
| 165 | |||
| 166 | void TranslatorVisitor::I2F_cbuf(u64 insn) { | ||
| 167 | if (Is64(insn)) { | ||
| 168 | I2F(*this, insn, GetPackedCbuf(insn)); | ||
| 169 | } else { | ||
| 170 | I2F(*this, insn, GetCbuf(insn)); | ||
| 171 | } | ||
| 172 | } | ||
| 173 | |||
| 174 | void TranslatorVisitor::I2F_imm(u64 insn) { | ||
| 175 | if (Is64(insn)) { | ||
| 176 | I2F(*this, insn, GetPackedImm20(insn)); | ||
| 177 | } else { | ||
| 178 | I2F(*this, insn, GetImm20(insn)); | ||
| 179 | } | ||
| 180 | } | ||
| 181 | |||
| 182 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp new file mode 100644 index 000000000..5feefc0ce --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_funnel_shift.cpp | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class MaxShift : u64 { | ||
| 12 | U32, | ||
| 13 | Undefined, | ||
| 14 | U64, | ||
| 15 | S64, | ||
| 16 | }; | ||
| 17 | |||
| 18 | IR::U64 PackedShift(IR::IREmitter& ir, const IR::U64& packed_int, const IR::U32& safe_shift, | ||
| 19 | bool right_shift, bool is_signed) { | ||
| 20 | if (!right_shift) { | ||
| 21 | return ir.ShiftLeftLogical(packed_int, safe_shift); | ||
| 22 | } | ||
| 23 | if (is_signed) { | ||
| 24 | return ir.ShiftRightArithmetic(packed_int, safe_shift); | ||
| 25 | } | ||
| 26 | return ir.ShiftRightLogical(packed_int, safe_shift); | ||
| 27 | } | ||
| 28 | |||
| 29 | void SHF(TranslatorVisitor& v, u64 insn, const IR::U32& shift, const IR::U32& high_bits, | ||
| 30 | bool right_shift) { | ||
| 31 | union { | ||
| 32 | u64 insn; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<0, 8, IR::Reg> lo_bits_reg; | ||
| 35 | BitField<37, 2, MaxShift> max_shift; | ||
| 36 | BitField<47, 1, u64> cc; | ||
| 37 | BitField<48, 2, u64> x_mode; | ||
| 38 | BitField<50, 1, u64> wrap; | ||
| 39 | } const shf{insn}; | ||
| 40 | |||
| 41 | if (shf.cc != 0) { | ||
| 42 | throw NotImplementedException("SHF CC"); | ||
| 43 | } | ||
| 44 | if (shf.x_mode != 0) { | ||
| 45 | throw NotImplementedException("SHF X Mode"); | ||
| 46 | } | ||
| 47 | if (shf.max_shift == MaxShift::Undefined) { | ||
| 48 | throw NotImplementedException("SHF Use of undefined MaxShift value"); | ||
| 49 | } | ||
| 50 | const IR::U32 low_bits{v.X(shf.lo_bits_reg)}; | ||
| 51 | const IR::U64 packed_int{v.ir.PackUint2x32(v.ir.CompositeConstruct(low_bits, high_bits))}; | ||
| 52 | const IR::U32 max_shift{shf.max_shift == MaxShift::U32 ? v.ir.Imm32(32) : v.ir.Imm32(63)}; | ||
| 53 | const IR::U32 safe_shift{shf.wrap != 0 | ||
| 54 | ? v.ir.BitwiseAnd(shift, v.ir.ISub(max_shift, v.ir.Imm32(1))) | ||
| 55 | : v.ir.UMin(shift, max_shift)}; | ||
| 56 | |||
| 57 | const bool is_signed{shf.max_shift == MaxShift::S64}; | ||
| 58 | const IR::U64 shifted_value{PackedShift(v.ir, packed_int, safe_shift, right_shift, is_signed)}; | ||
| 59 | const IR::Value unpacked_value{v.ir.UnpackUint2x32(shifted_value)}; | ||
| 60 | |||
| 61 | const IR::U32 result{v.ir.CompositeExtract(unpacked_value, right_shift ? 0 : 1)}; | ||
| 62 | v.X(shf.dest_reg, result); | ||
| 63 | } | ||
| 64 | } // Anonymous namespace | ||
| 65 | |||
| 66 | void TranslatorVisitor::SHF_l_reg(u64 insn) { | ||
| 67 | SHF(*this, insn, GetReg20(insn), GetReg39(insn), false); | ||
| 68 | } | ||
| 69 | |||
| 70 | void TranslatorVisitor::SHF_l_imm(u64 insn) { | ||
| 71 | SHF(*this, insn, GetImm20(insn), GetReg39(insn), false); | ||
| 72 | } | ||
| 73 | |||
| 74 | void TranslatorVisitor::SHF_r_reg(u64 insn) { | ||
| 75 | SHF(*this, insn, GetReg20(insn), GetReg39(insn), true); | ||
| 76 | } | ||
| 77 | |||
| 78 | void TranslatorVisitor::SHF_r_imm(u64 insn) { | ||
| 79 | SHF(*this, insn, GetImm20(insn), GetReg39(insn), true); | ||
| 80 | } | ||
| 81 | |||
| 82 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp new file mode 100644 index 000000000..1badbacc4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_minimum_maximum.cpp | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void IMNMX(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg; | ||
| 16 | BitField<39, 3, IR::Pred> pred; | ||
| 17 | BitField<42, 1, u64> neg_pred; | ||
| 18 | BitField<43, 2, u64> mode; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> is_signed; | ||
| 21 | } const imnmx{insn}; | ||
| 22 | |||
| 23 | if (imnmx.cc != 0) { | ||
| 24 | throw NotImplementedException("IMNMX CC"); | ||
| 25 | } | ||
| 26 | |||
| 27 | if (imnmx.mode != 0) { | ||
| 28 | throw NotImplementedException("IMNMX.MODE"); | ||
| 29 | } | ||
| 30 | |||
| 31 | const IR::U1 pred{v.ir.GetPred(imnmx.pred)}; | ||
| 32 | const IR::U32 op_a{v.X(imnmx.src_reg)}; | ||
| 33 | IR::U32 min; | ||
| 34 | IR::U32 max; | ||
| 35 | |||
| 36 | if (imnmx.is_signed != 0) { | ||
| 37 | min = IR::U32{v.ir.SMin(op_a, op_b)}; | ||
| 38 | max = IR::U32{v.ir.SMax(op_a, op_b)}; | ||
| 39 | } else { | ||
| 40 | min = IR::U32{v.ir.UMin(op_a, op_b)}; | ||
| 41 | max = IR::U32{v.ir.UMax(op_a, op_b)}; | ||
| 42 | } | ||
| 43 | if (imnmx.neg_pred != 0) { | ||
| 44 | std::swap(min, max); | ||
| 45 | } | ||
| 46 | |||
| 47 | const IR::U32 result{v.ir.Select(pred, min, max)}; | ||
| 48 | v.X(imnmx.dest_reg, result); | ||
| 49 | } | ||
| 50 | } // Anonymous namespace | ||
| 51 | |||
| 52 | void TranslatorVisitor::IMNMX_reg(u64 insn) { | ||
| 53 | IMNMX(*this, insn, GetReg20(insn)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::IMNMX_cbuf(u64 insn) { | ||
| 57 | IMNMX(*this, insn, GetCbuf(insn)); | ||
| 58 | } | ||
| 59 | |||
| 60 | void TranslatorVisitor::IMNMX_imm(u64 insn) { | ||
| 61 | IMNMX(*this, insn, GetImm20(insn)); | ||
| 62 | } | ||
| 63 | |||
| 64 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp new file mode 100644 index 000000000..5ece7678d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_popcount.cpp | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void POPC(TranslatorVisitor& v, u64 insn, const IR::U32& src) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<40, 1, u64> tilde; | ||
| 16 | } const popc{insn}; | ||
| 17 | |||
| 18 | const IR::U32 operand = popc.tilde == 0 ? src : v.ir.BitwiseNot(src); | ||
| 19 | const IR::U32 result = v.ir.BitCount(operand); | ||
| 20 | v.X(popc.dest_reg, result); | ||
| 21 | } | ||
| 22 | } // Anonymous namespace | ||
| 23 | |||
| 24 | void TranslatorVisitor::POPC_reg(u64 insn) { | ||
| 25 | POPC(*this, insn, GetReg20(insn)); | ||
| 26 | } | ||
| 27 | |||
| 28 | void TranslatorVisitor::POPC_cbuf(u64 insn) { | ||
| 29 | POPC(*this, insn, GetCbuf(insn)); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::POPC_imm(u64 insn) { | ||
| 33 | POPC(*this, insn, GetImm20(insn)); | ||
| 34 | } | ||
| 35 | |||
| 36 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp new file mode 100644 index 000000000..044671943 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_scaled_add.cpp | |||
| @@ -0,0 +1,86 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool cc, bool neg_a, bool neg_b, | ||
| 12 | u64 scale_imm) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> op_a; | ||
| 17 | } const iscadd{insn}; | ||
| 18 | |||
| 19 | const bool po{neg_a && neg_b}; | ||
| 20 | IR::U32 op_a{v.X(iscadd.op_a)}; | ||
| 21 | if (po) { | ||
| 22 | // When PO is present, add one | ||
| 23 | op_b = v.ir.IAdd(op_b, v.ir.Imm32(1)); | ||
| 24 | } else { | ||
| 25 | // When PO is not present, the bits are interpreted as negation | ||
| 26 | if (neg_a) { | ||
| 27 | op_a = v.ir.INeg(op_a); | ||
| 28 | } | ||
| 29 | if (neg_b) { | ||
| 30 | op_b = v.ir.INeg(op_b); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | // With the operands already processed, scale A | ||
| 34 | const IR::U32 scale{v.ir.Imm32(static_cast<u32>(scale_imm))}; | ||
| 35 | const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; | ||
| 36 | |||
| 37 | const IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; | ||
| 38 | v.X(iscadd.dest_reg, result); | ||
| 39 | |||
| 40 | if (cc) { | ||
| 41 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 42 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 43 | const IR::U1 carry{v.ir.GetCarryFromOp(result)}; | ||
| 44 | const IR::U1 overflow{v.ir.GetOverflowFromOp(result)}; | ||
| 45 | v.SetCFlag(po ? v.ir.LogicalOr(carry, v.ir.GetCarryFromOp(op_b)) : carry); | ||
| 46 | v.SetOFlag(po ? v.ir.LogicalOr(overflow, v.ir.GetOverflowFromOp(op_b)) : overflow); | ||
| 47 | } | ||
| 48 | } | ||
| 49 | |||
| 50 | void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||
| 51 | union { | ||
| 52 | u64 raw; | ||
| 53 | BitField<47, 1, u64> cc; | ||
| 54 | BitField<48, 1, u64> neg_b; | ||
| 55 | BitField<49, 1, u64> neg_a; | ||
| 56 | BitField<39, 5, u64> scale; | ||
| 57 | } const iscadd{insn}; | ||
| 58 | |||
| 59 | ISCADD(v, insn, op_b, iscadd.cc != 0, iscadd.neg_a != 0, iscadd.neg_b != 0, iscadd.scale); | ||
| 60 | } | ||
| 61 | |||
| 62 | } // Anonymous namespace | ||
| 63 | |||
| 64 | void TranslatorVisitor::ISCADD_reg(u64 insn) { | ||
| 65 | ISCADD(*this, insn, GetReg20(insn)); | ||
| 66 | } | ||
| 67 | |||
| 68 | void TranslatorVisitor::ISCADD_cbuf(u64 insn) { | ||
| 69 | ISCADD(*this, insn, GetCbuf(insn)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::ISCADD_imm(u64 insn) { | ||
| 73 | ISCADD(*this, insn, GetImm20(insn)); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TranslatorVisitor::ISCADD32I(u64 insn) { | ||
| 77 | union { | ||
| 78 | u64 raw; | ||
| 79 | BitField<52, 1, u64> cc; | ||
| 80 | BitField<53, 5, u64> scale; | ||
| 81 | } const iscadd{insn}; | ||
| 82 | |||
| 83 | return ISCADD(*this, insn, GetImm32(insn), iscadd.cc != 0, false, false, iscadd.scale); | ||
| 84 | } | ||
| 85 | |||
| 86 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp new file mode 100644 index 000000000..bee10e5b9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_set_predicate.cpp | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | IR::U1 IsetpCompare(IR::IREmitter& ir, const IR::U32& operand_1, const IR::U32& operand_2, | ||
| 13 | CompareOp compare_op, bool is_signed, bool x) { | ||
| 14 | return x ? ExtendedIntegerCompare(ir, operand_1, operand_2, compare_op, is_signed) | ||
| 15 | : IntegerCompare(ir, operand_1, operand_2, compare_op, is_signed); | ||
| 16 | } | ||
| 17 | |||
| 18 | void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 19 | union { | ||
| 20 | u64 raw; | ||
| 21 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 22 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 23 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 24 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 25 | BitField<42, 1, u64> neg_bop_pred; | ||
| 26 | BitField<43, 1, u64> x; | ||
| 27 | BitField<45, 2, BooleanOp> bop; | ||
| 28 | BitField<48, 1, u64> is_signed; | ||
| 29 | BitField<49, 3, CompareOp> compare_op; | ||
| 30 | } const isetp{insn}; | ||
| 31 | |||
| 32 | const bool is_signed{isetp.is_signed != 0}; | ||
| 33 | const bool x{isetp.x != 0}; | ||
| 34 | const BooleanOp bop{isetp.bop}; | ||
| 35 | const CompareOp compare_op{isetp.compare_op}; | ||
| 36 | const IR::U32 op_a{v.X(isetp.src_reg_a)}; | ||
| 37 | const IR::U1 comparison{IsetpCompare(v.ir, op_a, op_b, compare_op, is_signed, x)}; | ||
| 38 | const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; | ||
| 39 | const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||
| 40 | const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||
| 41 | v.ir.SetPred(isetp.dest_pred_a, result_a); | ||
| 42 | v.ir.SetPred(isetp.dest_pred_b, result_b); | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | void TranslatorVisitor::ISETP_reg(u64 insn) { | ||
| 47 | ISETP(*this, insn, GetReg20(insn)); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::ISETP_cbuf(u64 insn) { | ||
| 51 | ISETP(*this, insn, GetCbuf(insn)); | ||
| 52 | } | ||
| 53 | |||
| 54 | void TranslatorVisitor::ISETP_imm(u64 insn) { | ||
| 55 | ISETP(*this, insn, GetImm20(insn)); | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp new file mode 100644 index 000000000..20af68852 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_left.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 16 | BitField<39, 1, u64> w; | ||
| 17 | BitField<43, 1, u64> x; | ||
| 18 | BitField<47, 1, u64> cc; | ||
| 19 | } const shl{insn}; | ||
| 20 | |||
| 21 | if (shl.x != 0) { | ||
| 22 | throw NotImplementedException("SHL.X"); | ||
| 23 | } | ||
| 24 | if (shl.cc != 0) { | ||
| 25 | throw NotImplementedException("SHL.CC"); | ||
| 26 | } | ||
| 27 | const IR::U32 base{v.X(shl.src_reg_a)}; | ||
| 28 | IR::U32 result; | ||
| 29 | if (shl.w != 0) { | ||
| 30 | // When .W is set, the shift value is wrapped | ||
| 31 | // To emulate this we just have to wrap it ourselves. | ||
| 32 | const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; | ||
| 33 | result = v.ir.ShiftLeftLogical(base, shift); | ||
| 34 | } else { | ||
| 35 | // When .W is not set, the shift value is clamped between 0 and 32. | ||
| 36 | // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. | ||
| 37 | // We can safely evaluate an out of bounds shift according to the SPIR-V specification: | ||
| 38 | // | ||
| 39 | // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical | ||
| 40 | // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than | ||
| 41 | // or equal to the bit width of the components of Base." | ||
| 42 | // | ||
| 43 | // And on the GLASM specification it is also safe to evaluate out of bounds: | ||
| 44 | // | ||
| 45 | // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt | ||
| 46 | // "The results of a shift operation ("<<") are undefined if the value of the second operand | ||
| 47 | // is negative, or greater than or equal to the number of bits in the first operand." | ||
| 48 | // | ||
| 49 | // Emphasis on undefined results in contrast to undefined behavior. | ||
| 50 | // | ||
| 51 | const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; | ||
| 52 | const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; | ||
| 53 | result = IR::U32{v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0))}; | ||
| 54 | } | ||
| 55 | v.X(shl.dest_reg, result); | ||
| 56 | } | ||
| 57 | } // Anonymous namespace | ||
| 58 | |||
| 59 | void TranslatorVisitor::SHL_reg(u64 insn) { | ||
| 60 | SHL(*this, insn, GetReg20(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::SHL_cbuf(u64 insn) { | ||
| 64 | SHL(*this, insn, GetCbuf(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::SHL_imm(u64 insn) { | ||
| 68 | SHL(*this, insn, GetImm20(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp new file mode 100644 index 000000000..be00bb605 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_shift_right.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void SHR(TranslatorVisitor& v, u64 insn, const IR::U32& shift) { | ||
| 12 | union { | ||
| 13 | u64 insn; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 16 | BitField<39, 1, u64> is_wrapped; | ||
| 17 | BitField<40, 1, u64> brev; | ||
| 18 | BitField<43, 1, u64> xmode; | ||
| 19 | BitField<47, 1, u64> cc; | ||
| 20 | BitField<48, 1, u64> is_signed; | ||
| 21 | } const shr{insn}; | ||
| 22 | |||
| 23 | if (shr.xmode != 0) { | ||
| 24 | throw NotImplementedException("SHR.XMODE"); | ||
| 25 | } | ||
| 26 | if (shr.cc != 0) { | ||
| 27 | throw NotImplementedException("SHR.CC"); | ||
| 28 | } | ||
| 29 | |||
| 30 | IR::U32 base{v.X(shr.src_reg_a)}; | ||
| 31 | if (shr.brev == 1) { | ||
| 32 | base = v.ir.BitReverse(base); | ||
| 33 | } | ||
| 34 | IR::U32 result; | ||
| 35 | const IR::U32 safe_shift = shr.is_wrapped == 0 ? shift : v.ir.BitwiseAnd(shift, v.ir.Imm32(31)); | ||
| 36 | if (shr.is_signed == 1) { | ||
| 37 | result = IR::U32{v.ir.ShiftRightArithmetic(base, safe_shift)}; | ||
| 38 | } else { | ||
| 39 | result = IR::U32{v.ir.ShiftRightLogical(base, safe_shift)}; | ||
| 40 | } | ||
| 41 | |||
| 42 | if (shr.is_wrapped == 0) { | ||
| 43 | const IR::U32 zero{v.ir.Imm32(0)}; | ||
| 44 | const IR::U32 safe_bits{v.ir.Imm32(32)}; | ||
| 45 | |||
| 46 | const IR::U1 is_negative{v.ir.ILessThan(result, zero, true)}; | ||
| 47 | const IR::U1 is_safe{v.ir.ILessThan(shift, safe_bits, false)}; | ||
| 48 | const IR::U32 clamped_value{v.ir.Select(is_negative, v.ir.Imm32(-1), zero)}; | ||
| 49 | result = IR::U32{v.ir.Select(is_safe, result, clamped_value)}; | ||
| 50 | } | ||
| 51 | v.X(shr.dest_reg, result); | ||
| 52 | } | ||
| 53 | } // Anonymous namespace | ||
| 54 | |||
| 55 | void TranslatorVisitor::SHR_reg(u64 insn) { | ||
| 56 | SHR(*this, insn, GetReg20(insn)); | ||
| 57 | } | ||
| 58 | |||
| 59 | void TranslatorVisitor::SHR_cbuf(u64 insn) { | ||
| 60 | SHR(*this, insn, GetCbuf(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::SHR_imm(u64 insn) { | ||
| 64 | SHR(*this, insn, GetImm20(insn)); | ||
| 65 | } | ||
| 66 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp new file mode 100644 index 000000000..2932cdc42 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class SelectMode : u64 { | ||
| 12 | Default, | ||
| 13 | CLO, | ||
| 14 | CHI, | ||
| 15 | CSFU, | ||
| 16 | CBCC, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class Half : u64 { | ||
| 20 | H0, // Least-significant bits (15:0) | ||
| 21 | H1, // Most-significant bits (31:16) | ||
| 22 | }; | ||
| 23 | |||
| 24 | IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { | ||
| 25 | const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; | ||
| 26 | return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); | ||
| 27 | } | ||
| 28 | |||
| 29 | void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, | ||
| 30 | SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { | ||
| 31 | union { | ||
| 32 | u64 raw; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 35 | BitField<47, 1, u64> cc; | ||
| 36 | BitField<48, 1, u64> is_a_signed; | ||
| 37 | BitField<49, 1, u64> is_b_signed; | ||
| 38 | BitField<53, 1, Half> half_a; | ||
| 39 | } const xmad{insn}; | ||
| 40 | |||
| 41 | if (x) { | ||
| 42 | throw NotImplementedException("XMAD X"); | ||
| 43 | } | ||
| 44 | const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; | ||
| 45 | const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; | ||
| 46 | |||
| 47 | IR::U32 product{v.ir.IMul(op_a, op_b)}; | ||
| 48 | if (psl) { | ||
| 49 | // .PSL shifts the product 16 bits | ||
| 50 | product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); | ||
| 51 | } | ||
| 52 | const IR::U32 op_c{[&]() -> IR::U32 { | ||
| 53 | switch (select_mode) { | ||
| 54 | case SelectMode::Default: | ||
| 55 | return src_c; | ||
| 56 | case SelectMode::CLO: | ||
| 57 | return ExtractHalf(v, src_c, Half::H0, false); | ||
| 58 | case SelectMode::CHI: | ||
| 59 | return ExtractHalf(v, src_c, Half::H1, false); | ||
| 60 | case SelectMode::CBCC: | ||
| 61 | return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c); | ||
| 62 | case SelectMode::CSFU: | ||
| 63 | throw NotImplementedException("XMAD CSFU"); | ||
| 64 | } | ||
| 65 | throw NotImplementedException("Invalid XMAD select mode {}", select_mode); | ||
| 66 | }()}; | ||
| 67 | IR::U32 result{v.ir.IAdd(product, op_c)}; | ||
| 68 | if (mrg) { | ||
| 69 | // .MRG inserts src_b [15:0] into result's [31:16]. | ||
| 70 | const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; | ||
| 71 | result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); | ||
| 72 | } | ||
| 73 | if (xmad.cc) { | ||
| 74 | throw NotImplementedException("XMAD CC"); | ||
| 75 | } | ||
| 76 | // Store result | ||
| 77 | v.X(xmad.dest_reg, result); | ||
| 78 | } | ||
| 79 | } // Anonymous namespace | ||
| 80 | |||
| 81 | void TranslatorVisitor::XMAD_reg(u64 insn) { | ||
| 82 | union { | ||
| 83 | u64 raw; | ||
| 84 | BitField<35, 1, Half> half_b; | ||
| 85 | BitField<36, 1, u64> psl; | ||
| 86 | BitField<37, 1, u64> mrg; | ||
| 87 | BitField<38, 1, u64> x; | ||
| 88 | BitField<50, 3, SelectMode> select_mode; | ||
| 89 | } const xmad{insn}; | ||
| 90 | |||
| 91 | XMAD(*this, insn, GetReg20(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, | ||
| 92 | xmad.mrg != 0, xmad.x != 0); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::XMAD_rc(u64 insn) { | ||
| 96 | union { | ||
| 97 | u64 raw; | ||
| 98 | BitField<50, 2, SelectMode> select_mode; | ||
| 99 | BitField<52, 1, Half> half_b; | ||
| 100 | BitField<54, 1, u64> x; | ||
| 101 | } const xmad{insn}; | ||
| 102 | |||
| 103 | XMAD(*this, insn, GetReg39(insn), GetCbuf(insn), xmad.select_mode, xmad.half_b, false, false, | ||
| 104 | xmad.x != 0); | ||
| 105 | } | ||
| 106 | |||
| 107 | void TranslatorVisitor::XMAD_cr(u64 insn) { | ||
| 108 | union { | ||
| 109 | u64 raw; | ||
| 110 | BitField<50, 2, SelectMode> select_mode; | ||
| 111 | BitField<52, 1, Half> half_b; | ||
| 112 | BitField<54, 1, u64> x; | ||
| 113 | BitField<55, 1, u64> psl; | ||
| 114 | BitField<56, 1, u64> mrg; | ||
| 115 | } const xmad{insn}; | ||
| 116 | |||
| 117 | XMAD(*this, insn, GetCbuf(insn), GetReg39(insn), xmad.select_mode, xmad.half_b, xmad.psl != 0, | ||
| 118 | xmad.mrg != 0, xmad.x != 0); | ||
| 119 | } | ||
| 120 | |||
| 121 | void TranslatorVisitor::XMAD_imm(u64 insn) { | ||
| 122 | union { | ||
| 123 | u64 raw; | ||
| 124 | BitField<20, 16, u64> src_b; | ||
| 125 | BitField<36, 1, u64> psl; | ||
| 126 | BitField<37, 1, u64> mrg; | ||
| 127 | BitField<38, 1, u64> x; | ||
| 128 | BitField<50, 3, SelectMode> select_mode; | ||
| 129 | } const xmad{insn}; | ||
| 130 | |||
| 131 | XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode, | ||
| 132 | Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0); | ||
| 133 | } | ||
| 134 | |||
| 135 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp new file mode 100644 index 000000000..53e8d8923 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/integer_to_integer_conversion.cpp | |||
| @@ -0,0 +1,126 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class IntegerWidth : u64 { | ||
| 12 | Byte, | ||
| 13 | Short, | ||
| 14 | Word, | ||
| 15 | }; | ||
| 16 | |||
| 17 | [[nodiscard]] IR::U32 WidthSize(IR::IREmitter& ir, IntegerWidth width) { | ||
| 18 | switch (width) { | ||
| 19 | case IntegerWidth::Byte: | ||
| 20 | return ir.Imm32(8); | ||
| 21 | case IntegerWidth::Short: | ||
| 22 | return ir.Imm32(16); | ||
| 23 | case IntegerWidth::Word: | ||
| 24 | return ir.Imm32(32); | ||
| 25 | default: | ||
| 26 | throw NotImplementedException("Invalid width {}", width); | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | [[nodiscard]] IR::U32 ConvertInteger(IR::IREmitter& ir, const IR::U32& src, | ||
| 31 | IntegerWidth dst_width) { | ||
| 32 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 33 | const IR::U32 count{WidthSize(ir, dst_width)}; | ||
| 34 | return ir.BitFieldExtract(src, zero, count, false); | ||
| 35 | } | ||
| 36 | |||
| 37 | [[nodiscard]] IR::U32 SaturateInteger(IR::IREmitter& ir, const IR::U32& src, IntegerWidth dst_width, | ||
| 38 | bool dst_signed, bool src_signed) { | ||
| 39 | IR::U32 min{}; | ||
| 40 | IR::U32 max{}; | ||
| 41 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 42 | switch (dst_width) { | ||
| 43 | case IntegerWidth::Byte: | ||
| 44 | min = dst_signed && src_signed ? ir.Imm32(0xffffff80) : zero; | ||
| 45 | max = dst_signed ? ir.Imm32(0x7f) : ir.Imm32(0xff); | ||
| 46 | break; | ||
| 47 | case IntegerWidth::Short: | ||
| 48 | min = dst_signed && src_signed ? ir.Imm32(0xffff8000) : zero; | ||
| 49 | max = dst_signed ? ir.Imm32(0x7fff) : ir.Imm32(0xffff); | ||
| 50 | break; | ||
| 51 | case IntegerWidth::Word: | ||
| 52 | min = dst_signed && src_signed ? ir.Imm32(0x80000000) : zero; | ||
| 53 | max = dst_signed ? ir.Imm32(0x7fffffff) : ir.Imm32(0xffffffff); | ||
| 54 | break; | ||
| 55 | default: | ||
| 56 | throw NotImplementedException("Invalid width {}", dst_width); | ||
| 57 | } | ||
| 58 | const IR::U32 value{!dst_signed && src_signed ? ir.SMax(zero, src) : src}; | ||
| 59 | return dst_signed && src_signed ? ir.SClamp(value, min, max) : ir.UClamp(value, min, max); | ||
| 60 | } | ||
| 61 | |||
| 62 | void I2I(TranslatorVisitor& v, u64 insn, const IR::U32& src_a) { | ||
| 63 | union { | ||
| 64 | u64 insn; | ||
| 65 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 66 | BitField<8, 2, IntegerWidth> dst_fmt; | ||
| 67 | BitField<12, 1, u64> dst_fmt_sign; | ||
| 68 | BitField<10, 2, IntegerWidth> src_fmt; | ||
| 69 | BitField<13, 1, u64> src_fmt_sign; | ||
| 70 | BitField<41, 3, u64> selector; | ||
| 71 | BitField<45, 1, u64> neg; | ||
| 72 | BitField<47, 1, u64> cc; | ||
| 73 | BitField<49, 1, u64> abs; | ||
| 74 | BitField<50, 1, u64> sat; | ||
| 75 | } const i2i{insn}; | ||
| 76 | |||
| 77 | if (i2i.src_fmt == IntegerWidth::Short && (i2i.selector == 1 || i2i.selector == 3)) { | ||
| 78 | throw NotImplementedException("16-bit source format incompatible with selector {}", | ||
| 79 | i2i.selector); | ||
| 80 | } | ||
| 81 | if (i2i.src_fmt == IntegerWidth::Word && i2i.selector != 0) { | ||
| 82 | throw NotImplementedException("32-bit source format incompatible with selector {}", | ||
| 83 | i2i.selector); | ||
| 84 | } | ||
| 85 | |||
| 86 | const s32 selector{static_cast<s32>(i2i.selector)}; | ||
| 87 | const IR::U32 offset{v.ir.Imm32(selector * 8)}; | ||
| 88 | const IR::U32 count{WidthSize(v.ir, i2i.src_fmt)}; | ||
| 89 | const bool src_signed{i2i.src_fmt_sign != 0}; | ||
| 90 | const bool dst_signed{i2i.dst_fmt_sign != 0}; | ||
| 91 | const bool sat{i2i.sat != 0}; | ||
| 92 | |||
| 93 | IR::U32 src_values{v.ir.BitFieldExtract(src_a, offset, count, src_signed)}; | ||
| 94 | if (i2i.abs != 0) { | ||
| 95 | src_values = v.ir.IAbs(src_values); | ||
| 96 | } | ||
| 97 | if (i2i.neg != 0) { | ||
| 98 | src_values = v.ir.INeg(src_values); | ||
| 99 | } | ||
| 100 | const IR::U32 result{ | ||
| 101 | sat ? SaturateInteger(v.ir, src_values, i2i.dst_fmt, dst_signed, src_signed) | ||
| 102 | : ConvertInteger(v.ir, src_values, i2i.dst_fmt)}; | ||
| 103 | |||
| 104 | v.X(i2i.dest_reg, result); | ||
| 105 | if (i2i.cc != 0) { | ||
| 106 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 107 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 108 | v.ResetCFlag(); | ||
| 109 | v.ResetOFlag(); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | } // Anonymous namespace | ||
| 113 | |||
| 114 | void TranslatorVisitor::I2I_reg(u64 insn) { | ||
| 115 | I2I(*this, insn, GetReg20(insn)); | ||
| 116 | } | ||
| 117 | |||
| 118 | void TranslatorVisitor::I2I_cbuf(u64 insn) { | ||
| 119 | I2I(*this, insn, GetCbuf(insn)); | ||
| 120 | } | ||
| 121 | |||
| 122 | void TranslatorVisitor::I2I_imm(u64 insn) { | ||
| 123 | I2I(*this, insn, GetImm20(insn)); | ||
| 124 | } | ||
| 125 | |||
| 126 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp new file mode 100644 index 000000000..9b85f8059 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | Default, | ||
| 13 | Patch, | ||
| 14 | Prim, | ||
| 15 | Attr, | ||
| 16 | }; | ||
| 17 | |||
| 18 | enum class Shift : u64 { | ||
| 19 | Default, | ||
| 20 | U16, | ||
| 21 | B32, | ||
| 22 | }; | ||
| 23 | |||
| 24 | } // Anonymous namespace | ||
| 25 | |||
| 26 | void TranslatorVisitor::ISBERD(u64 insn) { | ||
| 27 | union { | ||
| 28 | u64 raw; | ||
| 29 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 30 | BitField<8, 8, IR::Reg> src_reg; | ||
| 31 | BitField<31, 1, u64> skew; | ||
| 32 | BitField<32, 1, u64> o; | ||
| 33 | BitField<33, 2, Mode> mode; | ||
| 34 | BitField<47, 2, Shift> shift; | ||
| 35 | } const isberd{insn}; | ||
| 36 | |||
| 37 | if (isberd.skew != 0) { | ||
| 38 | throw NotImplementedException("SKEW"); | ||
| 39 | } | ||
| 40 | if (isberd.o != 0) { | ||
| 41 | throw NotImplementedException("O"); | ||
| 42 | } | ||
| 43 | if (isberd.mode != Mode::Default) { | ||
| 44 | throw NotImplementedException("Mode {}", isberd.mode.Value()); | ||
| 45 | } | ||
| 46 | if (isberd.shift != Shift::Default) { | ||
| 47 | throw NotImplementedException("Shift {}", isberd.shift.Value()); | ||
| 48 | } | ||
| 49 | LOG_WARNING(Shader, "(STUBBED) called"); | ||
| 50 | X(isberd.dest_reg, X(isberd.src_reg)); | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp new file mode 100644 index 000000000..2300088e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.cpp | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/load_constant.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | using namespace LDC; | ||
| 12 | namespace { | ||
| 13 | std::pair<IR::U32, IR::U32> Slot(IR::IREmitter& ir, Mode mode, const IR::U32& imm_index, | ||
| 14 | const IR::U32& reg, const IR::U32& imm) { | ||
| 15 | switch (mode) { | ||
| 16 | case Mode::Default: | ||
| 17 | return {imm_index, ir.IAdd(reg, imm)}; | ||
| 18 | default: | ||
| 19 | break; | ||
| 20 | } | ||
| 21 | throw NotImplementedException("Mode {}", mode); | ||
| 22 | } | ||
| 23 | } // Anonymous namespace | ||
| 24 | |||
| 25 | void TranslatorVisitor::LDC(u64 insn) { | ||
| 26 | const Encoding ldc{insn}; | ||
| 27 | const IR::U32 imm_index{ir.Imm32(static_cast<u32>(ldc.index))}; | ||
| 28 | const IR::U32 reg{X(ldc.src_reg)}; | ||
| 29 | const IR::U32 imm{ir.Imm32(static_cast<s32>(ldc.offset))}; | ||
| 30 | const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)}; | ||
| 31 | switch (ldc.size) { | ||
| 32 | case Size::U8: | ||
| 33 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)}); | ||
| 34 | break; | ||
| 35 | case Size::S8: | ||
| 36 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)}); | ||
| 37 | break; | ||
| 38 | case Size::U16: | ||
| 39 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)}); | ||
| 40 | break; | ||
| 41 | case Size::S16: | ||
| 42 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)}); | ||
| 43 | break; | ||
| 44 | case Size::B32: | ||
| 45 | X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)}); | ||
| 46 | break; | ||
| 47 | case Size::B64: { | ||
| 48 | if (!IR::IsAligned(ldc.dest_reg, 2)) { | ||
| 49 | throw NotImplementedException("Unaligned destination register"); | ||
| 50 | } | ||
| 51 | const IR::Value vector{ir.GetCbuf(index, offset, 64, false)}; | ||
| 52 | for (int i = 0; i < 2; ++i) { | ||
| 53 | X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 54 | } | ||
| 55 | break; | ||
| 56 | } | ||
| 57 | default: | ||
| 58 | throw NotImplementedException("Invalid size {}", ldc.size.Value()); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h new file mode 100644 index 000000000..3074ea0e3 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_constant.h | |||
| @@ -0,0 +1,39 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/reg.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell::LDC { | ||
| 12 | |||
| 13 | enum class Mode : u64 { | ||
| 14 | Default, | ||
| 15 | IL, | ||
| 16 | IS, | ||
| 17 | ISL, | ||
| 18 | }; | ||
| 19 | |||
| 20 | enum class Size : u64 { | ||
| 21 | U8, | ||
| 22 | S8, | ||
| 23 | U16, | ||
| 24 | S16, | ||
| 25 | B32, | ||
| 26 | B64, | ||
| 27 | }; | ||
| 28 | |||
| 29 | union Encoding { | ||
| 30 | u64 raw; | ||
| 31 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 32 | BitField<8, 8, IR::Reg> src_reg; | ||
| 33 | BitField<20, 16, s64> offset; | ||
| 34 | BitField<36, 5, u64> index; | ||
| 35 | BitField<44, 2, Mode> mode; | ||
| 36 | BitField<48, 3, Size> size; | ||
| 37 | }; | ||
| 38 | |||
| 39 | } // namespace Shader::Maxwell::LDC | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp new file mode 100644 index 000000000..4a0f04e47 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_effective_address.cpp | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void LEA_hi(TranslatorVisitor& v, u64 insn, const IR::U32& base, IR::U32 offset_hi, u64 scale, | ||
| 12 | bool neg, bool x) { | ||
| 13 | union { | ||
| 14 | u64 insn; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> offset_lo_reg; | ||
| 17 | BitField<47, 1, u64> cc; | ||
| 18 | BitField<48, 3, IR::Pred> pred; | ||
| 19 | } const lea{insn}; | ||
| 20 | |||
| 21 | if (x) { | ||
| 22 | throw NotImplementedException("LEA.HI X"); | ||
| 23 | } | ||
| 24 | if (lea.pred != IR::Pred::PT) { | ||
| 25 | throw NotImplementedException("LEA.HI Pred"); | ||
| 26 | } | ||
| 27 | if (lea.cc != 0) { | ||
| 28 | throw NotImplementedException("LEA.HI CC"); | ||
| 29 | } | ||
| 30 | |||
| 31 | const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; | ||
| 32 | const IR::U64 packed_offset{v.ir.PackUint2x32(v.ir.CompositeConstruct(offset_lo, offset_hi))}; | ||
| 33 | const IR::U64 offset{neg ? IR::U64{v.ir.INeg(packed_offset)} : packed_offset}; | ||
| 34 | |||
| 35 | const s32 hi_scale{32 - static_cast<s32>(scale)}; | ||
| 36 | const IR::U64 scaled_offset{v.ir.ShiftRightLogical(offset, v.ir.Imm32(hi_scale))}; | ||
| 37 | const IR::U32 scaled_offset_w0{v.ir.CompositeExtract(v.ir.UnpackUint2x32(scaled_offset), 0)}; | ||
| 38 | |||
| 39 | IR::U32 result{v.ir.IAdd(base, scaled_offset_w0)}; | ||
| 40 | v.X(lea.dest_reg, result); | ||
| 41 | } | ||
| 42 | |||
| 43 | void LEA_lo(TranslatorVisitor& v, u64 insn, const IR::U32& base) { | ||
| 44 | union { | ||
| 45 | u64 insn; | ||
| 46 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 47 | BitField<8, 8, IR::Reg> offset_lo_reg; | ||
| 48 | BitField<39, 5, u64> scale; | ||
| 49 | BitField<45, 1, u64> neg; | ||
| 50 | BitField<46, 1, u64> x; | ||
| 51 | BitField<47, 1, u64> cc; | ||
| 52 | BitField<48, 3, IR::Pred> pred; | ||
| 53 | } const lea{insn}; | ||
| 54 | if (lea.x != 0) { | ||
| 55 | throw NotImplementedException("LEA.LO X"); | ||
| 56 | } | ||
| 57 | if (lea.pred != IR::Pred::PT) { | ||
| 58 | throw NotImplementedException("LEA.LO Pred"); | ||
| 59 | } | ||
| 60 | if (lea.cc != 0) { | ||
| 61 | throw NotImplementedException("LEA.LO CC"); | ||
| 62 | } | ||
| 63 | |||
| 64 | const IR::U32 offset_lo{v.X(lea.offset_lo_reg)}; | ||
| 65 | const s32 scale{static_cast<s32>(lea.scale)}; | ||
| 66 | const IR::U32 offset{lea.neg != 0 ? IR::U32{v.ir.INeg(offset_lo)} : offset_lo}; | ||
| 67 | const IR::U32 scaled_offset{v.ir.ShiftLeftLogical(offset, v.ir.Imm32(scale))}; | ||
| 68 | |||
| 69 | IR::U32 result{v.ir.IAdd(base, scaled_offset)}; | ||
| 70 | v.X(lea.dest_reg, result); | ||
| 71 | } | ||
| 72 | } // Anonymous namespace | ||
| 73 | |||
| 74 | void TranslatorVisitor::LEA_hi_reg(u64 insn) { | ||
| 75 | union { | ||
| 76 | u64 insn; | ||
| 77 | BitField<28, 5, u64> scale; | ||
| 78 | BitField<37, 1, u64> neg; | ||
| 79 | BitField<38, 1, u64> x; | ||
| 80 | } const lea{insn}; | ||
| 81 | |||
| 82 | LEA_hi(*this, insn, GetReg20(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); | ||
| 83 | } | ||
| 84 | |||
| 85 | void TranslatorVisitor::LEA_hi_cbuf(u64 insn) { | ||
| 86 | union { | ||
| 87 | u64 insn; | ||
| 88 | BitField<51, 5, u64> scale; | ||
| 89 | BitField<56, 1, u64> neg; | ||
| 90 | BitField<57, 1, u64> x; | ||
| 91 | } const lea{insn}; | ||
| 92 | |||
| 93 | LEA_hi(*this, insn, GetCbuf(insn), GetReg39(insn), lea.scale, lea.neg != 0, lea.x != 0); | ||
| 94 | } | ||
| 95 | |||
| 96 | void TranslatorVisitor::LEA_lo_reg(u64 insn) { | ||
| 97 | LEA_lo(*this, insn, GetReg20(insn)); | ||
| 98 | } | ||
| 99 | |||
| 100 | void TranslatorVisitor::LEA_lo_cbuf(u64 insn) { | ||
| 101 | LEA_lo(*this, insn, GetCbuf(insn)); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::LEA_lo_imm(u64 insn) { | ||
| 105 | LEA_lo(*this, insn, GetImm20(insn)); | ||
| 106 | } | ||
| 107 | |||
| 108 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp new file mode 100644 index 000000000..924fb7a40 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_attribute.cpp | |||
| @@ -0,0 +1,196 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Size : u64 { | ||
| 15 | B32, | ||
| 16 | B64, | ||
| 17 | B96, | ||
| 18 | B128, | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum class InterpolationMode : u64 { | ||
| 22 | Pass, | ||
| 23 | Multiply, | ||
| 24 | Constant, | ||
| 25 | Sc, | ||
| 26 | }; | ||
| 27 | |||
| 28 | enum class SampleMode : u64 { | ||
| 29 | Default, | ||
| 30 | Centroid, | ||
| 31 | Offset, | ||
| 32 | }; | ||
| 33 | |||
| 34 | u32 NumElements(Size size) { | ||
| 35 | switch (size) { | ||
| 36 | case Size::B32: | ||
| 37 | return 1; | ||
| 38 | case Size::B64: | ||
| 39 | return 2; | ||
| 40 | case Size::B96: | ||
| 41 | return 3; | ||
| 42 | case Size::B128: | ||
| 43 | return 4; | ||
| 44 | } | ||
| 45 | throw InvalidArgument("Invalid size {}", size); | ||
| 46 | } | ||
| 47 | |||
| 48 | template <typename F> | ||
| 49 | void HandleIndexed(TranslatorVisitor& v, IR::Reg index_reg, u32 num_elements, F&& f) { | ||
| 50 | const IR::U32 index_value{v.X(index_reg)}; | ||
| 51 | for (u32 element = 0; element < num_elements; ++element) { | ||
| 52 | const IR::U32 final_offset{ | ||
| 53 | element == 0 ? index_value : IR::U32{v.ir.IAdd(index_value, v.ir.Imm32(element * 4U))}}; | ||
| 54 | f(element, final_offset); | ||
| 55 | } | ||
| 56 | } | ||
| 57 | |||
| 58 | } // Anonymous namespace | ||
| 59 | |||
| 60 | void TranslatorVisitor::ALD(u64 insn) { | ||
| 61 | union { | ||
| 62 | u64 raw; | ||
| 63 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 64 | BitField<8, 8, IR::Reg> index_reg; | ||
| 65 | BitField<20, 10, u64> absolute_offset; | ||
| 66 | BitField<20, 11, s64> relative_offset; | ||
| 67 | BitField<39, 8, IR::Reg> vertex_reg; | ||
| 68 | BitField<32, 1, u64> o; | ||
| 69 | BitField<31, 1, u64> patch; | ||
| 70 | BitField<47, 2, Size> size; | ||
| 71 | } const ald{insn}; | ||
| 72 | |||
| 73 | const u64 offset{ald.absolute_offset.Value()}; | ||
| 74 | if (offset % 4 != 0) { | ||
| 75 | throw NotImplementedException("Unaligned absolute offset {}", offset); | ||
| 76 | } | ||
| 77 | const IR::U32 vertex{X(ald.vertex_reg)}; | ||
| 78 | const u32 num_elements{NumElements(ald.size)}; | ||
| 79 | if (ald.index_reg == IR::Reg::RZ) { | ||
| 80 | for (u32 element = 0; element < num_elements; ++element) { | ||
| 81 | if (ald.patch != 0) { | ||
| 82 | const IR::Patch patch{offset / 4 + element}; | ||
| 83 | F(ald.dest_reg + static_cast<int>(element), ir.GetPatch(patch)); | ||
| 84 | } else { | ||
| 85 | const IR::Attribute attr{offset / 4 + element}; | ||
| 86 | F(ald.dest_reg + static_cast<int>(element), ir.GetAttribute(attr, vertex)); | ||
| 87 | } | ||
| 88 | } | ||
| 89 | return; | ||
| 90 | } | ||
| 91 | if (ald.patch != 0) { | ||
| 92 | throw NotImplementedException("Indirect patch read"); | ||
| 93 | } | ||
| 94 | HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { | ||
| 95 | F(ald.dest_reg + static_cast<int>(element), ir.GetAttributeIndexed(final_offset, vertex)); | ||
| 96 | }); | ||
| 97 | } | ||
| 98 | |||
| 99 | void TranslatorVisitor::AST(u64 insn) { | ||
| 100 | union { | ||
| 101 | u64 raw; | ||
| 102 | BitField<0, 8, IR::Reg> src_reg; | ||
| 103 | BitField<8, 8, IR::Reg> index_reg; | ||
| 104 | BitField<20, 10, u64> absolute_offset; | ||
| 105 | BitField<20, 11, s64> relative_offset; | ||
| 106 | BitField<31, 1, u64> patch; | ||
| 107 | BitField<39, 8, IR::Reg> vertex_reg; | ||
| 108 | BitField<47, 2, Size> size; | ||
| 109 | } const ast{insn}; | ||
| 110 | |||
| 111 | if (ast.index_reg != IR::Reg::RZ) { | ||
| 112 | throw NotImplementedException("Indexed store"); | ||
| 113 | } | ||
| 114 | const u64 offset{ast.absolute_offset.Value()}; | ||
| 115 | if (offset % 4 != 0) { | ||
| 116 | throw NotImplementedException("Unaligned absolute offset {}", offset); | ||
| 117 | } | ||
| 118 | const IR::U32 vertex{X(ast.vertex_reg)}; | ||
| 119 | const u32 num_elements{NumElements(ast.size)}; | ||
| 120 | if (ast.index_reg == IR::Reg::RZ) { | ||
| 121 | for (u32 element = 0; element < num_elements; ++element) { | ||
| 122 | if (ast.patch != 0) { | ||
| 123 | const IR::Patch patch{offset / 4 + element}; | ||
| 124 | ir.SetPatch(patch, F(ast.src_reg + static_cast<int>(element))); | ||
| 125 | } else { | ||
| 126 | const IR::Attribute attr{offset / 4 + element}; | ||
| 127 | ir.SetAttribute(attr, F(ast.src_reg + static_cast<int>(element)), vertex); | ||
| 128 | } | ||
| 129 | } | ||
| 130 | return; | ||
| 131 | } | ||
| 132 | if (ast.patch != 0) { | ||
| 133 | throw NotImplementedException("Indexed tessellation patch store"); | ||
| 134 | } | ||
| 135 | HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { | ||
| 136 | ir.SetAttributeIndexed(final_offset, F(ast.src_reg + static_cast<int>(element)), vertex); | ||
| 137 | }); | ||
| 138 | } | ||
| 139 | |||
| 140 | void TranslatorVisitor::IPA(u64 insn) { | ||
| 141 | // IPA is the instruction used to read varyings from a fragment shader. | ||
| 142 | // gl_FragCoord is mapped to the gl_Position attribute. | ||
| 143 | // It yields unknown results when used outside of the fragment shader stage. | ||
| 144 | union { | ||
| 145 | u64 raw; | ||
| 146 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 147 | BitField<8, 8, IR::Reg> index_reg; | ||
| 148 | BitField<20, 8, IR::Reg> multiplier; | ||
| 149 | BitField<30, 8, IR::Attribute> attribute; | ||
| 150 | BitField<38, 1, u64> idx; | ||
| 151 | BitField<51, 1, u64> sat; | ||
| 152 | BitField<52, 2, SampleMode> sample_mode; | ||
| 153 | BitField<54, 2, InterpolationMode> interpolation_mode; | ||
| 154 | } const ipa{insn}; | ||
| 155 | |||
| 156 | // Indexed IPAs are used for indexed varyings. | ||
| 157 | // For example: | ||
| 158 | // | ||
| 159 | // in vec4 colors[4]; | ||
| 160 | // uniform int idx; | ||
| 161 | // void main() { | ||
| 162 | // gl_FragColor = colors[idx]; | ||
| 163 | // } | ||
| 164 | const bool is_indexed{ipa.idx != 0 && ipa.index_reg != IR::Reg::RZ}; | ||
| 165 | const IR::Attribute attribute{ipa.attribute}; | ||
| 166 | IR::F32 value{is_indexed ? ir.GetAttributeIndexed(X(ipa.index_reg)) | ||
| 167 | : ir.GetAttribute(attribute)}; | ||
| 168 | if (IR::IsGeneric(attribute)) { | ||
| 169 | const ProgramHeader& sph{env.SPH()}; | ||
| 170 | const u32 attr_index{IR::GenericAttributeIndex(attribute)}; | ||
| 171 | const u32 element{static_cast<u32>(attribute) % 4}; | ||
| 172 | const std::array input_map{sph.ps.GenericInputMap(attr_index)}; | ||
| 173 | const bool is_perspective{input_map[element] == Shader::PixelImap::Perspective}; | ||
| 174 | if (is_perspective) { | ||
| 175 | const IR::F32 position_w{ir.GetAttribute(IR::Attribute::PositionW)}; | ||
| 176 | value = ir.FPMul(value, position_w); | ||
| 177 | } | ||
| 178 | } | ||
| 179 | if (ipa.interpolation_mode == InterpolationMode::Multiply) { | ||
| 180 | value = ir.FPMul(value, F(ipa.multiplier)); | ||
| 181 | } | ||
| 182 | |||
| 183 | // Saturated IPAs are generally generated out of clamped varyings. | ||
| 184 | // For example: clamp(some_varying, 0.0, 1.0) | ||
| 185 | const bool is_saturated{ipa.sat != 0}; | ||
| 186 | if (is_saturated) { | ||
| 187 | if (attribute == IR::Attribute::FrontFace) { | ||
| 188 | throw NotImplementedException("IPA.SAT on FrontFace"); | ||
| 189 | } | ||
| 190 | value = ir.FPSaturate(value); | ||
| 191 | } | ||
| 192 | |||
| 193 | F(ipa.dest_reg, value); | ||
| 194 | } | ||
| 195 | |||
| 196 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp new file mode 100644 index 000000000..d2a1dbf61 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp | |||
| @@ -0,0 +1,218 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Size : u64 { | ||
| 12 | U8, | ||
| 13 | S8, | ||
| 14 | U16, | ||
| 15 | S16, | ||
| 16 | B32, | ||
| 17 | B64, | ||
| 18 | B128, | ||
| 19 | }; | ||
| 20 | |||
| 21 | IR::U32 Offset(TranslatorVisitor& v, u64 insn) { | ||
| 22 | union { | ||
| 23 | u64 raw; | ||
| 24 | BitField<8, 8, IR::Reg> offset_reg; | ||
| 25 | BitField<20, 24, u64> absolute_offset; | ||
| 26 | BitField<20, 24, s64> relative_offset; | ||
| 27 | } const encoding{insn}; | ||
| 28 | |||
| 29 | if (encoding.offset_reg == IR::Reg::RZ) { | ||
| 30 | return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset)); | ||
| 31 | } else { | ||
| 32 | const s32 relative{static_cast<s32>(encoding.relative_offset.Value())}; | ||
| 33 | return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | std::pair<IR::U32, IR::U32> WordOffset(TranslatorVisitor& v, u64 insn) { | ||
| 38 | const IR::U32 offset{Offset(v, insn)}; | ||
| 39 | if (offset.IsImmediate()) { | ||
| 40 | return {v.ir.Imm32(offset.U32() / 4), offset}; | ||
| 41 | } else { | ||
| 42 | return {v.ir.ShiftRightArithmetic(offset, v.ir.Imm32(2)), offset}; | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | std::pair<int, bool> GetSize(u64 insn) { | ||
| 47 | union { | ||
| 48 | u64 raw; | ||
| 49 | BitField<48, 3, Size> size; | ||
| 50 | } const encoding{insn}; | ||
| 51 | |||
| 52 | switch (encoding.size) { | ||
| 53 | case Size::U8: | ||
| 54 | return {8, false}; | ||
| 55 | case Size::S8: | ||
| 56 | return {8, true}; | ||
| 57 | case Size::U16: | ||
| 58 | return {16, false}; | ||
| 59 | case Size::S16: | ||
| 60 | return {16, true}; | ||
| 61 | case Size::B32: | ||
| 62 | return {32, false}; | ||
| 63 | case Size::B64: | ||
| 64 | return {64, false}; | ||
| 65 | case Size::B128: | ||
| 66 | return {128, false}; | ||
| 67 | default: | ||
| 68 | throw NotImplementedException("Invalid size {}", encoding.size.Value()); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | IR::Reg Reg(u64 insn) { | ||
| 73 | union { | ||
| 74 | u64 raw; | ||
| 75 | BitField<0, 8, IR::Reg> reg; | ||
| 76 | } const encoding{insn}; | ||
| 77 | |||
| 78 | return encoding.reg; | ||
| 79 | } | ||
| 80 | |||
| 81 | IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { | ||
| 82 | return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); | ||
| 83 | } | ||
| 84 | |||
| 85 | IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { | ||
| 86 | return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); | ||
| 87 | } | ||
| 88 | |||
| 89 | IR::U32 LoadLocal(TranslatorVisitor& v, const IR::U32& word_offset, const IR::U32& offset) { | ||
| 90 | const IR::U32 local_memory_size{v.ir.Imm32(v.env.LocalMemorySize())}; | ||
| 91 | const IR::U1 in_bounds{v.ir.ILessThan(offset, local_memory_size, false)}; | ||
| 92 | return IR::U32{v.ir.Select(in_bounds, v.ir.LoadLocal(word_offset), v.ir.Imm32(0))}; | ||
| 93 | } | ||
| 94 | } // Anonymous namespace | ||
| 95 | |||
| 96 | void TranslatorVisitor::LDL(u64 insn) { | ||
| 97 | const auto [word_offset, offset]{WordOffset(*this, insn)}; | ||
| 98 | const IR::U32 word{LoadLocal(*this, word_offset, offset)}; | ||
| 99 | const IR::Reg dest{Reg(insn)}; | ||
| 100 | const auto [bit_size, is_signed]{GetSize(insn)}; | ||
| 101 | switch (bit_size) { | ||
| 102 | case 8: { | ||
| 103 | const IR::U32 bit{ByteOffset(ir, offset)}; | ||
| 104 | X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(8), is_signed)); | ||
| 105 | break; | ||
| 106 | } | ||
| 107 | case 16: { | ||
| 108 | const IR::U32 bit{ShortOffset(ir, offset)}; | ||
| 109 | X(dest, ir.BitFieldExtract(word, bit, ir.Imm32(16), is_signed)); | ||
| 110 | break; | ||
| 111 | } | ||
| 112 | case 32: | ||
| 113 | case 64: | ||
| 114 | case 128: | ||
| 115 | if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { | ||
| 116 | throw NotImplementedException("Unaligned destination register {}", dest); | ||
| 117 | } | ||
| 118 | X(dest, word); | ||
| 119 | for (int i = 1; i < bit_size / 32; ++i) { | ||
| 120 | const IR::U32 sub_word_offset{ir.IAdd(word_offset, ir.Imm32(i))}; | ||
| 121 | const IR::U32 sub_offset{ir.IAdd(offset, ir.Imm32(i * 4))}; | ||
| 122 | X(dest + i, LoadLocal(*this, sub_word_offset, sub_offset)); | ||
| 123 | } | ||
| 124 | break; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | void TranslatorVisitor::LDS(u64 insn) { | ||
| 129 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 130 | const IR::Reg dest{Reg(insn)}; | ||
| 131 | const auto [bit_size, is_signed]{GetSize(insn)}; | ||
| 132 | const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; | ||
| 133 | switch (bit_size) { | ||
| 134 | case 8: | ||
| 135 | case 16: | ||
| 136 | case 32: | ||
| 137 | X(dest, IR::U32{value}); | ||
| 138 | break; | ||
| 139 | case 64: | ||
| 140 | case 128: | ||
| 141 | if (!IR::IsAligned(dest, static_cast<size_t>(bit_size / 32))) { | ||
| 142 | throw NotImplementedException("Unaligned destination register {}", dest); | ||
| 143 | } | ||
| 144 | for (int element = 0; element < bit_size / 32; ++element) { | ||
| 145 | X(dest + element, IR::U32{ir.CompositeExtract(value, static_cast<size_t>(element))}); | ||
| 146 | } | ||
| 147 | break; | ||
| 148 | } | ||
| 149 | } | ||
| 150 | |||
| 151 | void TranslatorVisitor::STL(u64 insn) { | ||
| 152 | const auto [word_offset, offset]{WordOffset(*this, insn)}; | ||
| 153 | if (offset.IsImmediate()) { | ||
| 154 | // TODO: Support storing out of bounds at runtime | ||
| 155 | if (offset.U32() >= env.LocalMemorySize()) { | ||
| 156 | LOG_WARNING(Shader, "Storing local memory at 0x{:x} with a size of 0x{:x}, dropping", | ||
| 157 | offset.U32(), env.LocalMemorySize()); | ||
| 158 | return; | ||
| 159 | } | ||
| 160 | } | ||
| 161 | const IR::Reg reg{Reg(insn)}; | ||
| 162 | const IR::U32 src{X(reg)}; | ||
| 163 | const int bit_size{GetSize(insn).first}; | ||
| 164 | switch (bit_size) { | ||
| 165 | case 8: { | ||
| 166 | const IR::U32 bit{ByteOffset(ir, offset)}; | ||
| 167 | const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; | ||
| 168 | ir.WriteLocal(word_offset, value); | ||
| 169 | break; | ||
| 170 | } | ||
| 171 | case 16: { | ||
| 172 | const IR::U32 bit{ShortOffset(ir, offset)}; | ||
| 173 | const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; | ||
| 174 | ir.WriteLocal(word_offset, value); | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | case 32: | ||
| 178 | case 64: | ||
| 179 | case 128: | ||
| 180 | if (!IR::IsAligned(reg, static_cast<size_t>(bit_size / 32))) { | ||
| 181 | throw NotImplementedException("Unaligned source register"); | ||
| 182 | } | ||
| 183 | ir.WriteLocal(word_offset, src); | ||
| 184 | for (int i = 1; i < bit_size / 32; ++i) { | ||
| 185 | ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); | ||
| 186 | } | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | void TranslatorVisitor::STS(u64 insn) { | ||
| 192 | const IR::U32 offset{Offset(*this, insn)}; | ||
| 193 | const IR::Reg reg{Reg(insn)}; | ||
| 194 | const int bit_size{GetSize(insn).first}; | ||
| 195 | switch (bit_size) { | ||
| 196 | case 8: | ||
| 197 | case 16: | ||
| 198 | case 32: | ||
| 199 | ir.WriteShared(bit_size, offset, X(reg)); | ||
| 200 | break; | ||
| 201 | case 64: | ||
| 202 | if (!IR::IsAligned(reg, 2)) { | ||
| 203 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 204 | } | ||
| 205 | ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); | ||
| 206 | break; | ||
| 207 | case 128: { | ||
| 208 | if (!IR::IsAligned(reg, 2)) { | ||
| 209 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 210 | } | ||
| 211 | const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; | ||
| 212 | ir.WriteShared(128, offset, vector); | ||
| 213 | break; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp new file mode 100644 index 000000000..36c5cff2f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp | |||
| @@ -0,0 +1,184 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class LoadSize : u64 { | ||
| 14 | U8, // Zero-extend | ||
| 15 | S8, // Sign-extend | ||
| 16 | U16, // Zero-extend | ||
| 17 | S16, // Sign-extend | ||
| 18 | B32, | ||
| 19 | B64, | ||
| 20 | B128, | ||
| 21 | U128, // ??? | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class StoreSize : u64 { | ||
| 25 | U8, // Zero-extend | ||
| 26 | S8, // Sign-extend | ||
| 27 | U16, // Zero-extend | ||
| 28 | S16, // Sign-extend | ||
| 29 | B32, | ||
| 30 | B64, | ||
| 31 | B128, | ||
| 32 | }; | ||
| 33 | |||
| 34 | // See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | ||
| 35 | enum class LoadCache : u64 { | ||
| 36 | CA, // Cache at all levels, likely to be accessed again | ||
| 37 | CG, // Cache at global level (cache in L2 and below, not L1) | ||
| 38 | CI, // ??? | ||
| 39 | CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) | ||
| 40 | }; | ||
| 41 | |||
| 42 | // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | ||
| 43 | enum class StoreCache : u64 { | ||
| 44 | WB, // Cache write-back all coherent levels | ||
| 45 | CG, // Cache at global level | ||
| 46 | CS, // Cache streaming, likely to be accessed once | ||
| 47 | WT, // Cache write-through (to system memory) | ||
| 48 | }; | ||
| 49 | |||
| 50 | IR::U64 Address(TranslatorVisitor& v, u64 insn) { | ||
| 51 | union { | ||
| 52 | u64 raw; | ||
| 53 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 54 | BitField<20, 24, s64> addr_offset; | ||
| 55 | BitField<20, 24, u64> rz_addr_offset; | ||
| 56 | BitField<45, 1, u64> e; | ||
| 57 | } const mem{insn}; | ||
| 58 | |||
| 59 | const IR::U64 address{[&]() -> IR::U64 { | ||
| 60 | if (mem.e == 0) { | ||
| 61 | // LDG/STG without .E uses a 32-bit pointer, zero-extend it | ||
| 62 | return v.ir.UConvert(64, v.X(mem.addr_reg)); | ||
| 63 | } | ||
| 64 | if (!IR::IsAligned(mem.addr_reg, 2)) { | ||
| 65 | throw NotImplementedException("Unaligned address register"); | ||
| 66 | } | ||
| 67 | // Pack two registers to build the 64-bit address | ||
| 68 | return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); | ||
| 69 | }()}; | ||
| 70 | const u64 addr_offset{[&]() -> u64 { | ||
| 71 | if (mem.addr_reg == IR::Reg::RZ) { | ||
| 72 | // When RZ is used, the address is an absolute address | ||
| 73 | return static_cast<u64>(mem.rz_addr_offset.Value()); | ||
| 74 | } else { | ||
| 75 | return static_cast<u64>(mem.addr_offset.Value()); | ||
| 76 | } | ||
| 77 | }()}; | ||
| 78 | // Apply the offset | ||
| 79 | return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||
| 80 | } | ||
| 81 | } // Anonymous namespace | ||
| 82 | |||
| 83 | void TranslatorVisitor::LDG(u64 insn) { | ||
| 84 | // LDG loads global memory into registers | ||
| 85 | union { | ||
| 86 | u64 raw; | ||
| 87 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 88 | BitField<46, 2, LoadCache> cache; | ||
| 89 | BitField<48, 3, LoadSize> size; | ||
| 90 | } const ldg{insn}; | ||
| 91 | |||
| 92 | // Pointer to load data from | ||
| 93 | const IR::U64 address{Address(*this, insn)}; | ||
| 94 | const IR::Reg dest_reg{ldg.dest_reg}; | ||
| 95 | switch (ldg.size) { | ||
| 96 | case LoadSize::U8: | ||
| 97 | X(dest_reg, ir.LoadGlobalU8(address)); | ||
| 98 | break; | ||
| 99 | case LoadSize::S8: | ||
| 100 | X(dest_reg, ir.LoadGlobalS8(address)); | ||
| 101 | break; | ||
| 102 | case LoadSize::U16: | ||
| 103 | X(dest_reg, ir.LoadGlobalU16(address)); | ||
| 104 | break; | ||
| 105 | case LoadSize::S16: | ||
| 106 | X(dest_reg, ir.LoadGlobalS16(address)); | ||
| 107 | break; | ||
| 108 | case LoadSize::B32: | ||
| 109 | X(dest_reg, ir.LoadGlobal32(address)); | ||
| 110 | break; | ||
| 111 | case LoadSize::B64: { | ||
| 112 | if (!IR::IsAligned(dest_reg, 2)) { | ||
| 113 | throw NotImplementedException("Unaligned data registers"); | ||
| 114 | } | ||
| 115 | const IR::Value vector{ir.LoadGlobal64(address)}; | ||
| 116 | for (int i = 0; i < 2; ++i) { | ||
| 117 | X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 118 | } | ||
| 119 | break; | ||
| 120 | } | ||
| 121 | case LoadSize::B128: | ||
| 122 | case LoadSize::U128: { | ||
| 123 | if (!IR::IsAligned(dest_reg, 4)) { | ||
| 124 | throw NotImplementedException("Unaligned data registers"); | ||
| 125 | } | ||
| 126 | const IR::Value vector{ir.LoadGlobal128(address)}; | ||
| 127 | for (int i = 0; i < 4; ++i) { | ||
| 128 | X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, static_cast<size_t>(i))}); | ||
| 129 | } | ||
| 130 | break; | ||
| 131 | } | ||
| 132 | default: | ||
| 133 | throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 137 | void TranslatorVisitor::STG(u64 insn) { | ||
| 138 | // STG stores registers into global memory. | ||
| 139 | union { | ||
| 140 | u64 raw; | ||
| 141 | BitField<0, 8, IR::Reg> data_reg; | ||
| 142 | BitField<46, 2, StoreCache> cache; | ||
| 143 | BitField<48, 3, StoreSize> size; | ||
| 144 | } const stg{insn}; | ||
| 145 | |||
| 146 | // Pointer to store data into | ||
| 147 | const IR::U64 address{Address(*this, insn)}; | ||
| 148 | const IR::Reg data_reg{stg.data_reg}; | ||
| 149 | switch (stg.size) { | ||
| 150 | case StoreSize::U8: | ||
| 151 | ir.WriteGlobalU8(address, X(data_reg)); | ||
| 152 | break; | ||
| 153 | case StoreSize::S8: | ||
| 154 | ir.WriteGlobalS8(address, X(data_reg)); | ||
| 155 | break; | ||
| 156 | case StoreSize::U16: | ||
| 157 | ir.WriteGlobalU16(address, X(data_reg)); | ||
| 158 | break; | ||
| 159 | case StoreSize::S16: | ||
| 160 | ir.WriteGlobalS16(address, X(data_reg)); | ||
| 161 | break; | ||
| 162 | case StoreSize::B32: | ||
| 163 | ir.WriteGlobal32(address, X(data_reg)); | ||
| 164 | break; | ||
| 165 | case StoreSize::B64: { | ||
| 166 | if (!IR::IsAligned(data_reg, 2)) { | ||
| 167 | throw NotImplementedException("Unaligned data registers"); | ||
| 168 | } | ||
| 169 | const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; | ||
| 170 | ir.WriteGlobal64(address, vector); | ||
| 171 | break; | ||
| 172 | } | ||
| 173 | case StoreSize::B128: | ||
| 174 | if (!IR::IsAligned(data_reg, 4)) { | ||
| 175 | throw NotImplementedException("Unaligned data registers"); | ||
| 176 | } | ||
| 177 | const IR::Value vector{ | ||
| 178 | ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; | ||
| 179 | ir.WriteGlobal128(address, vector); | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | } | ||
| 183 | |||
| 184 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp new file mode 100644 index 000000000..92cd27ed4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation.cpp | |||
| @@ -0,0 +1,116 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class LogicalOp : u64 { | ||
| 13 | AND, | ||
| 14 | OR, | ||
| 15 | XOR, | ||
| 16 | PASS_B, | ||
| 17 | }; | ||
| 18 | |||
| 19 | [[nodiscard]] IR::U32 LogicalOperation(IR::IREmitter& ir, const IR::U32& operand_1, | ||
| 20 | const IR::U32& operand_2, LogicalOp op) { | ||
| 21 | switch (op) { | ||
| 22 | case LogicalOp::AND: | ||
| 23 | return ir.BitwiseAnd(operand_1, operand_2); | ||
| 24 | case LogicalOp::OR: | ||
| 25 | return ir.BitwiseOr(operand_1, operand_2); | ||
| 26 | case LogicalOp::XOR: | ||
| 27 | return ir.BitwiseXor(operand_1, operand_2); | ||
| 28 | case LogicalOp::PASS_B: | ||
| 29 | return operand_2; | ||
| 30 | default: | ||
| 31 | throw NotImplementedException("Invalid Logical operation {}", op); | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | void LOP(TranslatorVisitor& v, u64 insn, IR::U32 op_b, bool x, bool cc, bool inv_a, bool inv_b, | ||
| 36 | LogicalOp bit_op, std::optional<PredicateOp> pred_op = std::nullopt, | ||
| 37 | IR::Pred dest_pred = IR::Pred::PT) { | ||
| 38 | union { | ||
| 39 | u64 insn; | ||
| 40 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 41 | BitField<8, 8, IR::Reg> src_reg; | ||
| 42 | } const lop{insn}; | ||
| 43 | |||
| 44 | if (x) { | ||
| 45 | throw NotImplementedException("X"); | ||
| 46 | } | ||
| 47 | IR::U32 op_a{v.X(lop.src_reg)}; | ||
| 48 | if (inv_a != 0) { | ||
| 49 | op_a = v.ir.BitwiseNot(op_a); | ||
| 50 | } | ||
| 51 | if (inv_b != 0) { | ||
| 52 | op_b = v.ir.BitwiseNot(op_b); | ||
| 53 | } | ||
| 54 | |||
| 55 | const IR::U32 result{LogicalOperation(v.ir, op_a, op_b, bit_op)}; | ||
| 56 | if (pred_op) { | ||
| 57 | const IR::U1 pred_result{PredicateOperation(v.ir, result, *pred_op)}; | ||
| 58 | v.ir.SetPred(dest_pred, pred_result); | ||
| 59 | } | ||
| 60 | if (cc) { | ||
| 61 | if (bit_op == LogicalOp::PASS_B) { | ||
| 62 | v.SetZFlag(v.ir.IEqual(result, v.ir.Imm32(0))); | ||
| 63 | v.SetSFlag(v.ir.ILessThan(result, v.ir.Imm32(0), true)); | ||
| 64 | } else { | ||
| 65 | v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||
| 66 | v.SetSFlag(v.ir.GetSignFromOp(result)); | ||
| 67 | } | ||
| 68 | v.ResetCFlag(); | ||
| 69 | v.ResetOFlag(); | ||
| 70 | } | ||
| 71 | v.X(lop.dest_reg, result); | ||
| 72 | } | ||
| 73 | |||
| 74 | void LOP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||
| 75 | union { | ||
| 76 | u64 insn; | ||
| 77 | BitField<39, 1, u64> inv_a; | ||
| 78 | BitField<40, 1, u64> inv_b; | ||
| 79 | BitField<41, 2, LogicalOp> bit_op; | ||
| 80 | BitField<43, 1, u64> x; | ||
| 81 | BitField<44, 2, PredicateOp> pred_op; | ||
| 82 | BitField<47, 1, u64> cc; | ||
| 83 | BitField<48, 3, IR::Pred> dest_pred; | ||
| 84 | } const lop{insn}; | ||
| 85 | |||
| 86 | LOP(v, insn, op_b, lop.x != 0, lop.cc != 0, lop.inv_a != 0, lop.inv_b != 0, lop.bit_op, | ||
| 87 | lop.pred_op, lop.dest_pred); | ||
| 88 | } | ||
| 89 | } // Anonymous namespace | ||
| 90 | |||
| 91 | void TranslatorVisitor::LOP_reg(u64 insn) { | ||
| 92 | LOP(*this, insn, GetReg20(insn)); | ||
| 93 | } | ||
| 94 | |||
| 95 | void TranslatorVisitor::LOP_cbuf(u64 insn) { | ||
| 96 | LOP(*this, insn, GetCbuf(insn)); | ||
| 97 | } | ||
| 98 | |||
| 99 | void TranslatorVisitor::LOP_imm(u64 insn) { | ||
| 100 | LOP(*this, insn, GetImm20(insn)); | ||
| 101 | } | ||
| 102 | |||
| 103 | void TranslatorVisitor::LOP32I(u64 insn) { | ||
| 104 | union { | ||
| 105 | u64 raw; | ||
| 106 | BitField<53, 2, LogicalOp> bit_op; | ||
| 107 | BitField<57, 1, u64> x; | ||
| 108 | BitField<52, 1, u64> cc; | ||
| 109 | BitField<55, 1, u64> inv_a; | ||
| 110 | BitField<56, 1, u64> inv_b; | ||
| 111 | } const lop32i{insn}; | ||
| 112 | |||
| 113 | LOP(*this, insn, GetImm32(insn), lop32i.x != 0, lop32i.cc != 0, lop32i.inv_a != 0, | ||
| 114 | lop32i.inv_b != 0, lop32i.bit_op); | ||
| 115 | } | ||
| 116 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp new file mode 100644 index 000000000..e0fe47912 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/logic_operation_three_input.cpp | |||
| @@ -0,0 +1,122 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | // https://forums.developer.nvidia.com/t/reverse-lut-for-lop3-lut/110651 | ||
| 13 | // Emulate GPU's LOP3.LUT (three-input logic op with 8-bit truth table) | ||
| 14 | IR::U32 ApplyLUT(IR::IREmitter& ir, const IR::U32& a, const IR::U32& b, const IR::U32& c, | ||
| 15 | u64 ttbl) { | ||
| 16 | IR::U32 r{ir.Imm32(0)}; | ||
| 17 | const IR::U32 not_a{ir.BitwiseNot(a)}; | ||
| 18 | const IR::U32 not_b{ir.BitwiseNot(b)}; | ||
| 19 | const IR::U32 not_c{ir.BitwiseNot(c)}; | ||
| 20 | if (ttbl & 0x01) { | ||
| 21 | // r |= ~a & ~b & ~c; | ||
| 22 | const auto lhs{ir.BitwiseAnd(not_a, not_b)}; | ||
| 23 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 24 | r = ir.BitwiseOr(r, rhs); | ||
| 25 | } | ||
| 26 | if (ttbl & 0x02) { | ||
| 27 | // r |= ~a & ~b & c; | ||
| 28 | const auto lhs{ir.BitwiseAnd(not_a, not_b)}; | ||
| 29 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 30 | r = ir.BitwiseOr(r, rhs); | ||
| 31 | } | ||
| 32 | if (ttbl & 0x04) { | ||
| 33 | // r |= ~a & b & ~c; | ||
| 34 | const auto lhs{ir.BitwiseAnd(not_a, b)}; | ||
| 35 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 36 | r = ir.BitwiseOr(r, rhs); | ||
| 37 | } | ||
| 38 | if (ttbl & 0x08) { | ||
| 39 | // r |= ~a & b & c; | ||
| 40 | const auto lhs{ir.BitwiseAnd(not_a, b)}; | ||
| 41 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 42 | r = ir.BitwiseOr(r, rhs); | ||
| 43 | } | ||
| 44 | if (ttbl & 0x10) { | ||
| 45 | // r |= a & ~b & ~c; | ||
| 46 | const auto lhs{ir.BitwiseAnd(a, not_b)}; | ||
| 47 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 48 | r = ir.BitwiseOr(r, rhs); | ||
| 49 | } | ||
| 50 | if (ttbl & 0x20) { | ||
| 51 | // r |= a & ~b & c; | ||
| 52 | const auto lhs{ir.BitwiseAnd(a, not_b)}; | ||
| 53 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 54 | r = ir.BitwiseOr(r, rhs); | ||
| 55 | } | ||
| 56 | if (ttbl & 0x40) { | ||
| 57 | // r |= a & b & ~c; | ||
| 58 | const auto lhs{ir.BitwiseAnd(a, b)}; | ||
| 59 | const auto rhs{ir.BitwiseAnd(lhs, not_c)}; | ||
| 60 | r = ir.BitwiseOr(r, rhs); | ||
| 61 | } | ||
| 62 | if (ttbl & 0x80) { | ||
| 63 | // r |= a & b & c; | ||
| 64 | const auto lhs{ir.BitwiseAnd(a, b)}; | ||
| 65 | const auto rhs{ir.BitwiseAnd(lhs, c)}; | ||
| 66 | r = ir.BitwiseOr(r, rhs); | ||
| 67 | } | ||
| 68 | return r; | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::U32 LOP3(TranslatorVisitor& v, u64 insn, const IR::U32& op_b, const IR::U32& op_c, u64 lut) { | ||
| 72 | union { | ||
| 73 | u64 insn; | ||
| 74 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 75 | BitField<8, 8, IR::Reg> src_reg; | ||
| 76 | BitField<47, 1, u64> cc; | ||
| 77 | } const lop3{insn}; | ||
| 78 | |||
| 79 | if (lop3.cc != 0) { | ||
| 80 | throw NotImplementedException("LOP3 CC"); | ||
| 81 | } | ||
| 82 | |||
| 83 | const IR::U32 op_a{v.X(lop3.src_reg)}; | ||
| 84 | const IR::U32 result{ApplyLUT(v.ir, op_a, op_b, op_c, lut)}; | ||
| 85 | v.X(lop3.dest_reg, result); | ||
| 86 | return result; | ||
| 87 | } | ||
| 88 | |||
| 89 | u64 GetLut48(u64 insn) { | ||
| 90 | union { | ||
| 91 | u64 raw; | ||
| 92 | BitField<48, 8, u64> lut; | ||
| 93 | } const lut{insn}; | ||
| 94 | return lut.lut; | ||
| 95 | } | ||
| 96 | } // Anonymous namespace | ||
| 97 | |||
| 98 | void TranslatorVisitor::LOP3_reg(u64 insn) { | ||
| 99 | union { | ||
| 100 | u64 insn; | ||
| 101 | BitField<28, 8, u64> lut; | ||
| 102 | BitField<38, 1, u64> x; | ||
| 103 | BitField<36, 2, PredicateOp> pred_op; | ||
| 104 | BitField<48, 3, IR::Pred> pred; | ||
| 105 | } const lop3{insn}; | ||
| 106 | |||
| 107 | if (lop3.x != 0) { | ||
| 108 | throw NotImplementedException("LOP3 X"); | ||
| 109 | } | ||
| 110 | const IR::U32 result{LOP3(*this, insn, GetReg20(insn), GetReg39(insn), lop3.lut)}; | ||
| 111 | const IR::U1 pred_result{PredicateOperation(ir, result, lop3.pred_op)}; | ||
| 112 | ir.SetPred(lop3.pred, pred_result); | ||
| 113 | } | ||
| 114 | |||
| 115 | void TranslatorVisitor::LOP3_cbuf(u64 insn) { | ||
| 116 | LOP3(*this, insn, GetCbuf(insn), GetReg39(insn), GetLut48(insn)); | ||
| 117 | } | ||
| 118 | |||
| 119 | void TranslatorVisitor::LOP3_imm(u64 insn) { | ||
| 120 | LOP3(*this, insn, GetImm20(insn), GetReg39(insn), GetLut48(insn)); | ||
| 121 | } | ||
| 122 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp new file mode 100644 index 000000000..4324fd443 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_predicate_to_register.cpp | |||
| @@ -0,0 +1,66 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | PR, | ||
| 13 | CC, | ||
| 14 | }; | ||
| 15 | } // Anonymous namespace | ||
| 16 | |||
| 17 | void TranslatorVisitor::P2R_reg(u64) { | ||
| 18 | throw NotImplementedException("P2R (reg)"); | ||
| 19 | } | ||
| 20 | |||
| 21 | void TranslatorVisitor::P2R_cbuf(u64) { | ||
| 22 | throw NotImplementedException("P2R (cbuf)"); | ||
| 23 | } | ||
| 24 | |||
| 25 | void TranslatorVisitor::P2R_imm(u64 insn) { | ||
| 26 | union { | ||
| 27 | u64 raw; | ||
| 28 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 29 | BitField<8, 8, IR::Reg> src; | ||
| 30 | BitField<40, 1, Mode> mode; | ||
| 31 | BitField<41, 2, u64> byte_selector; | ||
| 32 | } const p2r{insn}; | ||
| 33 | |||
| 34 | const u32 mask{GetImm20(insn).U32()}; | ||
| 35 | const bool pr_mode{p2r.mode == Mode::PR}; | ||
| 36 | const u32 num_items{pr_mode ? 7U : 4U}; | ||
| 37 | const u32 offset{static_cast<u32>(p2r.byte_selector) * 8}; | ||
| 38 | IR::U32 insert{ir.Imm32(0)}; | ||
| 39 | for (u32 index = 0; index < num_items; ++index) { | ||
| 40 | if (((mask >> index) & 1) == 0) { | ||
| 41 | continue; | ||
| 42 | } | ||
| 43 | const IR::U1 cond{[this, index, pr_mode] { | ||
| 44 | if (pr_mode) { | ||
| 45 | return ir.GetPred(IR::Pred{index}); | ||
| 46 | } | ||
| 47 | switch (index) { | ||
| 48 | case 0: | ||
| 49 | return ir.GetZFlag(); | ||
| 50 | case 1: | ||
| 51 | return ir.GetSFlag(); | ||
| 52 | case 2: | ||
| 53 | return ir.GetCFlag(); | ||
| 54 | case 3: | ||
| 55 | return ir.GetOFlag(); | ||
| 56 | } | ||
| 57 | throw LogicError("Unreachable P2R index"); | ||
| 58 | }()}; | ||
| 59 | const IR::U32 bit{ir.Select(cond, ir.Imm32(1U << (index + offset)), ir.Imm32(0))}; | ||
| 60 | insert = ir.BitwiseOr(insert, bit); | ||
| 61 | } | ||
| 62 | const IR::U32 masked_out{ir.BitwiseAnd(X(p2r.src), ir.Imm32(~(mask << offset)))}; | ||
| 63 | X(p2r.dest_reg, ir.BitwiseOr(masked_out, insert)); | ||
| 64 | } | ||
| 65 | |||
| 66 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp new file mode 100644 index 000000000..6bb08db8a --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/exception.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = false) { | ||
| 14 | union { | ||
| 15 | u64 raw; | ||
| 16 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 17 | BitField<39, 4, u64> mask; | ||
| 18 | BitField<12, 4, u64> mov32i_mask; | ||
| 19 | } const mov{insn}; | ||
| 20 | |||
| 21 | if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) { | ||
| 22 | throw NotImplementedException("Non-full move mask"); | ||
| 23 | } | ||
| 24 | v.X(mov.dest_reg, src); | ||
| 25 | } | ||
| 26 | } // Anonymous namespace | ||
| 27 | |||
| 28 | void TranslatorVisitor::MOV_reg(u64 insn) { | ||
| 29 | MOV(*this, insn, GetReg20(insn)); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::MOV_cbuf(u64 insn) { | ||
| 33 | MOV(*this, insn, GetCbuf(insn)); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TranslatorVisitor::MOV_imm(u64 insn) { | ||
| 37 | MOV(*this, insn, GetImm20(insn)); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::MOV32I(u64 insn) { | ||
| 41 | MOV(*this, insn, GetImm32(insn), true); | ||
| 42 | } | ||
| 43 | |||
| 44 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp new file mode 100644 index 000000000..eda5f177b --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_register_to_predicate.cpp | |||
| @@ -0,0 +1,71 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | PR, | ||
| 13 | CC, | ||
| 14 | }; | ||
| 15 | |||
| 16 | void SetFlag(IR::IREmitter& ir, const IR::U1& inv_mask_bit, const IR::U1& src_bit, u32 index) { | ||
| 17 | switch (index) { | ||
| 18 | case 0: | ||
| 19 | return ir.SetZFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetZFlag(), src_bit)}); | ||
| 20 | case 1: | ||
| 21 | return ir.SetSFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetSFlag(), src_bit)}); | ||
| 22 | case 2: | ||
| 23 | return ir.SetCFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetCFlag(), src_bit)}); | ||
| 24 | case 3: | ||
| 25 | return ir.SetOFlag(IR::U1{ir.Select(inv_mask_bit, ir.GetOFlag(), src_bit)}); | ||
| 26 | default: | ||
| 27 | throw LogicError("Unreachable R2P index"); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | void R2P(TranslatorVisitor& v, u64 insn, const IR::U32& mask) { | ||
| 32 | union { | ||
| 33 | u64 raw; | ||
| 34 | BitField<8, 8, IR::Reg> src_reg; | ||
| 35 | BitField<40, 1, Mode> mode; | ||
| 36 | BitField<41, 2, u64> byte_selector; | ||
| 37 | } const r2p{insn}; | ||
| 38 | const IR::U32 src{v.X(r2p.src_reg)}; | ||
| 39 | const IR::U32 count{v.ir.Imm32(1)}; | ||
| 40 | const bool pr_mode{r2p.mode == Mode::PR}; | ||
| 41 | const u32 num_items{pr_mode ? 7U : 4U}; | ||
| 42 | const u32 offset_base{static_cast<u32>(r2p.byte_selector) * 8}; | ||
| 43 | for (u32 index = 0; index < num_items; ++index) { | ||
| 44 | const IR::U32 offset{v.ir.Imm32(offset_base + index)}; | ||
| 45 | const IR::U1 src_zero{v.ir.GetZeroFromOp(v.ir.BitFieldExtract(src, offset, count, false))}; | ||
| 46 | const IR::U1 src_bit{v.ir.LogicalNot(src_zero)}; | ||
| 47 | const IR::U32 mask_bfe{v.ir.BitFieldExtract(mask, v.ir.Imm32(index), count, false)}; | ||
| 48 | const IR::U1 inv_mask_bit{v.ir.GetZeroFromOp(mask_bfe)}; | ||
| 49 | if (pr_mode) { | ||
| 50 | const IR::Pred pred{index}; | ||
| 51 | v.ir.SetPred(pred, IR::U1{v.ir.Select(inv_mask_bit, v.ir.GetPred(pred), src_bit)}); | ||
| 52 | } else { | ||
| 53 | SetFlag(v.ir, inv_mask_bit, src_bit, index); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | } | ||
| 57 | } // Anonymous namespace | ||
| 58 | |||
| 59 | void TranslatorVisitor::R2P_reg(u64 insn) { | ||
| 60 | R2P(*this, insn, GetReg20(insn)); | ||
| 61 | } | ||
| 62 | |||
| 63 | void TranslatorVisitor::R2P_cbuf(u64 insn) { | ||
| 64 | R2P(*this, insn, GetCbuf(insn)); | ||
| 65 | } | ||
| 66 | |||
| 67 | void TranslatorVisitor::R2P_imm(u64 insn) { | ||
| 68 | R2P(*this, insn, GetImm20(insn)); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp new file mode 100644 index 000000000..20cb2674e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp | |||
| @@ -0,0 +1,181 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class SpecialRegister : u64 { | ||
| 12 | SR_LANEID = 0, | ||
| 13 | SR_CLOCK = 1, | ||
| 14 | SR_VIRTCFG = 2, | ||
| 15 | SR_VIRTID = 3, | ||
| 16 | SR_PM0 = 4, | ||
| 17 | SR_PM1 = 5, | ||
| 18 | SR_PM2 = 6, | ||
| 19 | SR_PM3 = 7, | ||
| 20 | SR_PM4 = 8, | ||
| 21 | SR_PM5 = 9, | ||
| 22 | SR_PM6 = 10, | ||
| 23 | SR_PM7 = 11, | ||
| 24 | SR12 = 12, | ||
| 25 | SR13 = 13, | ||
| 26 | SR14 = 14, | ||
| 27 | SR_ORDERING_TICKET = 15, | ||
| 28 | SR_PRIM_TYPE = 16, | ||
| 29 | SR_INVOCATION_ID = 17, | ||
| 30 | SR_Y_DIRECTION = 18, | ||
| 31 | SR_THREAD_KILL = 19, | ||
| 32 | SM_SHADER_TYPE = 20, | ||
| 33 | SR_DIRECTCBEWRITEADDRESSLOW = 21, | ||
| 34 | SR_DIRECTCBEWRITEADDRESSHIGH = 22, | ||
| 35 | SR_DIRECTCBEWRITEENABLE = 23, | ||
| 36 | SR_MACHINE_ID_0 = 24, | ||
| 37 | SR_MACHINE_ID_1 = 25, | ||
| 38 | SR_MACHINE_ID_2 = 26, | ||
| 39 | SR_MACHINE_ID_3 = 27, | ||
| 40 | SR_AFFINITY = 28, | ||
| 41 | SR_INVOCATION_INFO = 29, | ||
| 42 | SR_WSCALEFACTOR_XY = 30, | ||
| 43 | SR_WSCALEFACTOR_Z = 31, | ||
| 44 | SR_TID = 32, | ||
| 45 | SR_TID_X = 33, | ||
| 46 | SR_TID_Y = 34, | ||
| 47 | SR_TID_Z = 35, | ||
| 48 | SR_CTA_PARAM = 36, | ||
| 49 | SR_CTAID_X = 37, | ||
| 50 | SR_CTAID_Y = 38, | ||
| 51 | SR_CTAID_Z = 39, | ||
| 52 | SR_NTID = 40, | ||
| 53 | SR_CirQueueIncrMinusOne = 41, | ||
| 54 | SR_NLATC = 42, | ||
| 55 | SR43 = 43, | ||
| 56 | SR_SM_SPA_VERSION = 44, | ||
| 57 | SR_MULTIPASSSHADERINFO = 45, | ||
| 58 | SR_LWINHI = 46, | ||
| 59 | SR_SWINHI = 47, | ||
| 60 | SR_SWINLO = 48, | ||
| 61 | SR_SWINSZ = 49, | ||
| 62 | SR_SMEMSZ = 50, | ||
| 63 | SR_SMEMBANKS = 51, | ||
| 64 | SR_LWINLO = 52, | ||
| 65 | SR_LWINSZ = 53, | ||
| 66 | SR_LMEMLOSZ = 54, | ||
| 67 | SR_LMEMHIOFF = 55, | ||
| 68 | SR_EQMASK = 56, | ||
| 69 | SR_LTMASK = 57, | ||
| 70 | SR_LEMASK = 58, | ||
| 71 | SR_GTMASK = 59, | ||
| 72 | SR_GEMASK = 60, | ||
| 73 | SR_REGALLOC = 61, | ||
| 74 | SR_BARRIERALLOC = 62, | ||
| 75 | SR63 = 63, | ||
| 76 | SR_GLOBALERRORSTATUS = 64, | ||
| 77 | SR65 = 65, | ||
| 78 | SR_WARPERRORSTATUS = 66, | ||
| 79 | SR_WARPERRORSTATUSCLEAR = 67, | ||
| 80 | SR68 = 68, | ||
| 81 | SR69 = 69, | ||
| 82 | SR70 = 70, | ||
| 83 | SR71 = 71, | ||
| 84 | SR_PM_HI0 = 72, | ||
| 85 | SR_PM_HI1 = 73, | ||
| 86 | SR_PM_HI2 = 74, | ||
| 87 | SR_PM_HI3 = 75, | ||
| 88 | SR_PM_HI4 = 76, | ||
| 89 | SR_PM_HI5 = 77, | ||
| 90 | SR_PM_HI6 = 78, | ||
| 91 | SR_PM_HI7 = 79, | ||
| 92 | SR_CLOCKLO = 80, | ||
| 93 | SR_CLOCKHI = 81, | ||
| 94 | SR_GLOBALTIMERLO = 82, | ||
| 95 | SR_GLOBALTIMERHI = 83, | ||
| 96 | SR84 = 84, | ||
| 97 | SR85 = 85, | ||
| 98 | SR86 = 86, | ||
| 99 | SR87 = 87, | ||
| 100 | SR88 = 88, | ||
| 101 | SR89 = 89, | ||
| 102 | SR90 = 90, | ||
| 103 | SR91 = 91, | ||
| 104 | SR92 = 92, | ||
| 105 | SR93 = 93, | ||
| 106 | SR94 = 94, | ||
| 107 | SR95 = 95, | ||
| 108 | SR_HWTASKID = 96, | ||
| 109 | SR_CIRCULARQUEUEENTRYINDEX = 97, | ||
| 110 | SR_CIRCULARQUEUEENTRYADDRESSLOW = 98, | ||
| 111 | SR_CIRCULARQUEUEENTRYADDRESSHIGH = 99, | ||
| 112 | }; | ||
| 113 | |||
| 114 | [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { | ||
| 115 | switch (special_register) { | ||
| 116 | case SpecialRegister::SR_INVOCATION_ID: | ||
| 117 | return ir.InvocationId(); | ||
| 118 | case SpecialRegister::SR_THREAD_KILL: | ||
| 119 | return IR::U32{ir.Select(ir.IsHelperInvocation(), ir.Imm32(-1), ir.Imm32(0))}; | ||
| 120 | case SpecialRegister::SR_INVOCATION_INFO: | ||
| 121 | LOG_WARNING(Shader, "(STUBBED) SR_INVOCATION_INFO"); | ||
| 122 | return ir.Imm32(0x00ff'0000); | ||
| 123 | case SpecialRegister::SR_TID: { | ||
| 124 | const IR::Value tid{ir.LocalInvocationId()}; | ||
| 125 | return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, | ||
| 126 | IR::U32{ir.CompositeExtract(tid, 1)}, | ||
| 127 | ir.Imm32(16), ir.Imm32(8)), | ||
| 128 | IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); | ||
| 129 | } | ||
| 130 | case SpecialRegister::SR_TID_X: | ||
| 131 | return ir.LocalInvocationIdX(); | ||
| 132 | case SpecialRegister::SR_TID_Y: | ||
| 133 | return ir.LocalInvocationIdY(); | ||
| 134 | case SpecialRegister::SR_TID_Z: | ||
| 135 | return ir.LocalInvocationIdZ(); | ||
| 136 | case SpecialRegister::SR_CTAID_X: | ||
| 137 | return ir.WorkgroupIdX(); | ||
| 138 | case SpecialRegister::SR_CTAID_Y: | ||
| 139 | return ir.WorkgroupIdY(); | ||
| 140 | case SpecialRegister::SR_CTAID_Z: | ||
| 141 | return ir.WorkgroupIdZ(); | ||
| 142 | case SpecialRegister::SR_WSCALEFACTOR_XY: | ||
| 143 | LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_XY"); | ||
| 144 | return ir.Imm32(Common::BitCast<u32>(1.0f)); | ||
| 145 | case SpecialRegister::SR_WSCALEFACTOR_Z: | ||
| 146 | LOG_WARNING(Shader, "(STUBBED) SR_WSCALEFACTOR_Z"); | ||
| 147 | return ir.Imm32(Common::BitCast<u32>(1.0f)); | ||
| 148 | case SpecialRegister::SR_LANEID: | ||
| 149 | return ir.LaneId(); | ||
| 150 | case SpecialRegister::SR_EQMASK: | ||
| 151 | return ir.SubgroupEqMask(); | ||
| 152 | case SpecialRegister::SR_LTMASK: | ||
| 153 | return ir.SubgroupLtMask(); | ||
| 154 | case SpecialRegister::SR_LEMASK: | ||
| 155 | return ir.SubgroupLeMask(); | ||
| 156 | case SpecialRegister::SR_GTMASK: | ||
| 157 | return ir.SubgroupGtMask(); | ||
| 158 | case SpecialRegister::SR_GEMASK: | ||
| 159 | return ir.SubgroupGeMask(); | ||
| 160 | case SpecialRegister::SR_Y_DIRECTION: | ||
| 161 | return ir.BitCast<IR::U32>(ir.YDirection()); | ||
| 162 | case SpecialRegister::SR_AFFINITY: | ||
| 163 | LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); | ||
| 164 | return ir.Imm32(0); // This is the default value hardware returns. | ||
| 165 | default: | ||
| 166 | throw NotImplementedException("S2R special register {}", special_register); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | } // Anonymous namespace | ||
| 170 | |||
| 171 | void TranslatorVisitor::S2R(u64 insn) { | ||
| 172 | union { | ||
| 173 | u64 raw; | ||
| 174 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 175 | BitField<20, 8, SpecialRegister> src_reg; | ||
| 176 | } const s2r{insn}; | ||
| 177 | |||
| 178 | X(s2r.dest_reg, Read(ir, s2r.src_reg)); | ||
| 179 | } | ||
| 180 | |||
| 181 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp new file mode 100644 index 000000000..7e26ab359 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp | |||
| @@ -0,0 +1,283 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/opcodes.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { | ||
| 13 | throw NotImplementedException("Instruction {} is not implemented", opcode); | ||
| 14 | } | ||
| 15 | |||
| 16 | void TranslatorVisitor::ATOM_cas(u64) { | ||
| 17 | ThrowNotImplemented(Opcode::ATOM_cas); | ||
| 18 | } | ||
| 19 | |||
| 20 | void TranslatorVisitor::ATOMS_cas(u64) { | ||
| 21 | ThrowNotImplemented(Opcode::ATOMS_cas); | ||
| 22 | } | ||
| 23 | |||
| 24 | void TranslatorVisitor::B2R(u64) { | ||
| 25 | ThrowNotImplemented(Opcode::B2R); | ||
| 26 | } | ||
| 27 | |||
| 28 | void TranslatorVisitor::BPT(u64) { | ||
| 29 | ThrowNotImplemented(Opcode::BPT); | ||
| 30 | } | ||
| 31 | |||
| 32 | void TranslatorVisitor::BRA(u64) { | ||
| 33 | ThrowNotImplemented(Opcode::BRA); | ||
| 34 | } | ||
| 35 | |||
| 36 | void TranslatorVisitor::BRK(u64) { | ||
| 37 | ThrowNotImplemented(Opcode::BRK); | ||
| 38 | } | ||
| 39 | |||
| 40 | void TranslatorVisitor::CAL() { | ||
| 41 | // CAL is a no-op | ||
| 42 | } | ||
| 43 | |||
| 44 | void TranslatorVisitor::CCTL(u64) { | ||
| 45 | ThrowNotImplemented(Opcode::CCTL); | ||
| 46 | } | ||
| 47 | |||
| 48 | void TranslatorVisitor::CCTLL(u64) { | ||
| 49 | ThrowNotImplemented(Opcode::CCTLL); | ||
| 50 | } | ||
| 51 | |||
| 52 | void TranslatorVisitor::CONT(u64) { | ||
| 53 | ThrowNotImplemented(Opcode::CONT); | ||
| 54 | } | ||
| 55 | |||
| 56 | void TranslatorVisitor::CS2R(u64) { | ||
| 57 | ThrowNotImplemented(Opcode::CS2R); | ||
| 58 | } | ||
| 59 | |||
| 60 | void TranslatorVisitor::FCHK_reg(u64) { | ||
| 61 | ThrowNotImplemented(Opcode::FCHK_reg); | ||
| 62 | } | ||
| 63 | |||
| 64 | void TranslatorVisitor::FCHK_cbuf(u64) { | ||
| 65 | ThrowNotImplemented(Opcode::FCHK_cbuf); | ||
| 66 | } | ||
| 67 | |||
| 68 | void TranslatorVisitor::FCHK_imm(u64) { | ||
| 69 | ThrowNotImplemented(Opcode::FCHK_imm); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::GETCRSPTR(u64) { | ||
| 73 | ThrowNotImplemented(Opcode::GETCRSPTR); | ||
| 74 | } | ||
| 75 | |||
| 76 | void TranslatorVisitor::GETLMEMBASE(u64) { | ||
| 77 | ThrowNotImplemented(Opcode::GETLMEMBASE); | ||
| 78 | } | ||
| 79 | |||
| 80 | void TranslatorVisitor::IDE(u64) { | ||
| 81 | ThrowNotImplemented(Opcode::IDE); | ||
| 82 | } | ||
| 83 | |||
| 84 | void TranslatorVisitor::IDP_reg(u64) { | ||
| 85 | ThrowNotImplemented(Opcode::IDP_reg); | ||
| 86 | } | ||
| 87 | |||
| 88 | void TranslatorVisitor::IDP_imm(u64) { | ||
| 89 | ThrowNotImplemented(Opcode::IDP_imm); | ||
| 90 | } | ||
| 91 | |||
| 92 | void TranslatorVisitor::IMAD_reg(u64) { | ||
| 93 | ThrowNotImplemented(Opcode::IMAD_reg); | ||
| 94 | } | ||
| 95 | |||
| 96 | void TranslatorVisitor::IMAD_rc(u64) { | ||
| 97 | ThrowNotImplemented(Opcode::IMAD_rc); | ||
| 98 | } | ||
| 99 | |||
| 100 | void TranslatorVisitor::IMAD_cr(u64) { | ||
| 101 | ThrowNotImplemented(Opcode::IMAD_cr); | ||
| 102 | } | ||
| 103 | |||
| 104 | void TranslatorVisitor::IMAD_imm(u64) { | ||
| 105 | ThrowNotImplemented(Opcode::IMAD_imm); | ||
| 106 | } | ||
| 107 | |||
| 108 | void TranslatorVisitor::IMAD32I(u64) { | ||
| 109 | ThrowNotImplemented(Opcode::IMAD32I); | ||
| 110 | } | ||
| 111 | |||
| 112 | void TranslatorVisitor::IMADSP_reg(u64) { | ||
| 113 | ThrowNotImplemented(Opcode::IMADSP_reg); | ||
| 114 | } | ||
| 115 | |||
| 116 | void TranslatorVisitor::IMADSP_rc(u64) { | ||
| 117 | ThrowNotImplemented(Opcode::IMADSP_rc); | ||
| 118 | } | ||
| 119 | |||
| 120 | void TranslatorVisitor::IMADSP_cr(u64) { | ||
| 121 | ThrowNotImplemented(Opcode::IMADSP_cr); | ||
| 122 | } | ||
| 123 | |||
| 124 | void TranslatorVisitor::IMADSP_imm(u64) { | ||
| 125 | ThrowNotImplemented(Opcode::IMADSP_imm); | ||
| 126 | } | ||
| 127 | |||
| 128 | void TranslatorVisitor::IMUL_reg(u64) { | ||
| 129 | ThrowNotImplemented(Opcode::IMUL_reg); | ||
| 130 | } | ||
| 131 | |||
| 132 | void TranslatorVisitor::IMUL_cbuf(u64) { | ||
| 133 | ThrowNotImplemented(Opcode::IMUL_cbuf); | ||
| 134 | } | ||
| 135 | |||
| 136 | void TranslatorVisitor::IMUL_imm(u64) { | ||
| 137 | ThrowNotImplemented(Opcode::IMUL_imm); | ||
| 138 | } | ||
| 139 | |||
| 140 | void TranslatorVisitor::IMUL32I(u64) { | ||
| 141 | ThrowNotImplemented(Opcode::IMUL32I); | ||
| 142 | } | ||
| 143 | |||
| 144 | void TranslatorVisitor::JCAL(u64) { | ||
| 145 | ThrowNotImplemented(Opcode::JCAL); | ||
| 146 | } | ||
| 147 | |||
| 148 | void TranslatorVisitor::JMP(u64) { | ||
| 149 | ThrowNotImplemented(Opcode::JMP); | ||
| 150 | } | ||
| 151 | |||
| 152 | void TranslatorVisitor::KIL() { | ||
| 153 | // KIL is a no-op | ||
| 154 | } | ||
| 155 | |||
| 156 | void TranslatorVisitor::LD(u64) { | ||
| 157 | ThrowNotImplemented(Opcode::LD); | ||
| 158 | } | ||
| 159 | |||
| 160 | void TranslatorVisitor::LEPC(u64) { | ||
| 161 | ThrowNotImplemented(Opcode::LEPC); | ||
| 162 | } | ||
| 163 | |||
| 164 | void TranslatorVisitor::LONGJMP(u64) { | ||
| 165 | ThrowNotImplemented(Opcode::LONGJMP); | ||
| 166 | } | ||
| 167 | |||
| 168 | void TranslatorVisitor::NOP(u64) { | ||
| 169 | // NOP is No-Op. | ||
| 170 | } | ||
| 171 | |||
| 172 | void TranslatorVisitor::PBK() { | ||
| 173 | // PBK is a no-op | ||
| 174 | } | ||
| 175 | |||
| 176 | void TranslatorVisitor::PCNT() { | ||
| 177 | // PCNT is a no-op | ||
| 178 | } | ||
| 179 | |||
| 180 | void TranslatorVisitor::PEXIT(u64) { | ||
| 181 | ThrowNotImplemented(Opcode::PEXIT); | ||
| 182 | } | ||
| 183 | |||
| 184 | void TranslatorVisitor::PLONGJMP(u64) { | ||
| 185 | ThrowNotImplemented(Opcode::PLONGJMP); | ||
| 186 | } | ||
| 187 | |||
| 188 | void TranslatorVisitor::PRET(u64) { | ||
| 189 | ThrowNotImplemented(Opcode::PRET); | ||
| 190 | } | ||
| 191 | |||
| 192 | void TranslatorVisitor::PRMT_reg(u64) { | ||
| 193 | ThrowNotImplemented(Opcode::PRMT_reg); | ||
| 194 | } | ||
| 195 | |||
| 196 | void TranslatorVisitor::PRMT_rc(u64) { | ||
| 197 | ThrowNotImplemented(Opcode::PRMT_rc); | ||
| 198 | } | ||
| 199 | |||
| 200 | void TranslatorVisitor::PRMT_cr(u64) { | ||
| 201 | ThrowNotImplemented(Opcode::PRMT_cr); | ||
| 202 | } | ||
| 203 | |||
| 204 | void TranslatorVisitor::PRMT_imm(u64) { | ||
| 205 | ThrowNotImplemented(Opcode::PRMT_imm); | ||
| 206 | } | ||
| 207 | |||
| 208 | void TranslatorVisitor::R2B(u64) { | ||
| 209 | ThrowNotImplemented(Opcode::R2B); | ||
| 210 | } | ||
| 211 | |||
| 212 | void TranslatorVisitor::RAM(u64) { | ||
| 213 | ThrowNotImplemented(Opcode::RAM); | ||
| 214 | } | ||
| 215 | |||
| 216 | void TranslatorVisitor::RET(u64) { | ||
| 217 | ThrowNotImplemented(Opcode::RET); | ||
| 218 | } | ||
| 219 | |||
| 220 | void TranslatorVisitor::RTT(u64) { | ||
| 221 | ThrowNotImplemented(Opcode::RTT); | ||
| 222 | } | ||
| 223 | |||
| 224 | void TranslatorVisitor::SAM(u64) { | ||
| 225 | ThrowNotImplemented(Opcode::SAM); | ||
| 226 | } | ||
| 227 | |||
| 228 | void TranslatorVisitor::SETCRSPTR(u64) { | ||
| 229 | ThrowNotImplemented(Opcode::SETCRSPTR); | ||
| 230 | } | ||
| 231 | |||
| 232 | void TranslatorVisitor::SETLMEMBASE(u64) { | ||
| 233 | ThrowNotImplemented(Opcode::SETLMEMBASE); | ||
| 234 | } | ||
| 235 | |||
| 236 | void TranslatorVisitor::SSY() { | ||
| 237 | // SSY is a no-op | ||
| 238 | } | ||
| 239 | |||
| 240 | void TranslatorVisitor::ST(u64) { | ||
| 241 | ThrowNotImplemented(Opcode::ST); | ||
| 242 | } | ||
| 243 | |||
| 244 | void TranslatorVisitor::STP(u64) { | ||
| 245 | ThrowNotImplemented(Opcode::STP); | ||
| 246 | } | ||
| 247 | |||
| 248 | void TranslatorVisitor::SUATOM_cas(u64) { | ||
| 249 | ThrowNotImplemented(Opcode::SUATOM_cas); | ||
| 250 | } | ||
| 251 | |||
| 252 | void TranslatorVisitor::SYNC(u64) { | ||
| 253 | ThrowNotImplemented(Opcode::SYNC); | ||
| 254 | } | ||
| 255 | |||
| 256 | void TranslatorVisitor::TXA(u64) { | ||
| 257 | ThrowNotImplemented(Opcode::TXA); | ||
| 258 | } | ||
| 259 | |||
| 260 | void TranslatorVisitor::VABSDIFF(u64) { | ||
| 261 | ThrowNotImplemented(Opcode::VABSDIFF); | ||
| 262 | } | ||
| 263 | |||
| 264 | void TranslatorVisitor::VABSDIFF4(u64) { | ||
| 265 | ThrowNotImplemented(Opcode::VABSDIFF4); | ||
| 266 | } | ||
| 267 | |||
| 268 | void TranslatorVisitor::VADD(u64) { | ||
| 269 | ThrowNotImplemented(Opcode::VADD); | ||
| 270 | } | ||
| 271 | |||
| 272 | void TranslatorVisitor::VSET(u64) { | ||
| 273 | ThrowNotImplemented(Opcode::VSET); | ||
| 274 | } | ||
| 275 | void TranslatorVisitor::VSHL(u64) { | ||
| 276 | ThrowNotImplemented(Opcode::VSHL); | ||
| 277 | } | ||
| 278 | |||
| 279 | void TranslatorVisitor::VSHR(u64) { | ||
| 280 | ThrowNotImplemented(Opcode::VSHR); | ||
| 281 | } | ||
| 282 | |||
| 283 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp new file mode 100644 index 000000000..01cfad88d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/output_geometry.cpp | |||
| @@ -0,0 +1,45 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | void OUT(TranslatorVisitor& v, u64 insn, IR::U32 stream_index) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<8, 8, IR::Reg> output_reg; // Not needed on host | ||
| 16 | BitField<39, 1, u64> emit; | ||
| 17 | BitField<40, 1, u64> cut; | ||
| 18 | } const out{insn}; | ||
| 19 | |||
| 20 | stream_index = v.ir.BitwiseAnd(stream_index, v.ir.Imm32(0b11)); | ||
| 21 | |||
| 22 | if (out.emit != 0) { | ||
| 23 | v.ir.EmitVertex(stream_index); | ||
| 24 | } | ||
| 25 | if (out.cut != 0) { | ||
| 26 | v.ir.EndPrimitive(stream_index); | ||
| 27 | } | ||
| 28 | // Host doesn't need the output register, but we can write to it to avoid undefined reads | ||
| 29 | v.X(out.dest_reg, v.ir.Imm32(0)); | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::OUT_reg(u64 insn) { | ||
| 34 | OUT(*this, insn, GetReg20(insn)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void TranslatorVisitor::OUT_cbuf(u64 insn) { | ||
| 38 | OUT(*this, insn, GetCbuf(insn)); | ||
| 39 | } | ||
| 40 | |||
| 41 | void TranslatorVisitor::OUT_imm(u64 insn) { | ||
| 42 | OUT(*this, insn, GetImm20(insn)); | ||
| 43 | } | ||
| 44 | |||
| 45 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp new file mode 100644 index 000000000..b4767afb5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/pixel_load.cpp | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class Mode : u64 { | ||
| 12 | Default, | ||
| 13 | CovMask, | ||
| 14 | Covered, | ||
| 15 | Offset, | ||
| 16 | CentroidOffset, | ||
| 17 | MyIndex, | ||
| 18 | }; | ||
| 19 | } // Anonymous namespace | ||
| 20 | |||
| 21 | void TranslatorVisitor::PIXLD(u64 insn) { | ||
| 22 | union { | ||
| 23 | u64 raw; | ||
| 24 | BitField<31, 3, Mode> mode; | ||
| 25 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 26 | BitField<8, 8, IR::Reg> addr_reg; | ||
| 27 | BitField<20, 8, s64> addr_offset; | ||
| 28 | BitField<45, 3, IR::Pred> dest_pred; | ||
| 29 | } const pixld{insn}; | ||
| 30 | |||
| 31 | if (pixld.dest_pred != IR::Pred::PT) { | ||
| 32 | throw NotImplementedException("Destination predicate"); | ||
| 33 | } | ||
| 34 | if (pixld.addr_reg != IR::Reg::RZ || pixld.addr_offset != 0) { | ||
| 35 | throw NotImplementedException("Non-zero source register"); | ||
| 36 | } | ||
| 37 | switch (pixld.mode) { | ||
| 38 | case Mode::MyIndex: | ||
| 39 | X(pixld.dest_reg, ir.SampleId()); | ||
| 40 | break; | ||
| 41 | default: | ||
| 42 | throw NotImplementedException("Mode {}", pixld.mode.Value()); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp new file mode 100644 index 000000000..75d1fa8c1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_predicate.cpp | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::PSETP(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 15 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 16 | BitField<12, 3, IR::Pred> pred_a; | ||
| 17 | BitField<15, 1, u64> neg_pred_a; | ||
| 18 | BitField<24, 2, BooleanOp> bop_1; | ||
| 19 | BitField<29, 3, IR::Pred> pred_b; | ||
| 20 | BitField<32, 1, u64> neg_pred_b; | ||
| 21 | BitField<39, 3, IR::Pred> pred_c; | ||
| 22 | BitField<42, 1, u64> neg_pred_c; | ||
| 23 | BitField<45, 2, BooleanOp> bop_2; | ||
| 24 | } const pset{insn}; | ||
| 25 | |||
| 26 | const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; | ||
| 27 | const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; | ||
| 28 | const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; | ||
| 29 | |||
| 30 | const IR::U1 lhs_a{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; | ||
| 31 | const IR::U1 lhs_b{PredicateCombine(ir, ir.LogicalNot(pred_a), pred_b, pset.bop_1)}; | ||
| 32 | const IR::U1 result_a{PredicateCombine(ir, lhs_a, pred_c, pset.bop_2)}; | ||
| 33 | const IR::U1 result_b{PredicateCombine(ir, lhs_b, pred_c, pset.bop_2)}; | ||
| 34 | |||
| 35 | ir.SetPred(pset.dest_pred_a, result_a); | ||
| 36 | ir.SetPred(pset.dest_pred_b, result_b); | ||
| 37 | } | ||
| 38 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp new file mode 100644 index 000000000..b02789874 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/predicate_set_register.cpp | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::PSET(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<12, 3, IR::Pred> pred_a; | ||
| 16 | BitField<15, 1, u64> neg_pred_a; | ||
| 17 | BitField<24, 2, BooleanOp> bop_1; | ||
| 18 | BitField<29, 3, IR::Pred> pred_b; | ||
| 19 | BitField<32, 1, u64> neg_pred_b; | ||
| 20 | BitField<39, 3, IR::Pred> pred_c; | ||
| 21 | BitField<42, 1, u64> neg_pred_c; | ||
| 22 | BitField<44, 1, u64> bf; | ||
| 23 | BitField<45, 2, BooleanOp> bop_2; | ||
| 24 | BitField<47, 1, u64> cc; | ||
| 25 | } const pset{insn}; | ||
| 26 | |||
| 27 | const IR::U1 pred_a{ir.GetPred(pset.pred_a, pset.neg_pred_a != 0)}; | ||
| 28 | const IR::U1 pred_b{ir.GetPred(pset.pred_b, pset.neg_pred_b != 0)}; | ||
| 29 | const IR::U1 pred_c{ir.GetPred(pset.pred_c, pset.neg_pred_c != 0)}; | ||
| 30 | |||
| 31 | const IR::U1 res_1{PredicateCombine(ir, pred_a, pred_b, pset.bop_1)}; | ||
| 32 | const IR::U1 res_2{PredicateCombine(ir, res_1, pred_c, pset.bop_2)}; | ||
| 33 | |||
| 34 | const IR::U32 true_result{pset.bf != 0 ? ir.Imm32(0x3f800000) : ir.Imm32(-1)}; | ||
| 35 | const IR::U32 zero{ir.Imm32(0)}; | ||
| 36 | |||
| 37 | const IR::U32 result{ir.Select(res_2, true_result, zero)}; | ||
| 38 | |||
| 39 | X(pset.dest_reg, result); | ||
| 40 | if (pset.cc != 0) { | ||
| 41 | const IR::U1 is_zero{ir.IEqual(result, zero)}; | ||
| 42 | SetZFlag(is_zero); | ||
| 43 | if (pset.bf != 0) { | ||
| 44 | ResetSFlag(); | ||
| 45 | } else { | ||
| 46 | SetSFlag(ir.LogicalNot(is_zero)); | ||
| 47 | } | ||
| 48 | ResetOFlag(); | ||
| 49 | ResetCFlag(); | ||
| 50 | } | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp new file mode 100644 index 000000000..93baa75a9 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/select_source_with_predicate.cpp | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | |||
| 12 | void SEL(TranslatorVisitor& v, u64 insn, const IR::U32& src) { | ||
| 13 | union { | ||
| 14 | u64 raw; | ||
| 15 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 16 | BitField<8, 8, IR::Reg> src_reg; | ||
| 17 | BitField<39, 3, IR::Pred> pred; | ||
| 18 | BitField<42, 1, u64> neg_pred; | ||
| 19 | } const sel{insn}; | ||
| 20 | |||
| 21 | const IR::U1 pred = v.ir.GetPred(sel.pred); | ||
| 22 | IR::U32 op_a{v.X(sel.src_reg)}; | ||
| 23 | IR::U32 op_b{src}; | ||
| 24 | if (sel.neg_pred != 0) { | ||
| 25 | std::swap(op_a, op_b); | ||
| 26 | } | ||
| 27 | const IR::U32 result{v.ir.Select(pred, op_a, op_b)}; | ||
| 28 | |||
| 29 | v.X(sel.dest_reg, result); | ||
| 30 | } | ||
| 31 | } // Anonymous namespace | ||
| 32 | |||
| 33 | void TranslatorVisitor::SEL_reg(u64 insn) { | ||
| 34 | SEL(*this, insn, GetReg20(insn)); | ||
| 35 | } | ||
| 36 | |||
| 37 | void TranslatorVisitor::SEL_cbuf(u64 insn) { | ||
| 38 | SEL(*this, insn, GetCbuf(insn)); | ||
| 39 | } | ||
| 40 | |||
| 41 | void TranslatorVisitor::SEL_imm(u64 insn) { | ||
| 42 | SEL(*this, insn, GetImm20(insn)); | ||
| 43 | } | ||
| 44 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp new file mode 100644 index 000000000..63b588ad4 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_atomic_operations.cpp | |||
| @@ -0,0 +1,205 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bit> | ||
| 7 | |||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 12 | |||
| 13 | namespace Shader::Maxwell { | ||
| 14 | namespace { | ||
| 15 | enum class Type : u64 { | ||
| 16 | _1D, | ||
| 17 | BUFFER_1D, | ||
| 18 | ARRAY_1D, | ||
| 19 | _2D, | ||
| 20 | ARRAY_2D, | ||
| 21 | _3D, | ||
| 22 | }; | ||
| 23 | |||
| 24 | enum class Size : u64 { | ||
| 25 | U32, | ||
| 26 | S32, | ||
| 27 | U64, | ||
| 28 | S64, | ||
| 29 | F32FTZRN, | ||
| 30 | F16x2FTZRN, | ||
| 31 | SD32, | ||
| 32 | SD64, | ||
| 33 | }; | ||
| 34 | |||
| 35 | enum class AtomicOp : u64 { | ||
| 36 | ADD, | ||
| 37 | MIN, | ||
| 38 | MAX, | ||
| 39 | INC, | ||
| 40 | DEC, | ||
| 41 | AND, | ||
| 42 | OR, | ||
| 43 | XOR, | ||
| 44 | EXCH, | ||
| 45 | }; | ||
| 46 | |||
| 47 | enum class Clamp : u64 { | ||
| 48 | IGN, | ||
| 49 | Default, | ||
| 50 | TRAP, | ||
| 51 | }; | ||
| 52 | |||
| 53 | TextureType GetType(Type type) { | ||
| 54 | switch (type) { | ||
| 55 | case Type::_1D: | ||
| 56 | return TextureType::Color1D; | ||
| 57 | case Type::BUFFER_1D: | ||
| 58 | return TextureType::Buffer; | ||
| 59 | case Type::ARRAY_1D: | ||
| 60 | return TextureType::ColorArray1D; | ||
| 61 | case Type::_2D: | ||
| 62 | return TextureType::Color2D; | ||
| 63 | case Type::ARRAY_2D: | ||
| 64 | return TextureType::ColorArray2D; | ||
| 65 | case Type::_3D: | ||
| 66 | return TextureType::Color3D; | ||
| 67 | } | ||
| 68 | throw NotImplementedException("Invalid type {}", type); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { | ||
| 72 | switch (type) { | ||
| 73 | case Type::_1D: | ||
| 74 | case Type::BUFFER_1D: | ||
| 75 | return v.X(reg); | ||
| 76 | case Type::_2D: | ||
| 77 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||
| 78 | case Type::_3D: | ||
| 79 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 80 | default: | ||
| 81 | break; | ||
| 82 | } | ||
| 83 | throw NotImplementedException("Invalid type {}", type); | ||
| 84 | } | ||
| 85 | |||
| 86 | IR::Value ApplyAtomicOp(IR::IREmitter& ir, const IR::U32& handle, const IR::Value& coords, | ||
| 87 | const IR::Value& op_b, IR::TextureInstInfo info, AtomicOp op, | ||
| 88 | bool is_signed) { | ||
| 89 | switch (op) { | ||
| 90 | case AtomicOp::ADD: | ||
| 91 | return ir.ImageAtomicIAdd(handle, coords, op_b, info); | ||
| 92 | case AtomicOp::MIN: | ||
| 93 | return ir.ImageAtomicIMin(handle, coords, op_b, is_signed, info); | ||
| 94 | case AtomicOp::MAX: | ||
| 95 | return ir.ImageAtomicIMax(handle, coords, op_b, is_signed, info); | ||
| 96 | case AtomicOp::INC: | ||
| 97 | return ir.ImageAtomicInc(handle, coords, op_b, info); | ||
| 98 | case AtomicOp::DEC: | ||
| 99 | return ir.ImageAtomicDec(handle, coords, op_b, info); | ||
| 100 | case AtomicOp::AND: | ||
| 101 | return ir.ImageAtomicAnd(handle, coords, op_b, info); | ||
| 102 | case AtomicOp::OR: | ||
| 103 | return ir.ImageAtomicOr(handle, coords, op_b, info); | ||
| 104 | case AtomicOp::XOR: | ||
| 105 | return ir.ImageAtomicXor(handle, coords, op_b, info); | ||
| 106 | case AtomicOp::EXCH: | ||
| 107 | return ir.ImageAtomicExchange(handle, coords, op_b, info); | ||
| 108 | default: | ||
| 109 | throw NotImplementedException("Atomic Operation {}", op); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | ImageFormat Format(Size size) { | ||
| 114 | switch (size) { | ||
| 115 | case Size::U32: | ||
| 116 | case Size::S32: | ||
| 117 | case Size::SD32: | ||
| 118 | return ImageFormat::R32_UINT; | ||
| 119 | default: | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | throw NotImplementedException("Invalid size {}", size); | ||
| 123 | } | ||
| 124 | |||
| 125 | bool IsSizeInt32(Size size) { | ||
| 126 | switch (size) { | ||
| 127 | case Size::U32: | ||
| 128 | case Size::S32: | ||
| 129 | case Size::SD32: | ||
| 130 | return true; | ||
| 131 | default: | ||
| 132 | return false; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | |||
| 136 | void ImageAtomOp(TranslatorVisitor& v, IR::Reg dest_reg, IR::Reg operand_reg, IR::Reg coord_reg, | ||
| 137 | IR::Reg bindless_reg, AtomicOp op, Clamp clamp, Size size, Type type, | ||
| 138 | u64 bound_offset, bool is_bindless, bool write_result) { | ||
| 139 | if (clamp != Clamp::IGN) { | ||
| 140 | throw NotImplementedException("Clamp {}", clamp); | ||
| 141 | } | ||
| 142 | if (!IsSizeInt32(size)) { | ||
| 143 | throw NotImplementedException("Size {}", size); | ||
| 144 | } | ||
| 145 | const bool is_signed{size == Size::S32}; | ||
| 146 | const ImageFormat format{Format(size)}; | ||
| 147 | const TextureType tex_type{GetType(type)}; | ||
| 148 | const IR::Value coords{MakeCoords(v, coord_reg, type)}; | ||
| 149 | |||
| 150 | const IR::U32 handle{is_bindless != 0 ? v.X(bindless_reg) | ||
| 151 | : v.ir.Imm32(static_cast<u32>(bound_offset * 4))}; | ||
| 152 | IR::TextureInstInfo info{}; | ||
| 153 | info.type.Assign(tex_type); | ||
| 154 | info.image_format.Assign(format); | ||
| 155 | |||
| 156 | // TODO: float/64-bit operand | ||
| 157 | const IR::Value op_b{v.X(operand_reg)}; | ||
| 158 | const IR::Value color{ApplyAtomicOp(v.ir, handle, coords, op_b, info, op, is_signed)}; | ||
| 159 | |||
| 160 | if (write_result) { | ||
| 161 | v.X(dest_reg, IR::U32{color}); | ||
| 162 | } | ||
| 163 | } | ||
| 164 | } // Anonymous namespace | ||
| 165 | |||
| 166 | void TranslatorVisitor::SUATOM(u64 insn) { | ||
| 167 | union { | ||
| 168 | u64 raw; | ||
| 169 | BitField<54, 1, u64> is_bindless; | ||
| 170 | BitField<29, 4, AtomicOp> op; | ||
| 171 | BitField<33, 3, Type> type; | ||
| 172 | BitField<51, 3, Size> size; | ||
| 173 | BitField<49, 2, Clamp> clamp; | ||
| 174 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 175 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 176 | BitField<20, 8, IR::Reg> operand_reg; | ||
| 177 | BitField<36, 13, u64> bound_offset; // !is_bindless | ||
| 178 | BitField<39, 8, IR::Reg> bindless_reg; // is_bindless | ||
| 179 | } const suatom{insn}; | ||
| 180 | |||
| 181 | ImageAtomOp(*this, suatom.dest_reg, suatom.operand_reg, suatom.coord_reg, suatom.bindless_reg, | ||
| 182 | suatom.op, suatom.clamp, suatom.size, suatom.type, suatom.bound_offset, | ||
| 183 | suatom.is_bindless != 0, true); | ||
| 184 | } | ||
| 185 | |||
| 186 | void TranslatorVisitor::SURED(u64 insn) { | ||
| 187 | // TODO: confirm offsets | ||
| 188 | union { | ||
| 189 | u64 raw; | ||
| 190 | BitField<51, 1, u64> is_bound; | ||
| 191 | BitField<21, 3, AtomicOp> op; | ||
| 192 | BitField<33, 3, Type> type; | ||
| 193 | BitField<20, 3, Size> size; | ||
| 194 | BitField<49, 2, Clamp> clamp; | ||
| 195 | BitField<0, 8, IR::Reg> operand_reg; | ||
| 196 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 197 | BitField<36, 13, u64> bound_offset; // is_bound | ||
| 198 | BitField<39, 8, IR::Reg> bindless_reg; // !is_bound | ||
| 199 | } const sured{insn}; | ||
| 200 | ImageAtomOp(*this, IR::Reg::RZ, sured.operand_reg, sured.coord_reg, sured.bindless_reg, | ||
| 201 | sured.op, sured.clamp, sured.size, sured.type, sured.bound_offset, | ||
| 202 | sured.is_bound == 0, false); | ||
| 203 | } | ||
| 204 | |||
| 205 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp new file mode 100644 index 000000000..681220a8d --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp | |||
| @@ -0,0 +1,281 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | #include <bit> | ||
| 7 | |||
| 8 | #include "common/bit_field.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 11 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 12 | |||
| 13 | namespace Shader::Maxwell { | ||
| 14 | namespace { | ||
| 15 | enum class Type : u64 { | ||
| 16 | _1D, | ||
| 17 | BUFFER_1D, | ||
| 18 | ARRAY_1D, | ||
| 19 | _2D, | ||
| 20 | ARRAY_2D, | ||
| 21 | _3D, | ||
| 22 | }; | ||
| 23 | |||
| 24 | constexpr unsigned R = 1 << 0; | ||
| 25 | constexpr unsigned G = 1 << 1; | ||
| 26 | constexpr unsigned B = 1 << 2; | ||
| 27 | constexpr unsigned A = 1 << 3; | ||
| 28 | |||
| 29 | constexpr std::array MASK{ | ||
| 30 | 0U, // | ||
| 31 | R, // | ||
| 32 | G, // | ||
| 33 | R | G, // | ||
| 34 | B, // | ||
| 35 | R | B, // | ||
| 36 | G | B, // | ||
| 37 | R | G | B, // | ||
| 38 | A, // | ||
| 39 | R | A, // | ||
| 40 | G | A, // | ||
| 41 | R | G | A, // | ||
| 42 | B | A, // | ||
| 43 | R | B | A, // | ||
| 44 | G | B | A, // | ||
| 45 | R | G | B | A, // | ||
| 46 | }; | ||
| 47 | |||
| 48 | enum class Size : u64 { | ||
| 49 | U8, | ||
| 50 | S8, | ||
| 51 | U16, | ||
| 52 | S16, | ||
| 53 | B32, | ||
| 54 | B64, | ||
| 55 | B128, | ||
| 56 | }; | ||
| 57 | |||
| 58 | enum class Clamp : u64 { | ||
| 59 | IGN, | ||
| 60 | Default, | ||
| 61 | TRAP, | ||
| 62 | }; | ||
| 63 | |||
| 64 | // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators | ||
| 65 | enum class LoadCache : u64 { | ||
| 66 | CA, // Cache at all levels, likely to be accessed again | ||
| 67 | CG, // Cache at global level (L2 and below, not L1) | ||
| 68 | CI, // ??? | ||
| 69 | CV, // Don't cache and fetch again (volatile) | ||
| 70 | }; | ||
| 71 | |||
| 72 | enum class StoreCache : u64 { | ||
| 73 | WB, // Cache write-back all coherent levels | ||
| 74 | CG, // Cache at global level (L2 and below, not L1) | ||
| 75 | CS, // Cache streaming, likely to be accessed once | ||
| 76 | WT, // Cache write-through (to system memory, volatile?) | ||
| 77 | }; | ||
| 78 | |||
| 79 | ImageFormat Format(Size size) { | ||
| 80 | switch (size) { | ||
| 81 | case Size::U8: | ||
| 82 | return ImageFormat::R8_UINT; | ||
| 83 | case Size::S8: | ||
| 84 | return ImageFormat::R8_SINT; | ||
| 85 | case Size::U16: | ||
| 86 | return ImageFormat::R16_UINT; | ||
| 87 | case Size::S16: | ||
| 88 | return ImageFormat::R16_SINT; | ||
| 89 | case Size::B32: | ||
| 90 | return ImageFormat::R32_UINT; | ||
| 91 | case Size::B64: | ||
| 92 | return ImageFormat::R32G32_UINT; | ||
| 93 | case Size::B128: | ||
| 94 | return ImageFormat::R32G32B32A32_UINT; | ||
| 95 | } | ||
| 96 | throw NotImplementedException("Invalid size {}", size); | ||
| 97 | } | ||
| 98 | |||
| 99 | int SizeInRegs(Size size) { | ||
| 100 | switch (size) { | ||
| 101 | case Size::U8: | ||
| 102 | case Size::S8: | ||
| 103 | case Size::U16: | ||
| 104 | case Size::S16: | ||
| 105 | case Size::B32: | ||
| 106 | return 1; | ||
| 107 | case Size::B64: | ||
| 108 | return 2; | ||
| 109 | case Size::B128: | ||
| 110 | return 4; | ||
| 111 | } | ||
| 112 | throw NotImplementedException("Invalid size {}", size); | ||
| 113 | } | ||
| 114 | |||
| 115 | TextureType GetType(Type type) { | ||
| 116 | switch (type) { | ||
| 117 | case Type::_1D: | ||
| 118 | return TextureType::Color1D; | ||
| 119 | case Type::BUFFER_1D: | ||
| 120 | return TextureType::Buffer; | ||
| 121 | case Type::ARRAY_1D: | ||
| 122 | return TextureType::ColorArray1D; | ||
| 123 | case Type::_2D: | ||
| 124 | return TextureType::Color2D; | ||
| 125 | case Type::ARRAY_2D: | ||
| 126 | return TextureType::ColorArray2D; | ||
| 127 | case Type::_3D: | ||
| 128 | return TextureType::Color3D; | ||
| 129 | } | ||
| 130 | throw NotImplementedException("Invalid type {}", type); | ||
| 131 | } | ||
| 132 | |||
| 133 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, Type type) { | ||
| 134 | const auto array{[&](int index) { | ||
| 135 | return v.ir.BitFieldExtract(v.X(reg + index), v.ir.Imm32(0), v.ir.Imm32(16)); | ||
| 136 | }}; | ||
| 137 | switch (type) { | ||
| 138 | case Type::_1D: | ||
| 139 | case Type::BUFFER_1D: | ||
| 140 | return v.X(reg); | ||
| 141 | case Type::ARRAY_1D: | ||
| 142 | return v.ir.CompositeConstruct(v.X(reg), array(1)); | ||
| 143 | case Type::_2D: | ||
| 144 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||
| 145 | case Type::ARRAY_2D: | ||
| 146 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), array(2)); | ||
| 147 | case Type::_3D: | ||
| 148 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 149 | } | ||
| 150 | throw NotImplementedException("Invalid type {}", type); | ||
| 151 | } | ||
| 152 | |||
| 153 | unsigned SwizzleMask(u64 swizzle) { | ||
| 154 | if (swizzle == 0 || swizzle >= MASK.size()) { | ||
| 155 | throw NotImplementedException("Invalid swizzle {}", swizzle); | ||
| 156 | } | ||
| 157 | return MASK[swizzle]; | ||
| 158 | } | ||
| 159 | |||
| 160 | IR::Value MakeColor(IR::IREmitter& ir, IR::Reg reg, int num_regs) { | ||
| 161 | std::array<IR::U32, 4> colors; | ||
| 162 | for (int i = 0; i < num_regs; ++i) { | ||
| 163 | colors[static_cast<size_t>(i)] = ir.GetReg(reg + i); | ||
| 164 | } | ||
| 165 | for (int i = num_regs; i < 4; ++i) { | ||
| 166 | colors[static_cast<size_t>(i)] = ir.Imm32(0); | ||
| 167 | } | ||
| 168 | return ir.CompositeConstruct(colors[0], colors[1], colors[2], colors[3]); | ||
| 169 | } | ||
| 170 | } // Anonymous namespace | ||
| 171 | |||
| 172 | void TranslatorVisitor::SULD(u64 insn) { | ||
| 173 | union { | ||
| 174 | u64 raw; | ||
| 175 | BitField<51, 1, u64> is_bound; | ||
| 176 | BitField<52, 1, u64> d; | ||
| 177 | BitField<23, 1, u64> ba; | ||
| 178 | BitField<33, 3, Type> type; | ||
| 179 | BitField<24, 2, LoadCache> cache; | ||
| 180 | BitField<20, 3, Size> size; // .D | ||
| 181 | BitField<20, 4, u64> swizzle; // .P | ||
| 182 | BitField<49, 2, Clamp> clamp; | ||
| 183 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 184 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 185 | BitField<36, 13, u64> bound_offset; // is_bound | ||
| 186 | BitField<39, 8, IR::Reg> bindless_reg; // !is_bound | ||
| 187 | } const suld{insn}; | ||
| 188 | |||
| 189 | if (suld.clamp != Clamp::IGN) { | ||
| 190 | throw NotImplementedException("Clamp {}", suld.clamp.Value()); | ||
| 191 | } | ||
| 192 | if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) { | ||
| 193 | throw NotImplementedException("Cache {}", suld.cache.Value()); | ||
| 194 | } | ||
| 195 | const bool is_typed{suld.d != 0}; | ||
| 196 | if (is_typed && suld.ba != 0) { | ||
| 197 | throw NotImplementedException("BA"); | ||
| 198 | } | ||
| 199 | |||
| 200 | const ImageFormat format{is_typed ? Format(suld.size) : ImageFormat::Typeless}; | ||
| 201 | const TextureType type{GetType(suld.type)}; | ||
| 202 | const IR::Value coords{MakeCoords(*this, suld.coord_reg, suld.type)}; | ||
| 203 | const IR::U32 handle{suld.is_bound != 0 ? ir.Imm32(static_cast<u32>(suld.bound_offset * 4)) | ||
| 204 | : X(suld.bindless_reg)}; | ||
| 205 | IR::TextureInstInfo info{}; | ||
| 206 | info.type.Assign(type); | ||
| 207 | info.image_format.Assign(format); | ||
| 208 | |||
| 209 | const IR::Value result{ir.ImageRead(handle, coords, info)}; | ||
| 210 | IR::Reg dest_reg{suld.dest_reg}; | ||
| 211 | if (is_typed) { | ||
| 212 | const int num_regs{SizeInRegs(suld.size)}; | ||
| 213 | for (int i = 0; i < num_regs; ++i) { | ||
| 214 | X(dest_reg + i, IR::U32{ir.CompositeExtract(result, static_cast<size_t>(i))}); | ||
| 215 | } | ||
| 216 | } else { | ||
| 217 | const unsigned mask{SwizzleMask(suld.swizzle)}; | ||
| 218 | const int bits{std::popcount(mask)}; | ||
| 219 | if (!IR::IsAligned(dest_reg, bits == 3 ? 4 : static_cast<size_t>(bits))) { | ||
| 220 | throw NotImplementedException("Unaligned destination register"); | ||
| 221 | } | ||
| 222 | for (unsigned component = 0; component < 4; ++component) { | ||
| 223 | if (((mask >> component) & 1) == 0) { | ||
| 224 | continue; | ||
| 225 | } | ||
| 226 | X(dest_reg, IR::U32{ir.CompositeExtract(result, component)}); | ||
| 227 | ++dest_reg; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | } | ||
| 231 | |||
| 232 | void TranslatorVisitor::SUST(u64 insn) { | ||
| 233 | union { | ||
| 234 | u64 raw; | ||
| 235 | BitField<51, 1, u64> is_bound; | ||
| 236 | BitField<52, 1, u64> d; | ||
| 237 | BitField<23, 1, u64> ba; | ||
| 238 | BitField<33, 3, Type> type; | ||
| 239 | BitField<24, 2, StoreCache> cache; | ||
| 240 | BitField<20, 3, Size> size; // .D | ||
| 241 | BitField<20, 4, u64> swizzle; // .P | ||
| 242 | BitField<49, 2, Clamp> clamp; | ||
| 243 | BitField<0, 8, IR::Reg> data_reg; | ||
| 244 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 245 | BitField<36, 13, u64> bound_offset; // is_bound | ||
| 246 | BitField<39, 8, IR::Reg> bindless_reg; // !is_bound | ||
| 247 | } const sust{insn}; | ||
| 248 | |||
| 249 | if (sust.clamp != Clamp::IGN) { | ||
| 250 | throw NotImplementedException("Clamp {}", sust.clamp.Value()); | ||
| 251 | } | ||
| 252 | if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) { | ||
| 253 | throw NotImplementedException("Cache {}", sust.cache.Value()); | ||
| 254 | } | ||
| 255 | const bool is_typed{sust.d != 0}; | ||
| 256 | if (is_typed && sust.ba != 0) { | ||
| 257 | throw NotImplementedException("BA"); | ||
| 258 | } | ||
| 259 | const ImageFormat format{is_typed ? Format(sust.size) : ImageFormat::Typeless}; | ||
| 260 | const TextureType type{GetType(sust.type)}; | ||
| 261 | const IR::Value coords{MakeCoords(*this, sust.coord_reg, sust.type)}; | ||
| 262 | const IR::U32 handle{sust.is_bound != 0 ? ir.Imm32(static_cast<u32>(sust.bound_offset * 4)) | ||
| 263 | : X(sust.bindless_reg)}; | ||
| 264 | IR::TextureInstInfo info{}; | ||
| 265 | info.type.Assign(type); | ||
| 266 | info.image_format.Assign(format); | ||
| 267 | |||
| 268 | IR::Value color; | ||
| 269 | if (is_typed) { | ||
| 270 | color = MakeColor(ir, sust.data_reg, SizeInRegs(sust.size)); | ||
| 271 | } else { | ||
| 272 | const unsigned mask{SwizzleMask(sust.swizzle)}; | ||
| 273 | if (mask != 0xf) { | ||
| 274 | throw NotImplementedException("Non-full mask"); | ||
| 275 | } | ||
| 276 | color = MakeColor(ir, sust.data_reg, 4); | ||
| 277 | } | ||
| 278 | ir.ImageWrite(handle, coords, color, info); | ||
| 279 | } | ||
| 280 | |||
| 281 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp new file mode 100644 index 000000000..0046b5edd --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp | |||
| @@ -0,0 +1,236 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Blod : u64 { | ||
| 15 | None, | ||
| 16 | LZ, | ||
| 17 | LB, | ||
| 18 | LL, | ||
| 19 | INVALIDBLOD4, | ||
| 20 | INVALIDBLOD5, | ||
| 21 | LBA, | ||
| 22 | LLA, | ||
| 23 | }; | ||
| 24 | |||
| 25 | enum class TextureType : u64 { | ||
| 26 | _1D, | ||
| 27 | ARRAY_1D, | ||
| 28 | _2D, | ||
| 29 | ARRAY_2D, | ||
| 30 | _3D, | ||
| 31 | ARRAY_3D, | ||
| 32 | CUBE, | ||
| 33 | ARRAY_CUBE, | ||
| 34 | }; | ||
| 35 | |||
| 36 | Shader::TextureType GetType(TextureType type) { | ||
| 37 | switch (type) { | ||
| 38 | case TextureType::_1D: | ||
| 39 | return Shader::TextureType::Color1D; | ||
| 40 | case TextureType::ARRAY_1D: | ||
| 41 | return Shader::TextureType::ColorArray1D; | ||
| 42 | case TextureType::_2D: | ||
| 43 | return Shader::TextureType::Color2D; | ||
| 44 | case TextureType::ARRAY_2D: | ||
| 45 | return Shader::TextureType::ColorArray2D; | ||
| 46 | case TextureType::_3D: | ||
| 47 | return Shader::TextureType::Color3D; | ||
| 48 | case TextureType::ARRAY_3D: | ||
| 49 | throw NotImplementedException("3D array texture type"); | ||
| 50 | case TextureType::CUBE: | ||
| 51 | return Shader::TextureType::ColorCube; | ||
| 52 | case TextureType::ARRAY_CUBE: | ||
| 53 | return Shader::TextureType::ColorArrayCube; | ||
| 54 | } | ||
| 55 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 56 | } | ||
| 57 | |||
| 58 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 59 | const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; | ||
| 60 | switch (type) { | ||
| 61 | case TextureType::_1D: | ||
| 62 | return v.F(reg); | ||
| 63 | case TextureType::ARRAY_1D: | ||
| 64 | return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); | ||
| 65 | case TextureType::_2D: | ||
| 66 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 67 | case TextureType::ARRAY_2D: | ||
| 68 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); | ||
| 69 | case TextureType::_3D: | ||
| 70 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 71 | case TextureType::ARRAY_3D: | ||
| 72 | throw NotImplementedException("3D array texture type"); | ||
| 73 | case TextureType::CUBE: | ||
| 74 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 75 | case TextureType::ARRAY_CUBE: | ||
| 76 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); | ||
| 77 | } | ||
| 78 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 79 | } | ||
| 80 | |||
| 81 | IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) { | ||
| 82 | switch (blod) { | ||
| 83 | case Blod::None: | ||
| 84 | return v.ir.Imm32(0.0f); | ||
| 85 | case Blod::LZ: | ||
| 86 | return v.ir.Imm32(0.0f); | ||
| 87 | case Blod::LB: | ||
| 88 | case Blod::LL: | ||
| 89 | case Blod::LBA: | ||
| 90 | case Blod::LLA: | ||
| 91 | return v.F(reg++); | ||
| 92 | case Blod::INVALIDBLOD4: | ||
| 93 | case Blod::INVALIDBLOD5: | ||
| 94 | break; | ||
| 95 | } | ||
| 96 | throw NotImplementedException("Invalid blod {}", blod); | ||
| 97 | } | ||
| 98 | |||
| 99 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 100 | const IR::U32 value{v.X(reg++)}; | ||
| 101 | switch (type) { | ||
| 102 | case TextureType::_1D: | ||
| 103 | case TextureType::ARRAY_1D: | ||
| 104 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); | ||
| 105 | case TextureType::_2D: | ||
| 106 | case TextureType::ARRAY_2D: | ||
| 107 | return v.ir.CompositeConstruct( | ||
| 108 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 109 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 110 | case TextureType::_3D: | ||
| 111 | case TextureType::ARRAY_3D: | ||
| 112 | return v.ir.CompositeConstruct( | ||
| 113 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 114 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), | ||
| 115 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); | ||
| 116 | case TextureType::CUBE: | ||
| 117 | case TextureType::ARRAY_CUBE: | ||
| 118 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 119 | } | ||
| 120 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 121 | } | ||
| 122 | |||
| 123 | bool HasExplicitLod(Blod blod) { | ||
| 124 | switch (blod) { | ||
| 125 | case Blod::LL: | ||
| 126 | case Blod::LLA: | ||
| 127 | case Blod::LZ: | ||
| 128 | return true; | ||
| 129 | default: | ||
| 130 | return false; | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc, | ||
| 135 | std::optional<u32> cbuf_offset) { | ||
| 136 | union { | ||
| 137 | u64 raw; | ||
| 138 | BitField<35, 1, u64> ndv; | ||
| 139 | BitField<49, 1, u64> nodep; | ||
| 140 | BitField<50, 1, u64> dc; | ||
| 141 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 142 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 143 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 144 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 145 | BitField<28, 3, TextureType> type; | ||
| 146 | BitField<31, 4, u64> mask; | ||
| 147 | } const tex{insn}; | ||
| 148 | |||
| 149 | if (lc) { | ||
| 150 | throw NotImplementedException("LC"); | ||
| 151 | } | ||
| 152 | const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)}; | ||
| 153 | |||
| 154 | IR::Reg meta_reg{tex.meta_reg}; | ||
| 155 | IR::Value handle; | ||
| 156 | IR::Value offset; | ||
| 157 | IR::F32 dref; | ||
| 158 | IR::F32 lod_clamp; | ||
| 159 | if (cbuf_offset) { | ||
| 160 | handle = v.ir.Imm32(*cbuf_offset); | ||
| 161 | } else { | ||
| 162 | handle = v.X(meta_reg++); | ||
| 163 | } | ||
| 164 | const IR::F32 lod{MakeLod(v, meta_reg, blod)}; | ||
| 165 | if (aoffi) { | ||
| 166 | offset = MakeOffset(v, meta_reg, tex.type); | ||
| 167 | } | ||
| 168 | if (tex.dc != 0) { | ||
| 169 | dref = v.F(meta_reg++); | ||
| 170 | } | ||
| 171 | IR::TextureInstInfo info{}; | ||
| 172 | info.type.Assign(GetType(tex.type)); | ||
| 173 | info.is_depth.Assign(tex.dc != 0 ? 1 : 0); | ||
| 174 | info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0); | ||
| 175 | info.has_lod_clamp.Assign(lc ? 1 : 0); | ||
| 176 | |||
| 177 | const IR::Value sample{[&]() -> IR::Value { | ||
| 178 | if (tex.dc == 0) { | ||
| 179 | if (HasExplicitLod(blod)) { | ||
| 180 | return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, info); | ||
| 181 | } else { | ||
| 182 | return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info); | ||
| 183 | } | ||
| 184 | } | ||
| 185 | if (HasExplicitLod(blod)) { | ||
| 186 | return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, info); | ||
| 187 | } else { | ||
| 188 | return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp, | ||
| 189 | info); | ||
| 190 | } | ||
| 191 | }()}; | ||
| 192 | |||
| 193 | IR::Reg dest_reg{tex.dest_reg}; | ||
| 194 | for (int element = 0; element < 4; ++element) { | ||
| 195 | if (((tex.mask >> element) & 1) == 0) { | ||
| 196 | continue; | ||
| 197 | } | ||
| 198 | IR::F32 value; | ||
| 199 | if (tex.dc != 0) { | ||
| 200 | value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f); | ||
| 201 | } else { | ||
| 202 | value = IR::F32{v.ir.CompositeExtract(sample, static_cast<size_t>(element))}; | ||
| 203 | } | ||
| 204 | v.F(dest_reg, value); | ||
| 205 | ++dest_reg; | ||
| 206 | } | ||
| 207 | if (tex.sparse_pred != IR::Pred::PT) { | ||
| 208 | v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 209 | } | ||
| 210 | } | ||
| 211 | } // Anonymous namespace | ||
| 212 | |||
| 213 | void TranslatorVisitor::TEX(u64 insn) { | ||
| 214 | union { | ||
| 215 | u64 raw; | ||
| 216 | BitField<54, 1, u64> aoffi; | ||
| 217 | BitField<55, 3, Blod> blod; | ||
| 218 | BitField<58, 1, u64> lc; | ||
| 219 | BitField<36, 13, u64> cbuf_offset; | ||
| 220 | } const tex{insn}; | ||
| 221 | |||
| 222 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset * 4)); | ||
| 223 | } | ||
| 224 | |||
| 225 | void TranslatorVisitor::TEX_b(u64 insn) { | ||
| 226 | union { | ||
| 227 | u64 raw; | ||
| 228 | BitField<36, 1, u64> aoffi; | ||
| 229 | BitField<37, 3, Blod> blod; | ||
| 230 | BitField<40, 1, u64> lc; | ||
| 231 | } const tex{insn}; | ||
| 232 | |||
| 233 | Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt); | ||
| 234 | } | ||
| 235 | |||
| 236 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp new file mode 100644 index 000000000..154e7f1a1 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | |||
| @@ -0,0 +1,266 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F16, | ||
| 16 | F32, | ||
| 17 | }; | ||
| 18 | |||
| 19 | union Encoding { | ||
| 20 | u64 raw; | ||
| 21 | BitField<59, 1, Precision> precision; | ||
| 22 | BitField<53, 4, u64> encoding; | ||
| 23 | BitField<49, 1, u64> nodep; | ||
| 24 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 25 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 26 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 27 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 28 | BitField<36, 13, u64> cbuf_offset; | ||
| 29 | BitField<50, 3, u64> swizzle; | ||
| 30 | }; | ||
| 31 | |||
| 32 | constexpr unsigned R = 1; | ||
| 33 | constexpr unsigned G = 2; | ||
| 34 | constexpr unsigned B = 4; | ||
| 35 | constexpr unsigned A = 8; | ||
| 36 | |||
| 37 | constexpr std::array RG_LUT{ | ||
| 38 | R, // | ||
| 39 | G, // | ||
| 40 | B, // | ||
| 41 | A, // | ||
| 42 | R | G, // | ||
| 43 | R | A, // | ||
| 44 | G | A, // | ||
| 45 | B | A, // | ||
| 46 | }; | ||
| 47 | |||
| 48 | constexpr std::array RGBA_LUT{ | ||
| 49 | R | G | B, // | ||
| 50 | R | G | A, // | ||
| 51 | R | B | A, // | ||
| 52 | G | B | A, // | ||
| 53 | R | G | B | A, // | ||
| 54 | }; | ||
| 55 | |||
| 56 | void CheckAlignment(IR::Reg reg, size_t alignment) { | ||
| 57 | if (!IR::IsAligned(reg, alignment)) { | ||
| 58 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | template <typename... Args> | ||
| 63 | IR::Value Composite(TranslatorVisitor& v, Args... regs) { | ||
| 64 | return v.ir.CompositeConstruct(v.F(regs)...); | ||
| 65 | } | ||
| 66 | |||
| 67 | IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) { | ||
| 68 | return v.ir.ConvertUToF(32, 16, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16))); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 72 | const Encoding texs{insn}; | ||
| 73 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset * 4))}; | ||
| 74 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 75 | const IR::Reg reg_a{texs.src_reg_a}; | ||
| 76 | const IR::Reg reg_b{texs.src_reg_b}; | ||
| 77 | IR::TextureInstInfo info{}; | ||
| 78 | if (texs.precision == Precision::F16) { | ||
| 79 | info.relaxed_precision.Assign(1); | ||
| 80 | } | ||
| 81 | switch (texs.encoding) { | ||
| 82 | case 0: // 1D.LZ | ||
| 83 | info.type.Assign(TextureType::Color1D); | ||
| 84 | return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, info); | ||
| 85 | case 1: // 2D | ||
| 86 | info.type.Assign(TextureType::Color2D); | ||
| 87 | return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info); | ||
| 88 | case 2: // 2D.LZ | ||
| 89 | info.type.Assign(TextureType::Color2D); | ||
| 90 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, info); | ||
| 91 | case 3: // 2D.LL | ||
| 92 | CheckAlignment(reg_a, 2); | ||
| 93 | info.type.Assign(TextureType::Color2D); | ||
| 94 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {}, | ||
| 95 | info); | ||
| 96 | case 4: // 2D.DC | ||
| 97 | CheckAlignment(reg_a, 2); | ||
| 98 | info.type.Assign(TextureType::Color2D); | ||
| 99 | info.is_depth.Assign(1); | ||
| 100 | return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), | ||
| 101 | {}, {}, {}, info); | ||
| 102 | case 5: // 2D.LL.DC | ||
| 103 | CheckAlignment(reg_a, 2); | ||
| 104 | CheckAlignment(reg_b, 2); | ||
| 105 | info.type.Assign(TextureType::Color2D); | ||
| 106 | info.is_depth.Assign(1); | ||
| 107 | return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), | ||
| 108 | v.F(reg_b + 1), v.F(reg_b), {}, info); | ||
| 109 | case 6: // 2D.LZ.DC | ||
| 110 | CheckAlignment(reg_a, 2); | ||
| 111 | info.type.Assign(TextureType::Color2D); | ||
| 112 | info.is_depth.Assign(1); | ||
| 113 | return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), | ||
| 114 | zero, {}, info); | ||
| 115 | case 7: // ARRAY_2D | ||
| 116 | CheckAlignment(reg_a, 2); | ||
| 117 | info.type.Assign(TextureType::ColorArray2D); | ||
| 118 | return v.ir.ImageSampleImplicitLod( | ||
| 119 | handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), | ||
| 120 | {}, {}, {}, info); | ||
| 121 | case 8: // ARRAY_2D.LZ | ||
| 122 | CheckAlignment(reg_a, 2); | ||
| 123 | info.type.Assign(TextureType::ColorArray2D); | ||
| 124 | return v.ir.ImageSampleExplicitLod( | ||
| 125 | handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), | ||
| 126 | zero, {}, info); | ||
| 127 | case 9: // ARRAY_2D.LZ.DC | ||
| 128 | CheckAlignment(reg_a, 2); | ||
| 129 | CheckAlignment(reg_b, 2); | ||
| 130 | info.type.Assign(TextureType::ColorArray2D); | ||
| 131 | info.is_depth.Assign(1); | ||
| 132 | return v.ir.ImageSampleDrefExplicitLod( | ||
| 133 | handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))), | ||
| 134 | v.F(reg_b + 1), zero, {}, info); | ||
| 135 | case 10: // 3D | ||
| 136 | CheckAlignment(reg_a, 2); | ||
| 137 | info.type.Assign(TextureType::Color3D); | ||
| 138 | return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, | ||
| 139 | {}, info); | ||
| 140 | case 11: // 3D.LZ | ||
| 141 | CheckAlignment(reg_a, 2); | ||
| 142 | info.type.Assign(TextureType::Color3D); | ||
| 143 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {}, | ||
| 144 | info); | ||
| 145 | case 12: // CUBE | ||
| 146 | CheckAlignment(reg_a, 2); | ||
| 147 | info.type.Assign(TextureType::ColorCube); | ||
| 148 | return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {}, | ||
| 149 | {}, info); | ||
| 150 | case 13: // CUBE.LL | ||
| 151 | CheckAlignment(reg_a, 2); | ||
| 152 | CheckAlignment(reg_b, 2); | ||
| 153 | info.type.Assign(TextureType::ColorCube); | ||
| 154 | return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), | ||
| 155 | v.F(reg_b + 1), {}, info); | ||
| 156 | default: | ||
| 157 | throw NotImplementedException("Illegal encoding {}", texs.encoding.Value()); | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | unsigned Swizzle(u64 insn) { | ||
| 162 | const Encoding texs{insn}; | ||
| 163 | const size_t encoding{texs.swizzle}; | ||
| 164 | if (texs.dest_reg_b == IR::Reg::RZ) { | ||
| 165 | if (encoding >= RG_LUT.size()) { | ||
| 166 | throw NotImplementedException("Illegal RG encoding {}", encoding); | ||
| 167 | } | ||
| 168 | return RG_LUT[encoding]; | ||
| 169 | } else { | ||
| 170 | if (encoding >= RGBA_LUT.size()) { | ||
| 171 | throw NotImplementedException("Illegal RGBA encoding {}", encoding); | ||
| 172 | } | ||
| 173 | return RGBA_LUT[encoding]; | ||
| 174 | } | ||
| 175 | } | ||
| 176 | |||
| 177 | IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { | ||
| 178 | const bool is_shadow{sample.Type() == IR::Type::F32}; | ||
| 179 | if (is_shadow) { | ||
| 180 | const bool is_alpha{component == 3}; | ||
| 181 | return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample}; | ||
| 182 | } else { | ||
| 183 | return IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | IR::Reg RegStoreComponent32(u64 insn, unsigned index) { | ||
| 188 | const Encoding texs{insn}; | ||
| 189 | switch (index) { | ||
| 190 | case 0: | ||
| 191 | return texs.dest_reg_a; | ||
| 192 | case 1: | ||
| 193 | CheckAlignment(texs.dest_reg_a, 2); | ||
| 194 | return texs.dest_reg_a + 1; | ||
| 195 | case 2: | ||
| 196 | return texs.dest_reg_b; | ||
| 197 | case 3: | ||
| 198 | CheckAlignment(texs.dest_reg_b, 2); | ||
| 199 | return texs.dest_reg_b + 1; | ||
| 200 | } | ||
| 201 | throw LogicError("Invalid store index {}", index); | ||
| 202 | } | ||
| 203 | |||
| 204 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 205 | const unsigned swizzle{Swizzle(insn)}; | ||
| 206 | unsigned store_index{0}; | ||
| 207 | for (unsigned component = 0; component < 4; ++component) { | ||
| 208 | if (((swizzle >> component) & 1) == 0) { | ||
| 209 | continue; | ||
| 210 | } | ||
| 211 | const IR::Reg dest{RegStoreComponent32(insn, store_index)}; | ||
| 212 | v.F(dest, Extract(v, sample, component)); | ||
| 213 | ++store_index; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | |||
| 217 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 218 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 219 | } | ||
| 220 | |||
| 221 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 222 | const unsigned swizzle{Swizzle(insn)}; | ||
| 223 | unsigned store_index{0}; | ||
| 224 | std::array<IR::F32, 4> swizzled; | ||
| 225 | for (unsigned component = 0; component < 4; ++component) { | ||
| 226 | if (((swizzle >> component) & 1) == 0) { | ||
| 227 | continue; | ||
| 228 | } | ||
| 229 | swizzled[store_index] = Extract(v, sample, component); | ||
| 230 | ++store_index; | ||
| 231 | } | ||
| 232 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 233 | const Encoding texs{insn}; | ||
| 234 | switch (store_index) { | ||
| 235 | case 1: | ||
| 236 | v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero)); | ||
| 237 | break; | ||
| 238 | case 2: | ||
| 239 | case 3: | ||
| 240 | case 4: | ||
| 241 | v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 242 | switch (store_index) { | ||
| 243 | case 2: | ||
| 244 | break; | ||
| 245 | case 3: | ||
| 246 | v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero)); | ||
| 247 | break; | ||
| 248 | case 4: | ||
| 249 | v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 250 | break; | ||
| 251 | } | ||
| 252 | break; | ||
| 253 | } | ||
| 254 | } | ||
| 255 | } // Anonymous namespace | ||
| 256 | |||
| 257 | void TranslatorVisitor::TEXS(u64 insn) { | ||
| 258 | const IR::Value sample{Sample(*this, insn)}; | ||
| 259 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 260 | Store32(*this, insn, sample); | ||
| 261 | } else { | ||
| 262 | Store16(*this, insn, sample); | ||
| 263 | } | ||
| 264 | } | ||
| 265 | |||
| 266 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp new file mode 100644 index 000000000..218cbc1a8 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather.cpp | |||
| @@ -0,0 +1,208 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | enum class OffsetType : u64 { | ||
| 27 | None = 0, | ||
| 28 | AOFFI, | ||
| 29 | PTP, | ||
| 30 | Invalid, | ||
| 31 | }; | ||
| 32 | |||
| 33 | enum class ComponentType : u64 { | ||
| 34 | R = 0, | ||
| 35 | G = 1, | ||
| 36 | B = 2, | ||
| 37 | A = 3, | ||
| 38 | }; | ||
| 39 | |||
| 40 | Shader::TextureType GetType(TextureType type) { | ||
| 41 | switch (type) { | ||
| 42 | case TextureType::_1D: | ||
| 43 | return Shader::TextureType::Color1D; | ||
| 44 | case TextureType::ARRAY_1D: | ||
| 45 | return Shader::TextureType::ColorArray1D; | ||
| 46 | case TextureType::_2D: | ||
| 47 | return Shader::TextureType::Color2D; | ||
| 48 | case TextureType::ARRAY_2D: | ||
| 49 | return Shader::TextureType::ColorArray2D; | ||
| 50 | case TextureType::_3D: | ||
| 51 | return Shader::TextureType::Color3D; | ||
| 52 | case TextureType::ARRAY_3D: | ||
| 53 | throw NotImplementedException("3D array texture type"); | ||
| 54 | case TextureType::CUBE: | ||
| 55 | return Shader::TextureType::ColorCube; | ||
| 56 | case TextureType::ARRAY_CUBE: | ||
| 57 | return Shader::TextureType::ColorArrayCube; | ||
| 58 | } | ||
| 59 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 60 | } | ||
| 61 | |||
| 62 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 63 | const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, 16, v.X(reg)); }}; | ||
| 64 | switch (type) { | ||
| 65 | case TextureType::_1D: | ||
| 66 | return v.F(reg); | ||
| 67 | case TextureType::ARRAY_1D: | ||
| 68 | return v.ir.CompositeConstruct(v.F(reg + 1), read_array()); | ||
| 69 | case TextureType::_2D: | ||
| 70 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 71 | case TextureType::ARRAY_2D: | ||
| 72 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), read_array()); | ||
| 73 | case TextureType::_3D: | ||
| 74 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 75 | case TextureType::ARRAY_3D: | ||
| 76 | throw NotImplementedException("3D array texture type"); | ||
| 77 | case TextureType::CUBE: | ||
| 78 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 79 | case TextureType::ARRAY_CUBE: | ||
| 80 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3), read_array()); | ||
| 81 | } | ||
| 82 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 83 | } | ||
| 84 | |||
| 85 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 86 | const IR::U32 value{v.X(reg++)}; | ||
| 87 | switch (type) { | ||
| 88 | case TextureType::_1D: | ||
| 89 | case TextureType::ARRAY_1D: | ||
| 90 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true); | ||
| 91 | case TextureType::_2D: | ||
| 92 | case TextureType::ARRAY_2D: | ||
| 93 | return v.ir.CompositeConstruct( | ||
| 94 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 95 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); | ||
| 96 | case TextureType::_3D: | ||
| 97 | case TextureType::ARRAY_3D: | ||
| 98 | return v.ir.CompositeConstruct( | ||
| 99 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 100 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true), | ||
| 101 | v.ir.BitFieldExtract(value, v.ir.Imm32(16), v.ir.Imm32(6), true)); | ||
| 102 | case TextureType::CUBE: | ||
| 103 | case TextureType::ARRAY_CUBE: | ||
| 104 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 105 | } | ||
| 106 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 107 | } | ||
| 108 | |||
| 109 | std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) { | ||
| 110 | const IR::U32 value1{v.X(reg++)}; | ||
| 111 | const IR::U32 value2{v.X(reg++)}; | ||
| 112 | const IR::U32 bitsize{v.ir.Imm32(6)}; | ||
| 113 | const auto make_vector{[&v, &bitsize](const IR::U32& value) { | ||
| 114 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true), | ||
| 115 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true), | ||
| 116 | v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true), | ||
| 117 | v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true)); | ||
| 118 | }}; | ||
| 119 | return {make_vector(value1), make_vector(value2)}; | ||
| 120 | } | ||
| 121 | |||
| 122 | void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type, | ||
| 123 | bool is_bindless) { | ||
| 124 | union { | ||
| 125 | u64 raw; | ||
| 126 | BitField<35, 1, u64> ndv; | ||
| 127 | BitField<49, 1, u64> nodep; | ||
| 128 | BitField<50, 1, u64> dc; | ||
| 129 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 130 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 131 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 132 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 133 | BitField<28, 3, TextureType> type; | ||
| 134 | BitField<31, 4, u64> mask; | ||
| 135 | BitField<36, 13, u64> cbuf_offset; | ||
| 136 | } const tld4{insn}; | ||
| 137 | |||
| 138 | const IR::Value coords{MakeCoords(v, tld4.coord_reg, tld4.type)}; | ||
| 139 | |||
| 140 | IR::Reg meta_reg{tld4.meta_reg}; | ||
| 141 | IR::Value handle; | ||
| 142 | IR::Value offset; | ||
| 143 | IR::Value offset2; | ||
| 144 | IR::F32 dref; | ||
| 145 | if (!is_bindless) { | ||
| 146 | handle = v.ir.Imm32(static_cast<u32>(tld4.cbuf_offset.Value() * 4)); | ||
| 147 | } else { | ||
| 148 | handle = v.X(meta_reg++); | ||
| 149 | } | ||
| 150 | switch (offset_type) { | ||
| 151 | case OffsetType::None: | ||
| 152 | break; | ||
| 153 | case OffsetType::AOFFI: | ||
| 154 | offset = MakeOffset(v, meta_reg, tld4.type); | ||
| 155 | break; | ||
| 156 | case OffsetType::PTP: | ||
| 157 | std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg); | ||
| 158 | break; | ||
| 159 | default: | ||
| 160 | throw NotImplementedException("Invalid offset type {}", offset_type); | ||
| 161 | } | ||
| 162 | if (tld4.dc != 0) { | ||
| 163 | dref = v.F(meta_reg++); | ||
| 164 | } | ||
| 165 | IR::TextureInstInfo info{}; | ||
| 166 | info.type.Assign(GetType(tld4.type)); | ||
| 167 | info.is_depth.Assign(tld4.dc != 0 ? 1 : 0); | ||
| 168 | info.gather_component.Assign(static_cast<u32>(component_type)); | ||
| 169 | const IR::Value sample{[&] { | ||
| 170 | if (tld4.dc == 0) { | ||
| 171 | return v.ir.ImageGather(handle, coords, offset, offset2, info); | ||
| 172 | } | ||
| 173 | return v.ir.ImageGatherDref(handle, coords, offset, offset2, dref, info); | ||
| 174 | }()}; | ||
| 175 | |||
| 176 | IR::Reg dest_reg{tld4.dest_reg}; | ||
| 177 | for (size_t element = 0; element < 4; ++element) { | ||
| 178 | if (((tld4.mask >> element) & 1) == 0) { | ||
| 179 | continue; | ||
| 180 | } | ||
| 181 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 182 | ++dest_reg; | ||
| 183 | } | ||
| 184 | if (tld4.sparse_pred != IR::Pred::PT) { | ||
| 185 | v.ir.SetPred(tld4.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | } // Anonymous namespace | ||
| 189 | |||
| 190 | void TranslatorVisitor::TLD4(u64 insn) { | ||
| 191 | union { | ||
| 192 | u64 raw; | ||
| 193 | BitField<56, 2, ComponentType> component; | ||
| 194 | BitField<54, 2, OffsetType> offset; | ||
| 195 | } const tld4{insn}; | ||
| 196 | Impl(*this, insn, tld4.component, tld4.offset, false); | ||
| 197 | } | ||
| 198 | |||
| 199 | void TranslatorVisitor::TLD4_b(u64 insn) { | ||
| 200 | union { | ||
| 201 | u64 raw; | ||
| 202 | BitField<38, 2, ComponentType> component; | ||
| 203 | BitField<36, 2, OffsetType> offset; | ||
| 204 | } const tld4{insn}; | ||
| 205 | Impl(*this, insn, tld4.component, tld4.offset, true); | ||
| 206 | } | ||
| 207 | |||
| 208 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp new file mode 100644 index 000000000..34efa2d50 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp | |||
| @@ -0,0 +1,134 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F32, | ||
| 16 | F16, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum class ComponentType : u64 { | ||
| 20 | R = 0, | ||
| 21 | G = 1, | ||
| 22 | B = 2, | ||
| 23 | A = 3, | ||
| 24 | }; | ||
| 25 | |||
| 26 | union Encoding { | ||
| 27 | u64 raw; | ||
| 28 | BitField<55, 1, Precision> precision; | ||
| 29 | BitField<52, 2, ComponentType> component_type; | ||
| 30 | BitField<51, 1, u64> aoffi; | ||
| 31 | BitField<50, 1, u64> dc; | ||
| 32 | BitField<49, 1, u64> nodep; | ||
| 33 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 34 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 35 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 36 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 37 | BitField<36, 13, u64> cbuf_offset; | ||
| 38 | }; | ||
| 39 | |||
| 40 | void CheckAlignment(IR::Reg reg, size_t alignment) { | ||
| 41 | if (!IR::IsAligned(reg, alignment)) { | ||
| 42 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { | ||
| 47 | const IR::U32 value{v.X(reg)}; | ||
| 48 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true), | ||
| 49 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true)); | ||
| 50 | } | ||
| 51 | |||
| 52 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 53 | const Encoding tld4s{insn}; | ||
| 54 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tld4s.cbuf_offset * 4))}; | ||
| 55 | const IR::Reg reg_a{tld4s.src_reg_a}; | ||
| 56 | const IR::Reg reg_b{tld4s.src_reg_b}; | ||
| 57 | IR::TextureInstInfo info{}; | ||
| 58 | if (tld4s.precision == Precision::F16) { | ||
| 59 | info.relaxed_precision.Assign(1); | ||
| 60 | } | ||
| 61 | info.gather_component.Assign(static_cast<u32>(tld4s.component_type.Value())); | ||
| 62 | info.type.Assign(Shader::TextureType::Color2D); | ||
| 63 | info.is_depth.Assign(tld4s.dc != 0 ? 1 : 0); | ||
| 64 | IR::Value coords; | ||
| 65 | if (tld4s.aoffi != 0) { | ||
| 66 | CheckAlignment(reg_a, 2); | ||
| 67 | coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); | ||
| 68 | IR::Value offset = MakeOffset(v, reg_b); | ||
| 69 | if (tld4s.dc != 0) { | ||
| 70 | CheckAlignment(reg_b, 2); | ||
| 71 | IR::F32 dref = v.F(reg_b + 1); | ||
| 72 | return v.ir.ImageGatherDref(handle, coords, offset, {}, dref, info); | ||
| 73 | } | ||
| 74 | return v.ir.ImageGather(handle, coords, offset, {}, info); | ||
| 75 | } | ||
| 76 | if (tld4s.dc != 0) { | ||
| 77 | CheckAlignment(reg_a, 2); | ||
| 78 | coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1)); | ||
| 79 | IR::F32 dref = v.F(reg_b); | ||
| 80 | return v.ir.ImageGatherDref(handle, coords, {}, {}, dref, info); | ||
| 81 | } | ||
| 82 | coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_b)); | ||
| 83 | return v.ir.ImageGather(handle, coords, {}, {}, info); | ||
| 84 | } | ||
| 85 | |||
| 86 | IR::Reg RegStoreComponent32(u64 insn, size_t index) { | ||
| 87 | const Encoding tlds4{insn}; | ||
| 88 | switch (index) { | ||
| 89 | case 0: | ||
| 90 | return tlds4.dest_reg_a; | ||
| 91 | case 1: | ||
| 92 | CheckAlignment(tlds4.dest_reg_a, 2); | ||
| 93 | return tlds4.dest_reg_a + 1; | ||
| 94 | case 2: | ||
| 95 | return tlds4.dest_reg_b; | ||
| 96 | case 3: | ||
| 97 | CheckAlignment(tlds4.dest_reg_b, 2); | ||
| 98 | return tlds4.dest_reg_b + 1; | ||
| 99 | } | ||
| 100 | throw LogicError("Invalid store index {}", index); | ||
| 101 | } | ||
| 102 | |||
| 103 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 104 | for (size_t component = 0; component < 4; ++component) { | ||
| 105 | const IR::Reg dest{RegStoreComponent32(insn, component)}; | ||
| 106 | v.F(dest, IR::F32{v.ir.CompositeExtract(sample, component)}); | ||
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 111 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 112 | } | ||
| 113 | |||
| 114 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 115 | std::array<IR::F32, 4> swizzled; | ||
| 116 | for (size_t component = 0; component < 4; ++component) { | ||
| 117 | swizzled[component] = IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 118 | } | ||
| 119 | const Encoding tld4s{insn}; | ||
| 120 | v.X(tld4s.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 121 | v.X(tld4s.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 122 | } | ||
| 123 | } // Anonymous namespace | ||
| 124 | |||
| 125 | void TranslatorVisitor::TLD4S(u64 insn) { | ||
| 126 | const IR::Value sample{Sample(*this, insn)}; | ||
| 127 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 128 | Store32(*this, insn, sample); | ||
| 129 | } else { | ||
| 130 | Store16(*this, insn, sample); | ||
| 131 | } | ||
| 132 | } | ||
| 133 | |||
| 134 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp new file mode 100644 index 000000000..c3fe3ffda --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_gradient.cpp | |||
| @@ -0,0 +1,182 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Shader::TextureType GetType(TextureType type) { | ||
| 27 | switch (type) { | ||
| 28 | case TextureType::_1D: | ||
| 29 | return Shader::TextureType::Color1D; | ||
| 30 | case TextureType::ARRAY_1D: | ||
| 31 | return Shader::TextureType::ColorArray1D; | ||
| 32 | case TextureType::_2D: | ||
| 33 | return Shader::TextureType::Color2D; | ||
| 34 | case TextureType::ARRAY_2D: | ||
| 35 | return Shader::TextureType::ColorArray2D; | ||
| 36 | case TextureType::_3D: | ||
| 37 | return Shader::TextureType::Color3D; | ||
| 38 | case TextureType::ARRAY_3D: | ||
| 39 | throw NotImplementedException("3D array texture type"); | ||
| 40 | case TextureType::CUBE: | ||
| 41 | return Shader::TextureType::ColorCube; | ||
| 42 | case TextureType::ARRAY_CUBE: | ||
| 43 | return Shader::TextureType::ColorArrayCube; | ||
| 44 | } | ||
| 45 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg, bool has_lod_clamp) { | ||
| 49 | const IR::U32 value{v.X(reg)}; | ||
| 50 | const u32 base{has_lod_clamp ? 12U : 16U}; | ||
| 51 | return v.ir.CompositeConstruct( | ||
| 52 | v.ir.BitFieldExtract(value, v.ir.Imm32(base), v.ir.Imm32(4), true), | ||
| 53 | v.ir.BitFieldExtract(value, v.ir.Imm32(base + 4), v.ir.Imm32(4), true)); | ||
| 54 | } | ||
| 55 | |||
| 56 | void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||
| 57 | union { | ||
| 58 | u64 raw; | ||
| 59 | BitField<49, 1, u64> nodep; | ||
| 60 | BitField<35, 1, u64> aoffi; | ||
| 61 | BitField<50, 1, u64> lc; | ||
| 62 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 63 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 64 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 65 | BitField<20, 8, IR::Reg> derivate_reg; | ||
| 66 | BitField<28, 3, TextureType> type; | ||
| 67 | BitField<31, 4, u64> mask; | ||
| 68 | BitField<36, 13, u64> cbuf_offset; | ||
| 69 | } const txd{insn}; | ||
| 70 | |||
| 71 | const bool has_lod_clamp = txd.lc != 0; | ||
| 72 | if (has_lod_clamp) { | ||
| 73 | throw NotImplementedException("TXD.LC - CLAMP is not implemented"); | ||
| 74 | } | ||
| 75 | |||
| 76 | IR::Value coords; | ||
| 77 | u32 num_derivates{}; | ||
| 78 | IR::Reg base_reg{txd.coord_reg}; | ||
| 79 | IR::Reg last_reg; | ||
| 80 | IR::Value handle; | ||
| 81 | if (is_bindless) { | ||
| 82 | handle = v.X(base_reg++); | ||
| 83 | } else { | ||
| 84 | handle = v.ir.Imm32(static_cast<u32>(txd.cbuf_offset.Value() * 4)); | ||
| 85 | } | ||
| 86 | |||
| 87 | const auto read_array{[&]() -> IR::F32 { | ||
| 88 | const IR::U32 base{v.ir.Imm32(0)}; | ||
| 89 | const IR::U32 count{v.ir.Imm32(has_lod_clamp ? 12 : 16)}; | ||
| 90 | const IR::U32 array_index{v.ir.BitFieldExtract(v.X(last_reg), base, count)}; | ||
| 91 | return v.ir.ConvertUToF(32, 16, array_index); | ||
| 92 | }}; | ||
| 93 | switch (txd.type) { | ||
| 94 | case TextureType::_1D: { | ||
| 95 | coords = v.F(base_reg); | ||
| 96 | num_derivates = 1; | ||
| 97 | last_reg = base_reg + 1; | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | case TextureType::ARRAY_1D: { | ||
| 101 | last_reg = base_reg + 1; | ||
| 102 | coords = v.ir.CompositeConstruct(v.F(base_reg), read_array()); | ||
| 103 | num_derivates = 1; | ||
| 104 | break; | ||
| 105 | } | ||
| 106 | case TextureType::_2D: { | ||
| 107 | last_reg = base_reg + 2; | ||
| 108 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1)); | ||
| 109 | num_derivates = 2; | ||
| 110 | break; | ||
| 111 | } | ||
| 112 | case TextureType::ARRAY_2D: { | ||
| 113 | last_reg = base_reg + 2; | ||
| 114 | coords = v.ir.CompositeConstruct(v.F(base_reg), v.F(base_reg + 1), read_array()); | ||
| 115 | num_derivates = 2; | ||
| 116 | break; | ||
| 117 | } | ||
| 118 | default: | ||
| 119 | throw NotImplementedException("Invalid texture type"); | ||
| 120 | } | ||
| 121 | |||
| 122 | const IR::Reg derivate_reg{txd.derivate_reg}; | ||
| 123 | IR::Value derivates; | ||
| 124 | switch (num_derivates) { | ||
| 125 | case 1: { | ||
| 126 | derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1)); | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | case 2: { | ||
| 130 | derivates = v.ir.CompositeConstruct(v.F(derivate_reg), v.F(derivate_reg + 1), | ||
| 131 | v.F(derivate_reg + 2), v.F(derivate_reg + 3)); | ||
| 132 | break; | ||
| 133 | } | ||
| 134 | default: | ||
| 135 | throw NotImplementedException("Invalid texture type"); | ||
| 136 | } | ||
| 137 | |||
| 138 | IR::Value offset; | ||
| 139 | if (txd.aoffi != 0) { | ||
| 140 | offset = MakeOffset(v, last_reg, has_lod_clamp); | ||
| 141 | } | ||
| 142 | |||
| 143 | IR::F32 lod_clamp; | ||
| 144 | if (has_lod_clamp) { | ||
| 145 | // Lod Clamp is a Fixed Point 4.8, we need to transform it to float. | ||
| 146 | // to convert a fixed point, float(value) / float(1 << fixed_point) | ||
| 147 | // in this case the fixed_point is 8. | ||
| 148 | const IR::F32 conv4_8fixp_f{v.ir.Imm32(static_cast<f32>(1U << 8))}; | ||
| 149 | const IR::F32 fixp_lc{v.ir.ConvertUToF( | ||
| 150 | 32, 16, v.ir.BitFieldExtract(v.X(last_reg), v.ir.Imm32(20), v.ir.Imm32(12)))}; | ||
| 151 | lod_clamp = v.ir.FPMul(fixp_lc, conv4_8fixp_f); | ||
| 152 | } | ||
| 153 | |||
| 154 | IR::TextureInstInfo info{}; | ||
| 155 | info.type.Assign(GetType(txd.type)); | ||
| 156 | info.num_derivates.Assign(num_derivates); | ||
| 157 | info.has_lod_clamp.Assign(has_lod_clamp ? 1 : 0); | ||
| 158 | const IR::Value sample{v.ir.ImageGradient(handle, coords, derivates, offset, lod_clamp, info)}; | ||
| 159 | |||
| 160 | IR::Reg dest_reg{txd.dest_reg}; | ||
| 161 | for (size_t element = 0; element < 4; ++element) { | ||
| 162 | if (((txd.mask >> element) & 1) == 0) { | ||
| 163 | continue; | ||
| 164 | } | ||
| 165 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 166 | ++dest_reg; | ||
| 167 | } | ||
| 168 | if (txd.sparse_pred != IR::Pred::PT) { | ||
| 169 | v.ir.SetPred(txd.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 170 | } | ||
| 171 | } | ||
| 172 | } // Anonymous namespace | ||
| 173 | |||
| 174 | void TranslatorVisitor::TXD(u64 insn) { | ||
| 175 | Impl(*this, insn, false); | ||
| 176 | } | ||
| 177 | |||
| 178 | void TranslatorVisitor::TXD_b(u64 insn) { | ||
| 179 | Impl(*this, insn, true); | ||
| 180 | } | ||
| 181 | |||
| 182 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp new file mode 100644 index 000000000..983058303 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load.cpp | |||
| @@ -0,0 +1,165 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Shader::TextureType GetType(TextureType type) { | ||
| 27 | switch (type) { | ||
| 28 | case TextureType::_1D: | ||
| 29 | return Shader::TextureType::Color1D; | ||
| 30 | case TextureType::ARRAY_1D: | ||
| 31 | return Shader::TextureType::ColorArray1D; | ||
| 32 | case TextureType::_2D: | ||
| 33 | return Shader::TextureType::Color2D; | ||
| 34 | case TextureType::ARRAY_2D: | ||
| 35 | return Shader::TextureType::ColorArray2D; | ||
| 36 | case TextureType::_3D: | ||
| 37 | return Shader::TextureType::Color3D; | ||
| 38 | case TextureType::ARRAY_3D: | ||
| 39 | throw NotImplementedException("3D array texture type"); | ||
| 40 | case TextureType::CUBE: | ||
| 41 | return Shader::TextureType::ColorCube; | ||
| 42 | case TextureType::ARRAY_CUBE: | ||
| 43 | return Shader::TextureType::ColorArrayCube; | ||
| 44 | } | ||
| 45 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 49 | const auto read_array{ | ||
| 50 | [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }}; | ||
| 51 | switch (type) { | ||
| 52 | case TextureType::_1D: | ||
| 53 | return v.X(reg); | ||
| 54 | case TextureType::ARRAY_1D: | ||
| 55 | return v.ir.CompositeConstruct(v.X(reg + 1), read_array()); | ||
| 56 | case TextureType::_2D: | ||
| 57 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||
| 58 | case TextureType::ARRAY_2D: | ||
| 59 | return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array()); | ||
| 60 | case TextureType::_3D: | ||
| 61 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 62 | case TextureType::ARRAY_3D: | ||
| 63 | throw NotImplementedException("3D array texture type"); | ||
| 64 | case TextureType::CUBE: | ||
| 65 | return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||
| 66 | case TextureType::ARRAY_CUBE: | ||
| 67 | return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array()); | ||
| 68 | } | ||
| 69 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 70 | } | ||
| 71 | |||
| 72 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||
| 73 | const IR::U32 value{v.X(reg++)}; | ||
| 74 | switch (type) { | ||
| 75 | case TextureType::_1D: | ||
| 76 | case TextureType::ARRAY_1D: | ||
| 77 | return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); | ||
| 78 | case TextureType::_2D: | ||
| 79 | case TextureType::ARRAY_2D: | ||
| 80 | return v.ir.CompositeConstruct( | ||
| 81 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 82 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 83 | case TextureType::_3D: | ||
| 84 | case TextureType::ARRAY_3D: | ||
| 85 | return v.ir.CompositeConstruct( | ||
| 86 | v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 87 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), | ||
| 88 | v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); | ||
| 89 | case TextureType::CUBE: | ||
| 90 | case TextureType::ARRAY_CUBE: | ||
| 91 | throw NotImplementedException("Illegal offset on CUBE sample"); | ||
| 92 | } | ||
| 93 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 94 | } | ||
| 95 | |||
| 96 | void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||
| 97 | union { | ||
| 98 | u64 raw; | ||
| 99 | BitField<49, 1, u64> nodep; | ||
| 100 | BitField<55, 1, u64> lod; | ||
| 101 | BitField<50, 1, u64> multisample; | ||
| 102 | BitField<35, 1, u64> aoffi; | ||
| 103 | BitField<54, 1, u64> clamp; | ||
| 104 | BitField<51, 3, IR::Pred> sparse_pred; | ||
| 105 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 106 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 107 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 108 | BitField<28, 3, TextureType> type; | ||
| 109 | BitField<31, 4, u64> mask; | ||
| 110 | BitField<36, 13, u64> cbuf_offset; | ||
| 111 | } const tld{insn}; | ||
| 112 | |||
| 113 | const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)}; | ||
| 114 | |||
| 115 | IR::Reg meta_reg{tld.meta_reg}; | ||
| 116 | IR::Value handle; | ||
| 117 | IR::Value offset; | ||
| 118 | IR::U32 lod; | ||
| 119 | IR::U32 multisample; | ||
| 120 | if (is_bindless) { | ||
| 121 | handle = v.X(meta_reg++); | ||
| 122 | } else { | ||
| 123 | handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4)); | ||
| 124 | } | ||
| 125 | if (tld.lod != 0) { | ||
| 126 | lod = v.X(meta_reg++); | ||
| 127 | } else { | ||
| 128 | lod = v.ir.Imm32(0U); | ||
| 129 | } | ||
| 130 | if (tld.aoffi != 0) { | ||
| 131 | offset = MakeOffset(v, meta_reg, tld.type); | ||
| 132 | } | ||
| 133 | if (tld.multisample != 0) { | ||
| 134 | multisample = v.X(meta_reg++); | ||
| 135 | } | ||
| 136 | if (tld.clamp != 0) { | ||
| 137 | throw NotImplementedException("TLD.CL - CLAMP is not implmented"); | ||
| 138 | } | ||
| 139 | IR::TextureInstInfo info{}; | ||
| 140 | info.type.Assign(GetType(tld.type)); | ||
| 141 | const IR::Value sample{v.ir.ImageFetch(handle, coords, offset, lod, multisample, info)}; | ||
| 142 | |||
| 143 | IR::Reg dest_reg{tld.dest_reg}; | ||
| 144 | for (size_t element = 0; element < 4; ++element) { | ||
| 145 | if (((tld.mask >> element) & 1) == 0) { | ||
| 146 | continue; | ||
| 147 | } | ||
| 148 | v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||
| 149 | ++dest_reg; | ||
| 150 | } | ||
| 151 | if (tld.sparse_pred != IR::Pred::PT) { | ||
| 152 | v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||
| 153 | } | ||
| 154 | } | ||
| 155 | } // Anonymous namespace | ||
| 156 | |||
| 157 | void TranslatorVisitor::TLD(u64 insn) { | ||
| 158 | Impl(*this, insn, false); | ||
| 159 | } | ||
| 160 | |||
| 161 | void TranslatorVisitor::TLD_b(u64 insn) { | ||
| 162 | Impl(*this, insn, true); | ||
| 163 | } | ||
| 164 | |||
| 165 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp new file mode 100644 index 000000000..5dd7e31b2 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp | |||
| @@ -0,0 +1,242 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Precision : u64 { | ||
| 15 | F16, | ||
| 16 | F32, | ||
| 17 | }; | ||
| 18 | |||
| 19 | constexpr unsigned R = 1; | ||
| 20 | constexpr unsigned G = 2; | ||
| 21 | constexpr unsigned B = 4; | ||
| 22 | constexpr unsigned A = 8; | ||
| 23 | |||
| 24 | constexpr std::array RG_LUT{ | ||
| 25 | R, // | ||
| 26 | G, // | ||
| 27 | B, // | ||
| 28 | A, // | ||
| 29 | R | G, // | ||
| 30 | R | A, // | ||
| 31 | G | A, // | ||
| 32 | B | A, // | ||
| 33 | }; | ||
| 34 | |||
| 35 | constexpr std::array RGBA_LUT{ | ||
| 36 | R | G | B, // | ||
| 37 | R | G | A, // | ||
| 38 | R | B | A, // | ||
| 39 | G | B | A, // | ||
| 40 | R | G | B | A, // | ||
| 41 | }; | ||
| 42 | |||
| 43 | union Encoding { | ||
| 44 | u64 raw; | ||
| 45 | BitField<59, 1, Precision> precision; | ||
| 46 | BitField<54, 1, u64> aoffi; | ||
| 47 | BitField<53, 1, u64> lod; | ||
| 48 | BitField<55, 1, u64> ms; | ||
| 49 | BitField<49, 1, u64> nodep; | ||
| 50 | BitField<28, 8, IR::Reg> dest_reg_b; | ||
| 51 | BitField<0, 8, IR::Reg> dest_reg_a; | ||
| 52 | BitField<8, 8, IR::Reg> src_reg_a; | ||
| 53 | BitField<20, 8, IR::Reg> src_reg_b; | ||
| 54 | BitField<36, 13, u64> cbuf_offset; | ||
| 55 | BitField<50, 3, u64> swizzle; | ||
| 56 | BitField<53, 4, u64> encoding; | ||
| 57 | }; | ||
| 58 | |||
| 59 | void CheckAlignment(IR::Reg reg, size_t alignment) { | ||
| 60 | if (!IR::IsAligned(reg, alignment)) { | ||
| 61 | throw NotImplementedException("Unaligned source register {}", reg); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) { | ||
| 66 | const IR::U32 value{v.X(reg)}; | ||
| 67 | return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||
| 68 | v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||
| 69 | } | ||
| 70 | |||
| 71 | IR::Value Sample(TranslatorVisitor& v, u64 insn) { | ||
| 72 | const Encoding tlds{insn}; | ||
| 73 | const IR::U32 handle{v.ir.Imm32(static_cast<u32>(tlds.cbuf_offset * 4))}; | ||
| 74 | const IR::Reg reg_a{tlds.src_reg_a}; | ||
| 75 | const IR::Reg reg_b{tlds.src_reg_b}; | ||
| 76 | IR::Value coords; | ||
| 77 | IR::U32 lod{v.ir.Imm32(0U)}; | ||
| 78 | IR::Value offsets; | ||
| 79 | IR::U32 multisample; | ||
| 80 | Shader::TextureType texture_type{}; | ||
| 81 | switch (tlds.encoding) { | ||
| 82 | case 0: | ||
| 83 | texture_type = Shader::TextureType::Color1D; | ||
| 84 | coords = v.X(reg_a); | ||
| 85 | break; | ||
| 86 | case 1: | ||
| 87 | texture_type = Shader::TextureType::Color1D; | ||
| 88 | coords = v.X(reg_a); | ||
| 89 | lod = v.X(reg_b); | ||
| 90 | break; | ||
| 91 | case 2: | ||
| 92 | texture_type = Shader::TextureType::Color2D; | ||
| 93 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_b)); | ||
| 94 | break; | ||
| 95 | case 4: | ||
| 96 | CheckAlignment(reg_a, 2); | ||
| 97 | texture_type = Shader::TextureType::Color2D; | ||
| 98 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 99 | offsets = MakeOffset(v, reg_b); | ||
| 100 | break; | ||
| 101 | case 5: | ||
| 102 | CheckAlignment(reg_a, 2); | ||
| 103 | texture_type = Shader::TextureType::Color2D; | ||
| 104 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 105 | lod = v.X(reg_b); | ||
| 106 | break; | ||
| 107 | case 6: | ||
| 108 | CheckAlignment(reg_a, 2); | ||
| 109 | texture_type = Shader::TextureType::Color2D; | ||
| 110 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 111 | multisample = v.X(reg_b); | ||
| 112 | break; | ||
| 113 | case 7: | ||
| 114 | CheckAlignment(reg_a, 2); | ||
| 115 | texture_type = Shader::TextureType::Color3D; | ||
| 116 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1), v.X(reg_b)); | ||
| 117 | break; | ||
| 118 | case 8: { | ||
| 119 | CheckAlignment(reg_b, 2); | ||
| 120 | const IR::U32 array{v.ir.BitFieldExtract(v.X(reg_a), v.ir.Imm32(0), v.ir.Imm32(16))}; | ||
| 121 | texture_type = Shader::TextureType::ColorArray2D; | ||
| 122 | coords = v.ir.CompositeConstruct(v.X(reg_b), v.X(reg_b + 1), array); | ||
| 123 | break; | ||
| 124 | } | ||
| 125 | case 12: | ||
| 126 | CheckAlignment(reg_a, 2); | ||
| 127 | CheckAlignment(reg_b, 2); | ||
| 128 | texture_type = Shader::TextureType::Color2D; | ||
| 129 | coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1)); | ||
| 130 | lod = v.X(reg_b); | ||
| 131 | offsets = MakeOffset(v, reg_b + 1); | ||
| 132 | break; | ||
| 133 | default: | ||
| 134 | throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value()); | ||
| 135 | } | ||
| 136 | IR::TextureInstInfo info{}; | ||
| 137 | if (tlds.precision == Precision::F16) { | ||
| 138 | info.relaxed_precision.Assign(1); | ||
| 139 | } | ||
| 140 | info.type.Assign(texture_type); | ||
| 141 | return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info); | ||
| 142 | } | ||
| 143 | |||
| 144 | unsigned Swizzle(u64 insn) { | ||
| 145 | const Encoding tlds{insn}; | ||
| 146 | const size_t encoding{tlds.swizzle}; | ||
| 147 | if (tlds.dest_reg_b == IR::Reg::RZ) { | ||
| 148 | if (encoding >= RG_LUT.size()) { | ||
| 149 | throw NotImplementedException("Illegal RG encoding {}", encoding); | ||
| 150 | } | ||
| 151 | return RG_LUT[encoding]; | ||
| 152 | } else { | ||
| 153 | if (encoding >= RGBA_LUT.size()) { | ||
| 154 | throw NotImplementedException("Illegal RGBA encoding {}", encoding); | ||
| 155 | } | ||
| 156 | return RGBA_LUT[encoding]; | ||
| 157 | } | ||
| 158 | } | ||
| 159 | |||
| 160 | IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) { | ||
| 161 | return IR::F32{v.ir.CompositeExtract(sample, component)}; | ||
| 162 | } | ||
| 163 | |||
| 164 | IR::Reg RegStoreComponent32(u64 insn, unsigned index) { | ||
| 165 | const Encoding tlds{insn}; | ||
| 166 | switch (index) { | ||
| 167 | case 0: | ||
| 168 | return tlds.dest_reg_a; | ||
| 169 | case 1: | ||
| 170 | CheckAlignment(tlds.dest_reg_a, 2); | ||
| 171 | return tlds.dest_reg_a + 1; | ||
| 172 | case 2: | ||
| 173 | return tlds.dest_reg_b; | ||
| 174 | case 3: | ||
| 175 | CheckAlignment(tlds.dest_reg_b, 2); | ||
| 176 | return tlds.dest_reg_b + 1; | ||
| 177 | } | ||
| 178 | throw LogicError("Invalid store index {}", index); | ||
| 179 | } | ||
| 180 | |||
| 181 | void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 182 | const unsigned swizzle{Swizzle(insn)}; | ||
| 183 | unsigned store_index{0}; | ||
| 184 | for (unsigned component = 0; component < 4; ++component) { | ||
| 185 | if (((swizzle >> component) & 1) == 0) { | ||
| 186 | continue; | ||
| 187 | } | ||
| 188 | const IR::Reg dest{RegStoreComponent32(insn, store_index)}; | ||
| 189 | v.F(dest, Extract(v, sample, component)); | ||
| 190 | ++store_index; | ||
| 191 | } | ||
| 192 | } | ||
| 193 | |||
| 194 | IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) { | ||
| 195 | return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs)); | ||
| 196 | } | ||
| 197 | |||
| 198 | void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) { | ||
| 199 | const unsigned swizzle{Swizzle(insn)}; | ||
| 200 | unsigned store_index{0}; | ||
| 201 | std::array<IR::F32, 4> swizzled; | ||
| 202 | for (unsigned component = 0; component < 4; ++component) { | ||
| 203 | if (((swizzle >> component) & 1) == 0) { | ||
| 204 | continue; | ||
| 205 | } | ||
| 206 | swizzled[store_index] = Extract(v, sample, component); | ||
| 207 | ++store_index; | ||
| 208 | } | ||
| 209 | const IR::F32 zero{v.ir.Imm32(0.0f)}; | ||
| 210 | const Encoding tlds{insn}; | ||
| 211 | switch (store_index) { | ||
| 212 | case 1: | ||
| 213 | v.X(tlds.dest_reg_a, Pack(v, swizzled[0], zero)); | ||
| 214 | break; | ||
| 215 | case 2: | ||
| 216 | case 3: | ||
| 217 | case 4: | ||
| 218 | v.X(tlds.dest_reg_a, Pack(v, swizzled[0], swizzled[1])); | ||
| 219 | switch (store_index) { | ||
| 220 | case 2: | ||
| 221 | break; | ||
| 222 | case 3: | ||
| 223 | v.X(tlds.dest_reg_b, Pack(v, swizzled[2], zero)); | ||
| 224 | break; | ||
| 225 | case 4: | ||
| 226 | v.X(tlds.dest_reg_b, Pack(v, swizzled[2], swizzled[3])); | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | } | ||
| 232 | } // Anonymous namespace | ||
| 233 | |||
| 234 | void TranslatorVisitor::TLDS(u64 insn) { | ||
| 235 | const IR::Value sample{Sample(*this, insn)}; | ||
| 236 | if (Encoding{insn}.precision == Precision::F32) { | ||
| 237 | Store32(*this, insn, sample); | ||
| 238 | } else { | ||
| 239 | Store16(*this, insn, sample); | ||
| 240 | } | ||
| 241 | } | ||
| 242 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp new file mode 100644 index 000000000..aea3c0e62 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_mipmap_level.cpp | |||
| @@ -0,0 +1,131 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | |||
| 15 | enum class TextureType : u64 { | ||
| 16 | _1D, | ||
| 17 | ARRAY_1D, | ||
| 18 | _2D, | ||
| 19 | ARRAY_2D, | ||
| 20 | _3D, | ||
| 21 | ARRAY_3D, | ||
| 22 | CUBE, | ||
| 23 | ARRAY_CUBE, | ||
| 24 | }; | ||
| 25 | |||
| 26 | Shader::TextureType GetType(TextureType type) { | ||
| 27 | switch (type) { | ||
| 28 | case TextureType::_1D: | ||
| 29 | return Shader::TextureType::Color1D; | ||
| 30 | case TextureType::ARRAY_1D: | ||
| 31 | return Shader::TextureType::ColorArray1D; | ||
| 32 | case TextureType::_2D: | ||
| 33 | return Shader::TextureType::Color2D; | ||
| 34 | case TextureType::ARRAY_2D: | ||
| 35 | return Shader::TextureType::ColorArray2D; | ||
| 36 | case TextureType::_3D: | ||
| 37 | return Shader::TextureType::Color3D; | ||
| 38 | case TextureType::ARRAY_3D: | ||
| 39 | throw NotImplementedException("3D array texture type"); | ||
| 40 | case TextureType::CUBE: | ||
| 41 | return Shader::TextureType::ColorCube; | ||
| 42 | case TextureType::ARRAY_CUBE: | ||
| 43 | return Shader::TextureType::ColorArrayCube; | ||
| 44 | } | ||
| 45 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 46 | } | ||
| 47 | |||
| 48 | IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||
| 49 | // The ISA reads an array component here, but this is not needed on high level shading languages | ||
| 50 | // We are dropping this information. | ||
| 51 | switch (type) { | ||
| 52 | case TextureType::_1D: | ||
| 53 | return v.F(reg); | ||
| 54 | case TextureType::ARRAY_1D: | ||
| 55 | return v.F(reg + 1); | ||
| 56 | case TextureType::_2D: | ||
| 57 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1)); | ||
| 58 | case TextureType::ARRAY_2D: | ||
| 59 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2)); | ||
| 60 | case TextureType::_3D: | ||
| 61 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 62 | case TextureType::ARRAY_3D: | ||
| 63 | throw NotImplementedException("3D array texture type"); | ||
| 64 | case TextureType::CUBE: | ||
| 65 | return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2)); | ||
| 66 | case TextureType::ARRAY_CUBE: | ||
| 67 | return v.ir.CompositeConstruct(v.F(reg + 1), v.F(reg + 2), v.F(reg + 3)); | ||
| 68 | } | ||
| 69 | throw NotImplementedException("Invalid texture type {}", type); | ||
| 70 | } | ||
| 71 | |||
| 72 | void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||
| 73 | union { | ||
| 74 | u64 raw; | ||
| 75 | BitField<49, 1, u64> nodep; | ||
| 76 | BitField<35, 1, u64> ndv; | ||
| 77 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 78 | BitField<8, 8, IR::Reg> coord_reg; | ||
| 79 | BitField<20, 8, IR::Reg> meta_reg; | ||
| 80 | BitField<28, 3, TextureType> type; | ||
| 81 | BitField<31, 4, u64> mask; | ||
| 82 | BitField<36, 13, u64> cbuf_offset; | ||
| 83 | } const tmml{insn}; | ||
| 84 | |||
| 85 | if ((tmml.mask & 0b1100) != 0) { | ||
| 86 | throw NotImplementedException("TMML BA results are not implmented"); | ||
| 87 | } | ||
| 88 | const IR::Value coords{MakeCoords(v, tmml.coord_reg, tmml.type)}; | ||
| 89 | |||
| 90 | IR::U32 handle; | ||
| 91 | IR::Reg meta_reg{tmml.meta_reg}; | ||
| 92 | if (is_bindless) { | ||
| 93 | handle = v.X(meta_reg++); | ||
| 94 | } else { | ||
| 95 | handle = v.ir.Imm32(static_cast<u32>(tmml.cbuf_offset.Value() * 4)); | ||
| 96 | } | ||
| 97 | IR::TextureInstInfo info{}; | ||
| 98 | info.type.Assign(GetType(tmml.type)); | ||
| 99 | const IR::Value sample{v.ir.ImageQueryLod(handle, coords, info)}; | ||
| 100 | |||
| 101 | IR::Reg dest_reg{tmml.dest_reg}; | ||
| 102 | for (size_t element = 0; element < 4; ++element) { | ||
| 103 | if (((tmml.mask >> element) & 1) == 0) { | ||
| 104 | continue; | ||
| 105 | } | ||
| 106 | IR::F32 value{v.ir.CompositeExtract(sample, element)}; | ||
| 107 | if (element < 2) { | ||
| 108 | IR::U32 casted_value; | ||
| 109 | if (element == 0) { | ||
| 110 | casted_value = v.ir.ConvertFToU(32, value); | ||
| 111 | } else { | ||
| 112 | casted_value = v.ir.ConvertFToS(16, value); | ||
| 113 | } | ||
| 114 | v.X(dest_reg, v.ir.ShiftLeftLogical(casted_value, v.ir.Imm32(8))); | ||
| 115 | } else { | ||
| 116 | v.F(dest_reg, value); | ||
| 117 | } | ||
| 118 | ++dest_reg; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | } // Anonymous namespace | ||
| 122 | |||
| 123 | void TranslatorVisitor::TMML(u64 insn) { | ||
| 124 | Impl(*this, insn, false); | ||
| 125 | } | ||
| 126 | |||
| 127 | void TranslatorVisitor::TMML_b(u64 insn) { | ||
| 128 | Impl(*this, insn, true); | ||
| 129 | } | ||
| 130 | |||
| 131 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp new file mode 100644 index 000000000..0459e5473 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_query.cpp | |||
| @@ -0,0 +1,76 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/modifiers.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | namespace { | ||
| 14 | enum class Mode : u64 { | ||
| 15 | Dimension = 1, | ||
| 16 | TextureType = 2, | ||
| 17 | SamplePos = 5, | ||
| 18 | }; | ||
| 19 | |||
| 20 | IR::Value Query(TranslatorVisitor& v, const IR::U32& handle, Mode mode, IR::Reg src_reg) { | ||
| 21 | switch (mode) { | ||
| 22 | case Mode::Dimension: { | ||
| 23 | const IR::U32 lod{v.X(src_reg)}; | ||
| 24 | return v.ir.ImageQueryDimension(handle, lod); | ||
| 25 | } | ||
| 26 | case Mode::TextureType: | ||
| 27 | case Mode::SamplePos: | ||
| 28 | default: | ||
| 29 | throw NotImplementedException("Mode {}", mode); | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | void Impl(TranslatorVisitor& v, u64 insn, std::optional<u32> cbuf_offset) { | ||
| 34 | union { | ||
| 35 | u64 raw; | ||
| 36 | BitField<49, 1, u64> nodep; | ||
| 37 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 38 | BitField<8, 8, IR::Reg> src_reg; | ||
| 39 | BitField<22, 3, Mode> mode; | ||
| 40 | BitField<31, 4, u64> mask; | ||
| 41 | } const txq{insn}; | ||
| 42 | |||
| 43 | IR::Reg src_reg{txq.src_reg}; | ||
| 44 | IR::U32 handle; | ||
| 45 | if (cbuf_offset) { | ||
| 46 | handle = v.ir.Imm32(*cbuf_offset); | ||
| 47 | } else { | ||
| 48 | handle = v.X(src_reg); | ||
| 49 | ++src_reg; | ||
| 50 | } | ||
| 51 | const IR::Value query{Query(v, handle, txq.mode, src_reg)}; | ||
| 52 | IR::Reg dest_reg{txq.dest_reg}; | ||
| 53 | for (int element = 0; element < 4; ++element) { | ||
| 54 | if (((txq.mask >> element) & 1) == 0) { | ||
| 55 | continue; | ||
| 56 | } | ||
| 57 | v.X(dest_reg, IR::U32{v.ir.CompositeExtract(query, static_cast<size_t>(element))}); | ||
| 58 | ++dest_reg; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | void TranslatorVisitor::TXQ(u64 insn) { | ||
| 64 | union { | ||
| 65 | u64 raw; | ||
| 66 | BitField<36, 13, u64> cbuf_offset; | ||
| 67 | } const txq{insn}; | ||
| 68 | |||
| 69 | Impl(*this, insn, static_cast<u32>(txq.cbuf_offset * 4)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void TranslatorVisitor::TXQ_b(u64 insn) { | ||
| 73 | Impl(*this, insn, std::nullopt); | ||
| 74 | } | ||
| 75 | |||
| 76 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp new file mode 100644 index 000000000..e1f4174cf --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.cpp | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/exception.h" | ||
| 6 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 7 | |||
| 8 | namespace Shader::Maxwell { | ||
| 9 | |||
| 10 | IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, VideoWidth width, | ||
| 11 | u32 selector, bool is_signed) { | ||
| 12 | switch (width) { | ||
| 13 | case VideoWidth::Byte: | ||
| 14 | case VideoWidth::Unknown: | ||
| 15 | return ir.BitFieldExtract(value, ir.Imm32(selector * 8), ir.Imm32(8), is_signed); | ||
| 16 | case VideoWidth::Short: | ||
| 17 | return ir.BitFieldExtract(value, ir.Imm32(selector * 16), ir.Imm32(16), is_signed); | ||
| 18 | case VideoWidth::Word: | ||
| 19 | return value; | ||
| 20 | default: | ||
| 21 | throw NotImplementedException("Unknown VideoWidth {}", width); | ||
| 22 | } | ||
| 23 | } | ||
| 24 | |||
| 25 | VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate) { | ||
| 26 | // immediates must be 16-bit format. | ||
| 27 | return is_immediate ? VideoWidth::Short : width; | ||
| 28 | } | ||
| 29 | |||
| 30 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h new file mode 100644 index 000000000..40c0b907c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_helper.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | enum class VideoWidth : u64 { | ||
| 12 | Byte, | ||
| 13 | Unknown, | ||
| 14 | Short, | ||
| 15 | Word, | ||
| 16 | }; | ||
| 17 | |||
| 18 | [[nodiscard]] IR::U32 ExtractVideoOperandValue(IR::IREmitter& ir, const IR::U32& value, | ||
| 19 | VideoWidth width, u32 selector, bool is_signed); | ||
| 20 | |||
| 21 | [[nodiscard]] VideoWidth GetVideoSourceWidth(VideoWidth width, bool is_immediate); | ||
| 22 | |||
| 23 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp new file mode 100644 index 000000000..78869601f --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_minimum_maximum.cpp | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | namespace { | ||
| 12 | enum class VideoMinMaxOps : u64 { | ||
| 13 | MRG_16H, | ||
| 14 | MRG_16L, | ||
| 15 | MRG_8B0, | ||
| 16 | MRG_8B2, | ||
| 17 | ACC, | ||
| 18 | MIN, | ||
| 19 | MAX, | ||
| 20 | }; | ||
| 21 | |||
| 22 | [[nodiscard]] IR::U32 ApplyVideoMinMaxOp(IR::IREmitter& ir, const IR::U32& lhs, const IR::U32& rhs, | ||
| 23 | VideoMinMaxOps op, bool is_signed) { | ||
| 24 | switch (op) { | ||
| 25 | case VideoMinMaxOps::MIN: | ||
| 26 | return ir.IMin(lhs, rhs, is_signed); | ||
| 27 | case VideoMinMaxOps::MAX: | ||
| 28 | return ir.IMax(lhs, rhs, is_signed); | ||
| 29 | default: | ||
| 30 | throw NotImplementedException("VMNMX op {}", op); | ||
| 31 | } | ||
| 32 | } | ||
| 33 | } // Anonymous namespace | ||
| 34 | |||
| 35 | void TranslatorVisitor::VMNMX(u64 insn) { | ||
| 36 | union { | ||
| 37 | u64 raw; | ||
| 38 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 39 | BitField<20, 16, u64> src_b_imm; | ||
| 40 | BitField<28, 2, u64> src_b_selector; | ||
| 41 | BitField<29, 2, VideoWidth> src_b_width; | ||
| 42 | BitField<36, 2, u64> src_a_selector; | ||
| 43 | BitField<37, 2, VideoWidth> src_a_width; | ||
| 44 | BitField<47, 1, u64> cc; | ||
| 45 | BitField<48, 1, u64> src_a_sign; | ||
| 46 | BitField<49, 1, u64> src_b_sign; | ||
| 47 | BitField<50, 1, u64> is_src_b_reg; | ||
| 48 | BitField<51, 3, VideoMinMaxOps> op; | ||
| 49 | BitField<54, 1, u64> dest_sign; | ||
| 50 | BitField<55, 1, u64> sat; | ||
| 51 | BitField<56, 1, u64> mx; | ||
| 52 | } const vmnmx{insn}; | ||
| 53 | |||
| 54 | if (vmnmx.cc != 0) { | ||
| 55 | throw NotImplementedException("VMNMX CC"); | ||
| 56 | } | ||
| 57 | if (vmnmx.sat != 0) { | ||
| 58 | throw NotImplementedException("VMNMX SAT"); | ||
| 59 | } | ||
| 60 | // Selectors were shown to default to 2 in unit tests | ||
| 61 | if (vmnmx.src_a_selector != 2) { | ||
| 62 | throw NotImplementedException("VMNMX Selector {}", vmnmx.src_a_selector.Value()); | ||
| 63 | } | ||
| 64 | if (vmnmx.src_b_selector != 2) { | ||
| 65 | throw NotImplementedException("VMNMX Selector {}", vmnmx.src_b_selector.Value()); | ||
| 66 | } | ||
| 67 | if (vmnmx.src_a_width != VideoWidth::Word) { | ||
| 68 | throw NotImplementedException("VMNMX Source Width {}", vmnmx.src_a_width.Value()); | ||
| 69 | } | ||
| 70 | |||
| 71 | const bool is_b_imm{vmnmx.is_src_b_reg == 0}; | ||
| 72 | const IR::U32 src_a{GetReg8(insn)}; | ||
| 73 | const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmnmx.src_b_imm)) : GetReg20(insn)}; | ||
| 74 | const IR::U32 src_c{GetReg39(insn)}; | ||
| 75 | |||
| 76 | const VideoWidth a_width{vmnmx.src_a_width}; | ||
| 77 | const VideoWidth b_width{GetVideoSourceWidth(vmnmx.src_b_width, is_b_imm)}; | ||
| 78 | |||
| 79 | const bool src_a_signed{vmnmx.src_a_sign != 0}; | ||
| 80 | const bool src_b_signed{vmnmx.src_b_sign != 0}; | ||
| 81 | const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, 0, src_a_signed)}; | ||
| 82 | const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, 0, src_b_signed)}; | ||
| 83 | |||
| 84 | // First operation's sign is only dependent on operand b's sign | ||
| 85 | const bool op_1_signed{src_b_signed}; | ||
| 86 | |||
| 87 | const IR::U32 lhs{vmnmx.mx != 0 ? ir.IMax(op_a, op_b, op_1_signed) | ||
| 88 | : ir.IMin(op_a, op_b, op_1_signed)}; | ||
| 89 | X(vmnmx.dest_reg, ApplyVideoMinMaxOp(ir, lhs, src_c, vmnmx.op, vmnmx.dest_sign != 0)); | ||
| 90 | } | ||
| 91 | |||
| 92 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp new file mode 100644 index 000000000..cc2e6d6e6 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_multiply_add.cpp | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | void TranslatorVisitor::VMAD(u64 insn) { | ||
| 12 | union { | ||
| 13 | u64 raw; | ||
| 14 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 15 | BitField<20, 16, u64> src_b_imm; | ||
| 16 | BitField<28, 2, u64> src_b_selector; | ||
| 17 | BitField<29, 2, VideoWidth> src_b_width; | ||
| 18 | BitField<36, 2, u64> src_a_selector; | ||
| 19 | BitField<37, 2, VideoWidth> src_a_width; | ||
| 20 | BitField<47, 1, u64> cc; | ||
| 21 | BitField<48, 1, u64> src_a_sign; | ||
| 22 | BitField<49, 1, u64> src_b_sign; | ||
| 23 | BitField<50, 1, u64> is_src_b_reg; | ||
| 24 | BitField<51, 2, u64> scale; | ||
| 25 | BitField<53, 1, u64> src_c_neg; | ||
| 26 | BitField<54, 1, u64> src_a_neg; | ||
| 27 | BitField<55, 1, u64> sat; | ||
| 28 | } const vmad{insn}; | ||
| 29 | |||
| 30 | if (vmad.cc != 0) { | ||
| 31 | throw NotImplementedException("VMAD CC"); | ||
| 32 | } | ||
| 33 | if (vmad.sat != 0) { | ||
| 34 | throw NotImplementedException("VMAD SAT"); | ||
| 35 | } | ||
| 36 | if (vmad.scale != 0) { | ||
| 37 | throw NotImplementedException("VMAD SCALE"); | ||
| 38 | } | ||
| 39 | if (vmad.src_a_neg != 0 && vmad.src_c_neg != 0) { | ||
| 40 | throw NotImplementedException("VMAD PO"); | ||
| 41 | } | ||
| 42 | if (vmad.src_a_neg != 0 || vmad.src_c_neg != 0) { | ||
| 43 | throw NotImplementedException("VMAD NEG"); | ||
| 44 | } | ||
| 45 | const bool is_b_imm{vmad.is_src_b_reg == 0}; | ||
| 46 | const IR::U32 src_a{GetReg8(insn)}; | ||
| 47 | const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vmad.src_b_imm)) : GetReg20(insn)}; | ||
| 48 | const IR::U32 src_c{GetReg39(insn)}; | ||
| 49 | |||
| 50 | const u32 a_selector{static_cast<u32>(vmad.src_a_selector)}; | ||
| 51 | // Immediate values can't have a selector | ||
| 52 | const u32 b_selector{is_b_imm ? 0U : static_cast<u32>(vmad.src_b_selector)}; | ||
| 53 | const VideoWidth a_width{vmad.src_a_width}; | ||
| 54 | const VideoWidth b_width{GetVideoSourceWidth(vmad.src_b_width, is_b_imm)}; | ||
| 55 | |||
| 56 | const bool src_a_signed{vmad.src_a_sign != 0}; | ||
| 57 | const bool src_b_signed{vmad.src_b_sign != 0}; | ||
| 58 | const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; | ||
| 59 | const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; | ||
| 60 | |||
| 61 | X(vmad.dest_reg, ir.IAdd(ir.IMul(op_a, op_b), src_c)); | ||
| 62 | } | ||
| 63 | |||
| 64 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp new file mode 100644 index 000000000..1b66abc33 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/video_set_predicate.cpp | |||
| @@ -0,0 +1,92 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "shader_recompiler/exception.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/video_helper.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class VsetpCompareOp : u64 { | ||
| 14 | False = 0, | ||
| 15 | LessThan, | ||
| 16 | Equal, | ||
| 17 | LessThanEqual, | ||
| 18 | GreaterThan = 16, | ||
| 19 | NotEqual, | ||
| 20 | GreaterThanEqual, | ||
| 21 | True, | ||
| 22 | }; | ||
| 23 | |||
| 24 | CompareOp VsetpToShaderCompareOp(VsetpCompareOp op) { | ||
| 25 | switch (op) { | ||
| 26 | case VsetpCompareOp::False: | ||
| 27 | return CompareOp::False; | ||
| 28 | case VsetpCompareOp::LessThan: | ||
| 29 | return CompareOp::LessThan; | ||
| 30 | case VsetpCompareOp::Equal: | ||
| 31 | return CompareOp::Equal; | ||
| 32 | case VsetpCompareOp::LessThanEqual: | ||
| 33 | return CompareOp::LessThanEqual; | ||
| 34 | case VsetpCompareOp::GreaterThan: | ||
| 35 | return CompareOp::GreaterThan; | ||
| 36 | case VsetpCompareOp::NotEqual: | ||
| 37 | return CompareOp::NotEqual; | ||
| 38 | case VsetpCompareOp::GreaterThanEqual: | ||
| 39 | return CompareOp::GreaterThanEqual; | ||
| 40 | case VsetpCompareOp::True: | ||
| 41 | return CompareOp::True; | ||
| 42 | default: | ||
| 43 | throw NotImplementedException("Invalid compare op {}", op); | ||
| 44 | } | ||
| 45 | } | ||
| 46 | } // Anonymous namespace | ||
| 47 | |||
| 48 | void TranslatorVisitor::VSETP(u64 insn) { | ||
| 49 | union { | ||
| 50 | u64 raw; | ||
| 51 | BitField<0, 3, IR::Pred> dest_pred_b; | ||
| 52 | BitField<3, 3, IR::Pred> dest_pred_a; | ||
| 53 | BitField<20, 16, u64> src_b_imm; | ||
| 54 | BitField<28, 2, u64> src_b_selector; | ||
| 55 | BitField<29, 2, VideoWidth> src_b_width; | ||
| 56 | BitField<36, 2, u64> src_a_selector; | ||
| 57 | BitField<37, 2, VideoWidth> src_a_width; | ||
| 58 | BitField<39, 3, IR::Pred> bop_pred; | ||
| 59 | BitField<42, 1, u64> neg_bop_pred; | ||
| 60 | BitField<43, 5, VsetpCompareOp> compare_op; | ||
| 61 | BitField<45, 2, BooleanOp> bop; | ||
| 62 | BitField<48, 1, u64> src_a_sign; | ||
| 63 | BitField<49, 1, u64> src_b_sign; | ||
| 64 | BitField<50, 1, u64> is_src_b_reg; | ||
| 65 | } const vsetp{insn}; | ||
| 66 | |||
| 67 | const bool is_b_imm{vsetp.is_src_b_reg == 0}; | ||
| 68 | const IR::U32 src_a{GetReg8(insn)}; | ||
| 69 | const IR::U32 src_b{is_b_imm ? ir.Imm32(static_cast<u32>(vsetp.src_b_imm)) : GetReg20(insn)}; | ||
| 70 | |||
| 71 | const u32 a_selector{static_cast<u32>(vsetp.src_a_selector)}; | ||
| 72 | const u32 b_selector{static_cast<u32>(vsetp.src_b_selector)}; | ||
| 73 | const VideoWidth a_width{vsetp.src_a_width}; | ||
| 74 | const VideoWidth b_width{GetVideoSourceWidth(vsetp.src_b_width, is_b_imm)}; | ||
| 75 | |||
| 76 | const bool src_a_signed{vsetp.src_a_sign != 0}; | ||
| 77 | const bool src_b_signed{vsetp.src_b_sign != 0}; | ||
| 78 | const IR::U32 op_a{ExtractVideoOperandValue(ir, src_a, a_width, a_selector, src_a_signed)}; | ||
| 79 | const IR::U32 op_b{ExtractVideoOperandValue(ir, src_b, b_width, b_selector, src_b_signed)}; | ||
| 80 | |||
| 81 | // Compare operation's sign is only dependent on operand b's sign | ||
| 82 | const bool compare_signed{src_b_signed}; | ||
| 83 | const CompareOp compare_op{VsetpToShaderCompareOp(vsetp.compare_op)}; | ||
| 84 | const IR::U1 comparison{IntegerCompare(ir, op_a, op_b, compare_op, compare_signed)}; | ||
| 85 | const IR::U1 bop_pred{ir.GetPred(vsetp.bop_pred, vsetp.neg_bop_pred != 0)}; | ||
| 86 | const IR::U1 result_a{PredicateCombine(ir, comparison, bop_pred, vsetp.bop)}; | ||
| 87 | const IR::U1 result_b{PredicateCombine(ir, ir.LogicalNot(comparison), bop_pred, vsetp.bop)}; | ||
| 88 | ir.SetPred(vsetp.dest_pred_a, result_a); | ||
| 89 | ir.SetPred(vsetp.dest_pred_b, result_b); | ||
| 90 | } | ||
| 91 | |||
| 92 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp new file mode 100644 index 000000000..7ce370f09 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/vote.cpp | |||
| @@ -0,0 +1,54 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/bit_field.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 8 | |||
| 9 | namespace Shader::Maxwell { | ||
| 10 | namespace { | ||
| 11 | enum class VoteOp : u64 { | ||
| 12 | ALL, | ||
| 13 | ANY, | ||
| 14 | EQ, | ||
| 15 | }; | ||
| 16 | |||
| 17 | [[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) { | ||
| 18 | switch (vote_op) { | ||
| 19 | case VoteOp::ALL: | ||
| 20 | return ir.VoteAll(pred); | ||
| 21 | case VoteOp::ANY: | ||
| 22 | return ir.VoteAny(pred); | ||
| 23 | case VoteOp::EQ: | ||
| 24 | return ir.VoteEqual(pred); | ||
| 25 | default: | ||
| 26 | throw NotImplementedException("Invalid VOTE op {}", vote_op); | ||
| 27 | } | ||
| 28 | } | ||
| 29 | |||
| 30 | void Vote(TranslatorVisitor& v, u64 insn) { | ||
| 31 | union { | ||
| 32 | u64 insn; | ||
| 33 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 34 | BitField<39, 3, IR::Pred> pred_a; | ||
| 35 | BitField<42, 1, u64> neg_pred_a; | ||
| 36 | BitField<45, 3, IR::Pred> pred_b; | ||
| 37 | BitField<48, 2, VoteOp> vote_op; | ||
| 38 | } const vote{insn}; | ||
| 39 | |||
| 40 | const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)}; | ||
| 41 | v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op)); | ||
| 42 | v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred)); | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | void TranslatorVisitor::VOTE(u64 insn) { | ||
| 47 | Vote(*this, insn); | ||
| 48 | } | ||
| 49 | |||
| 50 | void TranslatorVisitor::VOTE_vtg(u64) { | ||
| 51 | LOG_WARNING(Shader, "(STUBBED) called"); | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp new file mode 100644 index 000000000..550fed55c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp | |||
| @@ -0,0 +1,69 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <optional> | ||
| 6 | |||
| 7 | #include "common/bit_field.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | |||
| 11 | namespace Shader::Maxwell { | ||
| 12 | namespace { | ||
| 13 | enum class ShuffleMode : u64 { | ||
| 14 | IDX, | ||
| 15 | UP, | ||
| 16 | DOWN, | ||
| 17 | BFLY, | ||
| 18 | }; | ||
| 19 | |||
| 20 | [[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, | ||
| 21 | const IR::U32& index, const IR::U32& mask, | ||
| 22 | ShuffleMode shfl_op) { | ||
| 23 | const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; | ||
| 24 | const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; | ||
| 25 | switch (shfl_op) { | ||
| 26 | case ShuffleMode::IDX: | ||
| 27 | return ir.ShuffleIndex(value, index, clamp, seg_mask); | ||
| 28 | case ShuffleMode::UP: | ||
| 29 | return ir.ShuffleUp(value, index, clamp, seg_mask); | ||
| 30 | case ShuffleMode::DOWN: | ||
| 31 | return ir.ShuffleDown(value, index, clamp, seg_mask); | ||
| 32 | case ShuffleMode::BFLY: | ||
| 33 | return ir.ShuffleButterfly(value, index, clamp, seg_mask); | ||
| 34 | default: | ||
| 35 | throw NotImplementedException("Invalid SHFL op {}", shfl_op); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { | ||
| 40 | union { | ||
| 41 | u64 insn; | ||
| 42 | BitField<0, 8, IR::Reg> dest_reg; | ||
| 43 | BitField<8, 8, IR::Reg> src_reg; | ||
| 44 | BitField<30, 2, ShuffleMode> mode; | ||
| 45 | BitField<48, 3, IR::Pred> pred; | ||
| 46 | } const shfl{insn}; | ||
| 47 | |||
| 48 | const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; | ||
| 49 | v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); | ||
| 50 | v.X(shfl.dest_reg, result); | ||
| 51 | } | ||
| 52 | } // Anonymous namespace | ||
| 53 | |||
| 54 | void TranslatorVisitor::SHFL(u64 insn) { | ||
| 55 | union { | ||
| 56 | u64 insn; | ||
| 57 | BitField<20, 5, u64> src_a_imm; | ||
| 58 | BitField<28, 1, u64> src_a_flag; | ||
| 59 | BitField<29, 1, u64> src_b_flag; | ||
| 60 | BitField<34, 13, u64> src_b_imm; | ||
| 61 | } const flags{insn}; | ||
| 62 | const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm)) | ||
| 63 | : GetReg20(insn)}; | ||
| 64 | const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm)) | ||
| 65 | : GetReg39(insn)}; | ||
| 66 | Shuffle(*this, insn, src_a, src_b); | ||
| 67 | } | ||
| 68 | |||
| 69 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.cpp b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp new file mode 100644 index 000000000..8e3c4c5d5 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.cpp | |||
| @@ -0,0 +1,52 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "shader_recompiler/environment.h" | ||
| 6 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 7 | #include "shader_recompiler/frontend/maxwell/decode.h" | ||
| 8 | #include "shader_recompiler/frontend/maxwell/location.h" | ||
| 9 | #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||
| 11 | |||
| 12 | namespace Shader::Maxwell { | ||
| 13 | |||
| 14 | template <auto method> | ||
| 15 | static void Invoke(TranslatorVisitor& visitor, Location pc, u64 insn) { | ||
| 16 | using MethodType = decltype(method); | ||
| 17 | if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, Location, u64>) { | ||
| 18 | (visitor.*method)(pc, insn); | ||
| 19 | } else if constexpr (std::is_invocable_r_v<void, MethodType, TranslatorVisitor&, u64>) { | ||
| 20 | (visitor.*method)(insn); | ||
| 21 | } else { | ||
| 22 | (visitor.*method)(); | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end) { | ||
| 27 | if (location_begin == location_end) { | ||
| 28 | return; | ||
| 29 | } | ||
| 30 | TranslatorVisitor visitor{env, *block}; | ||
| 31 | for (Location pc = location_begin; pc != location_end; ++pc) { | ||
| 32 | const u64 insn{env.ReadInstruction(pc.Offset())}; | ||
| 33 | try { | ||
| 34 | const Opcode opcode{Decode(insn)}; | ||
| 35 | switch (opcode) { | ||
| 36 | #define INST(name, cute, mask) \ | ||
| 37 | case Opcode::name: \ | ||
| 38 | Invoke<&TranslatorVisitor::name>(visitor, pc, insn); \ | ||
| 39 | break; | ||
| 40 | #include "shader_recompiler/frontend/maxwell/maxwell.inc" | ||
| 41 | #undef OPCODE | ||
| 42 | default: | ||
| 43 | throw LogicError("Invalid opcode {}", opcode); | ||
| 44 | } | ||
| 45 | } catch (Exception& exception) { | ||
| 46 | exception.Prepend(fmt::format("Translate {}: ", Decode(insn))); | ||
| 47 | throw; | ||
| 48 | } | ||
| 49 | } | ||
| 50 | } | ||
| 51 | |||
| 52 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate/translate.h b/src/shader_recompiler/frontend/maxwell/translate/translate.h new file mode 100644 index 000000000..a3edd2e46 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/translate.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | |||
| 10 | namespace Shader::Maxwell { | ||
| 11 | |||
| 12 | void Translate(Environment& env, IR::Block* block, u32 location_begin, u32 location_end); | ||
| 13 | |||
| 14 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp new file mode 100644 index 000000000..c067d459c --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp | |||
| @@ -0,0 +1,223 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <memory> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/settings.h" | ||
| 10 | #include "shader_recompiler/exception.h" | ||
| 11 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 12 | #include "shader_recompiler/frontend/ir/post_order.h" | ||
| 13 | #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" | ||
| 14 | #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||
| 15 | #include "shader_recompiler/frontend/maxwell/translate_program.h" | ||
| 16 | #include "shader_recompiler/host_translate_info.h" | ||
| 17 | #include "shader_recompiler/ir_opt/passes.h" | ||
| 18 | |||
| 19 | namespace Shader::Maxwell { | ||
| 20 | namespace { | ||
| 21 | IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) { | ||
| 22 | size_t num_syntax_blocks{}; | ||
| 23 | for (const auto& node : syntax_list) { | ||
| 24 | if (node.type == IR::AbstractSyntaxNode::Type::Block) { | ||
| 25 | ++num_syntax_blocks; | ||
| 26 | } | ||
| 27 | } | ||
| 28 | IR::BlockList blocks; | ||
| 29 | blocks.reserve(num_syntax_blocks); | ||
| 30 | for (const auto& node : syntax_list) { | ||
| 31 | if (node.type == IR::AbstractSyntaxNode::Type::Block) { | ||
| 32 | blocks.push_back(node.data.block); | ||
| 33 | } | ||
| 34 | } | ||
| 35 | return blocks; | ||
| 36 | } | ||
| 37 | |||
| 38 | void RemoveUnreachableBlocks(IR::Program& program) { | ||
| 39 | // Some blocks might be unreachable if a function call exists unconditionally | ||
| 40 | // If this happens the number of blocks and post order blocks will mismatch | ||
| 41 | if (program.blocks.size() == program.post_order_blocks.size()) { | ||
| 42 | return; | ||
| 43 | } | ||
| 44 | const auto begin{program.blocks.begin() + 1}; | ||
| 45 | const auto end{program.blocks.end()}; | ||
| 46 | const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }}; | ||
| 47 | program.blocks.erase(std::remove_if(begin, end, pred), end); | ||
| 48 | } | ||
| 49 | |||
| 50 | void CollectInterpolationInfo(Environment& env, IR::Program& program) { | ||
| 51 | if (program.stage != Stage::Fragment) { | ||
| 52 | return; | ||
| 53 | } | ||
| 54 | const ProgramHeader& sph{env.SPH()}; | ||
| 55 | for (size_t index = 0; index < IR::NUM_GENERICS; ++index) { | ||
| 56 | std::optional<PixelImap> imap; | ||
| 57 | for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) { | ||
| 58 | if (value == PixelImap::Unused) { | ||
| 59 | continue; | ||
| 60 | } | ||
| 61 | if (imap && imap != value) { | ||
| 62 | throw NotImplementedException("Per component interpolation"); | ||
| 63 | } | ||
| 64 | imap = value; | ||
| 65 | } | ||
| 66 | if (!imap) { | ||
| 67 | continue; | ||
| 68 | } | ||
| 69 | program.info.interpolation[index] = [&] { | ||
| 70 | switch (*imap) { | ||
| 71 | case PixelImap::Unused: | ||
| 72 | case PixelImap::Perspective: | ||
| 73 | return Interpolation::Smooth; | ||
| 74 | case PixelImap::Constant: | ||
| 75 | return Interpolation::Flat; | ||
| 76 | case PixelImap::ScreenLinear: | ||
| 77 | return Interpolation::NoPerspective; | ||
| 78 | } | ||
| 79 | throw NotImplementedException("Unknown interpolation {}", *imap); | ||
| 80 | }(); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 84 | void AddNVNStorageBuffers(IR::Program& program) { | ||
| 85 | if (!program.info.uses_global_memory) { | ||
| 86 | return; | ||
| 87 | } | ||
| 88 | const u32 driver_cbuf{0}; | ||
| 89 | const u32 descriptor_size{0x10}; | ||
| 90 | const u32 num_buffers{16}; | ||
| 91 | const u32 base{[&] { | ||
| 92 | switch (program.stage) { | ||
| 93 | case Stage::VertexA: | ||
| 94 | case Stage::VertexB: | ||
| 95 | return 0x110u; | ||
| 96 | case Stage::TessellationControl: | ||
| 97 | return 0x210u; | ||
| 98 | case Stage::TessellationEval: | ||
| 99 | return 0x310u; | ||
| 100 | case Stage::Geometry: | ||
| 101 | return 0x410u; | ||
| 102 | case Stage::Fragment: | ||
| 103 | return 0x510u; | ||
| 104 | case Stage::Compute: | ||
| 105 | return 0x310u; | ||
| 106 | } | ||
| 107 | throw InvalidArgument("Invalid stage {}", program.stage); | ||
| 108 | }()}; | ||
| 109 | auto& descs{program.info.storage_buffers_descriptors}; | ||
| 110 | for (u32 index = 0; index < num_buffers; ++index) { | ||
| 111 | if (!program.info.nvn_buffer_used[index]) { | ||
| 112 | continue; | ||
| 113 | } | ||
| 114 | const u32 offset{base + index * descriptor_size}; | ||
| 115 | const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)}; | ||
| 116 | if (it != descs.end()) { | ||
| 117 | it->is_written |= program.info.stores_global_memory; | ||
| 118 | continue; | ||
| 119 | } | ||
| 120 | descs.push_back({ | ||
| 121 | .cbuf_index = driver_cbuf, | ||
| 122 | .cbuf_offset = offset, | ||
| 123 | .count = 1, | ||
| 124 | .is_written = program.info.stores_global_memory, | ||
| 125 | }); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | } // Anonymous namespace | ||
| 129 | |||
| 130 | IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | ||
| 131 | Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { | ||
| 132 | IR::Program program; | ||
| 133 | program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); | ||
| 134 | program.blocks = GenerateBlocks(program.syntax_list); | ||
| 135 | program.post_order_blocks = PostOrder(program.syntax_list.front()); | ||
| 136 | program.stage = env.ShaderStage(); | ||
| 137 | program.local_memory_size = env.LocalMemorySize(); | ||
| 138 | switch (program.stage) { | ||
| 139 | case Stage::TessellationControl: { | ||
| 140 | const ProgramHeader& sph{env.SPH()}; | ||
| 141 | program.invocations = sph.common2.threads_per_input_primitive; | ||
| 142 | break; | ||
| 143 | } | ||
| 144 | case Stage::Geometry: { | ||
| 145 | const ProgramHeader& sph{env.SPH()}; | ||
| 146 | program.output_topology = sph.common3.output_topology; | ||
| 147 | program.output_vertices = sph.common4.max_output_vertices; | ||
| 148 | program.invocations = sph.common2.threads_per_input_primitive; | ||
| 149 | program.is_geometry_passthrough = sph.common0.geometry_passthrough != 0; | ||
| 150 | if (program.is_geometry_passthrough) { | ||
| 151 | const auto& mask{env.GpPassthroughMask()}; | ||
| 152 | for (size_t i = 0; i < program.info.passthrough.mask.size(); ++i) { | ||
| 153 | program.info.passthrough.mask[i] = ((mask[i / 32] >> (i % 32)) & 1) == 0; | ||
| 154 | } | ||
| 155 | } | ||
| 156 | break; | ||
| 157 | } | ||
| 158 | case Stage::Compute: | ||
| 159 | program.workgroup_size = env.WorkgroupSize(); | ||
| 160 | program.shared_memory_size = env.SharedMemorySize(); | ||
| 161 | break; | ||
| 162 | default: | ||
| 163 | break; | ||
| 164 | } | ||
| 165 | RemoveUnreachableBlocks(program); | ||
| 166 | |||
| 167 | // Replace instructions before the SSA rewrite | ||
| 168 | if (!host_info.support_float16) { | ||
| 169 | Optimization::LowerFp16ToFp32(program); | ||
| 170 | } | ||
| 171 | if (!host_info.support_int64) { | ||
| 172 | Optimization::LowerInt64ToInt32(program); | ||
| 173 | } | ||
| 174 | Optimization::SsaRewritePass(program); | ||
| 175 | |||
| 176 | Optimization::GlobalMemoryToStorageBufferPass(program); | ||
| 177 | Optimization::TexturePass(env, program); | ||
| 178 | |||
| 179 | Optimization::ConstantPropagationPass(program); | ||
| 180 | Optimization::DeadCodeEliminationPass(program); | ||
| 181 | if (Settings::values.renderer_debug) { | ||
| 182 | Optimization::VerificationPass(program); | ||
| 183 | } | ||
| 184 | Optimization::CollectShaderInfoPass(env, program); | ||
| 185 | CollectInterpolationInfo(env, program); | ||
| 186 | AddNVNStorageBuffers(program); | ||
| 187 | return program; | ||
| 188 | } | ||
| 189 | |||
| 190 | IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, | ||
| 191 | Environment& env_vertex_b) { | ||
| 192 | IR::Program result{}; | ||
| 193 | Optimization::VertexATransformPass(vertex_a); | ||
| 194 | Optimization::VertexBTransformPass(vertex_b); | ||
| 195 | for (const auto& term : vertex_a.syntax_list) { | ||
| 196 | if (term.type != IR::AbstractSyntaxNode::Type::Return) { | ||
| 197 | result.syntax_list.push_back(term); | ||
| 198 | } | ||
| 199 | } | ||
| 200 | result.syntax_list.insert(result.syntax_list.end(), vertex_b.syntax_list.begin(), | ||
| 201 | vertex_b.syntax_list.end()); | ||
| 202 | result.blocks = GenerateBlocks(result.syntax_list); | ||
| 203 | result.post_order_blocks = vertex_b.post_order_blocks; | ||
| 204 | for (const auto& block : vertex_a.post_order_blocks) { | ||
| 205 | result.post_order_blocks.push_back(block); | ||
| 206 | } | ||
| 207 | result.stage = Stage::VertexB; | ||
| 208 | result.info = vertex_a.info; | ||
| 209 | result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); | ||
| 210 | result.info.loads.mask |= vertex_b.info.loads.mask; | ||
| 211 | result.info.stores.mask |= vertex_b.info.stores.mask; | ||
| 212 | |||
| 213 | Optimization::JoinTextureInfo(result.info, vertex_b.info); | ||
| 214 | Optimization::JoinStorageInfo(result.info, vertex_b.info); | ||
| 215 | Optimization::DeadCodeEliminationPass(result); | ||
| 216 | if (Settings::values.renderer_debug) { | ||
| 217 | Optimization::VerificationPass(result); | ||
| 218 | } | ||
| 219 | Optimization::CollectShaderInfoPass(env_vertex_b, result); | ||
| 220 | return result; | ||
| 221 | } | ||
| 222 | |||
| 223 | } // namespace Shader::Maxwell | ||
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.h b/src/shader_recompiler/frontend/maxwell/translate_program.h new file mode 100644 index 000000000..a84814811 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate_program.h | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "shader_recompiler/environment.h" | ||
| 8 | #include "shader_recompiler/frontend/ir/basic_block.h" | ||
| 9 | #include "shader_recompiler/frontend/ir/program.h" | ||
| 10 | #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||
| 11 | #include "shader_recompiler/host_translate_info.h" | ||
| 12 | #include "shader_recompiler/object_pool.h" | ||
| 13 | |||
| 14 | namespace Shader::Maxwell { | ||
| 15 | |||
| 16 | [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, | ||
| 17 | ObjectPool<IR::Block>& block_pool, Environment& env, | ||
| 18 | Flow::CFG& cfg, const HostTranslateInfo& host_info); | ||
| 19 | |||
| 20 | [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, | ||
| 21 | Environment& env_vertex_b); | ||
| 22 | |||
| 23 | } // namespace Shader::Maxwell | ||