diff options
Diffstat (limited to 'src/video_core/shader')
51 files changed, 0 insertions, 10654 deletions
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp deleted file mode 100644 index db11144c7..000000000 --- a/src/video_core/shader/ast.cpp +++ /dev/null | |||
| @@ -1,752 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <string> | ||
| 6 | #include <string_view> | ||
| 7 | |||
| 8 | #include <fmt/format.h> | ||
| 9 | |||
| 10 | #include "common/assert.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "video_core/shader/ast.h" | ||
| 13 | #include "video_core/shader/expr.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | ASTZipper::ASTZipper() = default; | ||
| 18 | |||
| 19 | void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) { | ||
| 20 | ASSERT(new_first->manager == nullptr); | ||
| 21 | first = new_first; | ||
| 22 | last = new_first; | ||
| 23 | |||
| 24 | ASTNode current = first; | ||
| 25 | while (current) { | ||
| 26 | current->manager = this; | ||
| 27 | current->parent = parent; | ||
| 28 | last = current; | ||
| 29 | current = current->next; | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | void ASTZipper::PushBack(const ASTNode new_node) { | ||
| 34 | ASSERT(new_node->manager == nullptr); | ||
| 35 | new_node->previous = last; | ||
| 36 | if (last) { | ||
| 37 | last->next = new_node; | ||
| 38 | } | ||
| 39 | new_node->next.reset(); | ||
| 40 | last = new_node; | ||
| 41 | if (!first) { | ||
| 42 | first = new_node; | ||
| 43 | } | ||
| 44 | new_node->manager = this; | ||
| 45 | } | ||
| 46 | |||
| 47 | void ASTZipper::PushFront(const ASTNode new_node) { | ||
| 48 | ASSERT(new_node->manager == nullptr); | ||
| 49 | new_node->previous.reset(); | ||
| 50 | new_node->next = first; | ||
| 51 | if (first) { | ||
| 52 | first->previous = new_node; | ||
| 53 | } | ||
| 54 | if (last == first) { | ||
| 55 | last = new_node; | ||
| 56 | } | ||
| 57 | first = new_node; | ||
| 58 | new_node->manager = this; | ||
| 59 | } | ||
| 60 | |||
| 61 | void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) { | ||
| 62 | ASSERT(new_node->manager == nullptr); | ||
| 63 | if (!at_node) { | ||
| 64 | PushFront(new_node); | ||
| 65 | return; | ||
| 66 | } | ||
| 67 | const ASTNode next = at_node->next; | ||
| 68 | if (next) { | ||
| 69 | next->previous = new_node; | ||
| 70 | } | ||
| 71 | new_node->previous = at_node; | ||
| 72 | if (at_node == last) { | ||
| 73 | last = new_node; | ||
| 74 | } | ||
| 75 | new_node->next = next; | ||
| 76 | at_node->next = new_node; | ||
| 77 | new_node->manager = this; | ||
| 78 | } | ||
| 79 | |||
| 80 | void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) { | ||
| 81 | ASSERT(new_node->manager == nullptr); | ||
| 82 | if (!at_node) { | ||
| 83 | PushBack(new_node); | ||
| 84 | return; | ||
| 85 | } | ||
| 86 | const ASTNode previous = at_node->previous; | ||
| 87 | if (previous) { | ||
| 88 | previous->next = new_node; | ||
| 89 | } | ||
| 90 | new_node->next = at_node; | ||
| 91 | if (at_node == first) { | ||
| 92 | first = new_node; | ||
| 93 | } | ||
| 94 | new_node->previous = previous; | ||
| 95 | at_node->previous = new_node; | ||
| 96 | new_node->manager = this; | ||
| 97 | } | ||
| 98 | |||
| 99 | void ASTZipper::DetachTail(ASTNode node) { | ||
| 100 | ASSERT(node->manager == this); | ||
| 101 | if (node == first) { | ||
| 102 | first.reset(); | ||
| 103 | last.reset(); | ||
| 104 | return; | ||
| 105 | } | ||
| 106 | |||
| 107 | last = node->previous; | ||
| 108 | last->next.reset(); | ||
| 109 | node->previous.reset(); | ||
| 110 | |||
| 111 | ASTNode current = std::move(node); | ||
| 112 | while (current) { | ||
| 113 | current->manager = nullptr; | ||
| 114 | current->parent.reset(); | ||
| 115 | current = current->next; | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) { | ||
| 120 | ASSERT(start->manager == this && end->manager == this); | ||
| 121 | if (start == end) { | ||
| 122 | DetachSingle(start); | ||
| 123 | return; | ||
| 124 | } | ||
| 125 | const ASTNode prev = start->previous; | ||
| 126 | const ASTNode post = end->next; | ||
| 127 | if (!prev) { | ||
| 128 | first = post; | ||
| 129 | } else { | ||
| 130 | prev->next = post; | ||
| 131 | } | ||
| 132 | if (!post) { | ||
| 133 | last = prev; | ||
| 134 | } else { | ||
| 135 | post->previous = prev; | ||
| 136 | } | ||
| 137 | start->previous.reset(); | ||
| 138 | end->next.reset(); | ||
| 139 | ASTNode current = start; | ||
| 140 | bool found = false; | ||
| 141 | while (current) { | ||
| 142 | current->manager = nullptr; | ||
| 143 | current->parent.reset(); | ||
| 144 | found |= current == end; | ||
| 145 | current = current->next; | ||
| 146 | } | ||
| 147 | ASSERT(found); | ||
| 148 | } | ||
| 149 | |||
| 150 | void ASTZipper::DetachSingle(const ASTNode node) { | ||
| 151 | ASSERT(node->manager == this); | ||
| 152 | const ASTNode prev = node->previous; | ||
| 153 | const ASTNode post = node->next; | ||
| 154 | node->previous.reset(); | ||
| 155 | node->next.reset(); | ||
| 156 | if (!prev) { | ||
| 157 | first = post; | ||
| 158 | } else { | ||
| 159 | prev->next = post; | ||
| 160 | } | ||
| 161 | if (!post) { | ||
| 162 | last = prev; | ||
| 163 | } else { | ||
| 164 | post->previous = prev; | ||
| 165 | } | ||
| 166 | |||
| 167 | node->manager = nullptr; | ||
| 168 | node->parent.reset(); | ||
| 169 | } | ||
| 170 | |||
| 171 | void ASTZipper::Remove(const ASTNode node) { | ||
| 172 | ASSERT(node->manager == this); | ||
| 173 | const ASTNode next = node->next; | ||
| 174 | const ASTNode previous = node->previous; | ||
| 175 | if (previous) { | ||
| 176 | previous->next = next; | ||
| 177 | } | ||
| 178 | if (next) { | ||
| 179 | next->previous = previous; | ||
| 180 | } | ||
| 181 | node->parent.reset(); | ||
| 182 | node->manager = nullptr; | ||
| 183 | if (node == last) { | ||
| 184 | last = previous; | ||
| 185 | } | ||
| 186 | if (node == first) { | ||
| 187 | first = next; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | class ExprPrinter final { | ||
| 192 | public: | ||
| 193 | void operator()(const ExprAnd& expr) { | ||
| 194 | inner += "( "; | ||
| 195 | std::visit(*this, *expr.operand1); | ||
| 196 | inner += " && "; | ||
| 197 | std::visit(*this, *expr.operand2); | ||
| 198 | inner += ')'; | ||
| 199 | } | ||
| 200 | |||
| 201 | void operator()(const ExprOr& expr) { | ||
| 202 | inner += "( "; | ||
| 203 | std::visit(*this, *expr.operand1); | ||
| 204 | inner += " || "; | ||
| 205 | std::visit(*this, *expr.operand2); | ||
| 206 | inner += ')'; | ||
| 207 | } | ||
| 208 | |||
| 209 | void operator()(const ExprNot& expr) { | ||
| 210 | inner += "!"; | ||
| 211 | std::visit(*this, *expr.operand1); | ||
| 212 | } | ||
| 213 | |||
| 214 | void operator()(const ExprPredicate& expr) { | ||
| 215 | inner += fmt::format("P{}", expr.predicate); | ||
| 216 | } | ||
| 217 | |||
| 218 | void operator()(const ExprCondCode& expr) { | ||
| 219 | inner += fmt::format("CC{}", expr.cc); | ||
| 220 | } | ||
| 221 | |||
| 222 | void operator()(const ExprVar& expr) { | ||
| 223 | inner += fmt::format("V{}", expr.var_index); | ||
| 224 | } | ||
| 225 | |||
| 226 | void operator()(const ExprBoolean& expr) { | ||
| 227 | inner += expr.value ? "true" : "false"; | ||
| 228 | } | ||
| 229 | |||
| 230 | void operator()(const ExprGprEqual& expr) { | ||
| 231 | inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value); | ||
| 232 | } | ||
| 233 | |||
| 234 | const std::string& GetResult() const { | ||
| 235 | return inner; | ||
| 236 | } | ||
| 237 | |||
| 238 | private: | ||
| 239 | std::string inner; | ||
| 240 | }; | ||
| 241 | |||
| 242 | class ASTPrinter { | ||
| 243 | public: | ||
| 244 | void operator()(const ASTProgram& ast) { | ||
| 245 | scope++; | ||
| 246 | inner += "program {\n"; | ||
| 247 | ASTNode current = ast.nodes.GetFirst(); | ||
| 248 | while (current) { | ||
| 249 | Visit(current); | ||
| 250 | current = current->GetNext(); | ||
| 251 | } | ||
| 252 | inner += "}\n"; | ||
| 253 | scope--; | ||
| 254 | } | ||
| 255 | |||
| 256 | void operator()(const ASTIfThen& ast) { | ||
| 257 | ExprPrinter expr_parser{}; | ||
| 258 | std::visit(expr_parser, *ast.condition); | ||
| 259 | inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult()); | ||
| 260 | scope++; | ||
| 261 | ASTNode current = ast.nodes.GetFirst(); | ||
| 262 | while (current) { | ||
| 263 | Visit(current); | ||
| 264 | current = current->GetNext(); | ||
| 265 | } | ||
| 266 | scope--; | ||
| 267 | inner += fmt::format("{}}}\n", Indent()); | ||
| 268 | } | ||
| 269 | |||
| 270 | void operator()(const ASTIfElse& ast) { | ||
| 271 | inner += Indent(); | ||
| 272 | inner += "else {\n"; | ||
| 273 | |||
| 274 | scope++; | ||
| 275 | ASTNode current = ast.nodes.GetFirst(); | ||
| 276 | while (current) { | ||
| 277 | Visit(current); | ||
| 278 | current = current->GetNext(); | ||
| 279 | } | ||
| 280 | scope--; | ||
| 281 | |||
| 282 | inner += Indent(); | ||
| 283 | inner += "}\n"; | ||
| 284 | } | ||
| 285 | |||
| 286 | void operator()(const ASTBlockEncoded& ast) { | ||
| 287 | inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end); | ||
| 288 | } | ||
| 289 | |||
| 290 | void operator()([[maybe_unused]] const ASTBlockDecoded& ast) { | ||
| 291 | inner += Indent(); | ||
| 292 | inner += "Block;\n"; | ||
| 293 | } | ||
| 294 | |||
| 295 | void operator()(const ASTVarSet& ast) { | ||
| 296 | ExprPrinter expr_parser{}; | ||
| 297 | std::visit(expr_parser, *ast.condition); | ||
| 298 | inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult()); | ||
| 299 | } | ||
| 300 | |||
| 301 | void operator()(const ASTLabel& ast) { | ||
| 302 | inner += fmt::format("Label_{}:\n", ast.index); | ||
| 303 | } | ||
| 304 | |||
| 305 | void operator()(const ASTGoto& ast) { | ||
| 306 | ExprPrinter expr_parser{}; | ||
| 307 | std::visit(expr_parser, *ast.condition); | ||
| 308 | inner += | ||
| 309 | fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label); | ||
| 310 | } | ||
| 311 | |||
| 312 | void operator()(const ASTDoWhile& ast) { | ||
| 313 | ExprPrinter expr_parser{}; | ||
| 314 | std::visit(expr_parser, *ast.condition); | ||
| 315 | inner += fmt::format("{}do {{\n", Indent()); | ||
| 316 | scope++; | ||
| 317 | ASTNode current = ast.nodes.GetFirst(); | ||
| 318 | while (current) { | ||
| 319 | Visit(current); | ||
| 320 | current = current->GetNext(); | ||
| 321 | } | ||
| 322 | scope--; | ||
| 323 | inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult()); | ||
| 324 | } | ||
| 325 | |||
| 326 | void operator()(const ASTReturn& ast) { | ||
| 327 | ExprPrinter expr_parser{}; | ||
| 328 | std::visit(expr_parser, *ast.condition); | ||
| 329 | inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(), | ||
| 330 | ast.kills ? "discard" : "exit"); | ||
| 331 | } | ||
| 332 | |||
| 333 | void operator()(const ASTBreak& ast) { | ||
| 334 | ExprPrinter expr_parser{}; | ||
| 335 | std::visit(expr_parser, *ast.condition); | ||
| 336 | inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult()); | ||
| 337 | } | ||
| 338 | |||
| 339 | void Visit(const ASTNode& node) { | ||
| 340 | std::visit(*this, *node->GetInnerData()); | ||
| 341 | } | ||
| 342 | |||
| 343 | const std::string& GetResult() const { | ||
| 344 | return inner; | ||
| 345 | } | ||
| 346 | |||
| 347 | private: | ||
| 348 | std::string_view Indent() { | ||
| 349 | if (space_segment_scope == scope) { | ||
| 350 | return space_segment; | ||
| 351 | } | ||
| 352 | |||
| 353 | // Ensure that we don't exceed our view. | ||
| 354 | ASSERT(scope * 2 < spaces.size()); | ||
| 355 | |||
| 356 | space_segment = spaces.substr(0, scope * 2); | ||
| 357 | space_segment_scope = scope; | ||
| 358 | return space_segment; | ||
| 359 | } | ||
| 360 | |||
| 361 | std::string inner{}; | ||
| 362 | std::string_view space_segment; | ||
| 363 | |||
| 364 | u32 scope{}; | ||
| 365 | u32 space_segment_scope{}; | ||
| 366 | |||
| 367 | static constexpr std::string_view spaces{" "}; | ||
| 368 | }; | ||
| 369 | |||
| 370 | std::string ASTManager::Print() const { | ||
| 371 | ASTPrinter printer{}; | ||
| 372 | printer.Visit(main_node); | ||
| 373 | return printer.GetResult(); | ||
| 374 | } | ||
| 375 | |||
| 376 | ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_) | ||
| 377 | : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {} | ||
| 378 | |||
| 379 | ASTManager::~ASTManager() { | ||
| 380 | Clear(); | ||
| 381 | } | ||
| 382 | |||
| 383 | void ASTManager::Init() { | ||
| 384 | main_node = ASTBase::Make<ASTProgram>(ASTNode{}); | ||
| 385 | program = std::get_if<ASTProgram>(main_node->GetInnerData()); | ||
| 386 | false_condition = MakeExpr<ExprBoolean>(false); | ||
| 387 | } | ||
| 388 | |||
| 389 | void ASTManager::DeclareLabel(u32 address) { | ||
| 390 | const auto pair = labels_map.emplace(address, labels_count); | ||
| 391 | if (pair.second) { | ||
| 392 | labels_count++; | ||
| 393 | labels.resize(labels_count); | ||
| 394 | } | ||
| 395 | } | ||
| 396 | |||
| 397 | void ASTManager::InsertLabel(u32 address) { | ||
| 398 | const u32 index = labels_map[address]; | ||
| 399 | const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index); | ||
| 400 | labels[index] = label; | ||
| 401 | program->nodes.PushBack(label); | ||
| 402 | } | ||
| 403 | |||
| 404 | void ASTManager::InsertGoto(Expr condition, u32 address) { | ||
| 405 | const u32 index = labels_map[address]; | ||
| 406 | const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index); | ||
| 407 | gotos.push_back(goto_node); | ||
| 408 | program->nodes.PushBack(goto_node); | ||
| 409 | } | ||
| 410 | |||
| 411 | void ASTManager::InsertBlock(u32 start_address, u32 end_address) { | ||
| 412 | ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address); | ||
| 413 | program->nodes.PushBack(std::move(block)); | ||
| 414 | } | ||
| 415 | |||
| 416 | void ASTManager::InsertReturn(Expr condition, bool kills) { | ||
| 417 | ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills); | ||
| 418 | program->nodes.PushBack(std::move(node)); | ||
| 419 | } | ||
| 420 | |||
| 421 | // The decompile algorithm is based on | ||
| 422 | // "Taming control flow: A structured approach to eliminating goto statements" | ||
| 423 | // by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be | ||
| 424 | // on the same structured level as the label which they jump to. This is done, | ||
| 425 | // through outward/inward movements and lifting. Once they are at the same | ||
| 426 | // level, you can enclose them in an "if" structure or a "do-while" structure. | ||
| 427 | void ASTManager::Decompile() { | ||
| 428 | auto it = gotos.begin(); | ||
| 429 | while (it != gotos.end()) { | ||
| 430 | const ASTNode goto_node = *it; | ||
| 431 | const auto label_index = goto_node->GetGotoLabel(); | ||
| 432 | if (!label_index) { | ||
| 433 | return; | ||
| 434 | } | ||
| 435 | const ASTNode label = labels[*label_index]; | ||
| 436 | if (!full_decompile) { | ||
| 437 | // We only decompile backward jumps | ||
| 438 | if (!IsBackwardsJump(goto_node, label)) { | ||
| 439 | it++; | ||
| 440 | continue; | ||
| 441 | } | ||
| 442 | } | ||
| 443 | if (IndirectlyRelated(goto_node, label)) { | ||
| 444 | while (!DirectlyRelated(goto_node, label)) { | ||
| 445 | MoveOutward(goto_node); | ||
| 446 | } | ||
| 447 | } | ||
| 448 | if (DirectlyRelated(goto_node, label)) { | ||
| 449 | u32 goto_level = goto_node->GetLevel(); | ||
| 450 | const u32 label_level = label->GetLevel(); | ||
| 451 | while (label_level < goto_level) { | ||
| 452 | MoveOutward(goto_node); | ||
| 453 | goto_level--; | ||
| 454 | } | ||
| 455 | // TODO(Blinkhawk): Implement Lifting and Inward Movements | ||
| 456 | } | ||
| 457 | if (label->GetParent() == goto_node->GetParent()) { | ||
| 458 | bool is_loop = false; | ||
| 459 | ASTNode current = goto_node->GetPrevious(); | ||
| 460 | while (current) { | ||
| 461 | if (current == label) { | ||
| 462 | is_loop = true; | ||
| 463 | break; | ||
| 464 | } | ||
| 465 | current = current->GetPrevious(); | ||
| 466 | } | ||
| 467 | |||
| 468 | if (is_loop) { | ||
| 469 | EncloseDoWhile(goto_node, label); | ||
| 470 | } else { | ||
| 471 | EncloseIfThen(goto_node, label); | ||
| 472 | } | ||
| 473 | it = gotos.erase(it); | ||
| 474 | continue; | ||
| 475 | } | ||
| 476 | it++; | ||
| 477 | } | ||
| 478 | if (full_decompile) { | ||
| 479 | for (const ASTNode& label : labels) { | ||
| 480 | auto& manager = label->GetManager(); | ||
| 481 | manager.Remove(label); | ||
| 482 | } | ||
| 483 | labels.clear(); | ||
| 484 | } else { | ||
| 485 | auto label_it = labels.begin(); | ||
| 486 | while (label_it != labels.end()) { | ||
| 487 | bool can_remove = true; | ||
| 488 | ASTNode label = *label_it; | ||
| 489 | for (const ASTNode& goto_node : gotos) { | ||
| 490 | const auto label_index = goto_node->GetGotoLabel(); | ||
| 491 | if (!label_index) { | ||
| 492 | return; | ||
| 493 | } | ||
| 494 | ASTNode& glabel = labels[*label_index]; | ||
| 495 | if (glabel == label) { | ||
| 496 | can_remove = false; | ||
| 497 | break; | ||
| 498 | } | ||
| 499 | } | ||
| 500 | if (can_remove) { | ||
| 501 | label->MarkLabelUnused(); | ||
| 502 | } | ||
| 503 | } | ||
| 504 | } | ||
| 505 | } | ||
| 506 | |||
| 507 | bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const { | ||
| 508 | u32 goto_level = goto_node->GetLevel(); | ||
| 509 | u32 label_level = label_node->GetLevel(); | ||
| 510 | while (goto_level > label_level) { | ||
| 511 | goto_level--; | ||
| 512 | goto_node = goto_node->GetParent(); | ||
| 513 | } | ||
| 514 | while (label_level > goto_level) { | ||
| 515 | label_level--; | ||
| 516 | label_node = label_node->GetParent(); | ||
| 517 | } | ||
| 518 | while (goto_node->GetParent() != label_node->GetParent()) { | ||
| 519 | goto_node = goto_node->GetParent(); | ||
| 520 | label_node = label_node->GetParent(); | ||
| 521 | } | ||
| 522 | ASTNode current = goto_node->GetPrevious(); | ||
| 523 | while (current) { | ||
| 524 | if (current == label_node) { | ||
| 525 | return true; | ||
| 526 | } | ||
| 527 | current = current->GetPrevious(); | ||
| 528 | } | ||
| 529 | return false; | ||
| 530 | } | ||
| 531 | |||
| 532 | bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const { | ||
| 533 | return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second)); | ||
| 534 | } | ||
| 535 | |||
| 536 | bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const { | ||
| 537 | if (first->GetParent() == second->GetParent()) { | ||
| 538 | return false; | ||
| 539 | } | ||
| 540 | const u32 first_level = first->GetLevel(); | ||
| 541 | const u32 second_level = second->GetLevel(); | ||
| 542 | u32 min_level; | ||
| 543 | u32 max_level; | ||
| 544 | ASTNode max; | ||
| 545 | ASTNode min; | ||
| 546 | if (first_level > second_level) { | ||
| 547 | min_level = second_level; | ||
| 548 | min = second; | ||
| 549 | max_level = first_level; | ||
| 550 | max = first; | ||
| 551 | } else { | ||
| 552 | min_level = first_level; | ||
| 553 | min = first; | ||
| 554 | max_level = second_level; | ||
| 555 | max = second; | ||
| 556 | } | ||
| 557 | |||
| 558 | while (max_level > min_level) { | ||
| 559 | max_level--; | ||
| 560 | max = max->GetParent(); | ||
| 561 | } | ||
| 562 | |||
| 563 | return min->GetParent() == max->GetParent(); | ||
| 564 | } | ||
| 565 | |||
| 566 | void ASTManager::ShowCurrentState(std::string_view state) const { | ||
| 567 | LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print()); | ||
| 568 | SanityCheck(); | ||
| 569 | } | ||
| 570 | |||
| 571 | void ASTManager::SanityCheck() const { | ||
| 572 | for (const auto& label : labels) { | ||
| 573 | if (!label->GetParent()) { | ||
| 574 | LOG_CRITICAL(HW_GPU, "Sanity Check Failed"); | ||
| 575 | } | ||
| 576 | } | ||
| 577 | } | ||
| 578 | |||
| 579 | void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) { | ||
| 580 | ASTZipper& zipper = goto_node->GetManager(); | ||
| 581 | const ASTNode loop_start = label->GetNext(); | ||
| 582 | if (loop_start == goto_node) { | ||
| 583 | zipper.Remove(goto_node); | ||
| 584 | return; | ||
| 585 | } | ||
| 586 | const ASTNode parent = label->GetParent(); | ||
| 587 | const Expr condition = goto_node->GetGotoCondition(); | ||
| 588 | zipper.DetachSegment(loop_start, goto_node); | ||
| 589 | const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition); | ||
| 590 | ASTZipper* sub_zipper = do_while_node->GetSubNodes(); | ||
| 591 | sub_zipper->Init(loop_start, do_while_node); | ||
| 592 | zipper.InsertAfter(do_while_node, label); | ||
| 593 | sub_zipper->Remove(goto_node); | ||
| 594 | } | ||
| 595 | |||
| 596 | void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) { | ||
| 597 | ASTZipper& zipper = goto_node->GetManager(); | ||
| 598 | const ASTNode if_end = label->GetPrevious(); | ||
| 599 | if (if_end == goto_node) { | ||
| 600 | zipper.Remove(goto_node); | ||
| 601 | return; | ||
| 602 | } | ||
| 603 | const ASTNode prev = goto_node->GetPrevious(); | ||
| 604 | const Expr condition = goto_node->GetGotoCondition(); | ||
| 605 | bool do_else = false; | ||
| 606 | if (!disable_else_derivation && prev->IsIfThen()) { | ||
| 607 | const Expr if_condition = prev->GetIfCondition(); | ||
| 608 | do_else = ExprAreEqual(if_condition, condition); | ||
| 609 | } | ||
| 610 | const ASTNode parent = label->GetParent(); | ||
| 611 | zipper.DetachSegment(goto_node, if_end); | ||
| 612 | ASTNode if_node; | ||
| 613 | if (do_else) { | ||
| 614 | if_node = ASTBase::Make<ASTIfElse>(parent); | ||
| 615 | } else { | ||
| 616 | Expr neg_condition = MakeExprNot(condition); | ||
| 617 | if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition); | ||
| 618 | } | ||
| 619 | ASTZipper* sub_zipper = if_node->GetSubNodes(); | ||
| 620 | sub_zipper->Init(goto_node, if_node); | ||
| 621 | zipper.InsertAfter(if_node, prev); | ||
| 622 | sub_zipper->Remove(goto_node); | ||
| 623 | } | ||
| 624 | |||
| 625 | void ASTManager::MoveOutward(ASTNode goto_node) { | ||
| 626 | ASTZipper& zipper = goto_node->GetManager(); | ||
| 627 | const ASTNode parent = goto_node->GetParent(); | ||
| 628 | ASTZipper& zipper2 = parent->GetManager(); | ||
| 629 | const ASTNode grandpa = parent->GetParent(); | ||
| 630 | const bool is_loop = parent->IsLoop(); | ||
| 631 | const bool is_else = parent->IsIfElse(); | ||
| 632 | const bool is_if = parent->IsIfThen(); | ||
| 633 | |||
| 634 | const ASTNode prev = goto_node->GetPrevious(); | ||
| 635 | const ASTNode post = goto_node->GetNext(); | ||
| 636 | |||
| 637 | const Expr condition = goto_node->GetGotoCondition(); | ||
| 638 | zipper.DetachSingle(goto_node); | ||
| 639 | if (is_loop) { | ||
| 640 | const u32 var_index = NewVariable(); | ||
| 641 | const Expr var_condition = MakeExpr<ExprVar>(var_index); | ||
| 642 | const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); | ||
| 643 | const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); | ||
| 644 | zipper2.InsertBefore(var_node_init, parent); | ||
| 645 | zipper.InsertAfter(var_node, prev); | ||
| 646 | goto_node->SetGotoCondition(var_condition); | ||
| 647 | const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition); | ||
| 648 | zipper.InsertAfter(break_node, var_node); | ||
| 649 | } else if (is_if || is_else) { | ||
| 650 | const u32 var_index = NewVariable(); | ||
| 651 | const Expr var_condition = MakeExpr<ExprVar>(var_index); | ||
| 652 | const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition); | ||
| 653 | const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition); | ||
| 654 | if (is_if) { | ||
| 655 | zipper2.InsertBefore(var_node_init, parent); | ||
| 656 | } else { | ||
| 657 | zipper2.InsertBefore(var_node_init, parent->GetPrevious()); | ||
| 658 | } | ||
| 659 | zipper.InsertAfter(var_node, prev); | ||
| 660 | goto_node->SetGotoCondition(var_condition); | ||
| 661 | if (post) { | ||
| 662 | zipper.DetachTail(post); | ||
| 663 | const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition)); | ||
| 664 | ASTZipper* sub_zipper = if_node->GetSubNodes(); | ||
| 665 | sub_zipper->Init(post, if_node); | ||
| 666 | zipper.InsertAfter(if_node, var_node); | ||
| 667 | } | ||
| 668 | } else { | ||
| 669 | UNREACHABLE(); | ||
| 670 | } | ||
| 671 | const ASTNode next = parent->GetNext(); | ||
| 672 | if (is_if && next && next->IsIfElse()) { | ||
| 673 | zipper2.InsertAfter(goto_node, next); | ||
| 674 | goto_node->SetParent(grandpa); | ||
| 675 | return; | ||
| 676 | } | ||
| 677 | zipper2.InsertAfter(goto_node, parent); | ||
| 678 | goto_node->SetParent(grandpa); | ||
| 679 | } | ||
| 680 | |||
| 681 | class ASTClearer { | ||
| 682 | public: | ||
| 683 | ASTClearer() = default; | ||
| 684 | |||
| 685 | void operator()(const ASTProgram& ast) { | ||
| 686 | ASTNode current = ast.nodes.GetFirst(); | ||
| 687 | while (current) { | ||
| 688 | Visit(current); | ||
| 689 | current = current->GetNext(); | ||
| 690 | } | ||
| 691 | } | ||
| 692 | |||
| 693 | void operator()(const ASTIfThen& ast) { | ||
| 694 | ASTNode current = ast.nodes.GetFirst(); | ||
| 695 | while (current) { | ||
| 696 | Visit(current); | ||
| 697 | current = current->GetNext(); | ||
| 698 | } | ||
| 699 | } | ||
| 700 | |||
| 701 | void operator()(const ASTIfElse& ast) { | ||
| 702 | ASTNode current = ast.nodes.GetFirst(); | ||
| 703 | while (current) { | ||
| 704 | Visit(current); | ||
| 705 | current = current->GetNext(); | ||
| 706 | } | ||
| 707 | } | ||
| 708 | |||
| 709 | void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {} | ||
| 710 | |||
| 711 | void operator()(ASTBlockDecoded& ast) { | ||
| 712 | ast.nodes.clear(); | ||
| 713 | } | ||
| 714 | |||
| 715 | void operator()([[maybe_unused]] const ASTVarSet& ast) {} | ||
| 716 | |||
| 717 | void operator()([[maybe_unused]] const ASTLabel& ast) {} | ||
| 718 | |||
| 719 | void operator()([[maybe_unused]] const ASTGoto& ast) {} | ||
| 720 | |||
| 721 | void operator()(const ASTDoWhile& ast) { | ||
| 722 | ASTNode current = ast.nodes.GetFirst(); | ||
| 723 | while (current) { | ||
| 724 | Visit(current); | ||
| 725 | current = current->GetNext(); | ||
| 726 | } | ||
| 727 | } | ||
| 728 | |||
| 729 | void operator()([[maybe_unused]] const ASTReturn& ast) {} | ||
| 730 | |||
| 731 | void operator()([[maybe_unused]] const ASTBreak& ast) {} | ||
| 732 | |||
| 733 | void Visit(const ASTNode& node) { | ||
| 734 | std::visit(*this, *node->GetInnerData()); | ||
| 735 | node->Clear(); | ||
| 736 | } | ||
| 737 | }; | ||
| 738 | |||
| 739 | void ASTManager::Clear() { | ||
| 740 | if (!main_node) { | ||
| 741 | return; | ||
| 742 | } | ||
| 743 | ASTClearer clearer{}; | ||
| 744 | clearer.Visit(main_node); | ||
| 745 | main_node.reset(); | ||
| 746 | program = nullptr; | ||
| 747 | labels_map.clear(); | ||
| 748 | labels.clear(); | ||
| 749 | gotos.clear(); | ||
| 750 | } | ||
| 751 | |||
| 752 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h deleted file mode 100644 index dc49b369e..000000000 --- a/src/video_core/shader/ast.h +++ /dev/null | |||
| @@ -1,398 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <functional> | ||
| 8 | #include <list> | ||
| 9 | #include <memory> | ||
| 10 | #include <optional> | ||
| 11 | #include <string> | ||
| 12 | #include <unordered_map> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "video_core/shader/expr.h" | ||
| 16 | #include "video_core/shader/node.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | class ASTBase; | ||
| 21 | class ASTBlockDecoded; | ||
| 22 | class ASTBlockEncoded; | ||
| 23 | class ASTBreak; | ||
| 24 | class ASTDoWhile; | ||
| 25 | class ASTGoto; | ||
| 26 | class ASTIfElse; | ||
| 27 | class ASTIfThen; | ||
| 28 | class ASTLabel; | ||
| 29 | class ASTProgram; | ||
| 30 | class ASTReturn; | ||
| 31 | class ASTVarSet; | ||
| 32 | |||
| 33 | using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded, | ||
| 34 | ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>; | ||
| 35 | |||
| 36 | using ASTNode = std::shared_ptr<ASTBase>; | ||
| 37 | |||
| 38 | enum class ASTZipperType : u32 { | ||
| 39 | Program, | ||
| 40 | IfThen, | ||
| 41 | IfElse, | ||
| 42 | Loop, | ||
| 43 | }; | ||
| 44 | |||
| 45 | class ASTZipper final { | ||
| 46 | public: | ||
| 47 | explicit ASTZipper(); | ||
| 48 | |||
| 49 | void Init(ASTNode first, ASTNode parent); | ||
| 50 | |||
| 51 | ASTNode GetFirst() const { | ||
| 52 | return first; | ||
| 53 | } | ||
| 54 | |||
| 55 | ASTNode GetLast() const { | ||
| 56 | return last; | ||
| 57 | } | ||
| 58 | |||
| 59 | void PushBack(ASTNode new_node); | ||
| 60 | void PushFront(ASTNode new_node); | ||
| 61 | void InsertAfter(ASTNode new_node, ASTNode at_node); | ||
| 62 | void InsertBefore(ASTNode new_node, ASTNode at_node); | ||
| 63 | void DetachTail(ASTNode node); | ||
| 64 | void DetachSingle(ASTNode node); | ||
| 65 | void DetachSegment(ASTNode start, ASTNode end); | ||
| 66 | void Remove(ASTNode node); | ||
| 67 | |||
| 68 | ASTNode first; | ||
| 69 | ASTNode last; | ||
| 70 | }; | ||
| 71 | |||
| 72 | class ASTProgram { | ||
| 73 | public: | ||
| 74 | ASTZipper nodes{}; | ||
| 75 | }; | ||
| 76 | |||
| 77 | class ASTIfThen { | ||
| 78 | public: | ||
| 79 | explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {} | ||
| 80 | Expr condition; | ||
| 81 | ASTZipper nodes{}; | ||
| 82 | }; | ||
| 83 | |||
| 84 | class ASTIfElse { | ||
| 85 | public: | ||
| 86 | ASTZipper nodes{}; | ||
| 87 | }; | ||
| 88 | |||
| 89 | class ASTBlockEncoded { | ||
| 90 | public: | ||
| 91 | explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {} | ||
| 92 | u32 start; | ||
| 93 | u32 end; | ||
| 94 | }; | ||
| 95 | |||
| 96 | class ASTBlockDecoded { | ||
| 97 | public: | ||
| 98 | explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {} | ||
| 99 | NodeBlock nodes; | ||
| 100 | }; | ||
| 101 | |||
| 102 | class ASTVarSet { | ||
| 103 | public: | ||
| 104 | explicit ASTVarSet(u32 index_, Expr condition_) | ||
| 105 | : index{index_}, condition{std::move(condition_)} {} | ||
| 106 | |||
| 107 | u32 index; | ||
| 108 | Expr condition; | ||
| 109 | }; | ||
| 110 | |||
| 111 | class ASTLabel { | ||
| 112 | public: | ||
| 113 | explicit ASTLabel(u32 index_) : index{index_} {} | ||
| 114 | u32 index; | ||
| 115 | bool unused{}; | ||
| 116 | }; | ||
| 117 | |||
| 118 | class ASTGoto { | ||
| 119 | public: | ||
| 120 | explicit ASTGoto(Expr condition_, u32 label_) | ||
| 121 | : condition{std::move(condition_)}, label{label_} {} | ||
| 122 | |||
| 123 | Expr condition; | ||
| 124 | u32 label; | ||
| 125 | }; | ||
| 126 | |||
| 127 | class ASTDoWhile { | ||
| 128 | public: | ||
| 129 | explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {} | ||
| 130 | Expr condition; | ||
| 131 | ASTZipper nodes{}; | ||
| 132 | }; | ||
| 133 | |||
| 134 | class ASTReturn { | ||
| 135 | public: | ||
| 136 | explicit ASTReturn(Expr condition_, bool kills_) | ||
| 137 | : condition{std::move(condition_)}, kills{kills_} {} | ||
| 138 | |||
| 139 | Expr condition; | ||
| 140 | bool kills; | ||
| 141 | }; | ||
| 142 | |||
| 143 | class ASTBreak { | ||
| 144 | public: | ||
| 145 | explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {} | ||
| 146 | Expr condition; | ||
| 147 | }; | ||
| 148 | |||
| 149 | class ASTBase { | ||
| 150 | public: | ||
| 151 | explicit ASTBase(ASTNode parent_, ASTData data_) | ||
| 152 | : data{std::move(data_)}, parent{std::move(parent_)} {} | ||
| 153 | |||
| 154 | template <class U, class... Args> | ||
| 155 | static ASTNode Make(ASTNode parent, Args&&... args) { | ||
| 156 | return std::make_shared<ASTBase>(std::move(parent), | ||
| 157 | ASTData(U(std::forward<Args>(args)...))); | ||
| 158 | } | ||
| 159 | |||
| 160 | void SetParent(ASTNode new_parent) { | ||
| 161 | parent = std::move(new_parent); | ||
| 162 | } | ||
| 163 | |||
| 164 | ASTNode& GetParent() { | ||
| 165 | return parent; | ||
| 166 | } | ||
| 167 | |||
| 168 | const ASTNode& GetParent() const { | ||
| 169 | return parent; | ||
| 170 | } | ||
| 171 | |||
| 172 | u32 GetLevel() const { | ||
| 173 | u32 level = 0; | ||
| 174 | auto next_parent = parent; | ||
| 175 | while (next_parent) { | ||
| 176 | next_parent = next_parent->GetParent(); | ||
| 177 | level++; | ||
| 178 | } | ||
| 179 | return level; | ||
| 180 | } | ||
| 181 | |||
| 182 | ASTData* GetInnerData() { | ||
| 183 | return &data; | ||
| 184 | } | ||
| 185 | |||
| 186 | const ASTData* GetInnerData() const { | ||
| 187 | return &data; | ||
| 188 | } | ||
| 189 | |||
| 190 | ASTNode GetNext() const { | ||
| 191 | return next; | ||
| 192 | } | ||
| 193 | |||
| 194 | ASTNode GetPrevious() const { | ||
| 195 | return previous; | ||
| 196 | } | ||
| 197 | |||
| 198 | ASTZipper& GetManager() { | ||
| 199 | return *manager; | ||
| 200 | } | ||
| 201 | |||
| 202 | const ASTZipper& GetManager() const { | ||
| 203 | return *manager; | ||
| 204 | } | ||
| 205 | |||
| 206 | std::optional<u32> GetGotoLabel() const { | ||
| 207 | if (const auto* inner = std::get_if<ASTGoto>(&data)) { | ||
| 208 | return {inner->label}; | ||
| 209 | } | ||
| 210 | return std::nullopt; | ||
| 211 | } | ||
| 212 | |||
| 213 | Expr GetGotoCondition() const { | ||
| 214 | if (const auto* inner = std::get_if<ASTGoto>(&data)) { | ||
| 215 | return inner->condition; | ||
| 216 | } | ||
| 217 | return nullptr; | ||
| 218 | } | ||
| 219 | |||
| 220 | void MarkLabelUnused() { | ||
| 221 | if (auto* inner = std::get_if<ASTLabel>(&data)) { | ||
| 222 | inner->unused = true; | ||
| 223 | } | ||
| 224 | } | ||
| 225 | |||
| 226 | bool IsLabelUnused() const { | ||
| 227 | if (const auto* inner = std::get_if<ASTLabel>(&data)) { | ||
| 228 | return inner->unused; | ||
| 229 | } | ||
| 230 | return true; | ||
| 231 | } | ||
| 232 | |||
| 233 | std::optional<u32> GetLabelIndex() const { | ||
| 234 | if (const auto* inner = std::get_if<ASTLabel>(&data)) { | ||
| 235 | return {inner->index}; | ||
| 236 | } | ||
| 237 | return std::nullopt; | ||
| 238 | } | ||
| 239 | |||
| 240 | Expr GetIfCondition() const { | ||
| 241 | if (const auto* inner = std::get_if<ASTIfThen>(&data)) { | ||
| 242 | return inner->condition; | ||
| 243 | } | ||
| 244 | return nullptr; | ||
| 245 | } | ||
| 246 | |||
| 247 | void SetGotoCondition(Expr new_condition) { | ||
| 248 | if (auto* inner = std::get_if<ASTGoto>(&data)) { | ||
| 249 | inner->condition = std::move(new_condition); | ||
| 250 | } | ||
| 251 | } | ||
| 252 | |||
| 253 | bool IsIfThen() const { | ||
| 254 | return std::holds_alternative<ASTIfThen>(data); | ||
| 255 | } | ||
| 256 | |||
| 257 | bool IsIfElse() const { | ||
| 258 | return std::holds_alternative<ASTIfElse>(data); | ||
| 259 | } | ||
| 260 | |||
| 261 | bool IsBlockEncoded() const { | ||
| 262 | return std::holds_alternative<ASTBlockEncoded>(data); | ||
| 263 | } | ||
| 264 | |||
| 265 | void TransformBlockEncoded(NodeBlock&& nodes) { | ||
| 266 | data = ASTBlockDecoded(std::move(nodes)); | ||
| 267 | } | ||
| 268 | |||
| 269 | bool IsLoop() const { | ||
| 270 | return std::holds_alternative<ASTDoWhile>(data); | ||
| 271 | } | ||
| 272 | |||
| 273 | ASTZipper* GetSubNodes() { | ||
| 274 | if (std::holds_alternative<ASTProgram>(data)) { | ||
| 275 | return &std::get_if<ASTProgram>(&data)->nodes; | ||
| 276 | } | ||
| 277 | if (std::holds_alternative<ASTIfThen>(data)) { | ||
| 278 | return &std::get_if<ASTIfThen>(&data)->nodes; | ||
| 279 | } | ||
| 280 | if (std::holds_alternative<ASTIfElse>(data)) { | ||
| 281 | return &std::get_if<ASTIfElse>(&data)->nodes; | ||
| 282 | } | ||
| 283 | if (std::holds_alternative<ASTDoWhile>(data)) { | ||
| 284 | return &std::get_if<ASTDoWhile>(&data)->nodes; | ||
| 285 | } | ||
| 286 | return nullptr; | ||
| 287 | } | ||
| 288 | |||
| 289 | void Clear() { | ||
| 290 | next.reset(); | ||
| 291 | previous.reset(); | ||
| 292 | parent.reset(); | ||
| 293 | manager = nullptr; | ||
| 294 | } | ||
| 295 | |||
| 296 | private: | ||
| 297 | friend class ASTZipper; | ||
| 298 | |||
| 299 | ASTData data; | ||
| 300 | ASTNode parent; | ||
| 301 | ASTNode next; | ||
| 302 | ASTNode previous; | ||
| 303 | ASTZipper* manager{}; | ||
| 304 | }; | ||
| 305 | |||
| 306 | class ASTManager final { | ||
| 307 | public: | ||
| 308 | explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_); | ||
| 309 | ~ASTManager(); | ||
| 310 | |||
| 311 | ASTManager(const ASTManager& o) = delete; | ||
| 312 | ASTManager& operator=(const ASTManager& other) = delete; | ||
| 313 | |||
| 314 | ASTManager(ASTManager&& other) noexcept = default; | ||
| 315 | ASTManager& operator=(ASTManager&& other) noexcept = default; | ||
| 316 | |||
| 317 | void Init(); | ||
| 318 | |||
| 319 | void DeclareLabel(u32 address); | ||
| 320 | |||
| 321 | void InsertLabel(u32 address); | ||
| 322 | |||
| 323 | void InsertGoto(Expr condition, u32 address); | ||
| 324 | |||
| 325 | void InsertBlock(u32 start_address, u32 end_address); | ||
| 326 | |||
| 327 | void InsertReturn(Expr condition, bool kills); | ||
| 328 | |||
| 329 | std::string Print() const; | ||
| 330 | |||
| 331 | void Decompile(); | ||
| 332 | |||
| 333 | void ShowCurrentState(std::string_view state) const; | ||
| 334 | |||
| 335 | void SanityCheck() const; | ||
| 336 | |||
| 337 | void Clear(); | ||
| 338 | |||
| 339 | bool IsFullyDecompiled() const { | ||
| 340 | if (full_decompile) { | ||
| 341 | return gotos.empty(); | ||
| 342 | } | ||
| 343 | |||
| 344 | for (ASTNode goto_node : gotos) { | ||
| 345 | auto label_index = goto_node->GetGotoLabel(); | ||
| 346 | if (!label_index) { | ||
| 347 | return false; | ||
| 348 | } | ||
| 349 | ASTNode glabel = labels[*label_index]; | ||
| 350 | if (IsBackwardsJump(goto_node, glabel)) { | ||
| 351 | return false; | ||
| 352 | } | ||
| 353 | } | ||
| 354 | return true; | ||
| 355 | } | ||
| 356 | |||
| 357 | ASTNode GetProgram() const { | ||
| 358 | return main_node; | ||
| 359 | } | ||
| 360 | |||
| 361 | u32 GetVariables() const { | ||
| 362 | return variables; | ||
| 363 | } | ||
| 364 | |||
| 365 | const std::vector<ASTNode>& GetLabels() const { | ||
| 366 | return labels; | ||
| 367 | } | ||
| 368 | |||
| 369 | private: | ||
| 370 | bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const; | ||
| 371 | |||
| 372 | bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const; | ||
| 373 | |||
| 374 | bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const; | ||
| 375 | |||
| 376 | void EncloseDoWhile(ASTNode goto_node, ASTNode label); | ||
| 377 | |||
| 378 | void EncloseIfThen(ASTNode goto_node, ASTNode label); | ||
| 379 | |||
| 380 | void MoveOutward(ASTNode goto_node); | ||
| 381 | |||
| 382 | u32 NewVariable() { | ||
| 383 | return variables++; | ||
| 384 | } | ||
| 385 | |||
| 386 | bool full_decompile{}; | ||
| 387 | bool disable_else_derivation{}; | ||
| 388 | std::unordered_map<u32, u32> labels_map{}; | ||
| 389 | u32 labels_count{}; | ||
| 390 | std::vector<ASTNode> labels{}; | ||
| 391 | std::list<ASTNode> gotos{}; | ||
| 392 | u32 variables{}; | ||
| 393 | ASTProgram* program{}; | ||
| 394 | ASTNode main_node{}; | ||
| 395 | Expr false_condition{}; | ||
| 396 | }; | ||
| 397 | |||
| 398 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp deleted file mode 100644 index 02adcf9c7..000000000 --- a/src/video_core/shader/async_shaders.cpp +++ /dev/null | |||
| @@ -1,234 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <condition_variable> | ||
| 6 | #include <mutex> | ||
| 7 | #include <thread> | ||
| 8 | #include <vector> | ||
| 9 | #include "video_core/engines/maxwell_3d.h" | ||
| 10 | #include "video_core/renderer_base.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_shader_cache.h" | ||
| 12 | #include "video_core/shader/async_shaders.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {} | ||
| 17 | |||
| 18 | AsyncShaders::~AsyncShaders() { | ||
| 19 | KillWorkers(); | ||
| 20 | } | ||
| 21 | |||
| 22 | void AsyncShaders::AllocateWorkers() { | ||
| 23 | // Use at least one thread | ||
| 24 | u32 num_workers = 1; | ||
| 25 | |||
| 26 | // Deduce how many more threads we can use | ||
| 27 | const u32 thread_count = std::thread::hardware_concurrency(); | ||
| 28 | if (thread_count >= 8) { | ||
| 29 | // Increase async workers by 1 for every 2 threads >= 8 | ||
| 30 | num_workers += 1 + (thread_count - 8) / 2; | ||
| 31 | } | ||
| 32 | |||
| 33 | // If we already have workers queued, ignore | ||
| 34 | if (num_workers == worker_threads.size()) { | ||
| 35 | return; | ||
| 36 | } | ||
| 37 | |||
| 38 | // If workers already exist, clear them | ||
| 39 | if (!worker_threads.empty()) { | ||
| 40 | FreeWorkers(); | ||
| 41 | } | ||
| 42 | |||
| 43 | // Create workers | ||
| 44 | for (std::size_t i = 0; i < num_workers; i++) { | ||
| 45 | context_list.push_back(emu_window.CreateSharedContext()); | ||
| 46 | worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this, | ||
| 47 | context_list[i].get()); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | void AsyncShaders::FreeWorkers() { | ||
| 52 | // Mark all threads to quit | ||
| 53 | is_thread_exiting.store(true); | ||
| 54 | cv.notify_all(); | ||
| 55 | for (auto& thread : worker_threads) { | ||
| 56 | thread.join(); | ||
| 57 | } | ||
| 58 | // Clear our shared contexts | ||
| 59 | context_list.clear(); | ||
| 60 | |||
| 61 | // Clear our worker threads | ||
| 62 | worker_threads.clear(); | ||
| 63 | } | ||
| 64 | |||
| 65 | void AsyncShaders::KillWorkers() { | ||
| 66 | is_thread_exiting.store(true); | ||
| 67 | cv.notify_all(); | ||
| 68 | for (auto& thread : worker_threads) { | ||
| 69 | thread.detach(); | ||
| 70 | } | ||
| 71 | // Clear our shared contexts | ||
| 72 | context_list.clear(); | ||
| 73 | |||
| 74 | // Clear our worker threads | ||
| 75 | worker_threads.clear(); | ||
| 76 | } | ||
| 77 | |||
| 78 | bool AsyncShaders::HasWorkQueued() const { | ||
| 79 | return !pending_queue.empty(); | ||
| 80 | } | ||
| 81 | |||
| 82 | bool AsyncShaders::HasCompletedWork() const { | ||
| 83 | std::shared_lock lock{completed_mutex}; | ||
| 84 | return !finished_work.empty(); | ||
| 85 | } | ||
| 86 | |||
| 87 | bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { | ||
| 88 | const auto& regs = gpu.Maxwell3D().regs; | ||
| 89 | |||
| 90 | // If something is using depth, we can assume that games are not rendering anything which will | ||
| 91 | // be used one time. | ||
| 92 | if (regs.zeta_enable) { | ||
| 93 | return true; | ||
| 94 | } | ||
| 95 | |||
| 96 | // If games are using a small index count, we can assume these are full screen quads. Usually | ||
| 97 | // these shaders are only used once for building textures so we can assume they can't be built | ||
| 98 | // async | ||
| 99 | if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) { | ||
| 100 | return false; | ||
| 101 | } | ||
| 102 | |||
| 103 | return true; | ||
| 104 | } | ||
| 105 | |||
| 106 | std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { | ||
| 107 | std::vector<Result> results; | ||
| 108 | { | ||
| 109 | std::unique_lock lock{completed_mutex}; | ||
| 110 | results = std::move(finished_work); | ||
| 111 | finished_work.clear(); | ||
| 112 | } | ||
| 113 | return results; | ||
| 114 | } | ||
| 115 | |||
| 116 | void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, | ||
| 117 | Tegra::Engines::ShaderType shader_type, u64 uid, | ||
| 118 | std::vector<u64> code, std::vector<u64> code_b, | ||
| 119 | u32 main_offset, CompilerSettings compiler_settings, | ||
| 120 | const Registry& registry, VAddr cpu_addr) { | ||
| 121 | std::unique_lock lock(queue_mutex); | ||
| 122 | pending_queue.push({ | ||
| 123 | .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, | ||
| 124 | .device = &device, | ||
| 125 | .shader_type = shader_type, | ||
| 126 | .uid = uid, | ||
| 127 | .code = std::move(code), | ||
| 128 | .code_b = std::move(code_b), | ||
| 129 | .main_offset = main_offset, | ||
| 130 | .compiler_settings = compiler_settings, | ||
| 131 | .registry = registry, | ||
| 132 | .cpu_address = cpu_addr, | ||
| 133 | .pp_cache = nullptr, | ||
| 134 | .vk_device = nullptr, | ||
| 135 | .scheduler = nullptr, | ||
| 136 | .descriptor_pool = nullptr, | ||
| 137 | .update_descriptor_queue = nullptr, | ||
| 138 | .bindings{}, | ||
| 139 | .program{}, | ||
| 140 | .key{}, | ||
| 141 | .num_color_buffers = 0, | ||
| 142 | }); | ||
| 143 | cv.notify_one(); | ||
| 144 | } | ||
| 145 | |||
| 146 | void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, | ||
| 147 | const Vulkan::Device& device, Vulkan::VKScheduler& scheduler, | ||
| 148 | Vulkan::VKDescriptorPool& descriptor_pool, | ||
| 149 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 150 | std::vector<VkDescriptorSetLayoutBinding> bindings, | ||
| 151 | Vulkan::SPIRVProgram program, | ||
| 152 | Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) { | ||
| 153 | std::unique_lock lock(queue_mutex); | ||
| 154 | pending_queue.push({ | ||
| 155 | .backend = Backend::Vulkan, | ||
| 156 | .device = nullptr, | ||
| 157 | .shader_type{}, | ||
| 158 | .uid = 0, | ||
| 159 | .code{}, | ||
| 160 | .code_b{}, | ||
| 161 | .main_offset = 0, | ||
| 162 | .compiler_settings{}, | ||
| 163 | .registry{}, | ||
| 164 | .cpu_address = 0, | ||
| 165 | .pp_cache = pp_cache, | ||
| 166 | .vk_device = &device, | ||
| 167 | .scheduler = &scheduler, | ||
| 168 | .descriptor_pool = &descriptor_pool, | ||
| 169 | .update_descriptor_queue = &update_descriptor_queue, | ||
| 170 | .bindings = std::move(bindings), | ||
| 171 | .program = std::move(program), | ||
| 172 | .key = key, | ||
| 173 | .num_color_buffers = num_color_buffers, | ||
| 174 | }); | ||
| 175 | cv.notify_one(); | ||
| 176 | } | ||
| 177 | |||
| 178 | void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { | ||
| 179 | while (!is_thread_exiting.load(std::memory_order_relaxed)) { | ||
| 180 | std::unique_lock lock{queue_mutex}; | ||
| 181 | cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); | ||
| 182 | if (is_thread_exiting) { | ||
| 183 | return; | ||
| 184 | } | ||
| 185 | |||
| 186 | // Partial lock to allow all threads to read at the same time | ||
| 187 | if (!HasWorkQueued()) { | ||
| 188 | continue; | ||
| 189 | } | ||
| 190 | // Another thread beat us, just unlock and wait for the next load | ||
| 191 | if (pending_queue.empty()) { | ||
| 192 | continue; | ||
| 193 | } | ||
| 194 | |||
| 195 | // Pull work from queue | ||
| 196 | WorkerParams work = std::move(pending_queue.front()); | ||
| 197 | pending_queue.pop(); | ||
| 198 | lock.unlock(); | ||
| 199 | |||
| 200 | if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { | ||
| 201 | const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); | ||
| 202 | const auto scope = context->Acquire(); | ||
| 203 | auto program = | ||
| 204 | OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); | ||
| 205 | Result result{}; | ||
| 206 | result.backend = work.backend; | ||
| 207 | result.cpu_address = work.cpu_address; | ||
| 208 | result.uid = work.uid; | ||
| 209 | result.code = std::move(work.code); | ||
| 210 | result.code_b = std::move(work.code_b); | ||
| 211 | result.shader_type = work.shader_type; | ||
| 212 | |||
| 213 | if (work.backend == Backend::OpenGL) { | ||
| 214 | result.program.opengl = std::move(program->source_program); | ||
| 215 | } else if (work.backend == Backend::GLASM) { | ||
| 216 | result.program.glasm = std::move(program->assembly_program); | ||
| 217 | } | ||
| 218 | |||
| 219 | { | ||
| 220 | std::unique_lock complete_lock(completed_mutex); | ||
| 221 | finished_work.push_back(std::move(result)); | ||
| 222 | } | ||
| 223 | } else if (work.backend == Backend::Vulkan) { | ||
| 224 | auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( | ||
| 225 | *work.vk_device, *work.scheduler, *work.descriptor_pool, | ||
| 226 | *work.update_descriptor_queue, work.key, work.bindings, work.program, | ||
| 227 | work.num_color_buffers); | ||
| 228 | |||
| 229 | work.pp_cache->EmplacePipeline(std::move(pipeline)); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h deleted file mode 100644 index 7fdff6e56..000000000 --- a/src/video_core/shader/async_shaders.h +++ /dev/null | |||
| @@ -1,138 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <condition_variable> | ||
| 8 | #include <memory> | ||
| 9 | #include <shared_mutex> | ||
| 10 | #include <thread> | ||
| 11 | |||
| 12 | #include <glad/glad.h> | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 17 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||
| 18 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 20 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 21 | |||
| 22 | namespace Core::Frontend { | ||
| 23 | class EmuWindow; | ||
| 24 | class GraphicsContext; | ||
| 25 | } // namespace Core::Frontend | ||
| 26 | |||
| 27 | namespace Tegra { | ||
| 28 | class GPU; | ||
| 29 | } | ||
| 30 | |||
| 31 | namespace Vulkan { | ||
| 32 | class VKPipelineCache; | ||
| 33 | } | ||
| 34 | |||
| 35 | namespace VideoCommon::Shader { | ||
| 36 | |||
| 37 | class AsyncShaders { | ||
| 38 | public: | ||
| 39 | enum class Backend { | ||
| 40 | OpenGL, | ||
| 41 | GLASM, | ||
| 42 | Vulkan, | ||
| 43 | }; | ||
| 44 | |||
| 45 | struct ResultPrograms { | ||
| 46 | OpenGL::OGLProgram opengl; | ||
| 47 | OpenGL::OGLAssemblyProgram glasm; | ||
| 48 | }; | ||
| 49 | |||
| 50 | struct Result { | ||
| 51 | u64 uid; | ||
| 52 | VAddr cpu_address; | ||
| 53 | Backend backend; | ||
| 54 | ResultPrograms program; | ||
| 55 | std::vector<u64> code; | ||
| 56 | std::vector<u64> code_b; | ||
| 57 | Tegra::Engines::ShaderType shader_type; | ||
| 58 | }; | ||
| 59 | |||
| 60 | explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_); | ||
| 61 | ~AsyncShaders(); | ||
| 62 | |||
| 63 | /// Start up shader worker threads | ||
| 64 | void AllocateWorkers(); | ||
| 65 | |||
| 66 | /// Clear the shader queue and kill all worker threads | ||
| 67 | void FreeWorkers(); | ||
| 68 | |||
| 69 | // Force end all threads | ||
| 70 | void KillWorkers(); | ||
| 71 | |||
| 72 | /// Check to see if any shaders have actually been compiled | ||
| 73 | [[nodiscard]] bool HasCompletedWork() const; | ||
| 74 | |||
| 75 | /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build | ||
| 76 | /// every shader async as some shaders are only built and executed once. We try to "guess" which | ||
| 77 | /// shader would be used only once | ||
| 78 | [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; | ||
| 79 | |||
| 80 | /// Pulls completed compiled shaders | ||
| 81 | [[nodiscard]] std::vector<Result> GetCompletedWork(); | ||
| 82 | |||
| 83 | void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, | ||
| 84 | u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, | ||
| 85 | CompilerSettings compiler_settings, const Registry& registry, | ||
| 86 | VAddr cpu_addr); | ||
| 87 | |||
| 88 | void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device, | ||
| 89 | Vulkan::VKScheduler& scheduler, | ||
| 90 | Vulkan::VKDescriptorPool& descriptor_pool, | ||
| 91 | Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, | ||
| 92 | std::vector<VkDescriptorSetLayoutBinding> bindings, | ||
| 93 | Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key, | ||
| 94 | u32 num_color_buffers); | ||
| 95 | |||
| 96 | private: | ||
| 97 | void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); | ||
| 98 | |||
| 99 | /// Check our worker queue to see if we have any work queued already | ||
| 100 | [[nodiscard]] bool HasWorkQueued() const; | ||
| 101 | |||
| 102 | struct WorkerParams { | ||
| 103 | Backend backend; | ||
| 104 | // For OGL | ||
| 105 | const OpenGL::Device* device; | ||
| 106 | Tegra::Engines::ShaderType shader_type; | ||
| 107 | u64 uid; | ||
| 108 | std::vector<u64> code; | ||
| 109 | std::vector<u64> code_b; | ||
| 110 | u32 main_offset; | ||
| 111 | CompilerSettings compiler_settings; | ||
| 112 | std::optional<Registry> registry; | ||
| 113 | VAddr cpu_address; | ||
| 114 | |||
| 115 | // For Vulkan | ||
| 116 | Vulkan::VKPipelineCache* pp_cache; | ||
| 117 | const Vulkan::Device* vk_device; | ||
| 118 | Vulkan::VKScheduler* scheduler; | ||
| 119 | Vulkan::VKDescriptorPool* descriptor_pool; | ||
| 120 | Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; | ||
| 121 | std::vector<VkDescriptorSetLayoutBinding> bindings; | ||
| 122 | Vulkan::SPIRVProgram program; | ||
| 123 | Vulkan::GraphicsPipelineCacheKey key; | ||
| 124 | u32 num_color_buffers; | ||
| 125 | }; | ||
| 126 | |||
| 127 | std::condition_variable cv; | ||
| 128 | mutable std::mutex queue_mutex; | ||
| 129 | mutable std::shared_mutex completed_mutex; | ||
| 130 | std::atomic<bool> is_thread_exiting{}; | ||
| 131 | std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; | ||
| 132 | std::vector<std::thread> worker_threads; | ||
| 133 | std::queue<WorkerParams> pending_queue; | ||
| 134 | std::vector<Result> finished_work; | ||
| 135 | Core::Frontend::EmuWindow& emu_window; | ||
| 136 | }; | ||
| 137 | |||
| 138 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp deleted file mode 100644 index cddcbd4f0..000000000 --- a/src/video_core/shader/compiler_settings.cpp +++ /dev/null | |||
| @@ -1,26 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/shader/compiler_settings.h" | ||
| 6 | |||
| 7 | namespace VideoCommon::Shader { | ||
| 8 | |||
| 9 | std::string CompileDepthAsString(const CompileDepth cd) { | ||
| 10 | switch (cd) { | ||
| 11 | case CompileDepth::BruteForce: | ||
| 12 | return "Brute Force Compile"; | ||
| 13 | case CompileDepth::FlowStack: | ||
| 14 | return "Simple Flow Stack Mode"; | ||
| 15 | case CompileDepth::NoFlowStack: | ||
| 16 | return "Remove Flow Stack"; | ||
| 17 | case CompileDepth::DecompileBackwards: | ||
| 18 | return "Decompile Backward Jumps"; | ||
| 19 | case CompileDepth::FullDecompile: | ||
| 20 | return "Full Decompilation"; | ||
| 21 | default: | ||
| 22 | return "Unknown Compiler Process"; | ||
| 23 | } | ||
| 24 | } | ||
| 25 | |||
| 26 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h deleted file mode 100644 index 916018c01..000000000 --- a/src/video_core/shader/compiler_settings.h +++ /dev/null | |||
| @@ -1,26 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | |||
| 9 | namespace VideoCommon::Shader { | ||
| 10 | |||
| 11 | enum class CompileDepth : u32 { | ||
| 12 | BruteForce = 0, | ||
| 13 | FlowStack = 1, | ||
| 14 | NoFlowStack = 2, | ||
| 15 | DecompileBackwards = 3, | ||
| 16 | FullDecompile = 4, | ||
| 17 | }; | ||
| 18 | |||
| 19 | std::string CompileDepthAsString(CompileDepth cd); | ||
| 20 | |||
| 21 | struct CompilerSettings { | ||
| 22 | CompileDepth depth{CompileDepth::NoFlowStack}; | ||
| 23 | bool disable_else_derivation{true}; | ||
| 24 | }; | ||
| 25 | |||
| 26 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp deleted file mode 100644 index 43d965f2f..000000000 --- a/src/video_core/shader/control_flow.cpp +++ /dev/null | |||
| @@ -1,751 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <list> | ||
| 6 | #include <map> | ||
| 7 | #include <set> | ||
| 8 | #include <stack> | ||
| 9 | #include <unordered_map> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "video_core/shader/ast.h" | ||
| 15 | #include "video_core/shader/control_flow.h" | ||
| 16 | #include "video_core/shader/memory_util.h" | ||
| 17 | #include "video_core/shader/registry.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | namespace { | ||
| 23 | |||
| 24 | using Tegra::Shader::Instruction; | ||
| 25 | using Tegra::Shader::OpCode; | ||
| 26 | |||
| 27 | constexpr s32 unassigned_branch = -2; | ||
| 28 | |||
| 29 | struct Query { | ||
| 30 | u32 address{}; | ||
| 31 | std::stack<u32> ssy_stack{}; | ||
| 32 | std::stack<u32> pbk_stack{}; | ||
| 33 | }; | ||
| 34 | |||
| 35 | struct BlockStack { | ||
| 36 | BlockStack() = default; | ||
| 37 | explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {} | ||
| 38 | std::stack<u32> ssy_stack{}; | ||
| 39 | std::stack<u32> pbk_stack{}; | ||
| 40 | }; | ||
| 41 | |||
| 42 | template <typename T, typename... Args> | ||
| 43 | BlockBranchInfo MakeBranchInfo(Args&&... args) { | ||
| 44 | static_assert(std::is_convertible_v<T, BranchData>); | ||
| 45 | return std::make_shared<BranchData>(T(std::forward<Args>(args)...)); | ||
| 46 | } | ||
| 47 | |||
| 48 | bool BlockBranchIsIgnored(BlockBranchInfo first) { | ||
| 49 | bool ignore = false; | ||
| 50 | if (std::holds_alternative<SingleBranch>(*first)) { | ||
| 51 | const auto branch = std::get_if<SingleBranch>(first.get()); | ||
| 52 | ignore = branch->ignore; | ||
| 53 | } | ||
| 54 | return ignore; | ||
| 55 | } | ||
| 56 | |||
| 57 | struct BlockInfo { | ||
| 58 | u32 start{}; | ||
| 59 | u32 end{}; | ||
| 60 | bool visited{}; | ||
| 61 | BlockBranchInfo branch{}; | ||
| 62 | |||
| 63 | bool IsInside(const u32 address) const { | ||
| 64 | return start <= address && address <= end; | ||
| 65 | } | ||
| 66 | }; | ||
| 67 | |||
| 68 | struct CFGRebuildState { | ||
| 69 | explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_) | ||
| 70 | : program_code{program_code_}, registry{registry_}, start{start_} {} | ||
| 71 | |||
| 72 | const ProgramCode& program_code; | ||
| 73 | Registry& registry; | ||
| 74 | u32 start{}; | ||
| 75 | std::vector<BlockInfo> block_info; | ||
| 76 | std::list<u32> inspect_queries; | ||
| 77 | std::list<Query> queries; | ||
| 78 | std::unordered_map<u32, u32> registered; | ||
| 79 | std::set<u32> labels; | ||
| 80 | std::map<u32, u32> ssy_labels; | ||
| 81 | std::map<u32, u32> pbk_labels; | ||
| 82 | std::unordered_map<u32, BlockStack> stacks; | ||
| 83 | ASTManager* manager{}; | ||
| 84 | }; | ||
| 85 | |||
| 86 | enum class BlockCollision : u32 { None, Found, Inside }; | ||
| 87 | |||
| 88 | std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) { | ||
| 89 | const auto& blocks = state.block_info; | ||
| 90 | for (u32 index = 0; index < blocks.size(); index++) { | ||
| 91 | if (blocks[index].start == address) { | ||
| 92 | return {BlockCollision::Found, index}; | ||
| 93 | } | ||
| 94 | if (blocks[index].IsInside(address)) { | ||
| 95 | return {BlockCollision::Inside, index}; | ||
| 96 | } | ||
| 97 | } | ||
| 98 | return {BlockCollision::None, 0xFFFFFFFF}; | ||
| 99 | } | ||
| 100 | |||
| 101 | struct ParseInfo { | ||
| 102 | BlockBranchInfo branch_info{}; | ||
| 103 | u32 end_address{}; | ||
| 104 | }; | ||
| 105 | |||
| 106 | BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { | ||
| 107 | auto& it = state.block_info.emplace_back(); | ||
| 108 | it.start = start; | ||
| 109 | it.end = end; | ||
| 110 | const u32 index = static_cast<u32>(state.block_info.size() - 1); | ||
| 111 | state.registered.insert({start, index}); | ||
| 112 | return it; | ||
| 113 | } | ||
| 114 | |||
| 115 | Pred GetPredicate(u32 index, bool negated) { | ||
| 116 | return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL)); | ||
| 117 | } | ||
| 118 | |||
| 119 | enum class ParseResult : u32 { | ||
| 120 | ControlCaught, | ||
| 121 | BlockEnd, | ||
| 122 | AbnormalFlow, | ||
| 123 | }; | ||
| 124 | |||
| 125 | struct BranchIndirectInfo { | ||
| 126 | u32 buffer{}; | ||
| 127 | u32 offset{}; | ||
| 128 | u32 entries{}; | ||
| 129 | s32 relative_position{}; | ||
| 130 | }; | ||
| 131 | |||
| 132 | struct BufferInfo { | ||
| 133 | u32 index; | ||
| 134 | u32 offset; | ||
| 135 | }; | ||
| 136 | |||
| 137 | std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) { | ||
| 138 | const Instruction instr = state.program_code[pos]; | ||
| 139 | const auto opcode = OpCode::Decode(instr); | ||
| 140 | if (opcode->get().GetId() != OpCode::Id::BRX) { | ||
| 141 | return std::nullopt; | ||
| 142 | } | ||
| 143 | if (instr.brx.constant_buffer != 0) { | ||
| 144 | return std::nullopt; | ||
| 145 | } | ||
| 146 | --pos; | ||
| 147 | return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value()); | ||
| 148 | } | ||
| 149 | |||
| 150 | template <typename Result, typename TestCallable, typename PackCallable> | ||
| 151 | // requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&> | ||
| 152 | // requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&> | ||
| 153 | std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test, | ||
| 154 | PackCallable pack) { | ||
| 155 | for (; pos >= state.start; --pos) { | ||
| 156 | if (IsSchedInstruction(pos, state.start)) { | ||
| 157 | continue; | ||
| 158 | } | ||
| 159 | const Instruction instr = state.program_code[pos]; | ||
| 160 | const auto opcode = OpCode::Decode(instr); | ||
| 161 | if (!opcode) { | ||
| 162 | continue; | ||
| 163 | } | ||
| 164 | if (test(instr, opcode->get())) { | ||
| 165 | --pos; | ||
| 166 | return std::make_optional(pack(instr, opcode->get())); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | return std::nullopt; | ||
| 170 | } | ||
| 171 | |||
| 172 | std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos, | ||
| 173 | u64 brx_tracked_register) { | ||
| 174 | return TrackInstruction<std::pair<BufferInfo, u64>>( | ||
| 175 | state, pos, | ||
| 176 | [brx_tracked_register](auto instr, const auto& opcode) { | ||
| 177 | return opcode.GetId() == OpCode::Id::LD_C && | ||
| 178 | instr.gpr0.Value() == brx_tracked_register && | ||
| 179 | instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single; | ||
| 180 | }, | ||
| 181 | [](auto instr, const auto& opcode) { | ||
| 182 | const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()), | ||
| 183 | static_cast<u32>(instr.cbuf36.GetOffset())}; | ||
| 184 | return std::make_pair(info, instr.gpr8.Value()); | ||
| 185 | }); | ||
| 186 | } | ||
| 187 | |||
| 188 | std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos, | ||
| 189 | u64 ldc_tracked_register) { | ||
| 190 | return TrackInstruction<u64>( | ||
| 191 | state, pos, | ||
| 192 | [ldc_tracked_register](auto instr, const auto& opcode) { | ||
| 193 | return opcode.GetId() == OpCode::Id::SHL_IMM && | ||
| 194 | instr.gpr0.Value() == ldc_tracked_register; | ||
| 195 | }, | ||
| 196 | [](auto instr, const auto&) { return instr.gpr8.Value(); }); | ||
| 197 | } | ||
| 198 | |||
| 199 | std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos, | ||
| 200 | u64 shl_tracked_register) { | ||
| 201 | return TrackInstruction<u32>( | ||
| 202 | state, pos, | ||
| 203 | [shl_tracked_register](auto instr, const auto& opcode) { | ||
| 204 | return opcode.GetId() == OpCode::Id::IMNMX_IMM && | ||
| 205 | instr.gpr0.Value() == shl_tracked_register; | ||
| 206 | }, | ||
| 207 | [](auto instr, const auto&) { | ||
| 208 | return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1); | ||
| 209 | }); | ||
| 210 | } | ||
| 211 | |||
| 212 | std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) { | ||
| 213 | const auto brx_info = GetBRXInfo(state, pos); | ||
| 214 | if (!brx_info) { | ||
| 215 | return std::nullopt; | ||
| 216 | } | ||
| 217 | const auto [relative_position, brx_tracked_register] = *brx_info; | ||
| 218 | |||
| 219 | const auto ldc_info = TrackLDC(state, pos, brx_tracked_register); | ||
| 220 | if (!ldc_info) { | ||
| 221 | return std::nullopt; | ||
| 222 | } | ||
| 223 | const auto [buffer_info, ldc_tracked_register] = *ldc_info; | ||
| 224 | |||
| 225 | const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register); | ||
| 226 | if (!shl_tracked_register) { | ||
| 227 | return std::nullopt; | ||
| 228 | } | ||
| 229 | |||
| 230 | const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register); | ||
| 231 | if (!entries) { | ||
| 232 | return std::nullopt; | ||
| 233 | } | ||
| 234 | |||
| 235 | return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position}; | ||
| 236 | } | ||
| 237 | |||
| 238 | std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { | ||
| 239 | u32 offset = static_cast<u32>(address); | ||
| 240 | const u32 end_address = static_cast<u32>(state.program_code.size()); | ||
| 241 | ParseInfo parse_info{}; | ||
| 242 | SingleBranch single_branch{}; | ||
| 243 | |||
| 244 | const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) { | ||
| 245 | const auto pair = rebuild_state.labels.emplace(label_address); | ||
| 246 | if (pair.second) { | ||
| 247 | rebuild_state.inspect_queries.push_back(label_address); | ||
| 248 | } | ||
| 249 | }; | ||
| 250 | |||
| 251 | while (true) { | ||
| 252 | if (offset >= end_address) { | ||
| 253 | // ASSERT_OR_EXECUTE can't be used, as it ignores the break | ||
| 254 | ASSERT_MSG(false, "Shader passed the current limit!"); | ||
| 255 | |||
| 256 | single_branch.address = exit_branch; | ||
| 257 | single_branch.ignore = false; | ||
| 258 | break; | ||
| 259 | } | ||
| 260 | if (state.registered.contains(offset)) { | ||
| 261 | single_branch.address = offset; | ||
| 262 | single_branch.ignore = true; | ||
| 263 | break; | ||
| 264 | } | ||
| 265 | if (IsSchedInstruction(offset, state.start)) { | ||
| 266 | offset++; | ||
| 267 | continue; | ||
| 268 | } | ||
| 269 | const Instruction instr = {state.program_code[offset]}; | ||
| 270 | const auto opcode = OpCode::Decode(instr); | ||
| 271 | if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { | ||
| 272 | offset++; | ||
| 273 | continue; | ||
| 274 | } | ||
| 275 | |||
| 276 | switch (opcode->get().GetId()) { | ||
| 277 | case OpCode::Id::EXIT: { | ||
| 278 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 279 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 280 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 281 | offset++; | ||
| 282 | continue; | ||
| 283 | } | ||
| 284 | const ConditionCode cc = instr.flow_condition_code; | ||
| 285 | single_branch.condition.cc = cc; | ||
| 286 | if (cc == ConditionCode::F) { | ||
| 287 | offset++; | ||
| 288 | continue; | ||
| 289 | } | ||
| 290 | single_branch.address = exit_branch; | ||
| 291 | single_branch.kill = false; | ||
| 292 | single_branch.is_sync = false; | ||
| 293 | single_branch.is_brk = false; | ||
| 294 | single_branch.ignore = false; | ||
| 295 | parse_info.end_address = offset; | ||
| 296 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 297 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 298 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 299 | |||
| 300 | return {ParseResult::ControlCaught, parse_info}; | ||
| 301 | } | ||
| 302 | case OpCode::Id::BRA: { | ||
| 303 | if (instr.bra.constant_buffer != 0) { | ||
| 304 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 305 | } | ||
| 306 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 307 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 308 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 309 | offset++; | ||
| 310 | continue; | ||
| 311 | } | ||
| 312 | const ConditionCode cc = instr.flow_condition_code; | ||
| 313 | single_branch.condition.cc = cc; | ||
| 314 | if (cc == ConditionCode::F) { | ||
| 315 | offset++; | ||
| 316 | continue; | ||
| 317 | } | ||
| 318 | const u32 branch_offset = offset + instr.bra.GetBranchTarget(); | ||
| 319 | if (branch_offset == 0) { | ||
| 320 | single_branch.address = exit_branch; | ||
| 321 | } else { | ||
| 322 | single_branch.address = branch_offset; | ||
| 323 | } | ||
| 324 | insert_label(state, branch_offset); | ||
| 325 | single_branch.kill = false; | ||
| 326 | single_branch.is_sync = false; | ||
| 327 | single_branch.is_brk = false; | ||
| 328 | single_branch.ignore = false; | ||
| 329 | parse_info.end_address = offset; | ||
| 330 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 331 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 332 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 333 | |||
| 334 | return {ParseResult::ControlCaught, parse_info}; | ||
| 335 | } | ||
| 336 | case OpCode::Id::SYNC: { | ||
| 337 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 338 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 339 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 340 | offset++; | ||
| 341 | continue; | ||
| 342 | } | ||
| 343 | const ConditionCode cc = instr.flow_condition_code; | ||
| 344 | single_branch.condition.cc = cc; | ||
| 345 | if (cc == ConditionCode::F) { | ||
| 346 | offset++; | ||
| 347 | continue; | ||
| 348 | } | ||
| 349 | single_branch.address = unassigned_branch; | ||
| 350 | single_branch.kill = false; | ||
| 351 | single_branch.is_sync = true; | ||
| 352 | single_branch.is_brk = false; | ||
| 353 | single_branch.ignore = false; | ||
| 354 | parse_info.end_address = offset; | ||
| 355 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 356 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 357 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 358 | |||
| 359 | return {ParseResult::ControlCaught, parse_info}; | ||
| 360 | } | ||
| 361 | case OpCode::Id::BRK: { | ||
| 362 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 363 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 364 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 365 | offset++; | ||
| 366 | continue; | ||
| 367 | } | ||
| 368 | const ConditionCode cc = instr.flow_condition_code; | ||
| 369 | single_branch.condition.cc = cc; | ||
| 370 | if (cc == ConditionCode::F) { | ||
| 371 | offset++; | ||
| 372 | continue; | ||
| 373 | } | ||
| 374 | single_branch.address = unassigned_branch; | ||
| 375 | single_branch.kill = false; | ||
| 376 | single_branch.is_sync = false; | ||
| 377 | single_branch.is_brk = true; | ||
| 378 | single_branch.ignore = false; | ||
| 379 | parse_info.end_address = offset; | ||
| 380 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 381 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 382 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 383 | |||
| 384 | return {ParseResult::ControlCaught, parse_info}; | ||
| 385 | } | ||
| 386 | case OpCode::Id::KIL: { | ||
| 387 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 388 | single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||
| 389 | if (single_branch.condition.predicate == Pred::NeverExecute) { | ||
| 390 | offset++; | ||
| 391 | continue; | ||
| 392 | } | ||
| 393 | const ConditionCode cc = instr.flow_condition_code; | ||
| 394 | single_branch.condition.cc = cc; | ||
| 395 | if (cc == ConditionCode::F) { | ||
| 396 | offset++; | ||
| 397 | continue; | ||
| 398 | } | ||
| 399 | single_branch.address = exit_branch; | ||
| 400 | single_branch.kill = true; | ||
| 401 | single_branch.is_sync = false; | ||
| 402 | single_branch.is_brk = false; | ||
| 403 | single_branch.ignore = false; | ||
| 404 | parse_info.end_address = offset; | ||
| 405 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 406 | single_branch.condition, single_branch.address, single_branch.kill, | ||
| 407 | single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||
| 408 | |||
| 409 | return {ParseResult::ControlCaught, parse_info}; | ||
| 410 | } | ||
| 411 | case OpCode::Id::SSY: { | ||
| 412 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 413 | insert_label(state, target); | ||
| 414 | state.ssy_labels.emplace(offset, target); | ||
| 415 | break; | ||
| 416 | } | ||
| 417 | case OpCode::Id::PBK: { | ||
| 418 | const u32 target = offset + instr.bra.GetBranchTarget(); | ||
| 419 | insert_label(state, target); | ||
| 420 | state.pbk_labels.emplace(offset, target); | ||
| 421 | break; | ||
| 422 | } | ||
| 423 | case OpCode::Id::BRX: { | ||
| 424 | const auto tmp = TrackBranchIndirectInfo(state, offset); | ||
| 425 | if (!tmp) { | ||
| 426 | LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); | ||
| 427 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 428 | } | ||
| 429 | |||
| 430 | const auto result = *tmp; | ||
| 431 | const s32 pc_target = offset + result.relative_position; | ||
| 432 | std::vector<CaseBranch> branches; | ||
| 433 | for (u32 i = 0; i < result.entries; i++) { | ||
| 434 | auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4); | ||
| 435 | if (!key) { | ||
| 436 | return {ParseResult::AbnormalFlow, parse_info}; | ||
| 437 | } | ||
| 438 | u32 value = *key; | ||
| 439 | u32 target = static_cast<u32>((value >> 3) + pc_target); | ||
| 440 | insert_label(state, target); | ||
| 441 | branches.emplace_back(value, target); | ||
| 442 | } | ||
| 443 | parse_info.end_address = offset; | ||
| 444 | parse_info.branch_info = MakeBranchInfo<MultiBranch>( | ||
| 445 | static_cast<u32>(instr.gpr8.Value()), std::move(branches)); | ||
| 446 | |||
| 447 | return {ParseResult::ControlCaught, parse_info}; | ||
| 448 | } | ||
| 449 | default: | ||
| 450 | break; | ||
| 451 | } | ||
| 452 | |||
| 453 | offset++; | ||
| 454 | } | ||
| 455 | single_branch.kill = false; | ||
| 456 | single_branch.is_sync = false; | ||
| 457 | single_branch.is_brk = false; | ||
| 458 | parse_info.end_address = offset - 1; | ||
| 459 | parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||
| 460 | single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, | ||
| 461 | single_branch.is_brk, single_branch.ignore); | ||
| 462 | return {ParseResult::BlockEnd, parse_info}; | ||
| 463 | } | ||
| 464 | |||
| 465 | bool TryInspectAddress(CFGRebuildState& state) { | ||
| 466 | if (state.inspect_queries.empty()) { | ||
| 467 | return false; | ||
| 468 | } | ||
| 469 | |||
| 470 | const u32 address = state.inspect_queries.front(); | ||
| 471 | state.inspect_queries.pop_front(); | ||
| 472 | const auto [result, block_index] = TryGetBlock(state, address); | ||
| 473 | switch (result) { | ||
| 474 | case BlockCollision::Found: { | ||
| 475 | return true; | ||
| 476 | } | ||
| 477 | case BlockCollision::Inside: { | ||
| 478 | // This case is the tricky one: | ||
| 479 | // We need to split the block into 2 separate blocks | ||
| 480 | const u32 end = state.block_info[block_index].end; | ||
| 481 | BlockInfo& new_block = CreateBlockInfo(state, address, end); | ||
| 482 | BlockInfo& current_block = state.block_info[block_index]; | ||
| 483 | current_block.end = address - 1; | ||
| 484 | new_block.branch = std::move(current_block.branch); | ||
| 485 | BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); | ||
| 486 | const auto branch = std::get_if<SingleBranch>(forward_branch.get()); | ||
| 487 | branch->address = address; | ||
| 488 | branch->ignore = true; | ||
| 489 | current_block.branch = std::move(forward_branch); | ||
| 490 | return true; | ||
| 491 | } | ||
| 492 | default: | ||
| 493 | break; | ||
| 494 | } | ||
| 495 | const auto [parse_result, parse_info] = ParseCode(state, address); | ||
| 496 | if (parse_result == ParseResult::AbnormalFlow) { | ||
| 497 | // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction | ||
| 498 | return false; | ||
| 499 | } | ||
| 500 | |||
| 501 | BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); | ||
| 502 | block_info.branch = parse_info.branch_info; | ||
| 503 | if (std::holds_alternative<SingleBranch>(*block_info.branch)) { | ||
| 504 | const auto branch = std::get_if<SingleBranch>(block_info.branch.get()); | ||
| 505 | if (branch->condition.IsUnconditional()) { | ||
| 506 | return true; | ||
| 507 | } | ||
| 508 | const u32 fallthrough_address = parse_info.end_address + 1; | ||
| 509 | state.inspect_queries.push_front(fallthrough_address); | ||
| 510 | return true; | ||
| 511 | } | ||
| 512 | return true; | ||
| 513 | } | ||
| 514 | |||
| 515 | bool TryQuery(CFGRebuildState& state) { | ||
| 516 | const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels, | ||
| 517 | BlockInfo& block) { | ||
| 518 | auto gather_start = labels.lower_bound(block.start); | ||
| 519 | const auto gather_end = labels.upper_bound(block.end); | ||
| 520 | while (gather_start != gather_end) { | ||
| 521 | cc.push(gather_start->second); | ||
| 522 | ++gather_start; | ||
| 523 | } | ||
| 524 | }; | ||
| 525 | if (state.queries.empty()) { | ||
| 526 | return false; | ||
| 527 | } | ||
| 528 | |||
| 529 | Query& q = state.queries.front(); | ||
| 530 | const u32 block_index = state.registered[q.address]; | ||
| 531 | BlockInfo& block = state.block_info[block_index]; | ||
| 532 | // If the block is visited, check if the stacks match, else gather the ssy/pbk | ||
| 533 | // labels into the current stack and look if the branch at the end of the block | ||
| 534 | // consumes a label. Schedule new queries accordingly | ||
| 535 | if (block.visited) { | ||
| 536 | BlockStack& stack = state.stacks[q.address]; | ||
| 537 | const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) && | ||
| 538 | (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack); | ||
| 539 | state.queries.pop_front(); | ||
| 540 | return all_okay; | ||
| 541 | } | ||
| 542 | block.visited = true; | ||
| 543 | state.stacks.insert_or_assign(q.address, BlockStack{q}); | ||
| 544 | |||
| 545 | Query q2(q); | ||
| 546 | state.queries.pop_front(); | ||
| 547 | gather_labels(q2.ssy_stack, state.ssy_labels, block); | ||
| 548 | gather_labels(q2.pbk_stack, state.pbk_labels, block); | ||
| 549 | if (std::holds_alternative<SingleBranch>(*block.branch)) { | ||
| 550 | auto* branch = std::get_if<SingleBranch>(block.branch.get()); | ||
| 551 | if (!branch->condition.IsUnconditional()) { | ||
| 552 | q2.address = block.end + 1; | ||
| 553 | state.queries.push_back(q2); | ||
| 554 | } | ||
| 555 | |||
| 556 | auto& conditional_query = state.queries.emplace_back(q2); | ||
| 557 | if (branch->is_sync) { | ||
| 558 | if (branch->address == unassigned_branch) { | ||
| 559 | branch->address = conditional_query.ssy_stack.top(); | ||
| 560 | } | ||
| 561 | conditional_query.ssy_stack.pop(); | ||
| 562 | } | ||
| 563 | if (branch->is_brk) { | ||
| 564 | if (branch->address == unassigned_branch) { | ||
| 565 | branch->address = conditional_query.pbk_stack.top(); | ||
| 566 | } | ||
| 567 | conditional_query.pbk_stack.pop(); | ||
| 568 | } | ||
| 569 | conditional_query.address = branch->address; | ||
| 570 | return true; | ||
| 571 | } | ||
| 572 | |||
| 573 | const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get()); | ||
| 574 | for (const auto& branch_case : multi_branch->branches) { | ||
| 575 | auto& conditional_query = state.queries.emplace_back(q2); | ||
| 576 | conditional_query.address = branch_case.address; | ||
| 577 | } | ||
| 578 | |||
| 579 | return true; | ||
| 580 | } | ||
| 581 | |||
| 582 | void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { | ||
| 583 | const auto get_expr = [](const Condition& cond) -> Expr { | ||
| 584 | Expr result; | ||
| 585 | if (cond.cc != ConditionCode::T) { | ||
| 586 | result = MakeExpr<ExprCondCode>(cond.cc); | ||
| 587 | } | ||
| 588 | if (cond.predicate != Pred::UnusedIndex) { | ||
| 589 | u32 pred = static_cast<u32>(cond.predicate); | ||
| 590 | bool negate = false; | ||
| 591 | if (pred > 7) { | ||
| 592 | negate = true; | ||
| 593 | pred -= 8; | ||
| 594 | } | ||
| 595 | Expr extra = MakeExpr<ExprPredicate>(pred); | ||
| 596 | if (negate) { | ||
| 597 | extra = MakeExpr<ExprNot>(std::move(extra)); | ||
| 598 | } | ||
| 599 | if (result) { | ||
| 600 | return MakeExpr<ExprAnd>(std::move(extra), std::move(result)); | ||
| 601 | } | ||
| 602 | return extra; | ||
| 603 | } | ||
| 604 | if (result) { | ||
| 605 | return result; | ||
| 606 | } | ||
| 607 | return MakeExpr<ExprBoolean>(true); | ||
| 608 | }; | ||
| 609 | |||
| 610 | if (std::holds_alternative<SingleBranch>(*branch_info)) { | ||
| 611 | const auto* branch = std::get_if<SingleBranch>(branch_info.get()); | ||
| 612 | if (branch->address < 0) { | ||
| 613 | if (branch->kill) { | ||
| 614 | mm.InsertReturn(get_expr(branch->condition), true); | ||
| 615 | return; | ||
| 616 | } | ||
| 617 | mm.InsertReturn(get_expr(branch->condition), false); | ||
| 618 | return; | ||
| 619 | } | ||
| 620 | mm.InsertGoto(get_expr(branch->condition), branch->address); | ||
| 621 | return; | ||
| 622 | } | ||
| 623 | const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get()); | ||
| 624 | for (const auto& branch_case : multi_branch->branches) { | ||
| 625 | mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), | ||
| 626 | branch_case.address); | ||
| 627 | } | ||
| 628 | } | ||
| 629 | |||
| 630 | void DecompileShader(CFGRebuildState& state) { | ||
| 631 | state.manager->Init(); | ||
| 632 | for (auto label : state.labels) { | ||
| 633 | state.manager->DeclareLabel(label); | ||
| 634 | } | ||
| 635 | for (const auto& block : state.block_info) { | ||
| 636 | if (state.labels.contains(block.start)) { | ||
| 637 | state.manager->InsertLabel(block.start); | ||
| 638 | } | ||
| 639 | const bool ignore = BlockBranchIsIgnored(block.branch); | ||
| 640 | const u32 end = ignore ? block.end + 1 : block.end; | ||
| 641 | state.manager->InsertBlock(block.start, end); | ||
| 642 | if (!ignore) { | ||
| 643 | InsertBranch(*state.manager, block.branch); | ||
| 644 | } | ||
| 645 | } | ||
| 646 | state.manager->Decompile(); | ||
| 647 | } | ||
| 648 | |||
| 649 | } // Anonymous namespace | ||
| 650 | |||
| 651 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | ||
| 652 | const CompilerSettings& settings, | ||
| 653 | Registry& registry) { | ||
| 654 | auto result_out = std::make_unique<ShaderCharacteristics>(); | ||
| 655 | if (settings.depth == CompileDepth::BruteForce) { | ||
| 656 | result_out->settings.depth = CompileDepth::BruteForce; | ||
| 657 | return result_out; | ||
| 658 | } | ||
| 659 | |||
| 660 | CFGRebuildState state{program_code, start_address, registry}; | ||
| 661 | // Inspect Code and generate blocks | ||
| 662 | state.labels.clear(); | ||
| 663 | state.labels.emplace(start_address); | ||
| 664 | state.inspect_queries.push_back(state.start); | ||
| 665 | while (!state.inspect_queries.empty()) { | ||
| 666 | if (!TryInspectAddress(state)) { | ||
| 667 | result_out->settings.depth = CompileDepth::BruteForce; | ||
| 668 | return result_out; | ||
| 669 | } | ||
| 670 | } | ||
| 671 | |||
| 672 | bool use_flow_stack = true; | ||
| 673 | |||
| 674 | bool decompiled = false; | ||
| 675 | |||
| 676 | if (settings.depth != CompileDepth::FlowStack) { | ||
| 677 | // Decompile Stacks | ||
| 678 | state.queries.push_back(Query{state.start, {}, {}}); | ||
| 679 | decompiled = true; | ||
| 680 | while (!state.queries.empty()) { | ||
| 681 | if (!TryQuery(state)) { | ||
| 682 | decompiled = false; | ||
| 683 | break; | ||
| 684 | } | ||
| 685 | } | ||
| 686 | } | ||
| 687 | |||
| 688 | use_flow_stack = !decompiled; | ||
| 689 | |||
| 690 | // Sort and organize results | ||
| 691 | std::sort(state.block_info.begin(), state.block_info.end(), | ||
| 692 | [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); | ||
| 693 | if (decompiled && settings.depth != CompileDepth::NoFlowStack) { | ||
| 694 | ASTManager manager{settings.depth != CompileDepth::DecompileBackwards, | ||
| 695 | settings.disable_else_derivation}; | ||
| 696 | state.manager = &manager; | ||
| 697 | DecompileShader(state); | ||
| 698 | decompiled = state.manager->IsFullyDecompiled(); | ||
| 699 | if (!decompiled) { | ||
| 700 | if (settings.depth == CompileDepth::FullDecompile) { | ||
| 701 | LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:"); | ||
| 702 | } else { | ||
| 703 | LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:"); | ||
| 704 | } | ||
| 705 | state.manager->ShowCurrentState("Of Shader"); | ||
| 706 | state.manager->Clear(); | ||
| 707 | } else { | ||
| 708 | auto characteristics = std::make_unique<ShaderCharacteristics>(); | ||
| 709 | characteristics->start = start_address; | ||
| 710 | characteristics->settings.depth = settings.depth; | ||
| 711 | characteristics->manager = std::move(manager); | ||
| 712 | characteristics->end = state.block_info.back().end + 1; | ||
| 713 | return characteristics; | ||
| 714 | } | ||
| 715 | } | ||
| 716 | |||
| 717 | result_out->start = start_address; | ||
| 718 | result_out->settings.depth = | ||
| 719 | use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack; | ||
| 720 | result_out->blocks.clear(); | ||
| 721 | for (auto& block : state.block_info) { | ||
| 722 | ShaderBlock new_block{}; | ||
| 723 | new_block.start = block.start; | ||
| 724 | new_block.end = block.end; | ||
| 725 | new_block.ignore_branch = BlockBranchIsIgnored(block.branch); | ||
| 726 | if (!new_block.ignore_branch) { | ||
| 727 | new_block.branch = block.branch; | ||
| 728 | } | ||
| 729 | result_out->end = std::max(result_out->end, block.end); | ||
| 730 | result_out->blocks.push_back(new_block); | ||
| 731 | } | ||
| 732 | if (!use_flow_stack) { | ||
| 733 | result_out->labels = std::move(state.labels); | ||
| 734 | return result_out; | ||
| 735 | } | ||
| 736 | |||
| 737 | auto back = result_out->blocks.begin(); | ||
| 738 | auto next = std::next(back); | ||
| 739 | while (next != result_out->blocks.end()) { | ||
| 740 | if (!state.labels.contains(next->start) && next->start == back->end + 1) { | ||
| 741 | back->end = next->end; | ||
| 742 | next = result_out->blocks.erase(next); | ||
| 743 | continue; | ||
| 744 | } | ||
| 745 | back = next; | ||
| 746 | ++next; | ||
| 747 | } | ||
| 748 | |||
| 749 | return result_out; | ||
| 750 | } | ||
| 751 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h deleted file mode 100644 index 37bf96492..000000000 --- a/src/video_core/shader/control_flow.h +++ /dev/null | |||
| @@ -1,117 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <list> | ||
| 8 | #include <optional> | ||
| 9 | #include <set> | ||
| 10 | #include <variant> | ||
| 11 | |||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/shader/ast.h" | ||
| 14 | #include "video_core/shader/compiler_settings.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::ConditionCode; | ||
| 21 | using Tegra::Shader::Pred; | ||
| 22 | |||
| 23 | constexpr s32 exit_branch = -1; | ||
| 24 | |||
| 25 | struct Condition { | ||
| 26 | Pred predicate{Pred::UnusedIndex}; | ||
| 27 | ConditionCode cc{ConditionCode::T}; | ||
| 28 | |||
| 29 | bool IsUnconditional() const { | ||
| 30 | return predicate == Pred::UnusedIndex && cc == ConditionCode::T; | ||
| 31 | } | ||
| 32 | |||
| 33 | bool operator==(const Condition& other) const { | ||
| 34 | return std::tie(predicate, cc) == std::tie(other.predicate, other.cc); | ||
| 35 | } | ||
| 36 | |||
| 37 | bool operator!=(const Condition& other) const { | ||
| 38 | return !operator==(other); | ||
| 39 | } | ||
| 40 | }; | ||
| 41 | |||
| 42 | class SingleBranch { | ||
| 43 | public: | ||
| 44 | SingleBranch() = default; | ||
| 45 | explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_, | ||
| 46 | bool is_brk_, bool ignore_) | ||
| 47 | : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_}, | ||
| 48 | ignore{ignore_} {} | ||
| 49 | |||
| 50 | bool operator==(const SingleBranch& b) const { | ||
| 51 | return std::tie(condition, address, kill, is_sync, is_brk, ignore) == | ||
| 52 | std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); | ||
| 53 | } | ||
| 54 | |||
| 55 | bool operator!=(const SingleBranch& b) const { | ||
| 56 | return !operator==(b); | ||
| 57 | } | ||
| 58 | |||
| 59 | Condition condition{}; | ||
| 60 | s32 address{exit_branch}; | ||
| 61 | bool kill{}; | ||
| 62 | bool is_sync{}; | ||
| 63 | bool is_brk{}; | ||
| 64 | bool ignore{}; | ||
| 65 | }; | ||
| 66 | |||
| 67 | struct CaseBranch { | ||
| 68 | explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {} | ||
| 69 | u32 cmp_value; | ||
| 70 | u32 address; | ||
| 71 | }; | ||
| 72 | |||
| 73 | class MultiBranch { | ||
| 74 | public: | ||
| 75 | explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_) | ||
| 76 | : gpr{gpr_}, branches{std::move(branches_)} {} | ||
| 77 | |||
| 78 | u32 gpr{}; | ||
| 79 | std::vector<CaseBranch> branches{}; | ||
| 80 | }; | ||
| 81 | |||
| 82 | using BranchData = std::variant<SingleBranch, MultiBranch>; | ||
| 83 | using BlockBranchInfo = std::shared_ptr<BranchData>; | ||
| 84 | |||
| 85 | bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); | ||
| 86 | |||
| 87 | struct ShaderBlock { | ||
| 88 | u32 start{}; | ||
| 89 | u32 end{}; | ||
| 90 | bool ignore_branch{}; | ||
| 91 | BlockBranchInfo branch{}; | ||
| 92 | |||
| 93 | bool operator==(const ShaderBlock& sb) const { | ||
| 94 | return std::tie(start, end, ignore_branch) == | ||
| 95 | std::tie(sb.start, sb.end, sb.ignore_branch) && | ||
| 96 | BlockBranchInfoAreEqual(branch, sb.branch); | ||
| 97 | } | ||
| 98 | |||
| 99 | bool operator!=(const ShaderBlock& sb) const { | ||
| 100 | return !operator==(sb); | ||
| 101 | } | ||
| 102 | }; | ||
| 103 | |||
| 104 | struct ShaderCharacteristics { | ||
| 105 | std::list<ShaderBlock> blocks{}; | ||
| 106 | std::set<u32> labels{}; | ||
| 107 | u32 start{}; | ||
| 108 | u32 end{}; | ||
| 109 | ASTManager manager{true, true}; | ||
| 110 | CompilerSettings settings{}; | ||
| 111 | }; | ||
| 112 | |||
| 113 | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | ||
| 114 | const CompilerSettings& settings, | ||
| 115 | Registry& registry); | ||
| 116 | |||
| 117 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp deleted file mode 100644 index 6576d1208..000000000 --- a/src/video_core/shader/decode.cpp +++ /dev/null | |||
| @@ -1,368 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <limits> | ||
| 7 | #include <set> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/assert.h" | ||
| 12 | #include "common/common_types.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/engines/shader_header.h" | ||
| 15 | #include "video_core/shader/control_flow.h" | ||
| 16 | #include "video_core/shader/memory_util.h" | ||
| 17 | #include "video_core/shader/node_helper.h" | ||
| 18 | #include "video_core/shader/shader_ir.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | using Tegra::Shader::Instruction; | ||
| 23 | using Tegra::Shader::OpCode; | ||
| 24 | |||
| 25 | namespace { | ||
| 26 | |||
| 27 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver, | ||
| 28 | const std::list<SamplerEntry>& used_samplers) { | ||
| 29 | if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) { | ||
| 30 | return; | ||
| 31 | } | ||
| 32 | u32 count{}; | ||
| 33 | std::vector<u32> bound_offsets; | ||
| 34 | for (const auto& sampler : used_samplers) { | ||
| 35 | if (sampler.is_bindless) { | ||
| 36 | continue; | ||
| 37 | } | ||
| 38 | ++count; | ||
| 39 | bound_offsets.emplace_back(sampler.offset); | ||
| 40 | } | ||
| 41 | if (count > 1) { | ||
| 42 | gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets)); | ||
| 43 | } | ||
| 44 | } | ||
| 45 | |||
| 46 | std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce, | ||
| 47 | VideoCore::GuestDriverProfile& gpu_driver, | ||
| 48 | const std::list<SamplerEntry>& used_samplers) { | ||
| 49 | const u32 base_offset = sampler_to_deduce.offset; | ||
| 50 | u32 max_offset{std::numeric_limits<u32>::max()}; | ||
| 51 | for (const auto& sampler : used_samplers) { | ||
| 52 | if (sampler.is_bindless) { | ||
| 53 | continue; | ||
| 54 | } | ||
| 55 | if (sampler.offset > base_offset) { | ||
| 56 | max_offset = std::min(sampler.offset, max_offset); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | if (max_offset == std::numeric_limits<u32>::max()) { | ||
| 60 | return std::nullopt; | ||
| 61 | } | ||
| 62 | return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize(); | ||
| 63 | } | ||
| 64 | |||
| 65 | } // Anonymous namespace | ||
| 66 | |||
| 67 | class ASTDecoder { | ||
| 68 | public: | ||
| 69 | explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {} | ||
| 70 | |||
| 71 | void operator()(ASTProgram& ast) { | ||
| 72 | ASTNode current = ast.nodes.GetFirst(); | ||
| 73 | while (current) { | ||
| 74 | Visit(current); | ||
| 75 | current = current->GetNext(); | ||
| 76 | } | ||
| 77 | } | ||
| 78 | |||
| 79 | void operator()(ASTIfThen& ast) { | ||
| 80 | ASTNode current = ast.nodes.GetFirst(); | ||
| 81 | while (current) { | ||
| 82 | Visit(current); | ||
| 83 | current = current->GetNext(); | ||
| 84 | } | ||
| 85 | } | ||
| 86 | |||
| 87 | void operator()(ASTIfElse& ast) { | ||
| 88 | ASTNode current = ast.nodes.GetFirst(); | ||
| 89 | while (current) { | ||
| 90 | Visit(current); | ||
| 91 | current = current->GetNext(); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | void operator()(ASTBlockEncoded& ast) {} | ||
| 96 | |||
| 97 | void operator()(ASTBlockDecoded& ast) {} | ||
| 98 | |||
| 99 | void operator()(ASTVarSet& ast) {} | ||
| 100 | |||
| 101 | void operator()(ASTLabel& ast) {} | ||
| 102 | |||
| 103 | void operator()(ASTGoto& ast) {} | ||
| 104 | |||
| 105 | void operator()(ASTDoWhile& ast) { | ||
| 106 | ASTNode current = ast.nodes.GetFirst(); | ||
| 107 | while (current) { | ||
| 108 | Visit(current); | ||
| 109 | current = current->GetNext(); | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | void operator()(ASTReturn& ast) {} | ||
| 114 | |||
| 115 | void operator()(ASTBreak& ast) {} | ||
| 116 | |||
| 117 | void Visit(ASTNode& node) { | ||
| 118 | std::visit(*this, *node->GetInnerData()); | ||
| 119 | if (node->IsBlockEncoded()) { | ||
| 120 | auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData()); | ||
| 121 | NodeBlock bb = ir.DecodeRange(block->start, block->end); | ||
| 122 | node->TransformBlockEncoded(std::move(bb)); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | |||
| 126 | private: | ||
| 127 | ShaderIR& ir; | ||
| 128 | }; | ||
| 129 | |||
| 130 | void ShaderIR::Decode() { | ||
| 131 | std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||
| 132 | |||
| 133 | decompiled = false; | ||
| 134 | auto info = ScanFlow(program_code, main_offset, settings, registry); | ||
| 135 | auto& shader_info = *info; | ||
| 136 | coverage_begin = shader_info.start; | ||
| 137 | coverage_end = shader_info.end; | ||
| 138 | switch (shader_info.settings.depth) { | ||
| 139 | case CompileDepth::FlowStack: { | ||
| 140 | for (const auto& block : shader_info.blocks) { | ||
| 141 | basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); | ||
| 142 | } | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | case CompileDepth::NoFlowStack: { | ||
| 146 | disable_flow_stack = true; | ||
| 147 | const auto insert_block = [this](NodeBlock& nodes, u32 label) { | ||
| 148 | if (label == static_cast<u32>(exit_branch)) { | ||
| 149 | return; | ||
| 150 | } | ||
| 151 | basic_blocks.insert({label, nodes}); | ||
| 152 | }; | ||
| 153 | const auto& blocks = shader_info.blocks; | ||
| 154 | NodeBlock current_block; | ||
| 155 | u32 current_label = static_cast<u32>(exit_branch); | ||
| 156 | for (const auto& block : blocks) { | ||
| 157 | if (shader_info.labels.contains(block.start)) { | ||
| 158 | insert_block(current_block, current_label); | ||
| 159 | current_block.clear(); | ||
| 160 | current_label = block.start; | ||
| 161 | } | ||
| 162 | if (!block.ignore_branch) { | ||
| 163 | DecodeRangeInner(current_block, block.start, block.end); | ||
| 164 | InsertControlFlow(current_block, block); | ||
| 165 | } else { | ||
| 166 | DecodeRangeInner(current_block, block.start, block.end + 1); | ||
| 167 | } | ||
| 168 | } | ||
| 169 | insert_block(current_block, current_label); | ||
| 170 | break; | ||
| 171 | } | ||
| 172 | case CompileDepth::DecompileBackwards: | ||
| 173 | case CompileDepth::FullDecompile: { | ||
| 174 | program_manager = std::move(shader_info.manager); | ||
| 175 | disable_flow_stack = true; | ||
| 176 | decompiled = true; | ||
| 177 | ASTDecoder decoder{*this}; | ||
| 178 | ASTNode program = GetASTProgram(); | ||
| 179 | decoder.Visit(program); | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | default: | ||
| 183 | LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!"); | ||
| 184 | [[fallthrough]]; | ||
| 185 | case CompileDepth::BruteForce: { | ||
| 186 | const auto shader_end = static_cast<u32>(program_code.size()); | ||
| 187 | coverage_begin = main_offset; | ||
| 188 | coverage_end = shader_end; | ||
| 189 | for (u32 label = main_offset; label < shader_end; ++label) { | ||
| 190 | basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||
| 191 | } | ||
| 192 | break; | ||
| 193 | } | ||
| 194 | } | ||
| 195 | if (settings.depth != shader_info.settings.depth) { | ||
| 196 | LOG_WARNING( | ||
| 197 | HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"", | ||
| 198 | CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth)); | ||
| 199 | } | ||
| 200 | } | ||
| 201 | |||
| 202 | NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) { | ||
| 203 | NodeBlock basic_block; | ||
| 204 | DecodeRangeInner(basic_block, begin, end); | ||
| 205 | return basic_block; | ||
| 206 | } | ||
| 207 | |||
| 208 | void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) { | ||
| 209 | for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) { | ||
| 210 | pc = DecodeInstr(bb, pc); | ||
| 211 | } | ||
| 212 | } | ||
| 213 | |||
| 214 | void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | ||
| 215 | const auto apply_conditions = [&](const Condition& cond, Node n) -> Node { | ||
| 216 | Node result = n; | ||
| 217 | if (cond.cc != ConditionCode::T) { | ||
| 218 | result = Conditional(GetConditionCode(cond.cc), {result}); | ||
| 219 | } | ||
| 220 | if (cond.predicate != Pred::UnusedIndex) { | ||
| 221 | u32 pred = static_cast<u32>(cond.predicate); | ||
| 222 | const bool is_neg = pred > 7; | ||
| 223 | if (is_neg) { | ||
| 224 | pred -= 8; | ||
| 225 | } | ||
| 226 | result = Conditional(GetPredicate(pred, is_neg), {result}); | ||
| 227 | } | ||
| 228 | return result; | ||
| 229 | }; | ||
| 230 | if (std::holds_alternative<SingleBranch>(*block.branch)) { | ||
| 231 | auto branch = std::get_if<SingleBranch>(block.branch.get()); | ||
| 232 | if (branch->address < 0) { | ||
| 233 | if (branch->kill) { | ||
| 234 | Node n = Operation(OperationCode::Discard); | ||
| 235 | n = apply_conditions(branch->condition, n); | ||
| 236 | bb.push_back(n); | ||
| 237 | global_code.push_back(n); | ||
| 238 | return; | ||
| 239 | } | ||
| 240 | Node n = Operation(OperationCode::Exit); | ||
| 241 | n = apply_conditions(branch->condition, n); | ||
| 242 | bb.push_back(n); | ||
| 243 | global_code.push_back(n); | ||
| 244 | return; | ||
| 245 | } | ||
| 246 | Node n = Operation(OperationCode::Branch, Immediate(branch->address)); | ||
| 247 | n = apply_conditions(branch->condition, n); | ||
| 248 | bb.push_back(n); | ||
| 249 | global_code.push_back(n); | ||
| 250 | return; | ||
| 251 | } | ||
| 252 | auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); | ||
| 253 | Node op_a = GetRegister(multi_branch->gpr); | ||
| 254 | for (auto& branch_case : multi_branch->branches) { | ||
| 255 | Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); | ||
| 256 | Node op_b = Immediate(branch_case.cmp_value); | ||
| 257 | Node condition = | ||
| 258 | GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b); | ||
| 259 | auto result = Conditional(condition, {n}); | ||
| 260 | bb.push_back(result); | ||
| 261 | global_code.push_back(result); | ||
| 262 | } | ||
| 263 | } | ||
| 264 | |||
| 265 | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | ||
| 266 | // Ignore sched instructions when generating code. | ||
| 267 | if (IsSchedInstruction(pc, main_offset)) { | ||
| 268 | return pc + 1; | ||
| 269 | } | ||
| 270 | |||
| 271 | const Instruction instr = {program_code[pc]}; | ||
| 272 | const auto opcode = OpCode::Decode(instr); | ||
| 273 | const u32 nv_address = ConvertAddressToNvidiaSpace(pc); | ||
| 274 | |||
| 275 | // Decoding failure | ||
| 276 | if (!opcode) { | ||
| 277 | UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value); | ||
| 278 | bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})", | ||
| 279 | nv_address, instr.value))); | ||
| 280 | return pc + 1; | ||
| 281 | } | ||
| 282 | |||
| 283 | bb.push_back(Comment( | ||
| 284 | fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value))); | ||
| 285 | |||
| 286 | using Tegra::Shader::Pred; | ||
| 287 | UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute, | ||
| 288 | "NeverExecute predicate not implemented"); | ||
| 289 | |||
| 290 | static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = { | ||
| 291 | {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic}, | ||
| 292 | {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate}, | ||
| 293 | {OpCode::Type::Bfe, &ShaderIR::DecodeBfe}, | ||
| 294 | {OpCode::Type::Bfi, &ShaderIR::DecodeBfi}, | ||
| 295 | {OpCode::Type::Shift, &ShaderIR::DecodeShift}, | ||
| 296 | {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger}, | ||
| 297 | {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate}, | ||
| 298 | {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf}, | ||
| 299 | {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate}, | ||
| 300 | {OpCode::Type::Ffma, &ShaderIR::DecodeFfma}, | ||
| 301 | {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, | ||
| 302 | {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, | ||
| 303 | {OpCode::Type::Warp, &ShaderIR::DecodeWarp}, | ||
| 304 | {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, | ||
| 305 | {OpCode::Type::Texture, &ShaderIR::DecodeTexture}, | ||
| 306 | {OpCode::Type::Image, &ShaderIR::DecodeImage}, | ||
| 307 | {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, | ||
| 308 | {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, | ||
| 309 | {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, | ||
| 310 | {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister}, | ||
| 311 | {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate}, | ||
| 312 | {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate}, | ||
| 313 | {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet}, | ||
| 314 | {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet}, | ||
| 315 | {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet}, | ||
| 316 | {OpCode::Type::Video, &ShaderIR::DecodeVideo}, | ||
| 317 | {OpCode::Type::Xmad, &ShaderIR::DecodeXmad}, | ||
| 318 | }; | ||
| 319 | |||
| 320 | std::vector<Node> tmp_block; | ||
| 321 | if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) { | ||
| 322 | pc = (this->*decoder->second)(tmp_block, pc); | ||
| 323 | } else { | ||
| 324 | pc = DecodeOther(tmp_block, pc); | ||
| 325 | } | ||
| 326 | |||
| 327 | // Some instructions (like SSY) don't have a predicate field, they are always unconditionally | ||
| 328 | // executed. | ||
| 329 | const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId()); | ||
| 330 | const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||
| 331 | |||
| 332 | if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) { | ||
| 333 | const Node conditional = | ||
| 334 | Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block)); | ||
| 335 | global_code.push_back(conditional); | ||
| 336 | bb.push_back(conditional); | ||
| 337 | } else { | ||
| 338 | for (auto& node : tmp_block) { | ||
| 339 | global_code.push_back(node); | ||
| 340 | bb.push_back(node); | ||
| 341 | } | ||
| 342 | } | ||
| 343 | |||
| 344 | return pc + 1; | ||
| 345 | } | ||
| 346 | |||
| 347 | void ShaderIR::PostDecode() { | ||
| 348 | // Deduce texture handler size if needed | ||
| 349 | auto gpu_driver = registry.AccessGuestDriverProfile(); | ||
| 350 | DeduceTextureHandlerSize(gpu_driver, used_samplers); | ||
| 351 | // Deduce Indexed Samplers | ||
| 352 | if (!uses_indexed_samplers) { | ||
| 353 | return; | ||
| 354 | } | ||
| 355 | for (auto& sampler : used_samplers) { | ||
| 356 | if (!sampler.is_indexed) { | ||
| 357 | continue; | ||
| 358 | } | ||
| 359 | if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { | ||
| 360 | sampler.size = *size; | ||
| 361 | } else { | ||
| 362 | LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); | ||
| 363 | sampler.size = 1; | ||
| 364 | } | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp deleted file mode 100644 index 15eb700e7..000000000 --- a/src/video_core/shader/decode/arithmetic.cpp +++ /dev/null | |||
| @@ -1,166 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::SubOp; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | Node op_a = GetRegister(instr.gpr8); | ||
| 23 | |||
| 24 | Node op_b = [&] { | ||
| 25 | if (instr.is_b_imm) { | ||
| 26 | return GetImmediate19(instr); | ||
| 27 | } else if (instr.is_b_gpr) { | ||
| 28 | return GetRegister(instr.gpr20); | ||
| 29 | } else { | ||
| 30 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 31 | } | ||
| 32 | }(); | ||
| 33 | |||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::MOV_C: | ||
| 36 | case OpCode::Id::MOV_R: { | ||
| 37 | // MOV does not have neither 'abs' nor 'neg' bits. | ||
| 38 | SetRegister(bb, instr.gpr0, op_b); | ||
| 39 | break; | ||
| 40 | } | ||
| 41 | case OpCode::Id::FMUL_C: | ||
| 42 | case OpCode::Id::FMUL_R: | ||
| 43 | case OpCode::Id::FMUL_IMM: { | ||
| 44 | // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit. | ||
| 45 | if (instr.fmul.tab5cb8_2 != 0) { | ||
| 46 | LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented", | ||
| 47 | instr.fmul.tab5cb8_2.Value()); | ||
| 48 | } | ||
| 49 | if (instr.fmul.tab5c68_0 != 1) { | ||
| 50 | LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented", | ||
| 51 | instr.fmul.tab5c68_0.Value()); | ||
| 52 | } | ||
| 53 | |||
| 54 | op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b); | ||
| 55 | |||
| 56 | static constexpr std::array FmulPostFactor = { | ||
| 57 | 1.000f, // None | ||
| 58 | 0.500f, // Divide 2 | ||
| 59 | 0.250f, // Divide 4 | ||
| 60 | 0.125f, // Divide 8 | ||
| 61 | 8.000f, // Mul 8 | ||
| 62 | 4.000f, // Mul 4 | ||
| 63 | 2.000f, // Mul 2 | ||
| 64 | }; | ||
| 65 | |||
| 66 | if (instr.fmul.postfactor != 0) { | ||
| 67 | op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a, | ||
| 68 | Immediate(FmulPostFactor[instr.fmul.postfactor])); | ||
| 69 | } | ||
| 70 | |||
| 71 | // TODO(Rodrigo): Should precise be used when there's a postfactor? | ||
| 72 | Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b); | ||
| 73 | |||
| 74 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 75 | |||
| 76 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 77 | SetRegister(bb, instr.gpr0, value); | ||
| 78 | break; | ||
| 79 | } | ||
| 80 | case OpCode::Id::FADD_C: | ||
| 81 | case OpCode::Id::FADD_R: | ||
| 82 | case OpCode::Id::FADD_IMM: { | ||
| 83 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 84 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 85 | |||
| 86 | Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 87 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 88 | |||
| 89 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 90 | SetRegister(bb, instr.gpr0, value); | ||
| 91 | break; | ||
| 92 | } | ||
| 93 | case OpCode::Id::MUFU: { | ||
| 94 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 95 | |||
| 96 | Node value = [&]() { | ||
| 97 | switch (instr.sub_op) { | ||
| 98 | case SubOp::Cos: | ||
| 99 | return Operation(OperationCode::FCos, PRECISE, op_a); | ||
| 100 | case SubOp::Sin: | ||
| 101 | return Operation(OperationCode::FSin, PRECISE, op_a); | ||
| 102 | case SubOp::Ex2: | ||
| 103 | return Operation(OperationCode::FExp2, PRECISE, op_a); | ||
| 104 | case SubOp::Lg2: | ||
| 105 | return Operation(OperationCode::FLog2, PRECISE, op_a); | ||
| 106 | case SubOp::Rcp: | ||
| 107 | return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a); | ||
| 108 | case SubOp::Rsq: | ||
| 109 | return Operation(OperationCode::FInverseSqrt, PRECISE, op_a); | ||
| 110 | case SubOp::Sqrt: | ||
| 111 | return Operation(OperationCode::FSqrt, PRECISE, op_a); | ||
| 112 | default: | ||
| 113 | UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value()); | ||
| 114 | return Immediate(0); | ||
| 115 | } | ||
| 116 | }(); | ||
| 117 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 118 | |||
| 119 | SetRegister(bb, instr.gpr0, value); | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::FMNMX_C: | ||
| 123 | case OpCode::Id::FMNMX_R: | ||
| 124 | case OpCode::Id::FMNMX_IMM: { | ||
| 125 | op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a); | ||
| 126 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 127 | |||
| 128 | const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0); | ||
| 129 | |||
| 130 | const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b); | ||
| 131 | const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b); | ||
| 132 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 133 | |||
| 134 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 135 | SetRegister(bb, instr.gpr0, value); | ||
| 136 | break; | ||
| 137 | } | ||
| 138 | case OpCode::Id::FCMP_RR: | ||
| 139 | case OpCode::Id::FCMP_RC: | ||
| 140 | case OpCode::Id::FCMP_IMMR: { | ||
| 141 | UNIMPLEMENTED_IF(instr.fcmp.ftz == 0); | ||
| 142 | Node op_c = GetRegister(instr.gpr39); | ||
| 143 | Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f)); | ||
| 144 | SetRegister( | ||
| 145 | bb, instr.gpr0, | ||
| 146 | Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b))); | ||
| 147 | break; | ||
| 148 | } | ||
| 149 | case OpCode::Id::RRO_C: | ||
| 150 | case OpCode::Id::RRO_R: | ||
| 151 | case OpCode::Id::RRO_IMM: { | ||
| 152 | LOG_DEBUG(HW_GPU, "(STUBBED) RRO used"); | ||
| 153 | |||
| 154 | // Currently RRO is only implemented as a register move. | ||
| 155 | op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b); | ||
| 156 | SetRegister(bb, instr.gpr0, op_b); | ||
| 157 | break; | ||
| 158 | } | ||
| 159 | default: | ||
| 160 | UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName()); | ||
| 161 | } | ||
| 162 | |||
| 163 | return pc; | ||
| 164 | } | ||
| 165 | |||
| 166 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp deleted file mode 100644 index 88103fede..000000000 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ /dev/null | |||
| @@ -1,101 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::HalfType; | ||
| 15 | using Tegra::Shader::Instruction; | ||
| 16 | using Tegra::Shader::OpCode; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | bool negate_a = false; | ||
| 23 | bool negate_b = false; | ||
| 24 | bool absolute_a = false; | ||
| 25 | bool absolute_b = false; | ||
| 26 | |||
| 27 | switch (opcode->get().GetId()) { | ||
| 28 | case OpCode::Id::HADD2_R: | ||
| 29 | if (instr.alu_half.ftz == 0) { | ||
| 30 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 31 | } | ||
| 32 | negate_a = ((instr.value >> 43) & 1) != 0; | ||
| 33 | negate_b = ((instr.value >> 31) & 1) != 0; | ||
| 34 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 35 | absolute_b = ((instr.value >> 30) & 1) != 0; | ||
| 36 | break; | ||
| 37 | case OpCode::Id::HADD2_C: | ||
| 38 | if (instr.alu_half.ftz == 0) { | ||
| 39 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 40 | } | ||
| 41 | negate_a = ((instr.value >> 43) & 1) != 0; | ||
| 42 | negate_b = ((instr.value >> 56) & 1) != 0; | ||
| 43 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 44 | absolute_b = ((instr.value >> 54) & 1) != 0; | ||
| 45 | break; | ||
| 46 | case OpCode::Id::HMUL2_R: | ||
| 47 | negate_a = ((instr.value >> 43) & 1) != 0; | ||
| 48 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 49 | absolute_b = ((instr.value >> 30) & 1) != 0; | ||
| 50 | break; | ||
| 51 | case OpCode::Id::HMUL2_C: | ||
| 52 | negate_b = ((instr.value >> 31) & 1) != 0; | ||
| 53 | absolute_a = ((instr.value >> 44) & 1) != 0; | ||
| 54 | absolute_b = ((instr.value >> 54) & 1) != 0; | ||
| 55 | break; | ||
| 56 | default: | ||
| 57 | UNREACHABLE(); | ||
| 58 | break; | ||
| 59 | } | ||
| 60 | |||
| 61 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); | ||
| 62 | op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a); | ||
| 63 | |||
| 64 | auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> { | ||
| 65 | switch (opcode->get().GetId()) { | ||
| 66 | case OpCode::Id::HADD2_C: | ||
| 67 | case OpCode::Id::HMUL2_C: | ||
| 68 | return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 69 | case OpCode::Id::HADD2_R: | ||
| 70 | case OpCode::Id::HMUL2_R: | ||
| 71 | return {instr.alu_half.type_b, GetRegister(instr.gpr20)}; | ||
| 72 | default: | ||
| 73 | UNREACHABLE(); | ||
| 74 | return {HalfType::F32, Immediate(0)}; | ||
| 75 | } | ||
| 76 | }(); | ||
| 77 | op_b = UnpackHalfFloat(op_b, type_b); | ||
| 78 | op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b); | ||
| 79 | |||
| 80 | Node value = [this, opcode, op_a, op_b = op_b] { | ||
| 81 | switch (opcode->get().GetId()) { | ||
| 82 | case OpCode::Id::HADD2_C: | ||
| 83 | case OpCode::Id::HADD2_R: | ||
| 84 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); | ||
| 85 | case OpCode::Id::HMUL2_C: | ||
| 86 | case OpCode::Id::HMUL2_R: | ||
| 87 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); | ||
| 88 | default: | ||
| 89 | UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName()); | ||
| 90 | return Immediate(0); | ||
| 91 | } | ||
| 92 | }(); | ||
| 93 | value = GetSaturatedHalfFloat(value, instr.alu_half.saturate); | ||
| 94 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge); | ||
| 95 | |||
| 96 | SetRegister(bb, instr.gpr0, value); | ||
| 97 | |||
| 98 | return pc; | ||
| 99 | } | ||
| 100 | |||
| 101 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp deleted file mode 100644 index d179b9873..000000000 --- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp +++ /dev/null | |||
| @@ -1,54 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) { | ||
| 22 | if (instr.alu_half_imm.ftz == 0) { | ||
| 23 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 24 | } | ||
| 25 | } else { | ||
| 26 | if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) { | ||
| 27 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 28 | } | ||
| 29 | } | ||
| 30 | |||
| 31 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a); | ||
| 32 | op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a); | ||
| 33 | |||
| 34 | const Node op_b = UnpackHalfImmediate(instr, true); | ||
| 35 | |||
| 36 | Node value = [&]() { | ||
| 37 | switch (opcode->get().GetId()) { | ||
| 38 | case OpCode::Id::HADD2_IMM: | ||
| 39 | return Operation(OperationCode::HAdd, PRECISE, op_a, op_b); | ||
| 40 | case OpCode::Id::HMUL2_IMM: | ||
| 41 | return Operation(OperationCode::HMul, PRECISE, op_a, op_b); | ||
| 42 | default: | ||
| 43 | UNREACHABLE(); | ||
| 44 | return Immediate(0); | ||
| 45 | } | ||
| 46 | }(); | ||
| 47 | |||
| 48 | value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate); | ||
| 49 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge); | ||
| 50 | SetRegister(bb, instr.gpr0, value); | ||
| 51 | return pc; | ||
| 52 | } | ||
| 53 | |||
| 54 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_immediate.cpp b/src/video_core/shader/decode/arithmetic_immediate.cpp deleted file mode 100644 index f1875967c..000000000 --- a/src/video_core/shader/decode/arithmetic_immediate.cpp +++ /dev/null | |||
| @@ -1,53 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | switch (opcode->get().GetId()) { | ||
| 21 | case OpCode::Id::MOV32_IMM: { | ||
| 22 | SetRegister(bb, instr.gpr0, GetImmediate32(instr)); | ||
| 23 | break; | ||
| 24 | } | ||
| 25 | case OpCode::Id::FMUL32_IMM: { | ||
| 26 | Node value = | ||
| 27 | Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr)); | ||
| 28 | value = GetSaturatedFloat(value, instr.fmul32.saturate); | ||
| 29 | |||
| 30 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 31 | SetRegister(bb, instr.gpr0, value); | ||
| 32 | break; | ||
| 33 | } | ||
| 34 | case OpCode::Id::FADD32I: { | ||
| 35 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a, | ||
| 36 | instr.fadd32i.negate_a); | ||
| 37 | const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b, | ||
| 38 | instr.fadd32i.negate_b); | ||
| 39 | |||
| 40 | const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b); | ||
| 41 | SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc); | ||
| 42 | SetRegister(bb, instr.gpr0, value); | ||
| 43 | break; | ||
| 44 | } | ||
| 45 | default: | ||
| 46 | UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}", | ||
| 47 | opcode->get().GetName()); | ||
| 48 | } | ||
| 49 | |||
| 50 | return pc; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp deleted file mode 100644 index 7b5bb7003..000000000 --- a/src/video_core/shader/decode/arithmetic_integer.cpp +++ /dev/null | |||
| @@ -1,375 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::IAdd3Height; | ||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | using Tegra::Shader::Register; | ||
| 18 | |||
| 19 | u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) { | ||
| 20 | const Instruction instr = {program_code[pc]}; | ||
| 21 | const auto opcode = OpCode::Decode(instr); | ||
| 22 | |||
| 23 | Node op_a = GetRegister(instr.gpr8); | ||
| 24 | Node op_b = [&]() { | ||
| 25 | if (instr.is_b_imm) { | ||
| 26 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 27 | } else if (instr.is_b_gpr) { | ||
| 28 | return GetRegister(instr.gpr20); | ||
| 29 | } else { | ||
| 30 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 31 | } | ||
| 32 | }(); | ||
| 33 | |||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::IADD_C: | ||
| 36 | case OpCode::Id::IADD_R: | ||
| 37 | case OpCode::Id::IADD_IMM: { | ||
| 38 | UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT"); | ||
| 39 | UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC"); | ||
| 40 | |||
| 41 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 42 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 43 | |||
| 44 | Node value = Operation(OperationCode::UAdd, op_a, op_b); | ||
| 45 | |||
| 46 | if (instr.iadd.x) { | ||
| 47 | Node carry = GetInternalFlag(InternalFlag::Carry); | ||
| 48 | Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0)); | ||
| 49 | value = Operation(OperationCode::UAdd, std::move(value), std::move(x)); | ||
| 50 | } | ||
| 51 | |||
| 52 | if (instr.generates_cc) { | ||
| 53 | const Node i0 = Immediate(0); | ||
| 54 | |||
| 55 | Node zero = Operation(OperationCode::LogicalIEqual, value, i0); | ||
| 56 | Node sign = Operation(OperationCode::LogicalILessThan, value, i0); | ||
| 57 | Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b); | ||
| 58 | |||
| 59 | Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0); | ||
| 60 | Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0); | ||
| 61 | Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b)); | ||
| 62 | Node overflow = Operation(OperationCode::LogicalAnd, pos, sign); | ||
| 63 | |||
| 64 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zero)); | ||
| 65 | SetInternalFlag(bb, InternalFlag::Sign, std::move(sign)); | ||
| 66 | SetInternalFlag(bb, InternalFlag::Carry, std::move(carry)); | ||
| 67 | SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow)); | ||
| 68 | } | ||
| 69 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | case OpCode::Id::IADD3_C: | ||
| 73 | case OpCode::Id::IADD3_R: | ||
| 74 | case OpCode::Id::IADD3_IMM: { | ||
| 75 | Node op_c = GetRegister(instr.gpr39); | ||
| 76 | |||
| 77 | const auto ApplyHeight = [&](IAdd3Height height, Node value) { | ||
| 78 | switch (height) { | ||
| 79 | case IAdd3Height::None: | ||
| 80 | return value; | ||
| 81 | case IAdd3Height::LowerHalfWord: | ||
| 82 | return BitfieldExtract(value, 0, 16); | ||
| 83 | case IAdd3Height::UpperHalfWord: | ||
| 84 | return BitfieldExtract(value, 16, 16); | ||
| 85 | default: | ||
| 86 | UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height); | ||
| 87 | return Immediate(0); | ||
| 88 | } | ||
| 89 | }; | ||
| 90 | |||
| 91 | if (opcode->get().GetId() == OpCode::Id::IADD3_R) { | ||
| 92 | op_a = ApplyHeight(instr.iadd3.height_a, op_a); | ||
| 93 | op_b = ApplyHeight(instr.iadd3.height_b, op_b); | ||
| 94 | op_c = ApplyHeight(instr.iadd3.height_c, op_c); | ||
| 95 | } | ||
| 96 | |||
| 97 | op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true); | ||
| 98 | op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true); | ||
| 99 | op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true); | ||
| 100 | |||
| 101 | const Node value = [&] { | ||
| 102 | Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b); | ||
| 103 | if (opcode->get().GetId() != OpCode::Id::IADD3_R) { | ||
| 104 | return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c); | ||
| 105 | } | ||
| 106 | const Node shifted = [&] { | ||
| 107 | switch (instr.iadd3.mode) { | ||
| 108 | case Tegra::Shader::IAdd3Mode::RightShift: | ||
| 109 | // TODO(tech4me): According to | ||
| 110 | // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3 | ||
| 111 | // The addition between op_a and op_b should be done in uint33, more | ||
| 112 | // investigation required | ||
| 113 | return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab, | ||
| 114 | Immediate(16)); | ||
| 115 | case Tegra::Shader::IAdd3Mode::LeftShift: | ||
| 116 | return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab, | ||
| 117 | Immediate(16)); | ||
| 118 | default: | ||
| 119 | return add_ab; | ||
| 120 | } | ||
| 121 | }(); | ||
| 122 | return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c); | ||
| 123 | }(); | ||
| 124 | |||
| 125 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 126 | SetRegister(bb, instr.gpr0, value); | ||
| 127 | break; | ||
| 128 | } | ||
| 129 | case OpCode::Id::ISCADD_C: | ||
| 130 | case OpCode::Id::ISCADD_R: | ||
| 131 | case OpCode::Id::ISCADD_IMM: { | ||
| 132 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 133 | "Condition codes generation in ISCADD is not implemented"); | ||
| 134 | |||
| 135 | op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true); | ||
| 136 | op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true); | ||
| 137 | |||
| 138 | const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount)); | ||
| 139 | const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift); | ||
| 140 | const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b); | ||
| 141 | |||
| 142 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 143 | SetRegister(bb, instr.gpr0, value); | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | case OpCode::Id::POPC_C: | ||
| 147 | case OpCode::Id::POPC_R: | ||
| 148 | case OpCode::Id::POPC_IMM: { | ||
| 149 | if (instr.popc.invert) { | ||
| 150 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 151 | } | ||
| 152 | const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b); | ||
| 153 | SetRegister(bb, instr.gpr0, value); | ||
| 154 | break; | ||
| 155 | } | ||
| 156 | case OpCode::Id::FLO_R: | ||
| 157 | case OpCode::Id::FLO_C: | ||
| 158 | case OpCode::Id::FLO_IMM: { | ||
| 159 | Node value; | ||
| 160 | if (instr.flo.invert) { | ||
| 161 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); | ||
| 162 | } | ||
| 163 | if (instr.flo.is_signed) { | ||
| 164 | value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b)); | ||
| 165 | } else { | ||
| 166 | value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b)); | ||
| 167 | } | ||
| 168 | if (instr.flo.sh) { | ||
| 169 | value = | ||
| 170 | Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31)); | ||
| 171 | } | ||
| 172 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 173 | break; | ||
| 174 | } | ||
| 175 | case OpCode::Id::SEL_C: | ||
| 176 | case OpCode::Id::SEL_R: | ||
| 177 | case OpCode::Id::SEL_IMM: { | ||
| 178 | const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0); | ||
| 179 | const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b); | ||
| 180 | SetRegister(bb, instr.gpr0, value); | ||
| 181 | break; | ||
| 182 | } | ||
| 183 | case OpCode::Id::ICMP_CR: | ||
| 184 | case OpCode::Id::ICMP_R: | ||
| 185 | case OpCode::Id::ICMP_RC: | ||
| 186 | case OpCode::Id::ICMP_IMM: { | ||
| 187 | const Node zero = Immediate(0); | ||
| 188 | |||
| 189 | const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> { | ||
| 190 | switch (opcode->get().GetId()) { | ||
| 191 | case OpCode::Id::ICMP_CR: | ||
| 192 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 193 | GetRegister(instr.gpr39)}; | ||
| 194 | case OpCode::Id::ICMP_R: | ||
| 195 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 196 | case OpCode::Id::ICMP_RC: | ||
| 197 | return {GetRegister(instr.gpr39), | ||
| 198 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 199 | case OpCode::Id::ICMP_IMM: | ||
| 200 | return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; | ||
| 201 | default: | ||
| 202 | UNREACHABLE(); | ||
| 203 | return {zero, zero}; | ||
| 204 | } | ||
| 205 | }(); | ||
| 206 | const Node op_lhs = GetRegister(instr.gpr8); | ||
| 207 | const Node comparison = | ||
| 208 | GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero); | ||
| 209 | SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs)); | ||
| 210 | break; | ||
| 211 | } | ||
| 212 | case OpCode::Id::LOP_C: | ||
| 213 | case OpCode::Id::LOP_R: | ||
| 214 | case OpCode::Id::LOP_IMM: { | ||
| 215 | if (instr.alu.lop.invert_a) | ||
| 216 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); | ||
| 217 | if (instr.alu.lop.invert_b) | ||
| 218 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); | ||
| 219 | |||
| 220 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b, | ||
| 221 | instr.alu.lop.pred_result_mode, instr.alu.lop.pred48, | ||
| 222 | instr.generates_cc); | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | case OpCode::Id::LOP3_C: | ||
| 226 | case OpCode::Id::LOP3_R: | ||
| 227 | case OpCode::Id::LOP3_IMM: { | ||
| 228 | const Node op_c = GetRegister(instr.gpr39); | ||
| 229 | const Node lut = [&]() { | ||
| 230 | if (opcode->get().GetId() == OpCode::Id::LOP3_R) { | ||
| 231 | return Immediate(instr.alu.lop3.GetImmLut28()); | ||
| 232 | } else { | ||
| 233 | return Immediate(instr.alu.lop3.GetImmLut48()); | ||
| 234 | } | ||
| 235 | }(); | ||
| 236 | |||
| 237 | WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc); | ||
| 238 | break; | ||
| 239 | } | ||
| 240 | case OpCode::Id::IMNMX_C: | ||
| 241 | case OpCode::Id::IMNMX_R: | ||
| 242 | case OpCode::Id::IMNMX_IMM: { | ||
| 243 | UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None); | ||
| 244 | |||
| 245 | const bool is_signed = instr.imnmx.is_signed; | ||
| 246 | |||
| 247 | const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0); | ||
| 248 | const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b); | ||
| 249 | const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b); | ||
| 250 | const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max); | ||
| 251 | |||
| 252 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 253 | SetRegister(bb, instr.gpr0, value); | ||
| 254 | break; | ||
| 255 | } | ||
| 256 | case OpCode::Id::LEA_R2: | ||
| 257 | case OpCode::Id::LEA_R1: | ||
| 258 | case OpCode::Id::LEA_IMM: | ||
| 259 | case OpCode::Id::LEA_RZ: | ||
| 260 | case OpCode::Id::LEA_HI: { | ||
| 261 | auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> { | ||
| 262 | switch (opcode->get().GetId()) { | ||
| 263 | case OpCode::Id::LEA_R2: { | ||
| 264 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39), | ||
| 265 | Immediate(static_cast<u32>(instr.lea.r2.entry_a))}; | ||
| 266 | } | ||
| 267 | case OpCode::Id::LEA_R1: { | ||
| 268 | const bool neg = instr.lea.r1.neg != 0; | ||
| 269 | return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 270 | GetRegister(instr.gpr20), | ||
| 271 | Immediate(static_cast<u32>(instr.lea.r1.entry_a))}; | ||
| 272 | } | ||
| 273 | case OpCode::Id::LEA_IMM: { | ||
| 274 | const bool neg = instr.lea.imm.neg != 0; | ||
| 275 | return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 276 | Immediate(static_cast<u32>(instr.lea.imm.entry_a)), | ||
| 277 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 278 | } | ||
| 279 | case OpCode::Id::LEA_RZ: { | ||
| 280 | const bool neg = instr.lea.rz.neg != 0; | ||
| 281 | return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset), | ||
| 282 | GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true), | ||
| 283 | Immediate(static_cast<u32>(instr.lea.rz.entry_a))}; | ||
| 284 | } | ||
| 285 | case OpCode::Id::LEA_HI: | ||
| 286 | default: | ||
| 287 | UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName()); | ||
| 288 | |||
| 289 | return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8), | ||
| 290 | Immediate(static_cast<u32>(instr.lea.imm.entry_b))}; | ||
| 291 | } | ||
| 292 | }(); | ||
| 293 | |||
| 294 | UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex), | ||
| 295 | "Unhandled LEA Predicate"); | ||
| 296 | |||
| 297 | Node value = | ||
| 298 | Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_)); | ||
| 299 | value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value)); | ||
| 300 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 301 | |||
| 302 | break; | ||
| 303 | } | ||
| 304 | default: | ||
| 305 | UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName()); | ||
| 306 | } | ||
| 307 | |||
| 308 | return pc; | ||
| 309 | } | ||
| 310 | |||
| 311 | void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c, | ||
| 312 | Node imm_lut, bool sets_cc) { | ||
| 313 | const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) { | ||
| 314 | Node value = Immediate(0); | ||
| 315 | const ImmediateNode imm = std::get<ImmediateNode>(*ttbl); | ||
| 316 | if (imm.GetValue() & 0x01) { | ||
| 317 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 318 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 319 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 320 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); | ||
| 321 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 322 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 323 | } | ||
| 324 | if (imm.GetValue() & 0x02) { | ||
| 325 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 326 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 327 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b); | ||
| 328 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 329 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 330 | } | ||
| 331 | if (imm.GetValue() & 0x04) { | ||
| 332 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 333 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 334 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); | ||
| 335 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 336 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 337 | } | ||
| 338 | if (imm.GetValue() & 0x08) { | ||
| 339 | const Node a = Operation(OperationCode::IBitwiseNot, na); | ||
| 340 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb); | ||
| 341 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 342 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 343 | } | ||
| 344 | if (imm.GetValue() & 0x10) { | ||
| 345 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 346 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 347 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); | ||
| 348 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 349 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 350 | } | ||
| 351 | if (imm.GetValue() & 0x20) { | ||
| 352 | const Node b = Operation(OperationCode::IBitwiseNot, nb); | ||
| 353 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b); | ||
| 354 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 355 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 356 | } | ||
| 357 | if (imm.GetValue() & 0x40) { | ||
| 358 | const Node c = Operation(OperationCode::IBitwiseNot, nc); | ||
| 359 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); | ||
| 360 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c); | ||
| 361 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 362 | } | ||
| 363 | if (imm.GetValue() & 0x80) { | ||
| 364 | Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb); | ||
| 365 | r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc); | ||
| 366 | value = Operation(OperationCode::IBitwiseOr, value, r); | ||
| 367 | } | ||
| 368 | return value; | ||
| 369 | }(op_a, op_b, op_c, imm_lut); | ||
| 370 | |||
| 371 | SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc); | ||
| 372 | SetRegister(bb, dest, lop3_fast); | ||
| 373 | } | ||
| 374 | |||
| 375 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp deleted file mode 100644 index 73580277a..000000000 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ /dev/null | |||
| @@ -1,99 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::LogicOperation; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | using Tegra::Shader::PredicateResultMode; | ||
| 18 | using Tegra::Shader::Register; | ||
| 19 | |||
| 20 | u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { | ||
| 21 | const Instruction instr = {program_code[pc]}; | ||
| 22 | const auto opcode = OpCode::Decode(instr); | ||
| 23 | |||
| 24 | Node op_a = GetRegister(instr.gpr8); | ||
| 25 | Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32)); | ||
| 26 | |||
| 27 | switch (opcode->get().GetId()) { | ||
| 28 | case OpCode::Id::IADD32I: { | ||
| 29 | UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); | ||
| 30 | |||
| 31 | op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); | ||
| 32 | |||
| 33 | Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 34 | |||
| 35 | SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); | ||
| 36 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 37 | break; | ||
| 38 | } | ||
| 39 | case OpCode::Id::LOP32I: { | ||
| 40 | if (instr.alu.lop32i.invert_a) { | ||
| 41 | op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); | ||
| 42 | } | ||
| 43 | |||
| 44 | if (instr.alu.lop32i.invert_b) { | ||
| 45 | op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); | ||
| 46 | } | ||
| 47 | |||
| 48 | WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), | ||
| 49 | std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, | ||
| 50 | instr.op_32.generates_cc != 0); | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | default: | ||
| 54 | UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}", | ||
| 55 | opcode->get().GetName()); | ||
| 56 | } | ||
| 57 | |||
| 58 | return pc; | ||
| 59 | } | ||
| 60 | |||
| 61 | void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, | ||
| 62 | Node op_b, PredicateResultMode predicate_mode, Pred predicate, | ||
| 63 | bool sets_cc) { | ||
| 64 | Node result = [&] { | ||
| 65 | switch (logic_op) { | ||
| 66 | case LogicOperation::And: | ||
| 67 | return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 68 | case LogicOperation::Or: | ||
| 69 | return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 70 | case LogicOperation::Xor: | ||
| 71 | return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); | ||
| 72 | case LogicOperation::PassB: | ||
| 73 | return op_b; | ||
| 74 | default: | ||
| 75 | UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op); | ||
| 76 | return Immediate(0); | ||
| 77 | } | ||
| 78 | }(); | ||
| 79 | |||
| 80 | SetInternalFlagsFromInteger(bb, result, sets_cc); | ||
| 81 | SetRegister(bb, dest, result); | ||
| 82 | |||
| 83 | // Write the predicate value depending on the predicate mode. | ||
| 84 | switch (predicate_mode) { | ||
| 85 | case PredicateResultMode::None: | ||
| 86 | // Do nothing. | ||
| 87 | return; | ||
| 88 | case PredicateResultMode::NotZero: { | ||
| 89 | // Set the predicate to true if the result is not zero. | ||
| 90 | Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); | ||
| 91 | SetPredicate(bb, static_cast<u64>(predicate), std::move(compare)); | ||
| 92 | break; | ||
| 93 | } | ||
| 94 | default: | ||
| 95 | UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode); | ||
| 96 | } | ||
| 97 | } | ||
| 98 | |||
| 99 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/bfe.cpp b/src/video_core/shader/decode/bfe.cpp deleted file mode 100644 index 8e3b46e8e..000000000 --- a/src/video_core/shader/decode/bfe.cpp +++ /dev/null | |||
| @@ -1,77 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | Node op_a = GetRegister(instr.gpr8); | ||
| 21 | Node op_b = [&] { | ||
| 22 | switch (opcode->get().GetId()) { | ||
| 23 | case OpCode::Id::BFE_R: | ||
| 24 | return GetRegister(instr.gpr20); | ||
| 25 | case OpCode::Id::BFE_C: | ||
| 26 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 27 | case OpCode::Id::BFE_IMM: | ||
| 28 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 29 | default: | ||
| 30 | UNREACHABLE(); | ||
| 31 | return Immediate(0); | ||
| 32 | } | ||
| 33 | }(); | ||
| 34 | |||
| 35 | UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented"); | ||
| 36 | |||
| 37 | const bool is_signed = instr.bfe.is_signed; | ||
| 38 | |||
| 39 | // using reverse parallel method in | ||
| 40 | // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel | ||
| 41 | // note for later if possible to implement faster method. | ||
| 42 | if (instr.bfe.brev) { | ||
| 43 | const auto swap = [&](u32 s, u32 mask) { | ||
| 44 | Node v1 = | ||
| 45 | SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s)); | ||
| 46 | if (mask != 0) { | ||
| 47 | v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1), | ||
| 48 | Immediate(mask)); | ||
| 49 | } | ||
| 50 | Node v2 = op_a; | ||
| 51 | if (mask != 0) { | ||
| 52 | v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2), | ||
| 53 | Immediate(mask)); | ||
| 54 | } | ||
| 55 | v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2), | ||
| 56 | Immediate(s)); | ||
| 57 | return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1), | ||
| 58 | std::move(v2)); | ||
| 59 | }; | ||
| 60 | op_a = swap(1, 0x55555555U); | ||
| 61 | op_a = swap(2, 0x33333333U); | ||
| 62 | op_a = swap(4, 0x0F0F0F0FU); | ||
| 63 | op_a = swap(8, 0x00FF00FFU); | ||
| 64 | op_a = swap(16, 0); | ||
| 65 | } | ||
| 66 | |||
| 67 | const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 68 | Immediate(0), Immediate(8)); | ||
| 69 | const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b, | ||
| 70 | Immediate(8), Immediate(8)); | ||
| 71 | auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits); | ||
| 72 | SetRegister(bb, instr.gpr0, std::move(result)); | ||
| 73 | |||
| 74 | return pc; | ||
| 75 | } | ||
| 76 | |||
| 77 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/bfi.cpp b/src/video_core/shader/decode/bfi.cpp deleted file mode 100644 index 70d1c055b..000000000 --- a/src/video_core/shader/decode/bfi.cpp +++ /dev/null | |||
| @@ -1,45 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> { | ||
| 21 | switch (opcode->get().GetId()) { | ||
| 22 | case OpCode::Id::BFI_RC: | ||
| 23 | return {GetRegister(instr.gpr39), | ||
| 24 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 25 | case OpCode::Id::BFI_IMM_R: | ||
| 26 | return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)}; | ||
| 27 | default: | ||
| 28 | UNREACHABLE(); | ||
| 29 | return {Immediate(0), Immediate(0)}; | ||
| 30 | } | ||
| 31 | }(); | ||
| 32 | const Node insert = GetRegister(instr.gpr8); | ||
| 33 | const Node offset = BitfieldExtract(packed_shift, 0, 8); | ||
| 34 | const Node bits = BitfieldExtract(packed_shift, 8, 8); | ||
| 35 | |||
| 36 | const Node value = | ||
| 37 | Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits); | ||
| 38 | |||
| 39 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 40 | SetRegister(bb, instr.gpr0, value); | ||
| 41 | |||
| 42 | return pc; | ||
| 43 | } | ||
| 44 | |||
| 45 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp deleted file mode 100644 index fea7a54df..000000000 --- a/src/video_core/shader/decode/conversion.cpp +++ /dev/null | |||
| @@ -1,321 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <limits> | ||
| 6 | #include <optional> | ||
| 7 | #include <utility> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | ||
| 12 | #include "video_core/shader/node_helper.h" | ||
| 13 | #include "video_core/shader/shader_ir.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::Register; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | constexpr OperationCode GetFloatSelector(u64 selector) { | ||
| 24 | return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1; | ||
| 25 | } | ||
| 26 | |||
| 27 | constexpr u32 SizeInBits(Register::Size size) { | ||
| 28 | switch (size) { | ||
| 29 | case Register::Size::Byte: | ||
| 30 | return 8; | ||
| 31 | case Register::Size::Short: | ||
| 32 | return 16; | ||
| 33 | case Register::Size::Word: | ||
| 34 | return 32; | ||
| 35 | case Register::Size::Long: | ||
| 36 | return 64; | ||
| 37 | } | ||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | |||
| 41 | constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size, | ||
| 42 | Register::Size dst_size, | ||
| 43 | bool src_signed, | ||
| 44 | bool dst_signed) { | ||
| 45 | const u32 dst_bits = SizeInBits(dst_size); | ||
| 46 | if (src_size == Register::Size::Word && dst_size == Register::Size::Word) { | ||
| 47 | if (src_signed == dst_signed) { | ||
| 48 | return std::nullopt; | ||
| 49 | } | ||
| 50 | return std::make_pair(0, std::numeric_limits<s32>::max()); | ||
| 51 | } | ||
| 52 | if (dst_signed) { | ||
| 53 | // Signed destination, clamp to [-128, 127] for instance | ||
| 54 | return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1); | ||
| 55 | } else { | ||
| 56 | // Unsigned destination | ||
| 57 | if (dst_bits == 32) { | ||
| 58 | // Avoid shifting by 32, that is undefined behavior | ||
| 59 | return std::make_pair(0, s32(std::numeric_limits<u32>::max())); | ||
| 60 | } | ||
| 61 | return std::make_pair(0, (1 << dst_bits) - 1); | ||
| 62 | } | ||
| 63 | } | ||
| 64 | |||
| 65 | } // Anonymous namespace | ||
| 66 | |||
| 67 | u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | ||
| 68 | const Instruction instr = {program_code[pc]}; | ||
| 69 | const auto opcode = OpCode::Decode(instr); | ||
| 70 | |||
| 71 | switch (opcode->get().GetId()) { | ||
| 72 | case OpCode::Id::I2I_R: | ||
| 73 | case OpCode::Id::I2I_C: | ||
| 74 | case OpCode::Id::I2I_IMM: { | ||
| 75 | const bool src_signed = instr.conversion.is_input_signed; | ||
| 76 | const bool dst_signed = instr.conversion.is_output_signed; | ||
| 77 | const Register::Size src_size = instr.conversion.src_size; | ||
| 78 | const Register::Size dst_size = instr.conversion.dst_size; | ||
| 79 | const u32 selector = static_cast<u32>(instr.conversion.int_src.selector); | ||
| 80 | |||
| 81 | Node value = [this, instr, opcode] { | ||
| 82 | switch (opcode->get().GetId()) { | ||
| 83 | case OpCode::Id::I2I_R: | ||
| 84 | return GetRegister(instr.gpr20); | ||
| 85 | case OpCode::Id::I2I_C: | ||
| 86 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 87 | case OpCode::Id::I2I_IMM: | ||
| 88 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 89 | default: | ||
| 90 | UNREACHABLE(); | ||
| 91 | return Immediate(0); | ||
| 92 | } | ||
| 93 | }(); | ||
| 94 | |||
| 95 | // Ensure the source selector is valid | ||
| 96 | switch (instr.conversion.src_size) { | ||
| 97 | case Register::Size::Byte: | ||
| 98 | break; | ||
| 99 | case Register::Size::Short: | ||
| 100 | ASSERT(selector == 0 || selector == 2); | ||
| 101 | break; | ||
| 102 | default: | ||
| 103 | ASSERT(selector == 0); | ||
| 104 | break; | ||
| 105 | } | ||
| 106 | |||
| 107 | if (src_size != Register::Size::Word || selector != 0) { | ||
| 108 | value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value), | ||
| 109 | Immediate(selector * 8), Immediate(SizeInBits(src_size))); | ||
| 110 | } | ||
| 111 | |||
| 112 | value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a, | ||
| 113 | instr.conversion.negate_a, src_signed); | ||
| 114 | |||
| 115 | if (instr.alu.saturate_d) { | ||
| 116 | if (src_signed && !dst_signed) { | ||
| 117 | Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value, | ||
| 118 | Immediate(1 << (SizeInBits(src_size) - 1))); | ||
| 119 | value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0), | ||
| 120 | std::move(value)); | ||
| 121 | |||
| 122 | // Simplify generated expressions, this can be removed without semantic impact | ||
| 123 | SetTemporary(bb, 0, std::move(value)); | ||
| 124 | value = GetTemporary(0); | ||
| 125 | |||
| 126 | if (dst_size != Register::Size::Word) { | ||
| 127 | const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1); | ||
| 128 | Node is_large = | ||
| 129 | Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit); | ||
| 130 | value = Operation(OperationCode::Select, std::move(is_large), limit, | ||
| 131 | std::move(value)); | ||
| 132 | } | ||
| 133 | } else if (const std::optional bounds = | ||
| 134 | IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) { | ||
| 135 | value = SignedOperation(OperationCode::IMax, src_signed, std::move(value), | ||
| 136 | Immediate(bounds->first)); | ||
| 137 | value = SignedOperation(OperationCode::IMin, src_signed, std::move(value), | ||
| 138 | Immediate(bounds->second)); | ||
| 139 | } | ||
| 140 | } else if (dst_size != Register::Size::Word) { | ||
| 141 | // No saturation, we only have to mask the result | ||
| 142 | Node mask = Immediate((1 << SizeInBits(dst_size)) - 1); | ||
| 143 | value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask)); | ||
| 144 | } | ||
| 145 | |||
| 146 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 147 | SetRegister(bb, instr.gpr0, std::move(value)); | ||
| 148 | break; | ||
| 149 | } | ||
| 150 | case OpCode::Id::I2F_R: | ||
| 151 | case OpCode::Id::I2F_C: | ||
| 152 | case OpCode::Id::I2F_IMM: { | ||
| 153 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | ||
| 154 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 155 | "Condition codes generation in I2F is not implemented"); | ||
| 156 | |||
| 157 | Node value = [&] { | ||
| 158 | switch (opcode->get().GetId()) { | ||
| 159 | case OpCode::Id::I2F_R: | ||
| 160 | return GetRegister(instr.gpr20); | ||
| 161 | case OpCode::Id::I2F_C: | ||
| 162 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 163 | case OpCode::Id::I2F_IMM: | ||
| 164 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 165 | default: | ||
| 166 | UNREACHABLE(); | ||
| 167 | return Immediate(0); | ||
| 168 | } | ||
| 169 | }(); | ||
| 170 | |||
| 171 | const bool input_signed = instr.conversion.is_input_signed; | ||
| 172 | |||
| 173 | if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) { | ||
| 174 | ASSERT(instr.conversion.src_size == Register::Size::Byte || | ||
| 175 | instr.conversion.src_size == Register::Size::Short); | ||
| 176 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 177 | ASSERT(offset == 0 || offset == 2); | ||
| 178 | } | ||
| 179 | value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed, | ||
| 180 | std::move(value), Immediate(offset * 8)); | ||
| 181 | } | ||
| 182 | |||
| 183 | value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed); | ||
| 184 | value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed); | ||
| 185 | value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value); | ||
| 186 | value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | ||
| 187 | |||
| 188 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 189 | |||
| 190 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 191 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 192 | } | ||
| 193 | |||
| 194 | SetRegister(bb, instr.gpr0, value); | ||
| 195 | break; | ||
| 196 | } | ||
| 197 | case OpCode::Id::F2F_R: | ||
| 198 | case OpCode::Id::F2F_C: | ||
| 199 | case OpCode::Id::F2F_IMM: { | ||
| 200 | UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | ||
| 201 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); | ||
| 202 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 203 | "Condition codes generation in F2F is not implemented"); | ||
| 204 | |||
| 205 | Node value = [&]() { | ||
| 206 | switch (opcode->get().GetId()) { | ||
| 207 | case OpCode::Id::F2F_R: | ||
| 208 | return GetRegister(instr.gpr20); | ||
| 209 | case OpCode::Id::F2F_C: | ||
| 210 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 211 | case OpCode::Id::F2F_IMM: | ||
| 212 | return GetImmediate19(instr); | ||
| 213 | default: | ||
| 214 | UNREACHABLE(); | ||
| 215 | return Immediate(0); | ||
| 216 | } | ||
| 217 | }(); | ||
| 218 | |||
| 219 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 220 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 221 | std::move(value)); | ||
| 222 | } else { | ||
| 223 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 224 | } | ||
| 225 | |||
| 226 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 227 | |||
| 228 | value = [&] { | ||
| 229 | if (instr.conversion.src_size != instr.conversion.dst_size) { | ||
| 230 | // Rounding operations only matter when the source and destination conversion size | ||
| 231 | // is the same. | ||
| 232 | return value; | ||
| 233 | } | ||
| 234 | switch (instr.conversion.f2f.GetRoundingMode()) { | ||
| 235 | case Tegra::Shader::F2fRoundingOp::None: | ||
| 236 | return value; | ||
| 237 | case Tegra::Shader::F2fRoundingOp::Round: | ||
| 238 | return Operation(OperationCode::FRoundEven, value); | ||
| 239 | case Tegra::Shader::F2fRoundingOp::Floor: | ||
| 240 | return Operation(OperationCode::FFloor, value); | ||
| 241 | case Tegra::Shader::F2fRoundingOp::Ceil: | ||
| 242 | return Operation(OperationCode::FCeil, value); | ||
| 243 | case Tegra::Shader::F2fRoundingOp::Trunc: | ||
| 244 | return Operation(OperationCode::FTrunc, value); | ||
| 245 | default: | ||
| 246 | UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | ||
| 247 | instr.conversion.f2f.rounding.Value()); | ||
| 248 | return value; | ||
| 249 | } | ||
| 250 | }(); | ||
| 251 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 252 | |||
| 253 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 254 | |||
| 255 | if (instr.conversion.dst_size == Register::Size::Short) { | ||
| 256 | value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||
| 257 | } | ||
| 258 | |||
| 259 | SetRegister(bb, instr.gpr0, value); | ||
| 260 | break; | ||
| 261 | } | ||
| 262 | case OpCode::Id::F2I_R: | ||
| 263 | case OpCode::Id::F2I_C: | ||
| 264 | case OpCode::Id::F2I_IMM: { | ||
| 265 | UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); | ||
| 266 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 267 | "Condition codes generation in F2I is not implemented"); | ||
| 268 | Node value = [&]() { | ||
| 269 | switch (opcode->get().GetId()) { | ||
| 270 | case OpCode::Id::F2I_R: | ||
| 271 | return GetRegister(instr.gpr20); | ||
| 272 | case OpCode::Id::F2I_C: | ||
| 273 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 274 | case OpCode::Id::F2I_IMM: | ||
| 275 | return GetImmediate19(instr); | ||
| 276 | default: | ||
| 277 | UNREACHABLE(); | ||
| 278 | return Immediate(0); | ||
| 279 | } | ||
| 280 | }(); | ||
| 281 | |||
| 282 | if (instr.conversion.src_size == Register::Size::Short) { | ||
| 283 | value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE, | ||
| 284 | std::move(value)); | ||
| 285 | } else { | ||
| 286 | ASSERT(instr.conversion.float_src.selector == 0); | ||
| 287 | } | ||
| 288 | |||
| 289 | value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||
| 290 | |||
| 291 | value = [&]() { | ||
| 292 | switch (instr.conversion.f2i.rounding) { | ||
| 293 | case Tegra::Shader::F2iRoundingOp::RoundEven: | ||
| 294 | return Operation(OperationCode::FRoundEven, PRECISE, value); | ||
| 295 | case Tegra::Shader::F2iRoundingOp::Floor: | ||
| 296 | return Operation(OperationCode::FFloor, PRECISE, value); | ||
| 297 | case Tegra::Shader::F2iRoundingOp::Ceil: | ||
| 298 | return Operation(OperationCode::FCeil, PRECISE, value); | ||
| 299 | case Tegra::Shader::F2iRoundingOp::Trunc: | ||
| 300 | return Operation(OperationCode::FTrunc, PRECISE, value); | ||
| 301 | default: | ||
| 302 | UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}", | ||
| 303 | instr.conversion.f2i.rounding.Value()); | ||
| 304 | return Immediate(0); | ||
| 305 | } | ||
| 306 | }(); | ||
| 307 | const bool is_signed = instr.conversion.is_output_signed; | ||
| 308 | value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value); | ||
| 309 | value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed); | ||
| 310 | |||
| 311 | SetRegister(bb, instr.gpr0, value); | ||
| 312 | break; | ||
| 313 | } | ||
| 314 | default: | ||
| 315 | UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); | ||
| 316 | } | ||
| 317 | |||
| 318 | return pc; | ||
| 319 | } | ||
| 320 | |||
| 321 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp deleted file mode 100644 index 5973588d6..000000000 --- a/src/video_core/shader/decode/ffma.cpp +++ /dev/null | |||
| @@ -1,62 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | const auto opcode = OpCode::Decode(instr); | ||
| 19 | |||
| 20 | UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented"); | ||
| 21 | if (instr.ffma.tab5980_0 != 1) { | ||
| 22 | LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value()); | ||
| 23 | } | ||
| 24 | if (instr.ffma.tab5980_1 != 0) { | ||
| 25 | LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value()); | ||
| 26 | } | ||
| 27 | |||
| 28 | const Node op_a = GetRegister(instr.gpr8); | ||
| 29 | |||
| 30 | auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> { | ||
| 31 | switch (opcode->get().GetId()) { | ||
| 32 | case OpCode::Id::FFMA_CR: { | ||
| 33 | return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 34 | GetRegister(instr.gpr39)}; | ||
| 35 | } | ||
| 36 | case OpCode::Id::FFMA_RR: | ||
| 37 | return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 38 | case OpCode::Id::FFMA_RC: { | ||
| 39 | return {GetRegister(instr.gpr39), | ||
| 40 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 41 | } | ||
| 42 | case OpCode::Id::FFMA_IMM: | ||
| 43 | return {GetImmediate19(instr), GetRegister(instr.gpr39)}; | ||
| 44 | default: | ||
| 45 | UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName()); | ||
| 46 | return {Immediate(0), Immediate(0)}; | ||
| 47 | } | ||
| 48 | }(); | ||
| 49 | |||
| 50 | op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b); | ||
| 51 | op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c); | ||
| 52 | |||
| 53 | Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c); | ||
| 54 | value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||
| 55 | |||
| 56 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 57 | SetRegister(bb, instr.gpr0, value); | ||
| 58 | |||
| 59 | return pc; | ||
| 60 | } | ||
| 61 | |||
| 62 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp deleted file mode 100644 index 5614e8a0d..000000000 --- a/src/video_core/shader/decode/float_set.cpp +++ /dev/null | |||
| @@ -1,58 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | |||
| 19 | const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0, | ||
| 20 | instr.fset.neg_a != 0); | ||
| 21 | |||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0); | ||
| 33 | |||
| 34 | // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the | ||
| 35 | // condition is true, and to 0 otherwise. | ||
| 36 | const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0); | ||
| 37 | |||
| 38 | const OperationCode combiner = GetPredicateCombiner(instr.fset.op); | ||
| 39 | const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b); | ||
| 40 | |||
| 41 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 42 | |||
| 43 | const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 44 | const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 45 | const Node value = | ||
| 46 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 47 | |||
| 48 | if (instr.fset.bf) { | ||
| 49 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 50 | } else { | ||
| 51 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 52 | } | ||
| 53 | SetRegister(bb, instr.gpr0, value); | ||
| 54 | |||
| 55 | return pc; | ||
| 56 | } | ||
| 57 | |||
| 58 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp deleted file mode 100644 index 200c2c983..000000000 --- a/src/video_core/shader/decode/float_set_predicate.cpp +++ /dev/null | |||
| @@ -1,57 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | |||
| 20 | Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0, | ||
| 21 | instr.fsetp.neg_a != 0); | ||
| 22 | Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return GetImmediate19(instr); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b); | ||
| 32 | |||
| 33 | // We can't use the constant predicate as destination. | ||
| 34 | ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 35 | |||
| 36 | const Node predicate = | ||
| 37 | GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b)); | ||
| 38 | const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0); | ||
| 39 | |||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op); | ||
| 41 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 42 | |||
| 43 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 44 | SetPredicate(bb, instr.fsetp.pred3, value); | ||
| 45 | |||
| 46 | if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 47 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 48 | // if enabled | ||
| 49 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 50 | const Node second_value = Operation(combiner, negated_pred, second_pred); | ||
| 51 | SetPredicate(bb, instr.fsetp.pred0, second_value); | ||
| 52 | } | ||
| 53 | |||
| 54 | return pc; | ||
| 55 | } | ||
| 56 | |||
| 57 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp deleted file mode 100644 index fa83108cd..000000000 --- a/src/video_core/shader/decode/half_set.cpp +++ /dev/null | |||
| @@ -1,115 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <array> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "common/logging/log.h" | ||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | #include "video_core/shader/node_helper.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | using std::move; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::PredCondition; | ||
| 20 | |||
| 21 | u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { | ||
| 22 | const Instruction instr = {program_code[pc]}; | ||
| 23 | const auto opcode = OpCode::Decode(instr); | ||
| 24 | |||
| 25 | PredCondition cond{}; | ||
| 26 | bool bf = false; | ||
| 27 | bool ftz = false; | ||
| 28 | bool neg_a = false; | ||
| 29 | bool abs_a = false; | ||
| 30 | bool neg_b = false; | ||
| 31 | bool abs_b = false; | ||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::HSET2_C: | ||
| 34 | case OpCode::Id::HSET2_IMM: | ||
| 35 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 36 | bf = instr.Bit(53); | ||
| 37 | ftz = instr.Bit(54); | ||
| 38 | neg_a = instr.Bit(43); | ||
| 39 | abs_a = instr.Bit(44); | ||
| 40 | neg_b = instr.Bit(56); | ||
| 41 | abs_b = instr.Bit(54); | ||
| 42 | break; | ||
| 43 | case OpCode::Id::HSET2_R: | ||
| 44 | cond = instr.hsetp2.reg.cond; | ||
| 45 | bf = instr.Bit(49); | ||
| 46 | ftz = instr.Bit(50); | ||
| 47 | neg_a = instr.Bit(43); | ||
| 48 | abs_a = instr.Bit(44); | ||
| 49 | neg_b = instr.Bit(31); | ||
| 50 | abs_b = instr.Bit(30); | ||
| 51 | break; | ||
| 52 | default: | ||
| 53 | UNREACHABLE(); | ||
| 54 | } | ||
| 55 | |||
| 56 | Node op_b = [this, instr, opcode] { | ||
| 57 | switch (opcode->get().GetId()) { | ||
| 58 | case OpCode::Id::HSET2_C: | ||
| 59 | // Inform as unimplemented as this is not tested. | ||
| 60 | UNIMPLEMENTED_MSG("HSET2_C is not implemented"); | ||
| 61 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 62 | case OpCode::Id::HSET2_R: | ||
| 63 | return GetRegister(instr.gpr20); | ||
| 64 | case OpCode::Id::HSET2_IMM: | ||
| 65 | return UnpackHalfImmediate(instr, true); | ||
| 66 | default: | ||
| 67 | UNREACHABLE(); | ||
| 68 | return Node{}; | ||
| 69 | } | ||
| 70 | }(); | ||
| 71 | |||
| 72 | if (!ftz) { | ||
| 73 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 74 | } | ||
| 75 | |||
| 76 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); | ||
| 77 | op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); | ||
| 78 | |||
| 79 | switch (opcode->get().GetId()) { | ||
| 80 | case OpCode::Id::HSET2_R: | ||
| 81 | op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); | ||
| 82 | [[fallthrough]]; | ||
| 83 | case OpCode::Id::HSET2_C: | ||
| 84 | op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); | ||
| 85 | break; | ||
| 86 | default: | ||
| 87 | break; | ||
| 88 | } | ||
| 89 | |||
| 90 | Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); | ||
| 91 | |||
| 92 | Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); | ||
| 93 | |||
| 94 | const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); | ||
| 95 | |||
| 96 | // HSET2 operates on each half float in the pack. | ||
| 97 | std::array<Node, 2> values; | ||
| 98 | for (u32 i = 0; i < 2; ++i) { | ||
| 99 | const u32 raw_value = bf ? 0x3c00 : 0xffff; | ||
| 100 | Node true_value = Immediate(raw_value << (i * 16)); | ||
| 101 | Node false_value = Immediate(0); | ||
| 102 | |||
| 103 | Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); | ||
| 104 | Node predicate = Operation(combiner, comparison, second_pred); | ||
| 105 | values[i] = | ||
| 106 | Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); | ||
| 107 | } | ||
| 108 | |||
| 109 | Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); | ||
| 110 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 111 | |||
| 112 | return pc; | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp deleted file mode 100644 index 310655619..000000000 --- a/src/video_core/shader/decode/half_set_predicate.cpp +++ /dev/null | |||
| @@ -1,80 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | |||
| 18 | u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 19 | const Instruction instr = {program_code[pc]}; | ||
| 20 | const auto opcode = OpCode::Decode(instr); | ||
| 21 | |||
| 22 | if (instr.hsetp2.ftz != 0) { | ||
| 23 | LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); | ||
| 24 | } | ||
| 25 | |||
| 26 | Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a); | ||
| 27 | op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a); | ||
| 28 | |||
| 29 | Tegra::Shader::PredCondition cond{}; | ||
| 30 | bool h_and{}; | ||
| 31 | Node op_b{}; | ||
| 32 | switch (opcode->get().GetId()) { | ||
| 33 | case OpCode::Id::HSETP2_C: | ||
| 34 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 35 | h_and = instr.hsetp2.cbuf_and_imm.h_and; | ||
| 36 | op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 37 | instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b); | ||
| 38 | // F32 is hardcoded in hardware | ||
| 39 | op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32); | ||
| 40 | break; | ||
| 41 | case OpCode::Id::HSETP2_IMM: | ||
| 42 | cond = instr.hsetp2.cbuf_and_imm.cond; | ||
| 43 | h_and = instr.hsetp2.cbuf_and_imm.h_and; | ||
| 44 | op_b = UnpackHalfImmediate(instr, true); | ||
| 45 | break; | ||
| 46 | case OpCode::Id::HSETP2_R: | ||
| 47 | cond = instr.hsetp2.reg.cond; | ||
| 48 | h_and = instr.hsetp2.reg.h_and; | ||
| 49 | op_b = | ||
| 50 | GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b), | ||
| 51 | instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b); | ||
| 52 | break; | ||
| 53 | default: | ||
| 54 | UNREACHABLE(); | ||
| 55 | op_b = Immediate(0); | ||
| 56 | } | ||
| 57 | |||
| 58 | const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op); | ||
| 59 | const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred); | ||
| 60 | |||
| 61 | const auto Write = [&](u64 dest, Node src) { | ||
| 62 | SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred)); | ||
| 63 | }; | ||
| 64 | |||
| 65 | const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b); | ||
| 66 | const u64 first = instr.hsetp2.pred3; | ||
| 67 | const u64 second = instr.hsetp2.pred0; | ||
| 68 | if (h_and) { | ||
| 69 | Node joined = Operation(OperationCode::LogicalAnd2, comparison); | ||
| 70 | Write(first, joined); | ||
| 71 | Write(second, Operation(OperationCode::LogicalNegate, std::move(joined))); | ||
| 72 | } else { | ||
| 73 | Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U))); | ||
| 74 | Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U))); | ||
| 75 | } | ||
| 76 | |||
| 77 | return pc; | ||
| 78 | } | ||
| 79 | |||
| 80 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/hfma2.cpp b/src/video_core/shader/decode/hfma2.cpp deleted file mode 100644 index 5b44cb79c..000000000 --- a/src/video_core/shader/decode/hfma2.cpp +++ /dev/null | |||
| @@ -1,73 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <tuple> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/node_helper.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using Tegra::Shader::HalfPrecision; | ||
| 16 | using Tegra::Shader::HalfType; | ||
| 17 | using Tegra::Shader::Instruction; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | |||
| 20 | u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) { | ||
| 21 | const Instruction instr = {program_code[pc]}; | ||
| 22 | const auto opcode = OpCode::Decode(instr); | ||
| 23 | |||
| 24 | if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) { | ||
| 25 | DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None); | ||
| 26 | } else { | ||
| 27 | DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None); | ||
| 28 | } | ||
| 29 | |||
| 30 | constexpr auto identity = HalfType::H0_H1; | ||
| 31 | bool neg_b{}, neg_c{}; | ||
| 32 | auto [saturate, type_b, op_b, type_c, | ||
| 33 | op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> { | ||
| 34 | switch (opcode->get().GetId()) { | ||
| 35 | case OpCode::Id::HFMA2_CR: | ||
| 36 | neg_b = instr.hfma2.negate_b; | ||
| 37 | neg_c = instr.hfma2.negate_c; | ||
| 38 | return {instr.hfma2.saturate, HalfType::F32, | ||
| 39 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 40 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 41 | case OpCode::Id::HFMA2_RC: | ||
| 42 | neg_b = instr.hfma2.negate_b; | ||
| 43 | neg_c = instr.hfma2.negate_c; | ||
| 44 | return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39), | ||
| 45 | HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 46 | case OpCode::Id::HFMA2_RR: | ||
| 47 | neg_b = instr.hfma2.rr.negate_b; | ||
| 48 | neg_c = instr.hfma2.rr.negate_c; | ||
| 49 | return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20), | ||
| 50 | instr.hfma2.rr.type_c, GetRegister(instr.gpr39)}; | ||
| 51 | case OpCode::Id::HFMA2_IMM_R: | ||
| 52 | neg_c = instr.hfma2.negate_c; | ||
| 53 | return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true), | ||
| 54 | instr.hfma2.type_reg39, GetRegister(instr.gpr39)}; | ||
| 55 | default: | ||
| 56 | return {false, identity, Immediate(0), identity, Immediate(0)}; | ||
| 57 | } | ||
| 58 | }(); | ||
| 59 | |||
| 60 | const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a); | ||
| 61 | op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b); | ||
| 62 | op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c); | ||
| 63 | |||
| 64 | Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c); | ||
| 65 | value = GetSaturatedHalfFloat(value, saturate); | ||
| 66 | value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge); | ||
| 67 | |||
| 68 | SetRegister(bb, instr.gpr0, value); | ||
| 69 | |||
| 70 | return pc; | ||
| 71 | } | ||
| 72 | |||
| 73 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp deleted file mode 100644 index 5470e8cf4..000000000 --- a/src/video_core/shader/decode/image.cpp +++ /dev/null | |||
| @@ -1,536 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | #include "video_core/textures/texture.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::Instruction; | ||
| 21 | using Tegra::Shader::OpCode; | ||
| 22 | using Tegra::Shader::PredCondition; | ||
| 23 | using Tegra::Shader::StoreType; | ||
| 24 | using Tegra::Texture::ComponentType; | ||
| 25 | using Tegra::Texture::TextureFormat; | ||
| 26 | using Tegra::Texture::TICEntry; | ||
| 27 | |||
| 28 | namespace { | ||
| 29 | |||
| 30 | ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, | ||
| 31 | std::size_t component) { | ||
| 32 | const TextureFormat format{descriptor.format}; | ||
| 33 | switch (format) { | ||
| 34 | case TextureFormat::R16G16B16A16: | ||
| 35 | case TextureFormat::R32G32B32A32: | ||
| 36 | case TextureFormat::R32G32B32: | ||
| 37 | case TextureFormat::R32G32: | ||
| 38 | case TextureFormat::R16G16: | ||
| 39 | case TextureFormat::R32: | ||
| 40 | case TextureFormat::R16: | ||
| 41 | case TextureFormat::R8: | ||
| 42 | case TextureFormat::R1: | ||
| 43 | if (component == 0) { | ||
| 44 | return descriptor.r_type; | ||
| 45 | } | ||
| 46 | if (component == 1) { | ||
| 47 | return descriptor.g_type; | ||
| 48 | } | ||
| 49 | if (component == 2) { | ||
| 50 | return descriptor.b_type; | ||
| 51 | } | ||
| 52 | if (component == 3) { | ||
| 53 | return descriptor.a_type; | ||
| 54 | } | ||
| 55 | break; | ||
| 56 | case TextureFormat::A8R8G8B8: | ||
| 57 | if (component == 0) { | ||
| 58 | return descriptor.a_type; | ||
| 59 | } | ||
| 60 | if (component == 1) { | ||
| 61 | return descriptor.r_type; | ||
| 62 | } | ||
| 63 | if (component == 2) { | ||
| 64 | return descriptor.g_type; | ||
| 65 | } | ||
| 66 | if (component == 3) { | ||
| 67 | return descriptor.b_type; | ||
| 68 | } | ||
| 69 | break; | ||
| 70 | case TextureFormat::A2B10G10R10: | ||
| 71 | case TextureFormat::A4B4G4R4: | ||
| 72 | case TextureFormat::A5B5G5R1: | ||
| 73 | case TextureFormat::A1B5G5R5: | ||
| 74 | if (component == 0) { | ||
| 75 | return descriptor.a_type; | ||
| 76 | } | ||
| 77 | if (component == 1) { | ||
| 78 | return descriptor.b_type; | ||
| 79 | } | ||
| 80 | if (component == 2) { | ||
| 81 | return descriptor.g_type; | ||
| 82 | } | ||
| 83 | if (component == 3) { | ||
| 84 | return descriptor.r_type; | ||
| 85 | } | ||
| 86 | break; | ||
| 87 | case TextureFormat::R32_B24G8: | ||
| 88 | if (component == 0) { | ||
| 89 | return descriptor.r_type; | ||
| 90 | } | ||
| 91 | if (component == 1) { | ||
| 92 | return descriptor.b_type; | ||
| 93 | } | ||
| 94 | if (component == 2) { | ||
| 95 | return descriptor.g_type; | ||
| 96 | } | ||
| 97 | break; | ||
| 98 | case TextureFormat::B5G6R5: | ||
| 99 | case TextureFormat::B6G5R5: | ||
| 100 | case TextureFormat::B10G11R11: | ||
| 101 | if (component == 0) { | ||
| 102 | return descriptor.b_type; | ||
| 103 | } | ||
| 104 | if (component == 1) { | ||
| 105 | return descriptor.g_type; | ||
| 106 | } | ||
| 107 | if (component == 2) { | ||
| 108 | return descriptor.r_type; | ||
| 109 | } | ||
| 110 | break; | ||
| 111 | case TextureFormat::R24G8: | ||
| 112 | case TextureFormat::R8G24: | ||
| 113 | case TextureFormat::R8G8: | ||
| 114 | case TextureFormat::G4R4: | ||
| 115 | if (component == 0) { | ||
| 116 | return descriptor.g_type; | ||
| 117 | } | ||
| 118 | if (component == 1) { | ||
| 119 | return descriptor.r_type; | ||
| 120 | } | ||
| 121 | break; | ||
| 122 | default: | ||
| 123 | break; | ||
| 124 | } | ||
| 125 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 126 | return ComponentType::FLOAT; | ||
| 127 | } | ||
| 128 | |||
| 129 | bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { | ||
| 130 | constexpr u8 R = 0b0001; | ||
| 131 | constexpr u8 G = 0b0010; | ||
| 132 | constexpr u8 B = 0b0100; | ||
| 133 | constexpr u8 A = 0b1000; | ||
| 134 | constexpr std::array<u8, 16> mask = { | ||
| 135 | 0, (R), (G), (R | G), (B), (R | B), (G | B), (R | G | B), | ||
| 136 | (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; | ||
| 137 | return std::bitset<4>{mask.at(component_mask)}.test(component); | ||
| 138 | } | ||
| 139 | |||
| 140 | u32 GetComponentSize(TextureFormat format, std::size_t component) { | ||
| 141 | switch (format) { | ||
| 142 | case TextureFormat::R32G32B32A32: | ||
| 143 | return 32; | ||
| 144 | case TextureFormat::R16G16B16A16: | ||
| 145 | return 16; | ||
| 146 | case TextureFormat::R32G32B32: | ||
| 147 | return component <= 2 ? 32 : 0; | ||
| 148 | case TextureFormat::R32G32: | ||
| 149 | return component <= 1 ? 32 : 0; | ||
| 150 | case TextureFormat::R16G16: | ||
| 151 | return component <= 1 ? 16 : 0; | ||
| 152 | case TextureFormat::R32: | ||
| 153 | return component == 0 ? 32 : 0; | ||
| 154 | case TextureFormat::R16: | ||
| 155 | return component == 0 ? 16 : 0; | ||
| 156 | case TextureFormat::R8: | ||
| 157 | return component == 0 ? 8 : 0; | ||
| 158 | case TextureFormat::R1: | ||
| 159 | return component == 0 ? 1 : 0; | ||
| 160 | case TextureFormat::A8R8G8B8: | ||
| 161 | return 8; | ||
| 162 | case TextureFormat::A2B10G10R10: | ||
| 163 | return (component == 3 || component == 2 || component == 1) ? 10 : 2; | ||
| 164 | case TextureFormat::A4B4G4R4: | ||
| 165 | return 4; | ||
| 166 | case TextureFormat::A5B5G5R1: | ||
| 167 | return (component == 0 || component == 1 || component == 2) ? 5 : 1; | ||
| 168 | case TextureFormat::A1B5G5R5: | ||
| 169 | return (component == 1 || component == 2 || component == 3) ? 5 : 1; | ||
| 170 | case TextureFormat::R32_B24G8: | ||
| 171 | if (component == 0) { | ||
| 172 | return 32; | ||
| 173 | } | ||
| 174 | if (component == 1) { | ||
| 175 | return 24; | ||
| 176 | } | ||
| 177 | if (component == 2) { | ||
| 178 | return 8; | ||
| 179 | } | ||
| 180 | return 0; | ||
| 181 | case TextureFormat::B5G6R5: | ||
| 182 | if (component == 0 || component == 2) { | ||
| 183 | return 5; | ||
| 184 | } | ||
| 185 | if (component == 1) { | ||
| 186 | return 6; | ||
| 187 | } | ||
| 188 | return 0; | ||
| 189 | case TextureFormat::B6G5R5: | ||
| 190 | if (component == 1 || component == 2) { | ||
| 191 | return 5; | ||
| 192 | } | ||
| 193 | if (component == 0) { | ||
| 194 | return 6; | ||
| 195 | } | ||
| 196 | return 0; | ||
| 197 | case TextureFormat::B10G11R11: | ||
| 198 | if (component == 1 || component == 2) { | ||
| 199 | return 11; | ||
| 200 | } | ||
| 201 | if (component == 0) { | ||
| 202 | return 10; | ||
| 203 | } | ||
| 204 | return 0; | ||
| 205 | case TextureFormat::R24G8: | ||
| 206 | if (component == 0) { | ||
| 207 | return 8; | ||
| 208 | } | ||
| 209 | if (component == 1) { | ||
| 210 | return 24; | ||
| 211 | } | ||
| 212 | return 0; | ||
| 213 | case TextureFormat::R8G24: | ||
| 214 | if (component == 0) { | ||
| 215 | return 24; | ||
| 216 | } | ||
| 217 | if (component == 1) { | ||
| 218 | return 8; | ||
| 219 | } | ||
| 220 | return 0; | ||
| 221 | case TextureFormat::R8G8: | ||
| 222 | return (component == 0 || component == 1) ? 8 : 0; | ||
| 223 | case TextureFormat::G4R4: | ||
| 224 | return (component == 0 || component == 1) ? 4 : 0; | ||
| 225 | default: | ||
| 226 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 227 | return 0; | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | std::size_t GetImageComponentMask(TextureFormat format) { | ||
| 232 | constexpr u8 R = 0b0001; | ||
| 233 | constexpr u8 G = 0b0010; | ||
| 234 | constexpr u8 B = 0b0100; | ||
| 235 | constexpr u8 A = 0b1000; | ||
| 236 | switch (format) { | ||
| 237 | case TextureFormat::R32G32B32A32: | ||
| 238 | case TextureFormat::R16G16B16A16: | ||
| 239 | case TextureFormat::A8R8G8B8: | ||
| 240 | case TextureFormat::A2B10G10R10: | ||
| 241 | case TextureFormat::A4B4G4R4: | ||
| 242 | case TextureFormat::A5B5G5R1: | ||
| 243 | case TextureFormat::A1B5G5R5: | ||
| 244 | return std::size_t{R | G | B | A}; | ||
| 245 | case TextureFormat::R32G32B32: | ||
| 246 | case TextureFormat::R32_B24G8: | ||
| 247 | case TextureFormat::B5G6R5: | ||
| 248 | case TextureFormat::B6G5R5: | ||
| 249 | case TextureFormat::B10G11R11: | ||
| 250 | return std::size_t{R | G | B}; | ||
| 251 | case TextureFormat::R32G32: | ||
| 252 | case TextureFormat::R16G16: | ||
| 253 | case TextureFormat::R24G8: | ||
| 254 | case TextureFormat::R8G24: | ||
| 255 | case TextureFormat::R8G8: | ||
| 256 | case TextureFormat::G4R4: | ||
| 257 | return std::size_t{R | G}; | ||
| 258 | case TextureFormat::R32: | ||
| 259 | case TextureFormat::R16: | ||
| 260 | case TextureFormat::R8: | ||
| 261 | case TextureFormat::R1: | ||
| 262 | return std::size_t{R}; | ||
| 263 | default: | ||
| 264 | UNIMPLEMENTED_MSG("Texture format not implemented={}", format); | ||
| 265 | return std::size_t{R | G | B | A}; | ||
| 266 | } | ||
| 267 | } | ||
| 268 | |||
| 269 | std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { | ||
| 270 | switch (image_type) { | ||
| 271 | case Tegra::Shader::ImageType::Texture1D: | ||
| 272 | case Tegra::Shader::ImageType::TextureBuffer: | ||
| 273 | return 1; | ||
| 274 | case Tegra::Shader::ImageType::Texture1DArray: | ||
| 275 | case Tegra::Shader::ImageType::Texture2D: | ||
| 276 | return 2; | ||
| 277 | case Tegra::Shader::ImageType::Texture2DArray: | ||
| 278 | case Tegra::Shader::ImageType::Texture3D: | ||
| 279 | return 3; | ||
| 280 | } | ||
| 281 | UNREACHABLE(); | ||
| 282 | return 1; | ||
| 283 | } | ||
| 284 | } // Anonymous namespace | ||
| 285 | |||
| 286 | std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, | ||
| 287 | Node original_value) { | ||
| 288 | switch (component_type) { | ||
| 289 | case ComponentType::SNORM: { | ||
| 290 | // range [-1.0, 1.0] | ||
| 291 | auto cnv_value = Operation(OperationCode::FMul, original_value, | ||
| 292 | Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); | ||
| 293 | cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); | ||
| 294 | return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; | ||
| 295 | } | ||
| 296 | case ComponentType::SINT: | ||
| 297 | case ComponentType::UNORM: { | ||
| 298 | bool is_signed = component_type == ComponentType::SINT; | ||
| 299 | // range [0.0, 1.0] | ||
| 300 | auto cnv_value = Operation(OperationCode::FMul, original_value, | ||
| 301 | Immediate(static_cast<float>(1 << component_size) - 1.f)); | ||
| 302 | return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), | ||
| 303 | is_signed}; | ||
| 304 | } | ||
| 305 | case ComponentType::UINT: // range [0, (1 << component_size) - 1] | ||
| 306 | return {std::move(original_value), false}; | ||
| 307 | case ComponentType::FLOAT: | ||
| 308 | if (component_size == 16) { | ||
| 309 | return {Operation(OperationCode::HCastFloat, original_value), true}; | ||
| 310 | } else { | ||
| 311 | return {std::move(original_value), true}; | ||
| 312 | } | ||
| 313 | default: | ||
| 314 | UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type); | ||
| 315 | return {std::move(original_value), true}; | ||
| 316 | } | ||
| 317 | } | ||
| 318 | |||
| 319 | u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | ||
| 320 | const Instruction instr = {program_code[pc]}; | ||
| 321 | const auto opcode = OpCode::Decode(instr); | ||
| 322 | |||
| 323 | const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { | ||
| 324 | std::vector<Node> coords; | ||
| 325 | const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; | ||
| 326 | coords.reserve(num_coords); | ||
| 327 | for (std::size_t i = 0; i < num_coords; ++i) { | ||
| 328 | coords.push_back(GetRegister(instr.gpr8.Value() + i)); | ||
| 329 | } | ||
| 330 | return coords; | ||
| 331 | }; | ||
| 332 | |||
| 333 | switch (opcode->get().GetId()) { | ||
| 334 | case OpCode::Id::SULD: { | ||
| 335 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != | ||
| 336 | Tegra::Shader::OutOfBoundsStore::Ignore); | ||
| 337 | |||
| 338 | const auto type{instr.suldst.image_type}; | ||
| 339 | auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) | ||
| 340 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 341 | image.MarkRead(); | ||
| 342 | |||
| 343 | if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { | ||
| 344 | u32 indexer = 0; | ||
| 345 | for (u32 element = 0; element < 4; ++element) { | ||
| 346 | if (!instr.suldst.IsComponentEnabled(element)) { | ||
| 347 | continue; | ||
| 348 | } | ||
| 349 | MetaImage meta{image, {}, element}; | ||
| 350 | Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); | ||
| 351 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 352 | } | ||
| 353 | for (u32 i = 0; i < indexer; ++i) { | ||
| 354 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 355 | } | ||
| 356 | } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { | ||
| 357 | UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && | ||
| 358 | instr.suldst.GetStoreDataLayout() != StoreType::Bits64); | ||
| 359 | |||
| 360 | auto descriptor = [this, instr] { | ||
| 361 | std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor; | ||
| 362 | if (instr.suldst.is_immediate) { | ||
| 363 | sampler_descriptor = | ||
| 364 | registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); | ||
| 365 | } else { | ||
| 366 | const Node image_register = GetRegister(instr.gpr39); | ||
| 367 | const auto result = TrackCbuf(image_register, global_code, | ||
| 368 | static_cast<s64>(global_code.size())); | ||
| 369 | const auto buffer = std::get<1>(result); | ||
| 370 | const auto offset = std::get<2>(result); | ||
| 371 | sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset); | ||
| 372 | } | ||
| 373 | if (!sampler_descriptor) { | ||
| 374 | UNREACHABLE_MSG("Failed to obtain image descriptor"); | ||
| 375 | } | ||
| 376 | return *sampler_descriptor; | ||
| 377 | }(); | ||
| 378 | |||
| 379 | const auto comp_mask = GetImageComponentMask(descriptor.format); | ||
| 380 | |||
| 381 | switch (instr.suldst.GetStoreDataLayout()) { | ||
| 382 | case StoreType::Bits32: | ||
| 383 | case StoreType::Bits64: { | ||
| 384 | u32 indexer = 0; | ||
| 385 | u32 shifted_counter = 0; | ||
| 386 | Node value = Immediate(0); | ||
| 387 | for (u32 element = 0; element < 4; ++element) { | ||
| 388 | if (!IsComponentEnabled(comp_mask, element)) { | ||
| 389 | continue; | ||
| 390 | } | ||
| 391 | const auto component_type = GetComponentType(descriptor, element); | ||
| 392 | const auto component_size = GetComponentSize(descriptor.format, element); | ||
| 393 | MetaImage meta{image, {}, element}; | ||
| 394 | |||
| 395 | auto [converted_value, is_signed] = GetComponentValue( | ||
| 396 | component_type, component_size, | ||
| 397 | Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); | ||
| 398 | |||
| 399 | // shift element to correct position | ||
| 400 | const auto shifted = shifted_counter; | ||
| 401 | if (shifted > 0) { | ||
| 402 | converted_value = | ||
| 403 | SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, | ||
| 404 | std::move(converted_value), Immediate(shifted)); | ||
| 405 | } | ||
| 406 | shifted_counter += component_size; | ||
| 407 | |||
| 408 | // add value into result | ||
| 409 | value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); | ||
| 410 | |||
| 411 | // if we shifted enough for 1 byte -> we save it into temp | ||
| 412 | if (shifted_counter >= 32) { | ||
| 413 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 414 | // reset counter and value to prepare pack next byte | ||
| 415 | value = Immediate(0); | ||
| 416 | shifted_counter = 0; | ||
| 417 | } | ||
| 418 | } | ||
| 419 | for (u32 i = 0; i < indexer; ++i) { | ||
| 420 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 421 | } | ||
| 422 | break; | ||
| 423 | } | ||
| 424 | default: | ||
| 425 | UNREACHABLE(); | ||
| 426 | break; | ||
| 427 | } | ||
| 428 | } | ||
| 429 | break; | ||
| 430 | } | ||
| 431 | case OpCode::Id::SUST: { | ||
| 432 | UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); | ||
| 433 | UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != | ||
| 434 | Tegra::Shader::OutOfBoundsStore::Ignore); | ||
| 435 | UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA | ||
| 436 | |||
| 437 | std::vector<Node> values; | ||
| 438 | constexpr std::size_t hardcoded_size{4}; | ||
| 439 | for (std::size_t i = 0; i < hardcoded_size; ++i) { | ||
| 440 | values.push_back(GetRegister(instr.gpr0.Value() + i)); | ||
| 441 | } | ||
| 442 | |||
| 443 | const auto type{instr.suldst.image_type}; | ||
| 444 | auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) | ||
| 445 | : GetBindlessImage(instr.gpr39, type)}; | ||
| 446 | image.MarkWrite(); | ||
| 447 | |||
| 448 | MetaImage meta{image, std::move(values)}; | ||
| 449 | bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); | ||
| 450 | break; | ||
| 451 | } | ||
| 452 | case OpCode::Id::SUATOM: { | ||
| 453 | UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); | ||
| 454 | |||
| 455 | const OperationCode operation_code = [instr] { | ||
| 456 | switch (instr.suatom_d.operation_type) { | ||
| 457 | case Tegra::Shader::ImageAtomicOperationType::S32: | ||
| 458 | case Tegra::Shader::ImageAtomicOperationType::U32: | ||
| 459 | switch (instr.suatom_d.operation) { | ||
| 460 | case Tegra::Shader::ImageAtomicOperation::Add: | ||
| 461 | return OperationCode::AtomicImageAdd; | ||
| 462 | case Tegra::Shader::ImageAtomicOperation::And: | ||
| 463 | return OperationCode::AtomicImageAnd; | ||
| 464 | case Tegra::Shader::ImageAtomicOperation::Or: | ||
| 465 | return OperationCode::AtomicImageOr; | ||
| 466 | case Tegra::Shader::ImageAtomicOperation::Xor: | ||
| 467 | return OperationCode::AtomicImageXor; | ||
| 468 | case Tegra::Shader::ImageAtomicOperation::Exch: | ||
| 469 | return OperationCode::AtomicImageExchange; | ||
| 470 | default: | ||
| 471 | break; | ||
| 472 | } | ||
| 473 | break; | ||
| 474 | default: | ||
| 475 | break; | ||
| 476 | } | ||
| 477 | UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}", | ||
| 478 | static_cast<u64>(instr.suatom_d.operation.Value()), | ||
| 479 | static_cast<u64>(instr.suatom_d.operation_type.Value())); | ||
| 480 | return OperationCode::AtomicImageAdd; | ||
| 481 | }(); | ||
| 482 | |||
| 483 | Node value = GetRegister(instr.gpr0); | ||
| 484 | |||
| 485 | const auto type = instr.suatom_d.image_type; | ||
| 486 | auto& image = GetImage(instr.image, type); | ||
| 487 | image.MarkAtomic(); | ||
| 488 | |||
| 489 | MetaImage meta{image, {std::move(value)}}; | ||
| 490 | SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); | ||
| 491 | break; | ||
| 492 | } | ||
| 493 | default: | ||
| 494 | UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName()); | ||
| 495 | } | ||
| 496 | |||
| 497 | return pc; | ||
| 498 | } | ||
| 499 | |||
| 500 | ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { | ||
| 501 | const auto offset = static_cast<u32>(image.index.Value()); | ||
| 502 | |||
| 503 | const auto it = | ||
| 504 | std::find_if(std::begin(used_images), std::end(used_images), | ||
| 505 | [offset](const ImageEntry& entry) { return entry.offset == offset; }); | ||
| 506 | if (it != std::end(used_images)) { | ||
| 507 | ASSERT(!it->is_bindless && it->type == type); | ||
| 508 | return *it; | ||
| 509 | } | ||
| 510 | |||
| 511 | const auto next_index = static_cast<u32>(used_images.size()); | ||
| 512 | return used_images.emplace_back(next_index, offset, type); | ||
| 513 | } | ||
| 514 | |||
| 515 | ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { | ||
| 516 | const Node image_register = GetRegister(reg); | ||
| 517 | const auto result = | ||
| 518 | TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); | ||
| 519 | |||
| 520 | const auto buffer = std::get<1>(result); | ||
| 521 | const auto offset = std::get<2>(result); | ||
| 522 | |||
| 523 | const auto it = std::find_if(std::begin(used_images), std::end(used_images), | ||
| 524 | [buffer, offset](const ImageEntry& entry) { | ||
| 525 | return entry.buffer == buffer && entry.offset == offset; | ||
| 526 | }); | ||
| 527 | if (it != std::end(used_images)) { | ||
| 528 | ASSERT(it->is_bindless && it->type == type); | ||
| 529 | return *it; | ||
| 530 | } | ||
| 531 | |||
| 532 | const auto next_index = static_cast<u32>(used_images.size()); | ||
| 533 | return used_images.emplace_back(next_index, offset, buffer, type); | ||
| 534 | } | ||
| 535 | |||
| 536 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp deleted file mode 100644 index 59809bcd8..000000000 --- a/src/video_core/shader/decode/integer_set.cpp +++ /dev/null | |||
| @@ -1,49 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/common_types.h" | ||
| 6 | #include "video_core/engines/shader_bytecode.h" | ||
| 7 | #include "video_core/shader/node_helper.h" | ||
| 8 | #include "video_core/shader/shader_ir.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | |||
| 12 | using Tegra::Shader::Instruction; | ||
| 13 | using Tegra::Shader::OpCode; | ||
| 14 | |||
| 15 | u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) { | ||
| 16 | const Instruction instr = {program_code[pc]}; | ||
| 17 | |||
| 18 | const Node op_a = GetRegister(instr.gpr8); | ||
| 19 | const Node op_b = [&]() { | ||
| 20 | if (instr.is_b_imm) { | ||
| 21 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 22 | } else if (instr.is_b_gpr) { | ||
| 23 | return GetRegister(instr.gpr20); | ||
| 24 | } else { | ||
| 25 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 26 | } | ||
| 27 | }(); | ||
| 28 | |||
| 29 | // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition | ||
| 30 | // is true, and to 0 otherwise. | ||
| 31 | const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0); | ||
| 32 | const Node first_pred = | ||
| 33 | GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b); | ||
| 34 | |||
| 35 | const OperationCode combiner = GetPredicateCombiner(instr.iset.op); | ||
| 36 | |||
| 37 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 38 | |||
| 39 | const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1); | ||
| 40 | const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 41 | const Node value = | ||
| 42 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 43 | |||
| 44 | SetRegister(bb, instr.gpr0, value); | ||
| 45 | |||
| 46 | return pc; | ||
| 47 | } | ||
| 48 | |||
| 49 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp deleted file mode 100644 index 25e48fef8..000000000 --- a/src/video_core/shader/decode/integer_set_predicate.cpp +++ /dev/null | |||
| @@ -1,53 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | |||
| 20 | const Node op_a = GetRegister(instr.gpr8); | ||
| 21 | |||
| 22 | const Node op_b = [&]() { | ||
| 23 | if (instr.is_b_imm) { | ||
| 24 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 25 | } else if (instr.is_b_gpr) { | ||
| 26 | return GetRegister(instr.gpr20); | ||
| 27 | } else { | ||
| 28 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 29 | } | ||
| 30 | }(); | ||
| 31 | |||
| 32 | // We can't use the constant predicate as destination. | ||
| 33 | ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 34 | |||
| 35 | const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0); | ||
| 36 | const Node predicate = | ||
| 37 | GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b); | ||
| 38 | |||
| 39 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 40 | const OperationCode combiner = GetPredicateCombiner(instr.isetp.op); | ||
| 41 | const Node value = Operation(combiner, predicate, second_pred); | ||
| 42 | SetPredicate(bb, instr.isetp.pred3, value); | ||
| 43 | |||
| 44 | if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 45 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled | ||
| 46 | const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate); | ||
| 47 | SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred)); | ||
| 48 | } | ||
| 49 | |||
| 50 | return pc; | ||
| 51 | } | ||
| 52 | |||
| 53 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp deleted file mode 100644 index 7728f600e..000000000 --- a/src/video_core/shader/decode/memory.cpp +++ /dev/null | |||
| @@ -1,493 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <utility> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include <fmt/format.h> | ||
| 10 | |||
| 11 | #include "common/alignment.h" | ||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/logging/log.h" | ||
| 15 | #include "video_core/engines/shader_bytecode.h" | ||
| 16 | #include "video_core/shader/node_helper.h" | ||
| 17 | #include "video_core/shader/shader_ir.h" | ||
| 18 | |||
| 19 | namespace VideoCommon::Shader { | ||
| 20 | |||
| 21 | using std::move; | ||
| 22 | using Tegra::Shader::AtomicOp; | ||
| 23 | using Tegra::Shader::AtomicType; | ||
| 24 | using Tegra::Shader::Attribute; | ||
| 25 | using Tegra::Shader::GlobalAtomicType; | ||
| 26 | using Tegra::Shader::Instruction; | ||
| 27 | using Tegra::Shader::OpCode; | ||
| 28 | using Tegra::Shader::Register; | ||
| 29 | using Tegra::Shader::StoreType; | ||
| 30 | |||
| 31 | namespace { | ||
| 32 | |||
| 33 | OperationCode GetAtomOperation(AtomicOp op) { | ||
| 34 | switch (op) { | ||
| 35 | case AtomicOp::Add: | ||
| 36 | return OperationCode::AtomicIAdd; | ||
| 37 | case AtomicOp::Min: | ||
| 38 | return OperationCode::AtomicIMin; | ||
| 39 | case AtomicOp::Max: | ||
| 40 | return OperationCode::AtomicIMax; | ||
| 41 | case AtomicOp::And: | ||
| 42 | return OperationCode::AtomicIAnd; | ||
| 43 | case AtomicOp::Or: | ||
| 44 | return OperationCode::AtomicIOr; | ||
| 45 | case AtomicOp::Xor: | ||
| 46 | return OperationCode::AtomicIXor; | ||
| 47 | case AtomicOp::Exch: | ||
| 48 | return OperationCode::AtomicIExchange; | ||
| 49 | default: | ||
| 50 | UNIMPLEMENTED_MSG("op={}", op); | ||
| 51 | return OperationCode::AtomicIAdd; | ||
| 52 | } | ||
| 53 | } | ||
| 54 | |||
| 55 | bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { | ||
| 56 | return uniform_type == Tegra::Shader::UniformType::UnsignedByte || | ||
| 57 | uniform_type == Tegra::Shader::UniformType::UnsignedShort; | ||
| 58 | } | ||
| 59 | |||
| 60 | u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) { | ||
| 61 | switch (uniform_type) { | ||
| 62 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 63 | return 0b11; | ||
| 64 | case Tegra::Shader::UniformType::UnsignedShort: | ||
| 65 | return 0b10; | ||
| 66 | default: | ||
| 67 | UNREACHABLE(); | ||
| 68 | return 0; | ||
| 69 | } | ||
| 70 | } | ||
| 71 | |||
| 72 | u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { | ||
| 73 | switch (uniform_type) { | ||
| 74 | case Tegra::Shader::UniformType::UnsignedByte: | ||
| 75 | return 8; | ||
| 76 | case Tegra::Shader::UniformType::UnsignedShort: | ||
| 77 | return 16; | ||
| 78 | case Tegra::Shader::UniformType::Single: | ||
| 79 | return 32; | ||
| 80 | case Tegra::Shader::UniformType::Double: | ||
| 81 | return 64; | ||
| 82 | case Tegra::Shader::UniformType::Quad: | ||
| 83 | case Tegra::Shader::UniformType::UnsignedQuad: | ||
| 84 | return 128; | ||
| 85 | default: | ||
| 86 | UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type); | ||
| 87 | return 32; | ||
| 88 | } | ||
| 89 | } | ||
| 90 | |||
| 91 | Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { | ||
| 92 | Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); | ||
| 93 | offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); | ||
| 94 | return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); | ||
| 95 | } | ||
| 96 | |||
| 97 | Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { | ||
| 98 | Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); | ||
| 99 | offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); | ||
| 100 | return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), | ||
| 101 | Immediate(size)); | ||
| 102 | } | ||
| 103 | |||
| 104 | Node Sign16Extend(Node value) { | ||
| 105 | Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); | ||
| 106 | Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); | ||
| 107 | Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); | ||
| 108 | return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); | ||
| 109 | } | ||
| 110 | |||
| 111 | } // Anonymous namespace | ||
| 112 | |||
| 113 | u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | ||
| 114 | const Instruction instr = {program_code[pc]}; | ||
| 115 | const auto opcode = OpCode::Decode(instr); | ||
| 116 | |||
| 117 | switch (opcode->get().GetId()) { | ||
| 118 | case OpCode::Id::LD_A: { | ||
| 119 | // Note: Shouldn't this be interp mode flat? As in no interpolation made. | ||
| 120 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 121 | "Indirect attribute loads are not supported"); | ||
| 122 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 123 | "Unaligned attribute loads are not supported"); | ||
| 124 | UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() && | ||
| 125 | instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word, | ||
| 126 | "Non-32 bits PHYS reads are not implemented"); | ||
| 127 | |||
| 128 | const Node buffer{GetRegister(instr.gpr39)}; | ||
| 129 | |||
| 130 | u64 next_element = instr.attribute.fmt20.element; | ||
| 131 | auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 132 | |||
| 133 | const auto LoadNextElement = [&](u32 reg_offset) { | ||
| 134 | const Node attribute{instr.attribute.fmt20.IsPhysical() | ||
| 135 | ? GetPhysicalInputAttribute(instr.gpr8, buffer) | ||
| 136 | : GetInputAttribute(static_cast<Attribute::Index>(next_index), | ||
| 137 | next_element, buffer)}; | ||
| 138 | |||
| 139 | SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute); | ||
| 140 | |||
| 141 | // Load the next attribute element into the following register. If the element | ||
| 142 | // to load goes beyond the vec4 size, load the first element of the next | ||
| 143 | // attribute. | ||
| 144 | next_element = (next_element + 1) % 4; | ||
| 145 | next_index = next_index + (next_element == 0 ? 1 : 0); | ||
| 146 | }; | ||
| 147 | |||
| 148 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 149 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 150 | LoadNextElement(reg_offset); | ||
| 151 | } | ||
| 152 | break; | ||
| 153 | } | ||
| 154 | case OpCode::Id::LD_C: { | ||
| 155 | UNIMPLEMENTED_IF(instr.ld_c.unknown != 0); | ||
| 156 | |||
| 157 | Node index = GetRegister(instr.gpr8); | ||
| 158 | |||
| 159 | const Node op_a = | ||
| 160 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 161 | |||
| 162 | switch (instr.ld_c.type.Value()) { | ||
| 163 | case Tegra::Shader::UniformType::Single: | ||
| 164 | SetRegister(bb, instr.gpr0, op_a); | ||
| 165 | break; | ||
| 166 | |||
| 167 | case Tegra::Shader::UniformType::Double: { | ||
| 168 | const Node op_b = | ||
| 169 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index); | ||
| 170 | |||
| 171 | SetTemporary(bb, 0, op_a); | ||
| 172 | SetTemporary(bb, 1, op_b); | ||
| 173 | SetRegister(bb, instr.gpr0, GetTemporary(0)); | ||
| 174 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1)); | ||
| 175 | break; | ||
| 176 | } | ||
| 177 | default: | ||
| 178 | UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value()); | ||
| 179 | } | ||
| 180 | break; | ||
| 181 | } | ||
| 182 | case OpCode::Id::LD_L: | ||
| 183 | LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown); | ||
| 184 | [[fallthrough]]; | ||
| 185 | case OpCode::Id::LD_S: { | ||
| 186 | const auto GetAddress = [&](s32 offset) { | ||
| 187 | ASSERT(offset % 4 == 0); | ||
| 188 | const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); | ||
| 189 | return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset); | ||
| 190 | }; | ||
| 191 | const auto GetMemory = [&](s32 offset) { | ||
| 192 | return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset)) | ||
| 193 | : GetLocalMemory(GetAddress(offset)); | ||
| 194 | }; | ||
| 195 | |||
| 196 | switch (instr.ldst_sl.type.Value()) { | ||
| 197 | case StoreType::Signed16: | ||
| 198 | SetRegister(bb, instr.gpr0, | ||
| 199 | Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16))); | ||
| 200 | break; | ||
| 201 | case StoreType::Bits32: | ||
| 202 | case StoreType::Bits64: | ||
| 203 | case StoreType::Bits128: { | ||
| 204 | const u32 count = [&] { | ||
| 205 | switch (instr.ldst_sl.type.Value()) { | ||
| 206 | case StoreType::Bits32: | ||
| 207 | return 1; | ||
| 208 | case StoreType::Bits64: | ||
| 209 | return 2; | ||
| 210 | case StoreType::Bits128: | ||
| 211 | return 4; | ||
| 212 | default: | ||
| 213 | UNREACHABLE(); | ||
| 214 | return 0; | ||
| 215 | } | ||
| 216 | }(); | ||
| 217 | for (u32 i = 0; i < count; ++i) { | ||
| 218 | SetTemporary(bb, i, GetMemory(i * 4)); | ||
| 219 | } | ||
| 220 | for (u32 i = 0; i < count; ++i) { | ||
| 221 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 222 | } | ||
| 223 | break; | ||
| 224 | } | ||
| 225 | default: | ||
| 226 | UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(), | ||
| 227 | instr.ldst_sl.type.Value()); | ||
| 228 | } | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | case OpCode::Id::LD: | ||
| 232 | case OpCode::Id::LDG: { | ||
| 233 | const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { | ||
| 234 | switch (opcode->get().GetId()) { | ||
| 235 | case OpCode::Id::LD: | ||
| 236 | UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); | ||
| 237 | return instr.generic.type; | ||
| 238 | case OpCode::Id::LDG: | ||
| 239 | return instr.ldg.type; | ||
| 240 | default: | ||
| 241 | UNREACHABLE(); | ||
| 242 | return {}; | ||
| 243 | } | ||
| 244 | }(); | ||
| 245 | |||
| 246 | const auto [real_address_base, base_address, descriptor] = | ||
| 247 | TrackGlobalMemory(bb, instr, true, false); | ||
| 248 | |||
| 249 | const u32 size = GetMemorySize(type); | ||
| 250 | const u32 count = Common::AlignUp(size, 32) / 32; | ||
| 251 | if (!real_address_base || !base_address) { | ||
| 252 | // Tracking failed, load zeroes. | ||
| 253 | for (u32 i = 0; i < count; ++i) { | ||
| 254 | SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f)); | ||
| 255 | } | ||
| 256 | break; | ||
| 257 | } | ||
| 258 | |||
| 259 | for (u32 i = 0; i < count; ++i) { | ||
| 260 | const Node it_offset = Immediate(i * 4); | ||
| 261 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | ||
| 262 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 263 | |||
| 264 | // To handle unaligned loads get the bytes used to dereference global memory and extract | ||
| 265 | // those bytes from the loaded u32. | ||
| 266 | if (IsUnaligned(type)) { | ||
| 267 | gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size); | ||
| 268 | } | ||
| 269 | |||
| 270 | SetTemporary(bb, i, gmem); | ||
| 271 | } | ||
| 272 | |||
| 273 | for (u32 i = 0; i < count; ++i) { | ||
| 274 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 275 | } | ||
| 276 | break; | ||
| 277 | } | ||
| 278 | case OpCode::Id::ST_A: { | ||
| 279 | UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, | ||
| 280 | "Indirect attribute loads are not supported"); | ||
| 281 | UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0, | ||
| 282 | "Unaligned attribute loads are not supported"); | ||
| 283 | |||
| 284 | u64 element = instr.attribute.fmt20.element; | ||
| 285 | auto index = static_cast<u64>(instr.attribute.fmt20.index.Value()); | ||
| 286 | |||
| 287 | const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1; | ||
| 288 | for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) { | ||
| 289 | Node dest; | ||
| 290 | if (instr.attribute.fmt20.patch) { | ||
| 291 | const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element); | ||
| 292 | dest = MakeNode<PatchNode>(offset); | ||
| 293 | } else { | ||
| 294 | dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element, | ||
| 295 | GetRegister(instr.gpr39)); | ||
| 296 | } | ||
| 297 | const auto src = GetRegister(instr.gpr0.Value() + reg_offset); | ||
| 298 | |||
| 299 | bb.push_back(Operation(OperationCode::Assign, dest, src)); | ||
| 300 | |||
| 301 | // Load the next attribute element into the following register. If the element to load | ||
| 302 | // goes beyond the vec4 size, load the first element of the next attribute. | ||
| 303 | element = (element + 1) % 4; | ||
| 304 | index = index + (element == 0 ? 1 : 0); | ||
| 305 | } | ||
| 306 | break; | ||
| 307 | } | ||
| 308 | case OpCode::Id::ST_L: | ||
| 309 | LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value()); | ||
| 310 | [[fallthrough]]; | ||
| 311 | case OpCode::Id::ST_S: { | ||
| 312 | const auto GetAddress = [&](s32 offset) { | ||
| 313 | ASSERT(offset % 4 == 0); | ||
| 314 | const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset); | ||
| 315 | return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); | ||
| 316 | }; | ||
| 317 | |||
| 318 | const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L; | ||
| 319 | const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory; | ||
| 320 | const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory; | ||
| 321 | |||
| 322 | switch (instr.ldst_sl.type.Value()) { | ||
| 323 | case StoreType::Bits128: | ||
| 324 | (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); | ||
| 325 | (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); | ||
| 326 | [[fallthrough]]; | ||
| 327 | case StoreType::Bits64: | ||
| 328 | (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); | ||
| 329 | [[fallthrough]]; | ||
| 330 | case StoreType::Bits32: | ||
| 331 | (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); | ||
| 332 | break; | ||
| 333 | case StoreType::Unsigned16: | ||
| 334 | case StoreType::Signed16: { | ||
| 335 | Node address = GetAddress(0); | ||
| 336 | Node memory = (this->*get_memory)(address); | ||
| 337 | (this->*set_memory)( | ||
| 338 | bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16)); | ||
| 339 | break; | ||
| 340 | } | ||
| 341 | default: | ||
| 342 | UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), | ||
| 343 | instr.ldst_sl.type.Value()); | ||
| 344 | } | ||
| 345 | break; | ||
| 346 | } | ||
| 347 | case OpCode::Id::ST: | ||
| 348 | case OpCode::Id::STG: { | ||
| 349 | const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { | ||
| 350 | switch (opcode->get().GetId()) { | ||
| 351 | case OpCode::Id::ST: | ||
| 352 | UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); | ||
| 353 | return instr.generic.type; | ||
| 354 | case OpCode::Id::STG: | ||
| 355 | return instr.stg.type; | ||
| 356 | default: | ||
| 357 | UNREACHABLE(); | ||
| 358 | return {}; | ||
| 359 | } | ||
| 360 | }(); | ||
| 361 | |||
| 362 | // For unaligned reads we have to read memory too. | ||
| 363 | const bool is_read = IsUnaligned(type); | ||
| 364 | const auto [real_address_base, base_address, descriptor] = | ||
| 365 | TrackGlobalMemory(bb, instr, is_read, true); | ||
| 366 | if (!real_address_base || !base_address) { | ||
| 367 | // Tracking failed, skip the store. | ||
| 368 | break; | ||
| 369 | } | ||
| 370 | |||
| 371 | const u32 size = GetMemorySize(type); | ||
| 372 | const u32 count = Common::AlignUp(size, 32) / 32; | ||
| 373 | for (u32 i = 0; i < count; ++i) { | ||
| 374 | const Node it_offset = Immediate(i * 4); | ||
| 375 | const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset); | ||
| 376 | const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 377 | Node value = GetRegister(instr.gpr0.Value() + i); | ||
| 378 | |||
| 379 | if (IsUnaligned(type)) { | ||
| 380 | const u32 mask = GetUnalignedMask(type); | ||
| 381 | value = InsertUnaligned(gmem, move(value), real_address, mask, size); | ||
| 382 | } | ||
| 383 | |||
| 384 | bb.push_back(Operation(OperationCode::Assign, gmem, value)); | ||
| 385 | } | ||
| 386 | break; | ||
| 387 | } | ||
| 388 | case OpCode::Id::RED: { | ||
| 389 | UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", | ||
| 390 | instr.red.type.Value()); | ||
| 391 | const auto [real_address, base_address, descriptor] = | ||
| 392 | TrackGlobalMemory(bb, instr, true, true); | ||
| 393 | if (!real_address || !base_address) { | ||
| 394 | // Tracking failed, skip atomic. | ||
| 395 | break; | ||
| 396 | } | ||
| 397 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 398 | Node value = GetRegister(instr.gpr0); | ||
| 399 | bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value))); | ||
| 400 | break; | ||
| 401 | } | ||
| 402 | case OpCode::Id::ATOM: { | ||
| 403 | UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || | ||
| 404 | instr.atom.operation == AtomicOp::Dec || | ||
| 405 | instr.atom.operation == AtomicOp::SafeAdd, | ||
| 406 | "operation={}", instr.atom.operation.Value()); | ||
| 407 | UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || | ||
| 408 | instr.atom.type == GlobalAtomicType::U64 || | ||
| 409 | instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || | ||
| 410 | instr.atom.type == GlobalAtomicType::F32_FTZ_RN, | ||
| 411 | "type={}", instr.atom.type.Value()); | ||
| 412 | |||
| 413 | const auto [real_address, base_address, descriptor] = | ||
| 414 | TrackGlobalMemory(bb, instr, true, true); | ||
| 415 | if (!real_address || !base_address) { | ||
| 416 | // Tracking failed, skip atomic. | ||
| 417 | break; | ||
| 418 | } | ||
| 419 | |||
| 420 | const bool is_signed = | ||
| 421 | instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; | ||
| 422 | Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||
| 423 | SetRegister(bb, instr.gpr0, | ||
| 424 | SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, | ||
| 425 | GetRegister(instr.gpr20))); | ||
| 426 | break; | ||
| 427 | } | ||
| 428 | case OpCode::Id::ATOMS: { | ||
| 429 | UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc || | ||
| 430 | instr.atoms.operation == AtomicOp::Dec, | ||
| 431 | "operation={}", instr.atoms.operation.Value()); | ||
| 432 | UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 || | ||
| 433 | instr.atoms.type == AtomicType::U64, | ||
| 434 | "type={}", instr.atoms.type.Value()); | ||
| 435 | const bool is_signed = | ||
| 436 | instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; | ||
| 437 | const s32 offset = instr.atoms.GetImmediateOffset(); | ||
| 438 | Node address = GetRegister(instr.gpr8); | ||
| 439 | address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); | ||
| 440 | SetRegister(bb, instr.gpr0, | ||
| 441 | SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, | ||
| 442 | GetSharedMemory(move(address)), GetRegister(instr.gpr20))); | ||
| 443 | break; | ||
| 444 | } | ||
| 445 | case OpCode::Id::AL2P: { | ||
| 446 | // Ignore al2p.direction since we don't care about it. | ||
| 447 | |||
| 448 | // Calculate emulation fake physical address. | ||
| 449 | const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))}; | ||
| 450 | const Node reg{GetRegister(instr.gpr8)}; | ||
| 451 | const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)}; | ||
| 452 | |||
| 453 | // Set the fake address to target register. | ||
| 454 | SetRegister(bb, instr.gpr0, fake_address); | ||
| 455 | |||
| 456 | // Signal the shader IR to declare all possible attributes and varyings | ||
| 457 | uses_physical_attributes = true; | ||
| 458 | break; | ||
| 459 | } | ||
| 460 | default: | ||
| 461 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 462 | } | ||
| 463 | |||
| 464 | return pc; | ||
| 465 | } | ||
| 466 | |||
| 467 | std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb, | ||
| 468 | Instruction instr, | ||
| 469 | bool is_read, bool is_write) { | ||
| 470 | const auto addr_register{GetRegister(instr.gmem.gpr)}; | ||
| 471 | const auto immediate_offset{static_cast<u32>(instr.gmem.offset)}; | ||
| 472 | |||
| 473 | const auto [base_address, index, offset] = | ||
| 474 | TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size())); | ||
| 475 | ASSERT_OR_EXECUTE_MSG( | ||
| 476 | base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); }, | ||
| 477 | "Global memory tracking failed"); | ||
| 478 | |||
| 479 | bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset))); | ||
| 480 | |||
| 481 | const GlobalMemoryBase descriptor{index, offset}; | ||
| 482 | const auto& entry = used_global_memory.try_emplace(descriptor).first; | ||
| 483 | auto& usage = entry->second; | ||
| 484 | usage.is_written |= is_write; | ||
| 485 | usage.is_read |= is_read; | ||
| 486 | |||
| 487 | const auto real_address = | ||
| 488 | Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register); | ||
| 489 | |||
| 490 | return {real_address, base_address, descriptor}; | ||
| 491 | } | ||
| 492 | |||
| 493 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp deleted file mode 100644 index 5f88537bc..000000000 --- a/src/video_core/shader/decode/other.cpp +++ /dev/null | |||
| @@ -1,322 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "common/logging/log.h" | ||
| 8 | #include "video_core/engines/shader_bytecode.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using std::move; | ||
| 15 | using Tegra::Shader::ConditionCode; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::IpaInterpMode; | ||
| 18 | using Tegra::Shader::OpCode; | ||
| 19 | using Tegra::Shader::PixelImap; | ||
| 20 | using Tegra::Shader::Register; | ||
| 21 | using Tegra::Shader::SystemVariable; | ||
| 22 | |||
| 23 | using Index = Tegra::Shader::Attribute::Index; | ||
| 24 | |||
| 25 | u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | ||
| 26 | const Instruction instr = {program_code[pc]}; | ||
| 27 | const auto opcode = OpCode::Decode(instr); | ||
| 28 | |||
| 29 | switch (opcode->get().GetId()) { | ||
| 30 | case OpCode::Id::NOP: { | ||
| 31 | UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T); | ||
| 32 | UNIMPLEMENTED_IF(instr.nop.trigger != 0); | ||
| 33 | // With the previous preconditions, this instruction is a no-operation. | ||
| 34 | break; | ||
| 35 | } | ||
| 36 | case OpCode::Id::EXIT: { | ||
| 37 | const ConditionCode cc = instr.flow_condition_code; | ||
| 38 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc); | ||
| 39 | |||
| 40 | switch (instr.flow.cond) { | ||
| 41 | case Tegra::Shader::FlowCondition::Always: | ||
| 42 | bb.push_back(Operation(OperationCode::Exit)); | ||
| 43 | if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) { | ||
| 44 | // If this is an unconditional exit then just end processing here, | ||
| 45 | // otherwise we have to account for the possibility of the condition | ||
| 46 | // not being met, so continue processing the next instruction. | ||
| 47 | pc = MAX_PROGRAM_LENGTH - 1; | ||
| 48 | } | ||
| 49 | break; | ||
| 50 | |||
| 51 | case Tegra::Shader::FlowCondition::Fcsm_Tr: | ||
| 52 | // TODO(bunnei): What is this used for? If we assume this conditon is not | ||
| 53 | // satisifed, dual vertex shaders in Farming Simulator make more sense | ||
| 54 | UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr"); | ||
| 55 | break; | ||
| 56 | |||
| 57 | default: | ||
| 58 | UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value()); | ||
| 59 | } | ||
| 60 | break; | ||
| 61 | } | ||
| 62 | case OpCode::Id::KIL: { | ||
| 63 | UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always); | ||
| 64 | |||
| 65 | const ConditionCode cc = instr.flow_condition_code; | ||
| 66 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc); | ||
| 67 | |||
| 68 | bb.push_back(Operation(OperationCode::Discard)); | ||
| 69 | break; | ||
| 70 | } | ||
| 71 | case OpCode::Id::S2R: { | ||
| 72 | const Node value = [this, instr] { | ||
| 73 | switch (instr.sys20) { | ||
| 74 | case SystemVariable::LaneId: | ||
| 75 | return Operation(OperationCode::ThreadId); | ||
| 76 | case SystemVariable::InvocationId: | ||
| 77 | return Operation(OperationCode::InvocationId); | ||
| 78 | case SystemVariable::Ydirection: | ||
| 79 | uses_y_negate = true; | ||
| 80 | return Operation(OperationCode::YNegate); | ||
| 81 | case SystemVariable::InvocationInfo: | ||
| 82 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); | ||
| 83 | return Immediate(0x00ff'0000U); | ||
| 84 | case SystemVariable::WscaleFactorXY: | ||
| 85 | UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented"); | ||
| 86 | return Immediate(0U); | ||
| 87 | case SystemVariable::WscaleFactorZ: | ||
| 88 | UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented"); | ||
| 89 | return Immediate(0U); | ||
| 90 | case SystemVariable::Tid: { | ||
| 91 | Node val = Immediate(0); | ||
| 92 | val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9); | ||
| 93 | val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9); | ||
| 94 | val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5); | ||
| 95 | return val; | ||
| 96 | } | ||
| 97 | case SystemVariable::TidX: | ||
| 98 | return Operation(OperationCode::LocalInvocationIdX); | ||
| 99 | case SystemVariable::TidY: | ||
| 100 | return Operation(OperationCode::LocalInvocationIdY); | ||
| 101 | case SystemVariable::TidZ: | ||
| 102 | return Operation(OperationCode::LocalInvocationIdZ); | ||
| 103 | case SystemVariable::CtaIdX: | ||
| 104 | return Operation(OperationCode::WorkGroupIdX); | ||
| 105 | case SystemVariable::CtaIdY: | ||
| 106 | return Operation(OperationCode::WorkGroupIdY); | ||
| 107 | case SystemVariable::CtaIdZ: | ||
| 108 | return Operation(OperationCode::WorkGroupIdZ); | ||
| 109 | case SystemVariable::EqMask: | ||
| 110 | case SystemVariable::LtMask: | ||
| 111 | case SystemVariable::LeMask: | ||
| 112 | case SystemVariable::GtMask: | ||
| 113 | case SystemVariable::GeMask: | ||
| 114 | uses_warps = true; | ||
| 115 | switch (instr.sys20) { | ||
| 116 | case SystemVariable::EqMask: | ||
| 117 | return Operation(OperationCode::ThreadEqMask); | ||
| 118 | case SystemVariable::LtMask: | ||
| 119 | return Operation(OperationCode::ThreadLtMask); | ||
| 120 | case SystemVariable::LeMask: | ||
| 121 | return Operation(OperationCode::ThreadLeMask); | ||
| 122 | case SystemVariable::GtMask: | ||
| 123 | return Operation(OperationCode::ThreadGtMask); | ||
| 124 | case SystemVariable::GeMask: | ||
| 125 | return Operation(OperationCode::ThreadGeMask); | ||
| 126 | default: | ||
| 127 | UNREACHABLE(); | ||
| 128 | return Immediate(0u); | ||
| 129 | } | ||
| 130 | default: | ||
| 131 | UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value()); | ||
| 132 | return Immediate(0u); | ||
| 133 | } | ||
| 134 | }(); | ||
| 135 | SetRegister(bb, instr.gpr0, value); | ||
| 136 | |||
| 137 | break; | ||
| 138 | } | ||
| 139 | case OpCode::Id::BRA: { | ||
| 140 | Node branch; | ||
| 141 | if (instr.bra.constant_buffer == 0) { | ||
| 142 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 143 | branch = Operation(OperationCode::Branch, Immediate(target)); | ||
| 144 | } else { | ||
| 145 | const u32 target = pc + 1; | ||
| 146 | const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset()); | ||
| 147 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 148 | PRECISE, op_a, Immediate(3)); | ||
| 149 | const Node operand = | ||
| 150 | Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 151 | branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 152 | } | ||
| 153 | |||
| 154 | const Tegra::Shader::ConditionCode cc = instr.flow_condition_code; | ||
| 155 | if (cc != Tegra::Shader::ConditionCode::T) { | ||
| 156 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 157 | } else { | ||
| 158 | bb.push_back(branch); | ||
| 159 | } | ||
| 160 | break; | ||
| 161 | } | ||
| 162 | case OpCode::Id::BRX: { | ||
| 163 | Node operand; | ||
| 164 | if (instr.brx.constant_buffer != 0) { | ||
| 165 | const s32 target = pc + 1; | ||
| 166 | const Node index = GetRegister(instr.gpr8); | ||
| 167 | const Node op_a = | ||
| 168 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index); | ||
| 169 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 170 | PRECISE, op_a, Immediate(3)); | ||
| 171 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 172 | } else { | ||
| 173 | const s32 target = pc + instr.brx.GetBranchExtend(); | ||
| 174 | const Node op_a = GetRegister(instr.gpr8); | ||
| 175 | const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true, | ||
| 176 | PRECISE, op_a, Immediate(3)); | ||
| 177 | operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target)); | ||
| 178 | } | ||
| 179 | const Node branch = Operation(OperationCode::BranchIndirect, operand); | ||
| 180 | |||
| 181 | const ConditionCode cc = instr.flow_condition_code; | ||
| 182 | if (cc != ConditionCode::T) { | ||
| 183 | bb.push_back(Conditional(GetConditionCode(cc), {branch})); | ||
| 184 | } else { | ||
| 185 | bb.push_back(branch); | ||
| 186 | } | ||
| 187 | break; | ||
| 188 | } | ||
| 189 | case OpCode::Id::SSY: { | ||
| 190 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 191 | "Constant buffer flow is not supported"); | ||
| 192 | |||
| 193 | if (disable_flow_stack) { | ||
| 194 | break; | ||
| 195 | } | ||
| 196 | |||
| 197 | // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC. | ||
| 198 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 199 | bb.push_back( | ||
| 200 | Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); | ||
| 201 | break; | ||
| 202 | } | ||
| 203 | case OpCode::Id::PBK: { | ||
| 204 | UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||
| 205 | "Constant buffer PBK is not supported"); | ||
| 206 | |||
| 207 | if (disable_flow_stack) { | ||
| 208 | break; | ||
| 209 | } | ||
| 210 | |||
| 211 | // PBK pushes to a stack the address where BRK will jump to. | ||
| 212 | const u32 target = pc + instr.bra.GetBranchTarget(); | ||
| 213 | bb.push_back( | ||
| 214 | Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); | ||
| 215 | break; | ||
| 216 | } | ||
| 217 | case OpCode::Id::SYNC: { | ||
| 218 | const ConditionCode cc = instr.flow_condition_code; | ||
| 219 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc); | ||
| 220 | |||
| 221 | if (decompiled) { | ||
| 222 | break; | ||
| 223 | } | ||
| 224 | |||
| 225 | // The SYNC opcode jumps to the address previously set by the SSY opcode | ||
| 226 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | case OpCode::Id::BRK: { | ||
| 230 | const ConditionCode cc = instr.flow_condition_code; | ||
| 231 | UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc); | ||
| 232 | if (decompiled) { | ||
| 233 | break; | ||
| 234 | } | ||
| 235 | |||
| 236 | // The BRK opcode jumps to the address previously set by the PBK opcode | ||
| 237 | bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); | ||
| 238 | break; | ||
| 239 | } | ||
| 240 | case OpCode::Id::IPA: { | ||
| 241 | const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff; | ||
| 242 | const auto attribute = instr.attribute.fmt28; | ||
| 243 | const Index index = attribute.index; | ||
| 244 | |||
| 245 | Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8) | ||
| 246 | : GetInputAttribute(index, attribute.element); | ||
| 247 | |||
| 248 | // Code taken from Ryujinx. | ||
| 249 | if (index >= Index::Attribute_0 && index <= Index::Attribute_31) { | ||
| 250 | const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0); | ||
| 251 | if (header.ps.GetPixelImap(location) == PixelImap::Perspective) { | ||
| 252 | Node position_w = GetInputAttribute(Index::Position, 3); | ||
| 253 | value = Operation(OperationCode::FMul, move(value), move(position_w)); | ||
| 254 | } | ||
| 255 | } | ||
| 256 | |||
| 257 | if (instr.ipa.interp_mode == IpaInterpMode::Multiply) { | ||
| 258 | value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20)); | ||
| 259 | } | ||
| 260 | |||
| 261 | value = GetSaturatedFloat(move(value), instr.ipa.saturate); | ||
| 262 | |||
| 263 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 264 | break; | ||
| 265 | } | ||
| 266 | case OpCode::Id::OUT_R: { | ||
| 267 | UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex, | ||
| 268 | "Stream buffer is not supported"); | ||
| 269 | |||
| 270 | if (instr.out.emit) { | ||
| 271 | // gpr0 is used to store the next address and gpr8 contains the address to emit. | ||
| 272 | // Hardware uses pointers here but we just ignore it | ||
| 273 | bb.push_back(Operation(OperationCode::EmitVertex)); | ||
| 274 | SetRegister(bb, instr.gpr0, Immediate(0)); | ||
| 275 | } | ||
| 276 | if (instr.out.cut) { | ||
| 277 | bb.push_back(Operation(OperationCode::EndPrimitive)); | ||
| 278 | } | ||
| 279 | break; | ||
| 280 | } | ||
| 281 | case OpCode::Id::ISBERD: { | ||
| 282 | UNIMPLEMENTED_IF(instr.isberd.o != 0); | ||
| 283 | UNIMPLEMENTED_IF(instr.isberd.skew != 0); | ||
| 284 | UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None); | ||
| 285 | UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None); | ||
| 286 | LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete"); | ||
| 287 | SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8)); | ||
| 288 | break; | ||
| 289 | } | ||
| 290 | case OpCode::Id::BAR: { | ||
| 291 | UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0"); | ||
| 292 | bb.push_back(Operation(OperationCode::Barrier)); | ||
| 293 | break; | ||
| 294 | } | ||
| 295 | case OpCode::Id::MEMBAR: { | ||
| 296 | UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default); | ||
| 297 | const OperationCode type = [instr] { | ||
| 298 | switch (instr.membar.type) { | ||
| 299 | case Tegra::Shader::MembarType::CTA: | ||
| 300 | return OperationCode::MemoryBarrierGroup; | ||
| 301 | case Tegra::Shader::MembarType::GL: | ||
| 302 | return OperationCode::MemoryBarrierGlobal; | ||
| 303 | default: | ||
| 304 | UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value()); | ||
| 305 | return OperationCode::MemoryBarrierGlobal; | ||
| 306 | } | ||
| 307 | }(); | ||
| 308 | bb.push_back(Operation(type)); | ||
| 309 | break; | ||
| 310 | } | ||
| 311 | case OpCode::Id::DEPBAR: { | ||
| 312 | LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed"); | ||
| 313 | break; | ||
| 314 | } | ||
| 315 | default: | ||
| 316 | UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); | ||
| 317 | } | ||
| 318 | |||
| 319 | return pc; | ||
| 320 | } | ||
| 321 | |||
| 322 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/predicate_set_predicate.cpp b/src/video_core/shader/decode/predicate_set_predicate.cpp deleted file mode 100644 index 9290d22eb..000000000 --- a/src/video_core/shader/decode/predicate_set_predicate.cpp +++ /dev/null | |||
| @@ -1,68 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | switch (opcode->get().GetId()) { | ||
| 22 | case OpCode::Id::PSETP: { | ||
| 23 | const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0); | ||
| 24 | const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0); | ||
| 25 | |||
| 26 | // We can't use the constant predicate as destination. | ||
| 27 | ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 28 | |||
| 29 | const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0); | ||
| 30 | |||
| 31 | const OperationCode combiner = GetPredicateCombiner(instr.psetp.op); | ||
| 32 | const Node predicate = Operation(combiner, op_a, op_b); | ||
| 33 | |||
| 34 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 35 | SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred)); | ||
| 36 | |||
| 37 | if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 38 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if | ||
| 39 | // enabled | ||
| 40 | SetPredicate(bb, instr.psetp.pred0, | ||
| 41 | Operation(combiner, Operation(OperationCode::LogicalNegate, predicate), | ||
| 42 | second_pred)); | ||
| 43 | } | ||
| 44 | break; | ||
| 45 | } | ||
| 46 | case OpCode::Id::CSETP: { | ||
| 47 | const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0); | ||
| 48 | const Node condition_code = GetConditionCode(instr.csetp.cc); | ||
| 49 | |||
| 50 | const OperationCode combiner = GetPredicateCombiner(instr.csetp.op); | ||
| 51 | |||
| 52 | if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 53 | SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred)); | ||
| 54 | } | ||
| 55 | if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 56 | const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code); | ||
| 57 | SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred)); | ||
| 58 | } | ||
| 59 | break; | ||
| 60 | } | ||
| 61 | default: | ||
| 62 | UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName()); | ||
| 63 | } | ||
| 64 | |||
| 65 | return pc; | ||
| 66 | } | ||
| 67 | |||
| 68 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp deleted file mode 100644 index 84dbc50fe..000000000 --- a/src/video_core/shader/decode/predicate_set_register.cpp +++ /dev/null | |||
| @@ -1,46 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | |||
| 16 | u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) { | ||
| 17 | const Instruction instr = {program_code[pc]}; | ||
| 18 | |||
| 19 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 20 | "Condition codes generation in PSET is not implemented"); | ||
| 21 | |||
| 22 | const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0); | ||
| 23 | const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0); | ||
| 24 | const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b); | ||
| 25 | |||
| 26 | const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0); | ||
| 27 | |||
| 28 | const OperationCode combiner = GetPredicateCombiner(instr.pset.op); | ||
| 29 | const Node predicate = Operation(combiner, first_pred, second_pred); | ||
| 30 | |||
| 31 | const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff); | ||
| 32 | const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0); | ||
| 33 | const Node value = | ||
| 34 | Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value); | ||
| 35 | |||
| 36 | if (instr.pset.bf) { | ||
| 37 | SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||
| 38 | } else { | ||
| 39 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 40 | } | ||
| 41 | SetRegister(bb, instr.gpr0, value); | ||
| 42 | |||
| 43 | return pc; | ||
| 44 | } | ||
| 45 | |||
| 46 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp deleted file mode 100644 index 6116c31aa..000000000 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ /dev/null | |||
| @@ -1,86 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <utility> | ||
| 6 | |||
| 7 | #include "common/assert.h" | ||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/shader/node_helper.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | ||
| 12 | |||
| 13 | namespace VideoCommon::Shader { | ||
| 14 | |||
| 15 | using std::move; | ||
| 16 | using Tegra::Shader::Instruction; | ||
| 17 | using Tegra::Shader::OpCode; | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | constexpr u64 NUM_CONDITION_CODES = 4; | ||
| 21 | constexpr u64 NUM_PREDICATES = 7; | ||
| 22 | } // namespace | ||
| 23 | |||
| 24 | u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { | ||
| 25 | const Instruction instr = {program_code[pc]}; | ||
| 26 | const auto opcode = OpCode::Decode(instr); | ||
| 27 | |||
| 28 | Node apply_mask = [this, opcode, instr] { | ||
| 29 | switch (opcode->get().GetId()) { | ||
| 30 | case OpCode::Id::R2P_IMM: | ||
| 31 | case OpCode::Id::P2R_IMM: | ||
| 32 | return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask)); | ||
| 33 | default: | ||
| 34 | UNREACHABLE(); | ||
| 35 | return Immediate(0); | ||
| 36 | } | ||
| 37 | }(); | ||
| 38 | |||
| 39 | const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8; | ||
| 40 | |||
| 41 | const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc; | ||
| 42 | const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES; | ||
| 43 | const auto get_entry = [this, cc](u64 entry) { | ||
| 44 | return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry); | ||
| 45 | }; | ||
| 46 | |||
| 47 | switch (opcode->get().GetId()) { | ||
| 48 | case OpCode::Id::R2P_IMM: { | ||
| 49 | Node mask = GetRegister(instr.gpr8); | ||
| 50 | |||
| 51 | for (u64 entry = 0; entry < num_entries; ++entry) { | ||
| 52 | const u32 shift = static_cast<u32>(entry); | ||
| 53 | |||
| 54 | Node apply = BitfieldExtract(apply_mask, shift, 1); | ||
| 55 | Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0)); | ||
| 56 | |||
| 57 | Node compare = BitfieldExtract(mask, offset + shift, 1); | ||
| 58 | Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0)); | ||
| 59 | |||
| 60 | Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value)); | ||
| 61 | bb.push_back(Conditional(condition, {move(code)})); | ||
| 62 | } | ||
| 63 | break; | ||
| 64 | } | ||
| 65 | case OpCode::Id::P2R_IMM: { | ||
| 66 | Node value = Immediate(0); | ||
| 67 | for (u64 entry = 0; entry < num_entries; ++entry) { | ||
| 68 | Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry), | ||
| 69 | Immediate(0)); | ||
| 70 | value = Operation(OperationCode::UBitwiseOr, move(value), move(bit)); | ||
| 71 | } | ||
| 72 | value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask); | ||
| 73 | value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8); | ||
| 74 | |||
| 75 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | default: | ||
| 79 | UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); | ||
| 80 | break; | ||
| 81 | } | ||
| 82 | |||
| 83 | return pc; | ||
| 84 | } | ||
| 85 | |||
| 86 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp deleted file mode 100644 index a53819c15..000000000 --- a/src/video_core/shader/decode/shift.cpp +++ /dev/null | |||
| @@ -1,153 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using std::move; | ||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::ShfType; | ||
| 17 | using Tegra::Shader::ShfXmode; | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | |||
| 21 | Node IsFull(Node shift) { | ||
| 22 | return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32)); | ||
| 23 | } | ||
| 24 | |||
| 25 | Node Shift(OperationCode opcode, Node value, Node shift) { | ||
| 26 | Node shifted = Operation(opcode, move(value), shift); | ||
| 27 | return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); | ||
| 28 | } | ||
| 29 | |||
| 30 | Node ClampShift(Node shift, s32 size = 32) { | ||
| 31 | shift = Operation(OperationCode::IMax, move(shift), Immediate(0)); | ||
| 32 | return Operation(OperationCode::IMin, move(shift), Immediate(size)); | ||
| 33 | } | ||
| 34 | |||
| 35 | Node WrapShift(Node shift, s32 size = 32) { | ||
| 36 | return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1)); | ||
| 37 | } | ||
| 38 | |||
| 39 | Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) { | ||
| 40 | // These values are used when the shift value is less than 32 | ||
| 41 | Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift); | ||
| 42 | Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift); | ||
| 43 | Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low)); | ||
| 44 | |||
| 45 | if (type == ShfType::Bits32) { | ||
| 46 | // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits | ||
| 47 | return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less)); | ||
| 48 | } | ||
| 49 | |||
| 50 | // And these when it's larger than or 32 | ||
| 51 | const bool is_signed = type == ShfType::S64; | ||
| 52 | const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed); | ||
| 53 | Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); | ||
| 54 | Node greater = Shift(opcode, high, move(reduced)); | ||
| 55 | |||
| 56 | Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); | ||
| 57 | Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); | ||
| 58 | |||
| 59 | Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); | ||
| 60 | return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); | ||
| 61 | } | ||
| 62 | |||
| 63 | Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) { | ||
| 64 | // These values are used when the shift value is less than 32 | ||
| 65 | Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift); | ||
| 66 | Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift); | ||
| 67 | Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high)); | ||
| 68 | |||
| 69 | if (type == ShfType::Bits32) { | ||
| 70 | // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits | ||
| 71 | return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less)); | ||
| 72 | } | ||
| 73 | |||
| 74 | // And these when it's larger than or 32 | ||
| 75 | Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32)); | ||
| 76 | Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced)); | ||
| 77 | |||
| 78 | Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32)); | ||
| 79 | Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0)); | ||
| 80 | |||
| 81 | Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater)); | ||
| 82 | return Operation(OperationCode::Select, move(is_zero), move(high), move(value)); | ||
| 83 | } | ||
| 84 | |||
| 85 | } // Anonymous namespace | ||
| 86 | |||
| 87 | u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) { | ||
| 88 | const Instruction instr = {program_code[pc]}; | ||
| 89 | const auto opcode = OpCode::Decode(instr); | ||
| 90 | |||
| 91 | Node op_a = GetRegister(instr.gpr8); | ||
| 92 | Node op_b = [this, instr] { | ||
| 93 | if (instr.is_b_imm) { | ||
| 94 | return Immediate(instr.alu.GetSignedImm20_20()); | ||
| 95 | } else if (instr.is_b_gpr) { | ||
| 96 | return GetRegister(instr.gpr20); | ||
| 97 | } else { | ||
| 98 | return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); | ||
| 99 | } | ||
| 100 | }(); | ||
| 101 | |||
| 102 | switch (const auto opid = opcode->get().GetId(); opid) { | ||
| 103 | case OpCode::Id::SHR_C: | ||
| 104 | case OpCode::Id::SHR_R: | ||
| 105 | case OpCode::Id::SHR_IMM: { | ||
| 106 | op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b)); | ||
| 107 | |||
| 108 | Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed, | ||
| 109 | move(op_a), move(op_b)); | ||
| 110 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 111 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 112 | break; | ||
| 113 | } | ||
| 114 | case OpCode::Id::SHL_C: | ||
| 115 | case OpCode::Id::SHL_R: | ||
| 116 | case OpCode::Id::SHL_IMM: { | ||
| 117 | Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b); | ||
| 118 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 119 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | case OpCode::Id::SHF_RIGHT_R: | ||
| 123 | case OpCode::Id::SHF_RIGHT_IMM: | ||
| 124 | case OpCode::Id::SHF_LEFT_R: | ||
| 125 | case OpCode::Id::SHF_LEFT_IMM: { | ||
| 126 | UNIMPLEMENTED_IF(instr.generates_cc); | ||
| 127 | UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}", | ||
| 128 | instr.shf.xmode.Value()); | ||
| 129 | |||
| 130 | if (instr.is_b_imm) { | ||
| 131 | op_b = Immediate(static_cast<u32>(instr.shf.immediate)); | ||
| 132 | } | ||
| 133 | const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64; | ||
| 134 | Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size); | ||
| 135 | |||
| 136 | Node negated_shift = Operation(OperationCode::INegate, shift); | ||
| 137 | Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32)); | ||
| 138 | |||
| 139 | const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM; | ||
| 140 | Node value = (is_right ? ShiftRight : ShiftLeft)( | ||
| 141 | move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type); | ||
| 142 | |||
| 143 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 144 | break; | ||
| 145 | } | ||
| 146 | default: | ||
| 147 | UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName()); | ||
| 148 | } | ||
| 149 | |||
| 150 | return pc; | ||
| 151 | } | ||
| 152 | |||
| 153 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp deleted file mode 100644 index c69681e8d..000000000 --- a/src/video_core/shader/decode/texture.cpp +++ /dev/null | |||
| @@ -1,935 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <vector> | ||
| 7 | #include <fmt/format.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/bit_field.h" | ||
| 11 | #include "common/common_types.h" | ||
| 12 | #include "common/logging/log.h" | ||
| 13 | #include "video_core/engines/shader_bytecode.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::Instruction; | ||
| 21 | using Tegra::Shader::OpCode; | ||
| 22 | using Tegra::Shader::Register; | ||
| 23 | using Tegra::Shader::TextureMiscMode; | ||
| 24 | using Tegra::Shader::TextureProcessMode; | ||
| 25 | using Tegra::Shader::TextureType; | ||
| 26 | |||
| 27 | static std::size_t GetCoordCount(TextureType texture_type) { | ||
| 28 | switch (texture_type) { | ||
| 29 | case TextureType::Texture1D: | ||
| 30 | return 1; | ||
| 31 | case TextureType::Texture2D: | ||
| 32 | return 2; | ||
| 33 | case TextureType::Texture3D: | ||
| 34 | case TextureType::TextureCube: | ||
| 35 | return 3; | ||
| 36 | default: | ||
| 37 | UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type); | ||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | } | ||
| 41 | |||
| 42 | u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||
| 43 | const Instruction instr = {program_code[pc]}; | ||
| 44 | const auto opcode = OpCode::Decode(instr); | ||
| 45 | bool is_bindless = false; | ||
| 46 | switch (opcode->get().GetId()) { | ||
| 47 | case OpCode::Id::TEX: { | ||
| 48 | const TextureType texture_type{instr.tex.texture_type}; | ||
| 49 | const bool is_array = instr.tex.array != 0; | ||
| 50 | const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 51 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 52 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 53 | WriteTexInstructionFloat( | ||
| 54 | bb, instr, | ||
| 55 | GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {})); | ||
| 56 | break; | ||
| 57 | } | ||
| 58 | case OpCode::Id::TEX_B: { | ||
| 59 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 60 | "AOFFI is not implemented"); | ||
| 61 | |||
| 62 | const TextureType texture_type{instr.tex_b.texture_type}; | ||
| 63 | const bool is_array = instr.tex_b.array != 0; | ||
| 64 | const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 65 | const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC); | ||
| 66 | const auto process_mode = instr.tex_b.GetTextureProcessMode(); | ||
| 67 | WriteTexInstructionFloat(bb, instr, | ||
| 68 | GetTexCode(instr, texture_type, process_mode, depth_compare, | ||
| 69 | is_array, is_aoffi, {instr.gpr20})); | ||
| 70 | break; | ||
| 71 | } | ||
| 72 | case OpCode::Id::TEXS: { | ||
| 73 | const TextureType texture_type{instr.texs.GetTextureType()}; | ||
| 74 | const bool is_array{instr.texs.IsArrayTexture()}; | ||
| 75 | const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC); | ||
| 76 | const auto process_mode = instr.texs.GetTextureProcessMode(); | ||
| 77 | |||
| 78 | const Node4 components = | ||
| 79 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | ||
| 80 | |||
| 81 | if (instr.texs.fp32_flag) { | ||
| 82 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 83 | } else { | ||
| 84 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 85 | } | ||
| 86 | break; | ||
| 87 | } | ||
| 88 | case OpCode::Id::TLD4_B: { | ||
| 89 | is_bindless = true; | ||
| 90 | [[fallthrough]]; | ||
| 91 | } | ||
| 92 | case OpCode::Id::TLD4: { | ||
| 93 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||
| 94 | "NDV is not implemented"); | ||
| 95 | const auto texture_type = instr.tld4.texture_type.Value(); | ||
| 96 | const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC) | ||
| 97 | : instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||
| 98 | const bool is_array = instr.tld4.array != 0; | ||
| 99 | const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI) | ||
| 100 | : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 101 | const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP) | ||
| 102 | : instr.tld4.UsesMiscMode(TextureMiscMode::PTP); | ||
| 103 | WriteTexInstructionFloat(bb, instr, | ||
| 104 | GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, | ||
| 105 | is_ptp, is_bindless)); | ||
| 106 | break; | ||
| 107 | } | ||
| 108 | case OpCode::Id::TLD4S: { | ||
| 109 | constexpr std::size_t num_coords = 2; | ||
| 110 | const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 111 | const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC); | ||
| 112 | const Node op_a = GetRegister(instr.gpr8); | ||
| 113 | const Node op_b = GetRegister(instr.gpr20); | ||
| 114 | |||
| 115 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | ||
| 116 | std::vector<Node> coords; | ||
| 117 | std::vector<Node> aoffi; | ||
| 118 | Node depth_compare; | ||
| 119 | if (is_depth_compare) { | ||
| 120 | // Note: TLD4S coordinate encoding works just like TEXS's | ||
| 121 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | ||
| 122 | coords.push_back(op_a); | ||
| 123 | coords.push_back(op_y); | ||
| 124 | if (is_aoffi) { | ||
| 125 | aoffi = GetAoffiCoordinates(op_b, num_coords, true); | ||
| 126 | depth_compare = GetRegister(instr.gpr20.Value() + 1); | ||
| 127 | } else { | ||
| 128 | depth_compare = op_b; | ||
| 129 | } | ||
| 130 | } else { | ||
| 131 | // There's no depth compare | ||
| 132 | coords.push_back(op_a); | ||
| 133 | if (is_aoffi) { | ||
| 134 | coords.push_back(GetRegister(instr.gpr8.Value() + 1)); | ||
| 135 | aoffi = GetAoffiCoordinates(op_b, num_coords, true); | ||
| 136 | } else { | ||
| 137 | coords.push_back(op_b); | ||
| 138 | } | ||
| 139 | } | ||
| 140 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); | ||
| 141 | |||
| 142 | SamplerInfo info; | ||
| 143 | info.is_shadow = is_depth_compare; | ||
| 144 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); | ||
| 145 | |||
| 146 | Node4 values; | ||
| 147 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 148 | MetaTexture meta{*sampler, {}, depth_compare, aoffi, {}, {}, | ||
| 149 | {}, {}, component, element, {}}; | ||
| 150 | values[element] = Operation(OperationCode::TextureGather, meta, coords); | ||
| 151 | } | ||
| 152 | |||
| 153 | if (instr.tld4s.fp16_flag) { | ||
| 154 | WriteTexsInstructionHalfFloat(bb, instr, values, true); | ||
| 155 | } else { | ||
| 156 | WriteTexsInstructionFloat(bb, instr, values, true); | ||
| 157 | } | ||
| 158 | break; | ||
| 159 | } | ||
| 160 | case OpCode::Id::TXD_B: | ||
| 161 | is_bindless = true; | ||
| 162 | [[fallthrough]]; | ||
| 163 | case OpCode::Id::TXD: { | ||
| 164 | UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 165 | "AOFFI is not implemented"); | ||
| 166 | |||
| 167 | const bool is_array = instr.txd.is_array != 0; | ||
| 168 | const auto derivate_reg = instr.gpr20.Value(); | ||
| 169 | const auto texture_type = instr.txd.texture_type.Value(); | ||
| 170 | const auto coord_count = GetCoordCount(texture_type); | ||
| 171 | u64 base_reg = instr.gpr8.Value(); | ||
| 172 | Node index_var; | ||
| 173 | SamplerInfo info; | ||
| 174 | info.type = texture_type; | ||
| 175 | info.is_array = is_array; | ||
| 176 | const std::optional<SamplerEntry> sampler = | ||
| 177 | is_bindless ? GetBindlessSampler(base_reg, info, index_var) | ||
| 178 | : GetSampler(instr.sampler, info); | ||
| 179 | Node4 values; | ||
| 180 | if (!sampler) { | ||
| 181 | std::generate(values.begin(), values.end(), [this] { return Immediate(0); }); | ||
| 182 | WriteTexInstructionFloat(bb, instr, values); | ||
| 183 | break; | ||
| 184 | } | ||
| 185 | |||
| 186 | if (is_bindless) { | ||
| 187 | base_reg++; | ||
| 188 | } | ||
| 189 | |||
| 190 | std::vector<Node> coords; | ||
| 191 | std::vector<Node> derivates; | ||
| 192 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 193 | coords.push_back(GetRegister(base_reg + i)); | ||
| 194 | const std::size_t derivate = i * 2; | ||
| 195 | derivates.push_back(GetRegister(derivate_reg + derivate)); | ||
| 196 | derivates.push_back(GetRegister(derivate_reg + derivate + 1)); | ||
| 197 | } | ||
| 198 | |||
| 199 | Node array_node = {}; | ||
| 200 | if (is_array) { | ||
| 201 | const Node info_reg = GetRegister(base_reg + coord_count); | ||
| 202 | array_node = BitfieldExtract(info_reg, 0, 16); | ||
| 203 | } | ||
| 204 | |||
| 205 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 206 | MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, | ||
| 207 | {}, {}, {}, element, index_var}; | ||
| 208 | values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); | ||
| 209 | } | ||
| 210 | |||
| 211 | WriteTexInstructionFloat(bb, instr, values); | ||
| 212 | |||
| 213 | break; | ||
| 214 | } | ||
| 215 | case OpCode::Id::TXQ_B: | ||
| 216 | is_bindless = true; | ||
| 217 | [[fallthrough]]; | ||
| 218 | case OpCode::Id::TXQ: { | ||
| 219 | Node index_var; | ||
| 220 | const std::optional<SamplerEntry> sampler = | ||
| 221 | is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var) | ||
| 222 | : GetSampler(instr.sampler, {}); | ||
| 223 | |||
| 224 | if (!sampler) { | ||
| 225 | u32 indexer = 0; | ||
| 226 | for (u32 element = 0; element < 4; ++element) { | ||
| 227 | if (!instr.txq.IsComponentEnabled(element)) { | ||
| 228 | continue; | ||
| 229 | } | ||
| 230 | const Node value = Immediate(0); | ||
| 231 | SetTemporary(bb, indexer++, value); | ||
| 232 | } | ||
| 233 | for (u32 i = 0; i < indexer; ++i) { | ||
| 234 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 235 | } | ||
| 236 | break; | ||
| 237 | } | ||
| 238 | |||
| 239 | u32 indexer = 0; | ||
| 240 | switch (instr.txq.query_type) { | ||
| 241 | case Tegra::Shader::TextureQueryType::Dimension: { | ||
| 242 | for (u32 element = 0; element < 4; ++element) { | ||
| 243 | if (!instr.txq.IsComponentEnabled(element)) { | ||
| 244 | continue; | ||
| 245 | } | ||
| 246 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; | ||
| 247 | const Node value = | ||
| 248 | Operation(OperationCode::TextureQueryDimensions, meta, | ||
| 249 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | ||
| 250 | SetTemporary(bb, indexer++, value); | ||
| 251 | } | ||
| 252 | for (u32 i = 0; i < indexer; ++i) { | ||
| 253 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 254 | } | ||
| 255 | break; | ||
| 256 | } | ||
| 257 | default: | ||
| 258 | UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value()); | ||
| 259 | } | ||
| 260 | break; | ||
| 261 | } | ||
| 262 | case OpCode::Id::TMML_B: | ||
| 263 | is_bindless = true; | ||
| 264 | [[fallthrough]]; | ||
| 265 | case OpCode::Id::TMML: { | ||
| 266 | UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV), | ||
| 267 | "NDV is not implemented"); | ||
| 268 | |||
| 269 | const auto texture_type = instr.tmml.texture_type.Value(); | ||
| 270 | const bool is_array = instr.tmml.array != 0; | ||
| 271 | SamplerInfo info; | ||
| 272 | info.type = texture_type; | ||
| 273 | info.is_array = is_array; | ||
| 274 | Node index_var; | ||
| 275 | const std::optional<SamplerEntry> sampler = | ||
| 276 | is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var) | ||
| 277 | : GetSampler(instr.sampler, info); | ||
| 278 | |||
| 279 | if (!sampler) { | ||
| 280 | u32 indexer = 0; | ||
| 281 | for (u32 element = 0; element < 2; ++element) { | ||
| 282 | if (!instr.tmml.IsComponentEnabled(element)) { | ||
| 283 | continue; | ||
| 284 | } | ||
| 285 | const Node value = Immediate(0); | ||
| 286 | SetTemporary(bb, indexer++, value); | ||
| 287 | } | ||
| 288 | for (u32 i = 0; i < indexer; ++i) { | ||
| 289 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 290 | } | ||
| 291 | break; | ||
| 292 | } | ||
| 293 | |||
| 294 | const u64 base_index = is_array ? 1 : 0; | ||
| 295 | const u64 num_components = [texture_type] { | ||
| 296 | switch (texture_type) { | ||
| 297 | case TextureType::Texture1D: | ||
| 298 | return 1; | ||
| 299 | case TextureType::Texture2D: | ||
| 300 | return 2; | ||
| 301 | case TextureType::TextureCube: | ||
| 302 | return 3; | ||
| 303 | default: | ||
| 304 | UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type); | ||
| 305 | return 2; | ||
| 306 | } | ||
| 307 | }(); | ||
| 308 | // TODO: What's the array component used for? | ||
| 309 | |||
| 310 | std::vector<Node> coords; | ||
| 311 | coords.reserve(num_components); | ||
| 312 | for (u64 component = 0; component < num_components; ++component) { | ||
| 313 | coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); | ||
| 314 | } | ||
| 315 | |||
| 316 | u32 indexer = 0; | ||
| 317 | for (u32 element = 0; element < 2; ++element) { | ||
| 318 | if (!instr.tmml.IsComponentEnabled(element)) { | ||
| 319 | continue; | ||
| 320 | } | ||
| 321 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; | ||
| 322 | Node value = Operation(OperationCode::TextureQueryLod, meta, coords); | ||
| 323 | SetTemporary(bb, indexer++, std::move(value)); | ||
| 324 | } | ||
| 325 | for (u32 i = 0; i < indexer; ++i) { | ||
| 326 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 327 | } | ||
| 328 | break; | ||
| 329 | } | ||
| 330 | case OpCode::Id::TLD: { | ||
| 331 | UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented"); | ||
| 332 | UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented"); | ||
| 333 | UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented"); | ||
| 334 | |||
| 335 | WriteTexInstructionFloat(bb, instr, GetTldCode(instr)); | ||
| 336 | break; | ||
| 337 | } | ||
| 338 | case OpCode::Id::TLDS: { | ||
| 339 | const TextureType texture_type{instr.tlds.GetTextureType()}; | ||
| 340 | const bool is_array{instr.tlds.IsArrayTexture()}; | ||
| 341 | |||
| 342 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 343 | "AOFFI is not implemented"); | ||
| 344 | UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented"); | ||
| 345 | |||
| 346 | const Node4 components = GetTldsCode(instr, texture_type, is_array); | ||
| 347 | |||
| 348 | if (instr.tlds.fp32_flag) { | ||
| 349 | WriteTexsInstructionFloat(bb, instr, components); | ||
| 350 | } else { | ||
| 351 | WriteTexsInstructionHalfFloat(bb, instr, components); | ||
| 352 | } | ||
| 353 | break; | ||
| 354 | } | ||
| 355 | default: | ||
| 356 | UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); | ||
| 357 | } | ||
| 358 | |||
| 359 | return pc; | ||
| 360 | } | ||
| 361 | |||
| 362 | ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( | ||
| 363 | SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) { | ||
| 364 | if (info.IsComplete()) { | ||
| 365 | return info; | ||
| 366 | } | ||
| 367 | if (!sampler) { | ||
| 368 | LOG_WARNING(HW_GPU, "Unknown sampler info"); | ||
| 369 | info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); | ||
| 370 | info.is_array = info.is_array.value_or(false); | ||
| 371 | info.is_shadow = info.is_shadow.value_or(false); | ||
| 372 | info.is_buffer = info.is_buffer.value_or(false); | ||
| 373 | return info; | ||
| 374 | } | ||
| 375 | info.type = info.type.value_or(sampler->texture_type); | ||
| 376 | info.is_array = info.is_array.value_or(sampler->is_array != 0); | ||
| 377 | info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0); | ||
| 378 | info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0); | ||
| 379 | return info; | ||
| 380 | } | ||
| 381 | |||
| 382 | std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, | ||
| 383 | SamplerInfo sampler_info) { | ||
| 384 | const u32 offset = static_cast<u32>(sampler.index.Value()); | ||
| 385 | const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); | ||
| 386 | |||
| 387 | // If this sampler has already been used, return the existing mapping. | ||
| 388 | const auto it = | ||
| 389 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 390 | [offset](const SamplerEntry& entry) { return entry.offset == offset; }); | ||
| 391 | if (it != used_samplers.end()) { | ||
| 392 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && | ||
| 393 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | ||
| 394 | return *it; | ||
| 395 | } | ||
| 396 | |||
| 397 | // Otherwise create a new mapping for this sampler | ||
| 398 | const auto next_index = static_cast<u32>(used_samplers.size()); | ||
| 399 | return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array, | ||
| 400 | *info.is_shadow, *info.is_buffer, false); | ||
| 401 | } | ||
| 402 | |||
| 403 | std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | ||
| 404 | SamplerInfo info, Node& index_var) { | ||
| 405 | const Node sampler_register = GetRegister(reg); | ||
| 406 | const auto [base_node, tracked_sampler_info] = | ||
| 407 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); | ||
| 408 | if (!base_node) { | ||
| 409 | UNREACHABLE(); | ||
| 410 | return std::nullopt; | ||
| 411 | } | ||
| 412 | |||
| 413 | if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { | ||
| 414 | const u32 buffer = sampler_info->index; | ||
| 415 | const u32 offset = sampler_info->offset; | ||
| 416 | info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); | ||
| 417 | |||
| 418 | // If this sampler has already been used, return the existing mapping. | ||
| 419 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 420 | [buffer, offset](const SamplerEntry& entry) { | ||
| 421 | return entry.buffer == buffer && entry.offset == offset; | ||
| 422 | }); | ||
| 423 | if (it != used_samplers.end()) { | ||
| 424 | ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array && | ||
| 425 | it->is_shadow == info.is_shadow); | ||
| 426 | return *it; | ||
| 427 | } | ||
| 428 | |||
| 429 | // Otherwise create a new mapping for this sampler | ||
| 430 | const auto next_index = static_cast<u32>(used_samplers.size()); | ||
| 431 | return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, | ||
| 432 | *info.is_shadow, *info.is_buffer, false); | ||
| 433 | } | ||
| 434 | if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) { | ||
| 435 | const std::pair indices = sampler_info->indices; | ||
| 436 | const std::pair offsets = sampler_info->offsets; | ||
| 437 | info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); | ||
| 438 | |||
| 439 | // Try to use an already created sampler if it exists | ||
| 440 | const auto it = | ||
| 441 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 442 | [indices, offsets](const SamplerEntry& entry) { | ||
| 443 | return offsets == std::pair{entry.offset, entry.secondary_offset} && | ||
| 444 | indices == std::pair{entry.buffer, entry.secondary_buffer}; | ||
| 445 | }); | ||
| 446 | if (it != used_samplers.end()) { | ||
| 447 | ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && | ||
| 448 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | ||
| 449 | return *it; | ||
| 450 | } | ||
| 451 | |||
| 452 | // Otherwise create a new mapping for this sampler | ||
| 453 | const u32 next_index = static_cast<u32>(used_samplers.size()); | ||
| 454 | return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, | ||
| 455 | *info.is_shadow, *info.is_buffer); | ||
| 456 | } | ||
| 457 | if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { | ||
| 458 | const u32 base_offset = sampler_info->base_offset / 4; | ||
| 459 | index_var = GetCustomVariable(sampler_info->bindless_var); | ||
| 460 | info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); | ||
| 461 | |||
| 462 | // If this sampler has already been used, return the existing mapping. | ||
| 463 | const auto it = std::find_if( | ||
| 464 | used_samplers.begin(), used_samplers.end(), | ||
| 465 | [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; }); | ||
| 466 | if (it != used_samplers.end()) { | ||
| 467 | ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array && | ||
| 468 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer && | ||
| 469 | it->is_indexed); | ||
| 470 | return *it; | ||
| 471 | } | ||
| 472 | |||
| 473 | uses_indexed_samplers = true; | ||
| 474 | // Otherwise create a new mapping for this sampler | ||
| 475 | const auto next_index = static_cast<u32>(used_samplers.size()); | ||
| 476 | return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array, | ||
| 477 | *info.is_shadow, *info.is_buffer, true); | ||
| 478 | } | ||
| 479 | return std::nullopt; | ||
| 480 | } | ||
| 481 | |||
| 482 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | ||
| 483 | u32 dest_elem = 0; | ||
| 484 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 485 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 486 | // Skip disabled components | ||
| 487 | continue; | ||
| 488 | } | ||
| 489 | SetTemporary(bb, dest_elem++, components[elem]); | ||
| 490 | } | ||
| 491 | // After writing values in temporals, move them to the real registers | ||
| 492 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 493 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||
| 494 | } | ||
| 495 | } | ||
| 496 | |||
| 497 | void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components, | ||
| 498 | bool ignore_mask) { | ||
| 499 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | ||
| 500 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | ||
| 501 | |||
| 502 | u32 dest_elem = 0; | ||
| 503 | for (u32 component = 0; component < 4; ++component) { | ||
| 504 | if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) | ||
| 505 | continue; | ||
| 506 | SetTemporary(bb, dest_elem++, components[component]); | ||
| 507 | } | ||
| 508 | |||
| 509 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 510 | if (i < 2) { | ||
| 511 | // Write the first two swizzle components to gpr0 and gpr0+1 | ||
| 512 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i)); | ||
| 513 | } else { | ||
| 514 | ASSERT(instr.texs.HasTwoDestinations()); | ||
| 515 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | ||
| 516 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i)); | ||
| 517 | } | ||
| 518 | } | ||
| 519 | } | ||
| 520 | |||
| 521 | void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | ||
| 522 | const Node4& components, bool ignore_mask) { | ||
| 523 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | ||
| 524 | // float instruction). | ||
| 525 | |||
| 526 | Node4 values; | ||
| 527 | u32 dest_elem = 0; | ||
| 528 | for (u32 component = 0; component < 4; ++component) { | ||
| 529 | if (!instr.texs.IsComponentEnabled(component) && !ignore_mask) | ||
| 530 | continue; | ||
| 531 | values[dest_elem++] = components[component]; | ||
| 532 | } | ||
| 533 | if (dest_elem == 0) | ||
| 534 | return; | ||
| 535 | |||
| 536 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); | ||
| 537 | |||
| 538 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 539 | if (dest_elem <= 2) { | ||
| 540 | SetRegister(bb, instr.gpr0, first_value); | ||
| 541 | return; | ||
| 542 | } | ||
| 543 | |||
| 544 | SetTemporary(bb, 0, first_value); | ||
| 545 | SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 546 | |||
| 547 | SetRegister(bb, instr.gpr0, GetTemporary(0)); | ||
| 548 | SetRegister(bb, instr.gpr28, GetTemporary(1)); | ||
| 549 | } | ||
| 550 | |||
| 551 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||
| 552 | TextureProcessMode process_mode, std::vector<Node> coords, | ||
| 553 | Node array, Node depth_compare, u32 bias_offset, | ||
| 554 | std::vector<Node> aoffi, | ||
| 555 | std::optional<Tegra::Shader::Register> bindless_reg) { | ||
| 556 | const bool is_array = array != nullptr; | ||
| 557 | const bool is_shadow = depth_compare != nullptr; | ||
| 558 | const bool is_bindless = bindless_reg.has_value(); | ||
| 559 | |||
| 560 | ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow, | ||
| 561 | "Illegal texture type"); | ||
| 562 | |||
| 563 | SamplerInfo info; | ||
| 564 | info.type = texture_type; | ||
| 565 | info.is_array = is_array; | ||
| 566 | info.is_shadow = is_shadow; | ||
| 567 | info.is_buffer = false; | ||
| 568 | |||
| 569 | Node index_var; | ||
| 570 | const std::optional<SamplerEntry> sampler = | ||
| 571 | is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var) | ||
| 572 | : GetSampler(instr.sampler, info); | ||
| 573 | if (!sampler) { | ||
| 574 | return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)}; | ||
| 575 | } | ||
| 576 | |||
| 577 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | ||
| 578 | process_mode == TextureProcessMode::LL || | ||
| 579 | process_mode == TextureProcessMode::LLA; | ||
| 580 | const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture; | ||
| 581 | |||
| 582 | Node bias; | ||
| 583 | Node lod; | ||
| 584 | switch (process_mode) { | ||
| 585 | case TextureProcessMode::None: | ||
| 586 | break; | ||
| 587 | case TextureProcessMode::LZ: | ||
| 588 | lod = Immediate(0.0f); | ||
| 589 | break; | ||
| 590 | case TextureProcessMode::LB: | ||
| 591 | // If present, lod or bias are always stored in the register indexed by the gpr20 field with | ||
| 592 | // an offset depending on the usage of the other registers. | ||
| 593 | bias = GetRegister(instr.gpr20.Value() + bias_offset); | ||
| 594 | break; | ||
| 595 | case TextureProcessMode::LL: | ||
| 596 | lod = GetRegister(instr.gpr20.Value() + bias_offset); | ||
| 597 | break; | ||
| 598 | default: | ||
| 599 | UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode); | ||
| 600 | break; | ||
| 601 | } | ||
| 602 | |||
| 603 | Node4 values; | ||
| 604 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 605 | MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, | ||
| 606 | lod, {}, element, index_var}; | ||
| 607 | values[element] = Operation(opcode, meta, coords); | ||
| 608 | } | ||
| 609 | |||
| 610 | return values; | ||
| 611 | } | ||
| 612 | |||
| 613 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||
| 614 | TextureProcessMode process_mode, bool depth_compare, bool is_array, | ||
| 615 | bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) { | ||
| 616 | const bool lod_bias_enabled{ | ||
| 617 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; | ||
| 618 | |||
| 619 | const bool is_bindless = bindless_reg.has_value(); | ||
| 620 | |||
| 621 | u64 parameter_register = instr.gpr20.Value(); | ||
| 622 | if (is_bindless) { | ||
| 623 | ++parameter_register; | ||
| 624 | } | ||
| 625 | |||
| 626 | const u32 bias_lod_offset = (is_bindless ? 1 : 0); | ||
| 627 | if (lod_bias_enabled) { | ||
| 628 | ++parameter_register; | ||
| 629 | } | ||
| 630 | |||
| 631 | const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, | ||
| 632 | lod_bias_enabled, 4, 5); | ||
| 633 | const auto coord_count = std::get<0>(coord_counts); | ||
| 634 | // If enabled arrays index is always stored in the gpr8 field | ||
| 635 | const u64 array_register = instr.gpr8.Value(); | ||
| 636 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 637 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 638 | |||
| 639 | std::vector<Node> coords; | ||
| 640 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 641 | coords.push_back(GetRegister(coord_register + i)); | ||
| 642 | } | ||
| 643 | // 1D.DC in OpenGL the 2nd component is ignored. | ||
| 644 | if (depth_compare && !is_array && texture_type == TextureType::Texture1D) { | ||
| 645 | coords.push_back(Immediate(0.0f)); | ||
| 646 | } | ||
| 647 | |||
| 648 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 649 | |||
| 650 | std::vector<Node> aoffi; | ||
| 651 | if (is_aoffi) { | ||
| 652 | aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); | ||
| 653 | } | ||
| 654 | |||
| 655 | Node dc; | ||
| 656 | if (depth_compare) { | ||
| 657 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 658 | // or bias are used | ||
| 659 | dc = GetRegister(parameter_register++); | ||
| 660 | } | ||
| 661 | |||
| 662 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset, | ||
| 663 | aoffi, bindless_reg); | ||
| 664 | } | ||
| 665 | |||
| 666 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||
| 667 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||
| 668 | const bool lod_bias_enabled = | ||
| 669 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||
| 670 | |||
| 671 | const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array, | ||
| 672 | lod_bias_enabled, 4, 4); | ||
| 673 | const auto coord_count = std::get<0>(coord_counts); | ||
| 674 | |||
| 675 | // If enabled arrays index is always stored in the gpr8 field | ||
| 676 | const u64 array_register = instr.gpr8.Value(); | ||
| 677 | // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used | ||
| 678 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 679 | const u64 last_coord_register = | ||
| 680 | (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2)) | ||
| 681 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 682 | : coord_register + 1; | ||
| 683 | const u32 bias_offset = coord_count > 2 ? 1 : 0; | ||
| 684 | |||
| 685 | std::vector<Node> coords; | ||
| 686 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 687 | const bool last = (i == (coord_count - 1)) && (coord_count > 1); | ||
| 688 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | ||
| 689 | } | ||
| 690 | |||
| 691 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 692 | |||
| 693 | Node dc; | ||
| 694 | if (depth_compare) { | ||
| 695 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||
| 696 | // or bias are used | ||
| 697 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||
| 698 | dc = GetRegister(depth_register); | ||
| 699 | } | ||
| 700 | |||
| 701 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}, | ||
| 702 | {}); | ||
| 703 | } | ||
| 704 | |||
| 705 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||
| 706 | bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) { | ||
| 707 | ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time"); | ||
| 708 | |||
| 709 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 710 | |||
| 711 | // If enabled arrays index is always stored in the gpr8 field | ||
| 712 | const u64 array_register = instr.gpr8.Value(); | ||
| 713 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | ||
| 714 | const u64 coord_register = array_register + (is_array ? 1 : 0); | ||
| 715 | |||
| 716 | std::vector<Node> coords; | ||
| 717 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 718 | coords.push_back(GetRegister(coord_register + i)); | ||
| 719 | } | ||
| 720 | |||
| 721 | u64 parameter_register = instr.gpr20.Value(); | ||
| 722 | |||
| 723 | SamplerInfo info; | ||
| 724 | info.type = texture_type; | ||
| 725 | info.is_array = is_array; | ||
| 726 | info.is_shadow = depth_compare; | ||
| 727 | |||
| 728 | Node index_var; | ||
| 729 | const std::optional<SamplerEntry> sampler = | ||
| 730 | is_bindless ? GetBindlessSampler(parameter_register++, info, index_var) | ||
| 731 | : GetSampler(instr.sampler, info); | ||
| 732 | Node4 values; | ||
| 733 | if (!sampler) { | ||
| 734 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 735 | values[element] = Immediate(0); | ||
| 736 | } | ||
| 737 | return values; | ||
| 738 | } | ||
| 739 | |||
| 740 | std::vector<Node> aoffi, ptp; | ||
| 741 | if (is_aoffi) { | ||
| 742 | aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); | ||
| 743 | } else if (is_ptp) { | ||
| 744 | ptp = GetPtpCoordinates( | ||
| 745 | {GetRegister(parameter_register++), GetRegister(parameter_register++)}); | ||
| 746 | } | ||
| 747 | |||
| 748 | Node dc; | ||
| 749 | if (depth_compare) { | ||
| 750 | dc = GetRegister(parameter_register++); | ||
| 751 | } | ||
| 752 | |||
| 753 | const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component)) | ||
| 754 | : Immediate(static_cast<u32>(instr.tld4.component)); | ||
| 755 | |||
| 756 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 757 | auto coords_copy = coords; | ||
| 758 | MetaTexture meta{ | ||
| 759 | *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, | ||
| 760 | index_var}; | ||
| 761 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||
| 762 | } | ||
| 763 | |||
| 764 | return values; | ||
| 765 | } | ||
| 766 | |||
| 767 | Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | ||
| 768 | const auto texture_type{instr.tld.texture_type}; | ||
| 769 | const bool is_array{instr.tld.is_array != 0}; | ||
| 770 | const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; | ||
| 771 | const std::size_t coord_count{GetCoordCount(texture_type)}; | ||
| 772 | |||
| 773 | u64 gpr8_cursor{instr.gpr8.Value()}; | ||
| 774 | const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr}; | ||
| 775 | |||
| 776 | std::vector<Node> coords; | ||
| 777 | coords.reserve(coord_count); | ||
| 778 | for (std::size_t i = 0; i < coord_count; ++i) { | ||
| 779 | coords.push_back(GetRegister(gpr8_cursor++)); | ||
| 780 | } | ||
| 781 | |||
| 782 | u64 gpr20_cursor{instr.gpr20.Value()}; | ||
| 783 | // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 784 | const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)}; | ||
| 785 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 786 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; | ||
| 787 | |||
| 788 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {}); | ||
| 789 | |||
| 790 | Node4 values; | ||
| 791 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 792 | auto coords_copy = coords; | ||
| 793 | MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; | ||
| 794 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 795 | } | ||
| 796 | |||
| 797 | return values; | ||
| 798 | } | ||
| 799 | |||
| 800 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | ||
| 801 | SamplerInfo info; | ||
| 802 | info.type = texture_type; | ||
| 803 | info.is_array = is_array; | ||
| 804 | info.is_shadow = false; | ||
| 805 | const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info); | ||
| 806 | |||
| 807 | const std::size_t type_coord_count = GetCoordCount(texture_type); | ||
| 808 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | ||
| 809 | const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 810 | |||
| 811 | // If enabled arrays index is always stored in the gpr8 field | ||
| 812 | const u64 array_register = instr.gpr8.Value(); | ||
| 813 | // if is array gpr20 is used | ||
| 814 | const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value(); | ||
| 815 | |||
| 816 | const u64 last_coord_register = | ||
| 817 | ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array | ||
| 818 | ? static_cast<u64>(instr.gpr20.Value()) | ||
| 819 | : coord_register + 1; | ||
| 820 | |||
| 821 | std::vector<Node> coords; | ||
| 822 | for (std::size_t i = 0; i < type_coord_count; ++i) { | ||
| 823 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | ||
| 824 | coords.push_back( | ||
| 825 | GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i)); | ||
| 826 | } | ||
| 827 | |||
| 828 | const Node array = is_array ? GetRegister(array_register) : nullptr; | ||
| 829 | // When lod is used always is in gpr20 | ||
| 830 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | ||
| 831 | |||
| 832 | std::vector<Node> aoffi; | ||
| 833 | if (aoffi_enabled) { | ||
| 834 | aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false); | ||
| 835 | } | ||
| 836 | |||
| 837 | Node4 values; | ||
| 838 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 839 | auto coords_copy = coords; | ||
| 840 | MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}}; | ||
| 841 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||
| 842 | } | ||
| 843 | return values; | ||
| 844 | } | ||
| 845 | |||
| 846 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | ||
| 847 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | ||
| 848 | std::size_t max_coords, std::size_t max_inputs) { | ||
| 849 | const std::size_t coord_count = GetCoordCount(texture_type); | ||
| 850 | |||
| 851 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | ||
| 852 | const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0); | ||
| 853 | if (total_coord_count > max_coords || total_reg_count > max_inputs) { | ||
| 854 | UNIMPLEMENTED_MSG("Unsupported Texture operation"); | ||
| 855 | total_coord_count = std::min(total_coord_count, max_coords); | ||
| 856 | } | ||
| 857 | // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later. | ||
| 858 | total_coord_count += | ||
| 859 | (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0; | ||
| 860 | |||
| 861 | return {coord_count, total_coord_count}; | ||
| 862 | } | ||
| 863 | |||
| 864 | std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, | ||
| 865 | bool is_tld4) { | ||
| 866 | const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U}; | ||
| 867 | const u32 size = is_tld4 ? 6 : 4; | ||
| 868 | const s32 wrap_value = is_tld4 ? 32 : 8; | ||
| 869 | const s32 diff_value = is_tld4 ? 64 : 16; | ||
| 870 | const u32 mask = (1U << size) - 1; | ||
| 871 | |||
| 872 | std::vector<Node> aoffi; | ||
| 873 | aoffi.reserve(coord_count); | ||
| 874 | |||
| 875 | const auto aoffi_immediate{ | ||
| 876 | TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; | ||
| 877 | if (!aoffi_immediate) { | ||
| 878 | // Variable access, not supported on AMD. | ||
| 879 | LOG_WARNING(HW_GPU, | ||
| 880 | "AOFFI constant folding failed, some hardware might have graphical issues"); | ||
| 881 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||
| 882 | const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size); | ||
| 883 | const Node condition = | ||
| 884 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); | ||
| 885 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); | ||
| 886 | aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); | ||
| 887 | } | ||
| 888 | return aoffi; | ||
| 889 | } | ||
| 890 | |||
| 891 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||
| 892 | s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask; | ||
| 893 | if (value >= wrap_value) { | ||
| 894 | value -= diff_value; | ||
| 895 | } | ||
| 896 | aoffi.push_back(Immediate(value)); | ||
| 897 | } | ||
| 898 | return aoffi; | ||
| 899 | } | ||
| 900 | |||
| 901 | std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) { | ||
| 902 | static constexpr u32 num_entries = 8; | ||
| 903 | |||
| 904 | std::vector<Node> ptp; | ||
| 905 | ptp.reserve(num_entries); | ||
| 906 | |||
| 907 | const auto global_size = static_cast<s64>(global_code.size()); | ||
| 908 | const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size); | ||
| 909 | const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size); | ||
| 910 | if (!low || !high) { | ||
| 911 | for (u32 entry = 0; entry < num_entries; ++entry) { | ||
| 912 | const u32 reg = entry / 4; | ||
| 913 | const u32 offset = entry % 4; | ||
| 914 | const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6); | ||
| 915 | const Node condition = | ||
| 916 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32)); | ||
| 917 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64)); | ||
| 918 | ptp.push_back(Operation(OperationCode::Select, condition, negative, value)); | ||
| 919 | } | ||
| 920 | return ptp; | ||
| 921 | } | ||
| 922 | |||
| 923 | const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low); | ||
| 924 | for (u32 entry = 0; entry < num_entries; ++entry) { | ||
| 925 | s32 value = (immediate >> (entry * 8)) & 0b111111; | ||
| 926 | if (value >= 32) { | ||
| 927 | value -= 64; | ||
| 928 | } | ||
| 929 | ptp.push_back(Immediate(value)); | ||
| 930 | } | ||
| 931 | |||
| 932 | return ptp; | ||
| 933 | } | ||
| 934 | |||
| 935 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp deleted file mode 100644 index 1c0957277..000000000 --- a/src/video_core/shader/decode/video.cpp +++ /dev/null | |||
| @@ -1,169 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using std::move; | ||
| 14 | using Tegra::Shader::Instruction; | ||
| 15 | using Tegra::Shader::OpCode; | ||
| 16 | using Tegra::Shader::Pred; | ||
| 17 | using Tegra::Shader::VideoType; | ||
| 18 | using Tegra::Shader::VmadShr; | ||
| 19 | using Tegra::Shader::VmnmxOperation; | ||
| 20 | using Tegra::Shader::VmnmxType; | ||
| 21 | |||
| 22 | u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) { | ||
| 23 | const Instruction instr = {program_code[pc]}; | ||
| 24 | const auto opcode = OpCode::Decode(instr); | ||
| 25 | |||
| 26 | if (opcode->get().GetId() == OpCode::Id::VMNMX) { | ||
| 27 | DecodeVMNMX(bb, instr); | ||
| 28 | return pc; | ||
| 29 | } | ||
| 30 | |||
| 31 | const Node op_a = | ||
| 32 | GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a, | ||
| 33 | instr.video.type_a, instr.video.byte_height_a); | ||
| 34 | const Node op_b = [this, instr] { | ||
| 35 | if (instr.video.use_register_b) { | ||
| 36 | return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b, | ||
| 37 | instr.video.signed_b, instr.video.type_b, | ||
| 38 | instr.video.byte_height_b); | ||
| 39 | } | ||
| 40 | if (instr.video.signed_b) { | ||
| 41 | const auto imm = static_cast<s16>(instr.alu.GetImm20_16()); | ||
| 42 | return Immediate(static_cast<u32>(imm)); | ||
| 43 | } else { | ||
| 44 | return Immediate(instr.alu.GetImm20_16()); | ||
| 45 | } | ||
| 46 | }(); | ||
| 47 | |||
| 48 | switch (opcode->get().GetId()) { | ||
| 49 | case OpCode::Id::VMAD: { | ||
| 50 | const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 51 | const Node op_c = GetRegister(instr.gpr39); | ||
| 52 | |||
| 53 | Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b); | ||
| 54 | value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c); | ||
| 55 | |||
| 56 | if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) { | ||
| 57 | const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15); | ||
| 58 | value = | ||
| 59 | SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift); | ||
| 60 | } | ||
| 61 | |||
| 62 | SetInternalFlagsFromInteger(bb, value, instr.generates_cc); | ||
| 63 | SetRegister(bb, instr.gpr0, value); | ||
| 64 | break; | ||
| 65 | } | ||
| 66 | case OpCode::Id::VSETP: { | ||
| 67 | // We can't use the constant predicate as destination. | ||
| 68 | ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex)); | ||
| 69 | |||
| 70 | const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1; | ||
| 71 | const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b); | ||
| 72 | const Node second_pred = GetPredicate(instr.vsetp.pred39, false); | ||
| 73 | |||
| 74 | const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op); | ||
| 75 | |||
| 76 | // Set the primary predicate to the result of Predicate OP SecondPredicate | ||
| 77 | SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred)); | ||
| 78 | |||
| 79 | if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) { | ||
| 80 | // Set the secondary predicate to the result of !Predicate OP SecondPredicate, | ||
| 81 | // if enabled | ||
| 82 | const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred); | ||
| 83 | SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred)); | ||
| 84 | } | ||
| 85 | break; | ||
| 86 | } | ||
| 87 | default: | ||
| 88 | UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName()); | ||
| 89 | } | ||
| 90 | |||
| 91 | return pc; | ||
| 92 | } | ||
| 93 | |||
| 94 | Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type, | ||
| 95 | u64 byte_height) { | ||
| 96 | if (!is_chunk) { | ||
| 97 | return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8); | ||
| 98 | } | ||
| 99 | |||
| 100 | switch (type) { | ||
| 101 | case VideoType::Size16_Low: | ||
| 102 | return BitfieldExtract(op, 0, 16); | ||
| 103 | case VideoType::Size16_High: | ||
| 104 | return BitfieldExtract(op, 16, 16); | ||
| 105 | case VideoType::Size32: | ||
| 106 | // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used | ||
| 107 | // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort. | ||
| 108 | UNIMPLEMENTED(); | ||
| 109 | return Immediate(0); | ||
| 110 | case VideoType::Invalid: | ||
| 111 | UNREACHABLE_MSG("Invalid instruction encoding"); | ||
| 112 | return Immediate(0); | ||
| 113 | default: | ||
| 114 | UNREACHABLE(); | ||
| 115 | return Immediate(0); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) { | ||
| 120 | UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register); | ||
| 121 | UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32); | ||
| 122 | UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32); | ||
| 123 | UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed); | ||
| 124 | UNIMPLEMENTED_IF(instr.vmnmx.sat); | ||
| 125 | UNIMPLEMENTED_IF(instr.generates_cc); | ||
| 126 | |||
| 127 | Node op_a = GetRegister(instr.gpr8); | ||
| 128 | Node op_b = GetRegister(instr.gpr20); | ||
| 129 | Node op_c = GetRegister(instr.gpr39); | ||
| 130 | |||
| 131 | const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed | ||
| 132 | const bool is_oper2_signed = instr.vmnmx.is_dest_signed; | ||
| 133 | |||
| 134 | const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin; | ||
| 135 | Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b)); | ||
| 136 | |||
| 137 | switch (instr.vmnmx.operation) { | ||
| 138 | case VmnmxOperation::Mrg_16H: | ||
| 139 | value = BitfieldInsert(move(op_c), move(value), 16, 16); | ||
| 140 | break; | ||
| 141 | case VmnmxOperation::Mrg_16L: | ||
| 142 | value = BitfieldInsert(move(op_c), move(value), 0, 16); | ||
| 143 | break; | ||
| 144 | case VmnmxOperation::Mrg_8B0: | ||
| 145 | value = BitfieldInsert(move(op_c), move(value), 0, 8); | ||
| 146 | break; | ||
| 147 | case VmnmxOperation::Mrg_8B2: | ||
| 148 | value = BitfieldInsert(move(op_c), move(value), 16, 8); | ||
| 149 | break; | ||
| 150 | case VmnmxOperation::Acc: | ||
| 151 | value = Operation(OperationCode::IAdd, move(value), move(op_c)); | ||
| 152 | break; | ||
| 153 | case VmnmxOperation::Min: | ||
| 154 | value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c)); | ||
| 155 | break; | ||
| 156 | case VmnmxOperation::Max: | ||
| 157 | value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c)); | ||
| 158 | break; | ||
| 159 | case VmnmxOperation::Nop: | ||
| 160 | break; | ||
| 161 | default: | ||
| 162 | UNREACHABLE(); | ||
| 163 | break; | ||
| 164 | } | ||
| 165 | |||
| 166 | SetRegister(bb, instr.gpr0, move(value)); | ||
| 167 | } | ||
| 168 | |||
| 169 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp deleted file mode 100644 index 37433d783..000000000 --- a/src/video_core/shader/decode/warp.cpp +++ /dev/null | |||
| @@ -1,117 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | using Tegra::Shader::ShuffleOperation; | ||
| 17 | using Tegra::Shader::VoteOperation; | ||
| 18 | |||
| 19 | namespace { | ||
| 20 | |||
| 21 | OperationCode GetOperationCode(VoteOperation vote_op) { | ||
| 22 | switch (vote_op) { | ||
| 23 | case VoteOperation::All: | ||
| 24 | return OperationCode::VoteAll; | ||
| 25 | case VoteOperation::Any: | ||
| 26 | return OperationCode::VoteAny; | ||
| 27 | case VoteOperation::Eq: | ||
| 28 | return OperationCode::VoteEqual; | ||
| 29 | default: | ||
| 30 | UNREACHABLE_MSG("Invalid vote operation={}", vote_op); | ||
| 31 | return OperationCode::VoteAll; | ||
| 32 | } | ||
| 33 | } | ||
| 34 | |||
| 35 | } // Anonymous namespace | ||
| 36 | |||
| 37 | u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { | ||
| 38 | const Instruction instr = {program_code[pc]}; | ||
| 39 | const auto opcode = OpCode::Decode(instr); | ||
| 40 | |||
| 41 | // Signal the backend that this shader uses warp instructions. | ||
| 42 | uses_warps = true; | ||
| 43 | |||
| 44 | switch (opcode->get().GetId()) { | ||
| 45 | case OpCode::Id::VOTE: { | ||
| 46 | const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0); | ||
| 47 | const Node active = Operation(OperationCode::BallotThread, value); | ||
| 48 | const Node vote = Operation(GetOperationCode(instr.vote.operation), value); | ||
| 49 | SetRegister(bb, instr.gpr0, active); | ||
| 50 | SetPredicate(bb, instr.vote.dest_pred, vote); | ||
| 51 | break; | ||
| 52 | } | ||
| 53 | case OpCode::Id::SHFL: { | ||
| 54 | Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm)) | ||
| 55 | : GetRegister(instr.gpr39); | ||
| 56 | Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm)) | ||
| 57 | : GetRegister(instr.gpr20); | ||
| 58 | |||
| 59 | Node thread_id = Operation(OperationCode::ThreadId); | ||
| 60 | Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU)); | ||
| 61 | Node seg_mask = BitfieldExtract(mask, 8, 16); | ||
| 62 | |||
| 63 | Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask); | ||
| 64 | Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask); | ||
| 65 | Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id, | ||
| 66 | Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask)); | ||
| 67 | |||
| 68 | Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] { | ||
| 69 | switch (instr.shfl.operation) { | ||
| 70 | case ShuffleOperation::Idx: | ||
| 71 | return Operation(OperationCode::IBitwiseOr, | ||
| 72 | Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask), | ||
| 73 | min_thread_id); | ||
| 74 | case ShuffleOperation::Down: | ||
| 75 | return Operation(OperationCode::IAdd, thread_id, index); | ||
| 76 | case ShuffleOperation::Up: | ||
| 77 | return Operation(OperationCode::IAdd, thread_id, | ||
| 78 | Operation(OperationCode::INegate, index)); | ||
| 79 | case ShuffleOperation::Bfly: | ||
| 80 | return Operation(OperationCode::IBitwiseXor, thread_id, index); | ||
| 81 | } | ||
| 82 | UNREACHABLE(); | ||
| 83 | return Immediate(0U); | ||
| 84 | }(); | ||
| 85 | |||
| 86 | Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] { | ||
| 87 | if (instr.shfl.operation == ShuffleOperation::Up) { | ||
| 88 | return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id); | ||
| 89 | } else { | ||
| 90 | return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id); | ||
| 91 | } | ||
| 92 | }(); | ||
| 93 | |||
| 94 | SetPredicate(bb, instr.shfl.pred48, in_bounds); | ||
| 95 | SetRegister( | ||
| 96 | bb, instr.gpr0, | ||
| 97 | Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); | ||
| 98 | break; | ||
| 99 | } | ||
| 100 | case OpCode::Id::FSWZADD: { | ||
| 101 | UNIMPLEMENTED_IF(instr.fswzadd.ndv); | ||
| 102 | |||
| 103 | Node op_a = GetRegister(instr.gpr8); | ||
| 104 | Node op_b = GetRegister(instr.gpr20); | ||
| 105 | Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle)); | ||
| 106 | SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); | ||
| 107 | break; | ||
| 108 | } | ||
| 109 | default: | ||
| 110 | UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); | ||
| 111 | break; | ||
| 112 | } | ||
| 113 | |||
| 114 | return pc; | ||
| 115 | } | ||
| 116 | |||
| 117 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp deleted file mode 100644 index 233b8fa42..000000000 --- a/src/video_core/shader/decode/xmad.cpp +++ /dev/null | |||
| @@ -1,156 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | #include "common/common_types.h" | ||
| 7 | #include "video_core/engines/shader_bytecode.h" | ||
| 8 | #include "video_core/shader/node_helper.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | ||
| 10 | |||
| 11 | namespace VideoCommon::Shader { | ||
| 12 | |||
| 13 | using Tegra::Shader::Instruction; | ||
| 14 | using Tegra::Shader::OpCode; | ||
| 15 | using Tegra::Shader::PredCondition; | ||
| 16 | |||
| 17 | u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) { | ||
| 18 | const Instruction instr = {program_code[pc]}; | ||
| 19 | const auto opcode = OpCode::Decode(instr); | ||
| 20 | |||
| 21 | UNIMPLEMENTED_IF(instr.xmad.sign_a); | ||
| 22 | UNIMPLEMENTED_IF(instr.xmad.sign_b); | ||
| 23 | UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||
| 24 | "Condition codes generation in XMAD is not implemented"); | ||
| 25 | |||
| 26 | Node op_a = GetRegister(instr.gpr8); | ||
| 27 | |||
| 28 | // TODO(bunnei): Needs to be fixed once op_a or op_b is signed | ||
| 29 | UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b); | ||
| 30 | const bool is_signed_a = instr.xmad.sign_a == 1; | ||
| 31 | const bool is_signed_b = instr.xmad.sign_b == 1; | ||
| 32 | const bool is_signed_c = is_signed_a; | ||
| 33 | |||
| 34 | auto [is_merge, is_psl, is_high_b, mode, op_b_binding, | ||
| 35 | op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> { | ||
| 36 | switch (opcode->get().GetId()) { | ||
| 37 | case OpCode::Id::XMAD_CR: | ||
| 38 | return {instr.xmad.merge_56, | ||
| 39 | instr.xmad.product_shift_left_second, | ||
| 40 | instr.xmad.high_b, | ||
| 41 | instr.xmad.mode_cbf, | ||
| 42 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()), | ||
| 43 | GetRegister(instr.gpr39)}; | ||
| 44 | case OpCode::Id::XMAD_RR: | ||
| 45 | return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr, | ||
| 46 | instr.xmad.mode, GetRegister(instr.gpr20), GetRegister(instr.gpr39)}; | ||
| 47 | case OpCode::Id::XMAD_RC: | ||
| 48 | return {false, | ||
| 49 | false, | ||
| 50 | instr.xmad.high_b, | ||
| 51 | instr.xmad.mode_cbf, | ||
| 52 | GetRegister(instr.gpr39), | ||
| 53 | GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())}; | ||
| 54 | case OpCode::Id::XMAD_IMM: | ||
| 55 | return {instr.xmad.merge_37, | ||
| 56 | instr.xmad.product_shift_left, | ||
| 57 | false, | ||
| 58 | instr.xmad.mode, | ||
| 59 | Immediate(static_cast<u32>(instr.xmad.imm20_16)), | ||
| 60 | GetRegister(instr.gpr39)}; | ||
| 61 | default: | ||
| 62 | UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName()); | ||
| 63 | return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)}; | ||
| 64 | } | ||
| 65 | }(); | ||
| 66 | |||
| 67 | op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a), | ||
| 68 | instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16)); | ||
| 69 | |||
| 70 | const Node original_b = op_b_binding; | ||
| 71 | const Node op_b = | ||
| 72 | SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding), | ||
| 73 | is_high_b ? Immediate(16) : Immediate(0), Immediate(16)); | ||
| 74 | |||
| 75 | // we already check sign_a and sign_b is difference or not before so just use one in here. | ||
| 76 | Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b); | ||
| 77 | if (is_psl) { | ||
| 78 | product = | ||
| 79 | SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16)); | ||
| 80 | } | ||
| 81 | SetTemporary(bb, 0, product); | ||
| 82 | product = GetTemporary(0); | ||
| 83 | |||
| 84 | Node original_c = op_c; | ||
| 85 | const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error | ||
| 86 | op_c = [&] { | ||
| 87 | switch (set_mode) { | ||
| 88 | case Tegra::Shader::XmadMode::None: | ||
| 89 | return original_c; | ||
| 90 | case Tegra::Shader::XmadMode::CLo: | ||
| 91 | return BitfieldExtract(std::move(original_c), 0, 16); | ||
| 92 | case Tegra::Shader::XmadMode::CHi: | ||
| 93 | return BitfieldExtract(std::move(original_c), 16, 16); | ||
| 94 | case Tegra::Shader::XmadMode::CBcc: { | ||
| 95 | Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, | ||
| 96 | original_b, Immediate(16)); | ||
| 97 | return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c), | ||
| 98 | std::move(shifted_b)); | ||
| 99 | } | ||
| 100 | case Tegra::Shader::XmadMode::CSfu: { | ||
| 101 | const Node comp_a = | ||
| 102 | GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0)); | ||
| 103 | const Node comp_b = | ||
| 104 | GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0)); | ||
| 105 | const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b); | ||
| 106 | |||
| 107 | const Node comp_minus_a = GetPredicateComparisonInteger( | ||
| 108 | PredCondition::NE, is_signed_a, | ||
| 109 | SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a, | ||
| 110 | Immediate(0x80000000)), | ||
| 111 | Immediate(0)); | ||
| 112 | const Node comp_minus_b = GetPredicateComparisonInteger( | ||
| 113 | PredCondition::NE, is_signed_b, | ||
| 114 | SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b, | ||
| 115 | Immediate(0x80000000)), | ||
| 116 | Immediate(0)); | ||
| 117 | |||
| 118 | Node new_c = Operation( | ||
| 119 | OperationCode::Select, comp_minus_a, | ||
| 120 | SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)), | ||
| 121 | original_c); | ||
| 122 | new_c = Operation( | ||
| 123 | OperationCode::Select, comp_minus_b, | ||
| 124 | SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)), | ||
| 125 | std::move(new_c)); | ||
| 126 | |||
| 127 | return Operation(OperationCode::Select, comp, original_c, std::move(new_c)); | ||
| 128 | } | ||
| 129 | default: | ||
| 130 | UNREACHABLE(); | ||
| 131 | return Immediate(0); | ||
| 132 | } | ||
| 133 | }(); | ||
| 134 | |||
| 135 | SetTemporary(bb, 1, op_c); | ||
| 136 | op_c = GetTemporary(1); | ||
| 137 | |||
| 138 | // TODO(Rodrigo): Use an appropiate sign for this operation | ||
| 139 | Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c)); | ||
| 140 | SetTemporary(bb, 2, sum); | ||
| 141 | sum = GetTemporary(2); | ||
| 142 | if (is_merge) { | ||
| 143 | const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum), | ||
| 144 | Immediate(0), Immediate(16)); | ||
| 145 | const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b, | ||
| 146 | Immediate(16)); | ||
| 147 | sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b); | ||
| 148 | } | ||
| 149 | |||
| 150 | SetInternalFlagsFromInteger(bb, sum, instr.generates_cc); | ||
| 151 | SetRegister(bb, instr.gpr0, std::move(sum)); | ||
| 152 | |||
| 153 | return pc; | ||
| 154 | } | ||
| 155 | |||
| 156 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp deleted file mode 100644 index 2647865d4..000000000 --- a/src/video_core/shader/expr.cpp +++ /dev/null | |||
| @@ -1,93 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | #include <variant> | ||
| 7 | |||
| 8 | #include "video_core/shader/expr.h" | ||
| 9 | |||
| 10 | namespace VideoCommon::Shader { | ||
| 11 | namespace { | ||
| 12 | bool ExprIsBoolean(const Expr& expr) { | ||
| 13 | return std::holds_alternative<ExprBoolean>(*expr); | ||
| 14 | } | ||
| 15 | |||
| 16 | bool ExprBooleanGet(const Expr& expr) { | ||
| 17 | return std::get_if<ExprBoolean>(expr.get())->value; | ||
| 18 | } | ||
| 19 | } // Anonymous namespace | ||
| 20 | |||
| 21 | bool ExprAnd::operator==(const ExprAnd& b) const { | ||
| 22 | return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); | ||
| 23 | } | ||
| 24 | |||
| 25 | bool ExprAnd::operator!=(const ExprAnd& b) const { | ||
| 26 | return !operator==(b); | ||
| 27 | } | ||
| 28 | |||
| 29 | bool ExprOr::operator==(const ExprOr& b) const { | ||
| 30 | return (*operand1 == *b.operand1) && (*operand2 == *b.operand2); | ||
| 31 | } | ||
| 32 | |||
| 33 | bool ExprOr::operator!=(const ExprOr& b) const { | ||
| 34 | return !operator==(b); | ||
| 35 | } | ||
| 36 | |||
| 37 | bool ExprNot::operator==(const ExprNot& b) const { | ||
| 38 | return *operand1 == *b.operand1; | ||
| 39 | } | ||
| 40 | |||
| 41 | bool ExprNot::operator!=(const ExprNot& b) const { | ||
| 42 | return !operator==(b); | ||
| 43 | } | ||
| 44 | |||
| 45 | Expr MakeExprNot(Expr first) { | ||
| 46 | if (std::holds_alternative<ExprNot>(*first)) { | ||
| 47 | return std::get_if<ExprNot>(first.get())->operand1; | ||
| 48 | } | ||
| 49 | return MakeExpr<ExprNot>(std::move(first)); | ||
| 50 | } | ||
| 51 | |||
| 52 | Expr MakeExprAnd(Expr first, Expr second) { | ||
| 53 | if (ExprIsBoolean(first)) { | ||
| 54 | return ExprBooleanGet(first) ? second : first; | ||
| 55 | } | ||
| 56 | if (ExprIsBoolean(second)) { | ||
| 57 | return ExprBooleanGet(second) ? first : second; | ||
| 58 | } | ||
| 59 | return MakeExpr<ExprAnd>(std::move(first), std::move(second)); | ||
| 60 | } | ||
| 61 | |||
| 62 | Expr MakeExprOr(Expr first, Expr second) { | ||
| 63 | if (ExprIsBoolean(first)) { | ||
| 64 | return ExprBooleanGet(first) ? first : second; | ||
| 65 | } | ||
| 66 | if (ExprIsBoolean(second)) { | ||
| 67 | return ExprBooleanGet(second) ? second : first; | ||
| 68 | } | ||
| 69 | return MakeExpr<ExprOr>(std::move(first), std::move(second)); | ||
| 70 | } | ||
| 71 | |||
| 72 | bool ExprAreEqual(const Expr& first, const Expr& second) { | ||
| 73 | return (*first) == (*second); | ||
| 74 | } | ||
| 75 | |||
| 76 | bool ExprAreOpposite(const Expr& first, const Expr& second) { | ||
| 77 | if (std::holds_alternative<ExprNot>(*first)) { | ||
| 78 | return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second); | ||
| 79 | } | ||
| 80 | if (std::holds_alternative<ExprNot>(*second)) { | ||
| 81 | return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first); | ||
| 82 | } | ||
| 83 | return false; | ||
| 84 | } | ||
| 85 | |||
| 86 | bool ExprIsTrue(const Expr& first) { | ||
| 87 | if (ExprIsBoolean(first)) { | ||
| 88 | return ExprBooleanGet(first); | ||
| 89 | } | ||
| 90 | return false; | ||
| 91 | } | ||
| 92 | |||
| 93 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h deleted file mode 100644 index cda284c72..000000000 --- a/src/video_core/shader/expr.h +++ /dev/null | |||
| @@ -1,156 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <variant> | ||
| 9 | |||
| 10 | #include "video_core/engines/shader_bytecode.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | using Tegra::Shader::ConditionCode; | ||
| 15 | using Tegra::Shader::Pred; | ||
| 16 | |||
| 17 | class ExprAnd; | ||
| 18 | class ExprBoolean; | ||
| 19 | class ExprCondCode; | ||
| 20 | class ExprGprEqual; | ||
| 21 | class ExprNot; | ||
| 22 | class ExprOr; | ||
| 23 | class ExprPredicate; | ||
| 24 | class ExprVar; | ||
| 25 | |||
| 26 | using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, | ||
| 27 | ExprBoolean, ExprGprEqual>; | ||
| 28 | using Expr = std::shared_ptr<ExprData>; | ||
| 29 | |||
| 30 | class ExprAnd final { | ||
| 31 | public: | ||
| 32 | explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} | ||
| 33 | |||
| 34 | bool operator==(const ExprAnd& b) const; | ||
| 35 | bool operator!=(const ExprAnd& b) const; | ||
| 36 | |||
| 37 | Expr operand1; | ||
| 38 | Expr operand2; | ||
| 39 | }; | ||
| 40 | |||
| 41 | class ExprOr final { | ||
| 42 | public: | ||
| 43 | explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {} | ||
| 44 | |||
| 45 | bool operator==(const ExprOr& b) const; | ||
| 46 | bool operator!=(const ExprOr& b) const; | ||
| 47 | |||
| 48 | Expr operand1; | ||
| 49 | Expr operand2; | ||
| 50 | }; | ||
| 51 | |||
| 52 | class ExprNot final { | ||
| 53 | public: | ||
| 54 | explicit ExprNot(Expr a) : operand1{std::move(a)} {} | ||
| 55 | |||
| 56 | bool operator==(const ExprNot& b) const; | ||
| 57 | bool operator!=(const ExprNot& b) const; | ||
| 58 | |||
| 59 | Expr operand1; | ||
| 60 | }; | ||
| 61 | |||
| 62 | class ExprVar final { | ||
| 63 | public: | ||
| 64 | explicit ExprVar(u32 index) : var_index{index} {} | ||
| 65 | |||
| 66 | bool operator==(const ExprVar& b) const { | ||
| 67 | return var_index == b.var_index; | ||
| 68 | } | ||
| 69 | |||
| 70 | bool operator!=(const ExprVar& b) const { | ||
| 71 | return !operator==(b); | ||
| 72 | } | ||
| 73 | |||
| 74 | u32 var_index; | ||
| 75 | }; | ||
| 76 | |||
| 77 | class ExprPredicate final { | ||
| 78 | public: | ||
| 79 | explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {} | ||
| 80 | |||
| 81 | bool operator==(const ExprPredicate& b) const { | ||
| 82 | return predicate == b.predicate; | ||
| 83 | } | ||
| 84 | |||
| 85 | bool operator!=(const ExprPredicate& b) const { | ||
| 86 | return !operator==(b); | ||
| 87 | } | ||
| 88 | |||
| 89 | u32 predicate; | ||
| 90 | }; | ||
| 91 | |||
| 92 | class ExprCondCode final { | ||
| 93 | public: | ||
| 94 | explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {} | ||
| 95 | |||
| 96 | bool operator==(const ExprCondCode& b) const { | ||
| 97 | return cc == b.cc; | ||
| 98 | } | ||
| 99 | |||
| 100 | bool operator!=(const ExprCondCode& b) const { | ||
| 101 | return !operator==(b); | ||
| 102 | } | ||
| 103 | |||
| 104 | ConditionCode cc; | ||
| 105 | }; | ||
| 106 | |||
| 107 | class ExprBoolean final { | ||
| 108 | public: | ||
| 109 | explicit ExprBoolean(bool val) : value{val} {} | ||
| 110 | |||
| 111 | bool operator==(const ExprBoolean& b) const { | ||
| 112 | return value == b.value; | ||
| 113 | } | ||
| 114 | |||
| 115 | bool operator!=(const ExprBoolean& b) const { | ||
| 116 | return !operator==(b); | ||
| 117 | } | ||
| 118 | |||
| 119 | bool value; | ||
| 120 | }; | ||
| 121 | |||
| 122 | class ExprGprEqual final { | ||
| 123 | public: | ||
| 124 | explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {} | ||
| 125 | |||
| 126 | bool operator==(const ExprGprEqual& b) const { | ||
| 127 | return gpr == b.gpr && value == b.value; | ||
| 128 | } | ||
| 129 | |||
| 130 | bool operator!=(const ExprGprEqual& b) const { | ||
| 131 | return !operator==(b); | ||
| 132 | } | ||
| 133 | |||
| 134 | u32 gpr; | ||
| 135 | u32 value; | ||
| 136 | }; | ||
| 137 | |||
| 138 | template <typename T, typename... Args> | ||
| 139 | Expr MakeExpr(Args&&... args) { | ||
| 140 | static_assert(std::is_convertible_v<T, ExprData>); | ||
| 141 | return std::make_shared<ExprData>(T(std::forward<Args>(args)...)); | ||
| 142 | } | ||
| 143 | |||
| 144 | bool ExprAreEqual(const Expr& first, const Expr& second); | ||
| 145 | |||
| 146 | bool ExprAreOpposite(const Expr& first, const Expr& second); | ||
| 147 | |||
| 148 | Expr MakeExprNot(Expr first); | ||
| 149 | |||
| 150 | Expr MakeExprAnd(Expr first, Expr second); | ||
| 151 | |||
| 152 | Expr MakeExprOr(Expr first, Expr second); | ||
| 153 | |||
| 154 | bool ExprIsTrue(const Expr& first); | ||
| 155 | |||
| 156 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp deleted file mode 100644 index e18ccba8e..000000000 --- a/src/video_core/shader/memory_util.cpp +++ /dev/null | |||
| @@ -1,76 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <cstddef> | ||
| 7 | |||
| 8 | #include <boost/container_hash/hash.hpp> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "core/core.h" | ||
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/memory_manager.h" | ||
| 14 | #include "video_core/shader/memory_util.h" | ||
| 15 | #include "video_core/shader/shader_ir.h" | ||
| 16 | |||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | |||
| 19 | GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 20 | Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { | ||
| 21 | const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]}; | ||
| 22 | return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; | ||
| 23 | } | ||
| 24 | |||
| 25 | bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | ||
| 26 | // Sched instructions appear once every 4 instructions. | ||
| 27 | constexpr std::size_t SchedPeriod = 4; | ||
| 28 | const std::size_t absolute_offset = offset - main_offset; | ||
| 29 | return (absolute_offset % SchedPeriod) == 0; | ||
| 30 | } | ||
| 31 | |||
| 32 | std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) { | ||
| 33 | // This is the encoded version of BRA that jumps to itself. All Nvidia | ||
| 34 | // shaders end with one. | ||
| 35 | static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL; | ||
| 36 | static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL; | ||
| 37 | |||
| 38 | const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||
| 39 | std::size_t offset = start_offset; | ||
| 40 | while (offset < program.size()) { | ||
| 41 | const u64 instruction = program[offset]; | ||
| 42 | if (!IsSchedInstruction(offset, start_offset)) { | ||
| 43 | if ((instruction & MASK) == SELF_JUMPING_BRANCH) { | ||
| 44 | // End on Maxwell's "nop" instruction | ||
| 45 | break; | ||
| 46 | } | ||
| 47 | if (instruction == 0) { | ||
| 48 | break; | ||
| 49 | } | ||
| 50 | } | ||
| 51 | ++offset; | ||
| 52 | } | ||
| 53 | // The last instruction is included in the program size | ||
| 54 | return std::min(offset + 1, program.size()); | ||
| 55 | } | ||
| 56 | |||
| 57 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, | ||
| 58 | const u8* host_ptr, bool is_compute) { | ||
| 59 | ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); | ||
| 60 | ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; }); | ||
| 61 | memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64)); | ||
| 62 | code.resize(CalculateProgramSize(code, is_compute)); | ||
| 63 | return code; | ||
| 64 | } | ||
| 65 | |||
| 66 | u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, | ||
| 67 | const ProgramCode& code_b) { | ||
| 68 | size_t unique_identifier = boost::hash_value(code); | ||
| 69 | if (is_a) { | ||
| 70 | // VertexA programs include two programs | ||
| 71 | boost::hash_combine(unique_identifier, boost::hash_value(code_b)); | ||
| 72 | } | ||
| 73 | return static_cast<u64>(unique_identifier); | ||
| 74 | } | ||
| 75 | |||
| 76 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h deleted file mode 100644 index 4624d38e6..000000000 --- a/src/video_core/shader/memory_util.h +++ /dev/null | |||
| @@ -1,43 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <cstddef> | ||
| 8 | #include <vector> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | |||
| 14 | namespace Tegra { | ||
| 15 | class MemoryManager; | ||
| 16 | } | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using ProgramCode = std::vector<u64>; | ||
| 21 | |||
| 22 | constexpr u32 STAGE_MAIN_OFFSET = 10; | ||
| 23 | constexpr u32 KERNEL_MAIN_OFFSET = 0; | ||
| 24 | |||
| 25 | /// Gets the address for the specified shader stage program | ||
| 26 | GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, | ||
| 27 | Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); | ||
| 28 | |||
| 29 | /// Gets if the current instruction offset is a scheduler instruction | ||
| 30 | bool IsSchedInstruction(std::size_t offset, std::size_t main_offset); | ||
| 31 | |||
| 32 | /// Calculates the size of a program stream | ||
| 33 | std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute); | ||
| 34 | |||
| 35 | /// Gets the shader program code from memory for the specified address | ||
| 36 | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr, | ||
| 37 | const u8* host_ptr, bool is_compute); | ||
| 38 | |||
| 39 | /// Hashes one (or two) program streams | ||
| 40 | u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code, | ||
| 41 | const ProgramCode& code_b = {}); | ||
| 42 | |||
| 43 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h deleted file mode 100644 index b54d33763..000000000 --- a/src/video_core/shader/node.h +++ /dev/null | |||
| @@ -1,701 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <memory> | ||
| 10 | #include <optional> | ||
| 11 | #include <string> | ||
| 12 | #include <tuple> | ||
| 13 | #include <utility> | ||
| 14 | #include <variant> | ||
| 15 | #include <vector> | ||
| 16 | |||
| 17 | #include "common/common_types.h" | ||
| 18 | #include "video_core/engines/shader_bytecode.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | enum class OperationCode { | ||
| 23 | Assign, /// (float& dest, float src) -> void | ||
| 24 | |||
| 25 | Select, /// (MetaArithmetic, bool pred, float a, float b) -> float | ||
| 26 | |||
| 27 | FAdd, /// (MetaArithmetic, float a, float b) -> float | ||
| 28 | FMul, /// (MetaArithmetic, float a, float b) -> float | ||
| 29 | FDiv, /// (MetaArithmetic, float a, float b) -> float | ||
| 30 | FFma, /// (MetaArithmetic, float a, float b, float c) -> float | ||
| 31 | FNegate, /// (MetaArithmetic, float a) -> float | ||
| 32 | FAbsolute, /// (MetaArithmetic, float a) -> float | ||
| 33 | FClamp, /// (MetaArithmetic, float value, float min, float max) -> float | ||
| 34 | FCastHalf0, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 35 | FCastHalf1, /// (MetaArithmetic, f16vec2 a) -> float | ||
| 36 | FMin, /// (MetaArithmetic, float a, float b) -> float | ||
| 37 | FMax, /// (MetaArithmetic, float a, float b) -> float | ||
| 38 | FCos, /// (MetaArithmetic, float a) -> float | ||
| 39 | FSin, /// (MetaArithmetic, float a) -> float | ||
| 40 | FExp2, /// (MetaArithmetic, float a) -> float | ||
| 41 | FLog2, /// (MetaArithmetic, float a) -> float | ||
| 42 | FInverseSqrt, /// (MetaArithmetic, float a) -> float | ||
| 43 | FSqrt, /// (MetaArithmetic, float a) -> float | ||
| 44 | FRoundEven, /// (MetaArithmetic, float a) -> float | ||
| 45 | FFloor, /// (MetaArithmetic, float a) -> float | ||
| 46 | FCeil, /// (MetaArithmetic, float a) -> float | ||
| 47 | FTrunc, /// (MetaArithmetic, float a) -> float | ||
| 48 | FCastInteger, /// (MetaArithmetic, int a) -> float | ||
| 49 | FCastUInteger, /// (MetaArithmetic, uint a) -> float | ||
| 50 | FSwizzleAdd, /// (float a, float b, uint mask) -> float | ||
| 51 | |||
| 52 | IAdd, /// (MetaArithmetic, int a, int b) -> int | ||
| 53 | IMul, /// (MetaArithmetic, int a, int b) -> int | ||
| 54 | IDiv, /// (MetaArithmetic, int a, int b) -> int | ||
| 55 | INegate, /// (MetaArithmetic, int a) -> int | ||
| 56 | IAbsolute, /// (MetaArithmetic, int a) -> int | ||
| 57 | IMin, /// (MetaArithmetic, int a, int b) -> int | ||
| 58 | IMax, /// (MetaArithmetic, int a, int b) -> int | ||
| 59 | ICastFloat, /// (MetaArithmetic, float a) -> int | ||
| 60 | ICastUnsigned, /// (MetaArithmetic, uint a) -> int | ||
| 61 | ILogicalShiftLeft, /// (MetaArithmetic, int a, uint b) -> int | ||
| 62 | ILogicalShiftRight, /// (MetaArithmetic, int a, uint b) -> int | ||
| 63 | IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int | ||
| 64 | IBitwiseAnd, /// (MetaArithmetic, int a, int b) -> int | ||
| 65 | IBitwiseOr, /// (MetaArithmetic, int a, int b) -> int | ||
| 66 | IBitwiseXor, /// (MetaArithmetic, int a, int b) -> int | ||
| 67 | IBitwiseNot, /// (MetaArithmetic, int a) -> int | ||
| 68 | IBitfieldInsert, /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int | ||
| 69 | IBitfieldExtract, /// (MetaArithmetic, int value, int offset, int offset) -> int | ||
| 70 | IBitCount, /// (MetaArithmetic, int) -> int | ||
| 71 | IBitMSB, /// (MetaArithmetic, int) -> int | ||
| 72 | |||
| 73 | UAdd, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 74 | UMul, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 75 | UDiv, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 76 | UMin, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 77 | UMax, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 78 | UCastFloat, /// (MetaArithmetic, float a) -> uint | ||
| 79 | UCastSigned, /// (MetaArithmetic, int a) -> uint | ||
| 80 | ULogicalShiftLeft, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 81 | ULogicalShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 82 | UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 83 | UBitwiseAnd, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 84 | UBitwiseOr, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 85 | UBitwiseXor, /// (MetaArithmetic, uint a, uint b) -> uint | ||
| 86 | UBitwiseNot, /// (MetaArithmetic, uint a) -> uint | ||
| 87 | UBitfieldInsert, /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint | ||
| 88 | UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint | ||
| 89 | UBitCount, /// (MetaArithmetic, uint) -> uint | ||
| 90 | UBitMSB, /// (MetaArithmetic, uint) -> uint | ||
| 91 | |||
| 92 | HAdd, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | ||
| 93 | HMul, /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2 | ||
| 94 | HFma, /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2 | ||
| 95 | HAbsolute, /// (f16vec2 a) -> f16vec2 | ||
| 96 | HNegate, /// (f16vec2 a, bool first, bool second) -> f16vec2 | ||
| 97 | HClamp, /// (f16vec2 src, float min, float max) -> f16vec2 | ||
| 98 | HCastFloat, /// (MetaArithmetic, float a) -> f16vec2 | ||
| 99 | HUnpack, /// (Tegra::Shader::HalfType, T value) -> f16vec2 | ||
| 100 | HMergeF32, /// (f16vec2 src) -> float | ||
| 101 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | ||
| 102 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | ||
| 103 | HPack2, /// (float a, float b) -> f16vec2 | ||
| 104 | |||
| 105 | LogicalAssign, /// (bool& dst, bool src) -> void | ||
| 106 | LogicalAnd, /// (bool a, bool b) -> bool | ||
| 107 | LogicalOr, /// (bool a, bool b) -> bool | ||
| 108 | LogicalXor, /// (bool a, bool b) -> bool | ||
| 109 | LogicalNegate, /// (bool a) -> bool | ||
| 110 | LogicalPick2, /// (bool2 pair, uint index) -> bool | ||
| 111 | LogicalAnd2, /// (bool2 a) -> bool | ||
| 112 | |||
| 113 | LogicalFOrdLessThan, /// (float a, float b) -> bool | ||
| 114 | LogicalFOrdEqual, /// (float a, float b) -> bool | ||
| 115 | LogicalFOrdLessEqual, /// (float a, float b) -> bool | ||
| 116 | LogicalFOrdGreaterThan, /// (float a, float b) -> bool | ||
| 117 | LogicalFOrdNotEqual, /// (float a, float b) -> bool | ||
| 118 | LogicalFOrdGreaterEqual, /// (float a, float b) -> bool | ||
| 119 | LogicalFOrdered, /// (float a, float b) -> bool | ||
| 120 | LogicalFUnordered, /// (float a, float b) -> bool | ||
| 121 | LogicalFUnordLessThan, /// (float a, float b) -> bool | ||
| 122 | LogicalFUnordEqual, /// (float a, float b) -> bool | ||
| 123 | LogicalFUnordLessEqual, /// (float a, float b) -> bool | ||
| 124 | LogicalFUnordGreaterThan, /// (float a, float b) -> bool | ||
| 125 | LogicalFUnordNotEqual, /// (float a, float b) -> bool | ||
| 126 | LogicalFUnordGreaterEqual, /// (float a, float b) -> bool | ||
| 127 | |||
| 128 | LogicalILessThan, /// (int a, int b) -> bool | ||
| 129 | LogicalIEqual, /// (int a, int b) -> bool | ||
| 130 | LogicalILessEqual, /// (int a, int b) -> bool | ||
| 131 | LogicalIGreaterThan, /// (int a, int b) -> bool | ||
| 132 | LogicalINotEqual, /// (int a, int b) -> bool | ||
| 133 | LogicalIGreaterEqual, /// (int a, int b) -> bool | ||
| 134 | |||
| 135 | LogicalULessThan, /// (uint a, uint b) -> bool | ||
| 136 | LogicalUEqual, /// (uint a, uint b) -> bool | ||
| 137 | LogicalULessEqual, /// (uint a, uint b) -> bool | ||
| 138 | LogicalUGreaterThan, /// (uint a, uint b) -> bool | ||
| 139 | LogicalUNotEqual, /// (uint a, uint b) -> bool | ||
| 140 | LogicalUGreaterEqual, /// (uint a, uint b) -> bool | ||
| 141 | |||
| 142 | LogicalAddCarry, /// (uint a, uint b) -> bool | ||
| 143 | |||
| 144 | Logical2HLessThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 145 | Logical2HEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 146 | Logical2HLessEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 147 | Logical2HGreaterThan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 148 | Logical2HNotEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 149 | Logical2HGreaterEqual, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 150 | Logical2HLessThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 151 | Logical2HEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 152 | Logical2HLessEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 153 | Logical2HGreaterThanWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 154 | Logical2HNotEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 155 | Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2 | ||
| 156 | |||
| 157 | Texture, /// (MetaTexture, float[N] coords) -> float4 | ||
| 158 | TextureLod, /// (MetaTexture, float[N] coords) -> float4 | ||
| 159 | TextureGather, /// (MetaTexture, float[N] coords) -> float4 | ||
| 160 | TextureQueryDimensions, /// (MetaTexture, float a) -> float4 | ||
| 161 | TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 | ||
| 162 | TexelFetch, /// (MetaTexture, int[N], int) -> float4 | ||
| 163 | TextureGradient, /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4 | ||
| 164 | |||
| 165 | ImageLoad, /// (MetaImage, int[N] coords) -> void | ||
| 166 | ImageStore, /// (MetaImage, int[N] coords) -> void | ||
| 167 | |||
| 168 | AtomicImageAdd, /// (MetaImage, int[N] coords) -> void | ||
| 169 | AtomicImageAnd, /// (MetaImage, int[N] coords) -> void | ||
| 170 | AtomicImageOr, /// (MetaImage, int[N] coords) -> void | ||
| 171 | AtomicImageXor, /// (MetaImage, int[N] coords) -> void | ||
| 172 | AtomicImageExchange, /// (MetaImage, int[N] coords) -> void | ||
| 173 | |||
| 174 | AtomicUExchange, /// (memory, uint) -> uint | ||
| 175 | AtomicUAdd, /// (memory, uint) -> uint | ||
| 176 | AtomicUMin, /// (memory, uint) -> uint | ||
| 177 | AtomicUMax, /// (memory, uint) -> uint | ||
| 178 | AtomicUAnd, /// (memory, uint) -> uint | ||
| 179 | AtomicUOr, /// (memory, uint) -> uint | ||
| 180 | AtomicUXor, /// (memory, uint) -> uint | ||
| 181 | |||
| 182 | AtomicIExchange, /// (memory, int) -> int | ||
| 183 | AtomicIAdd, /// (memory, int) -> int | ||
| 184 | AtomicIMin, /// (memory, int) -> int | ||
| 185 | AtomicIMax, /// (memory, int) -> int | ||
| 186 | AtomicIAnd, /// (memory, int) -> int | ||
| 187 | AtomicIOr, /// (memory, int) -> int | ||
| 188 | AtomicIXor, /// (memory, int) -> int | ||
| 189 | |||
| 190 | ReduceUAdd, /// (memory, uint) -> void | ||
| 191 | ReduceUMin, /// (memory, uint) -> void | ||
| 192 | ReduceUMax, /// (memory, uint) -> void | ||
| 193 | ReduceUAnd, /// (memory, uint) -> void | ||
| 194 | ReduceUOr, /// (memory, uint) -> void | ||
| 195 | ReduceUXor, /// (memory, uint) -> void | ||
| 196 | |||
| 197 | ReduceIAdd, /// (memory, int) -> void | ||
| 198 | ReduceIMin, /// (memory, int) -> void | ||
| 199 | ReduceIMax, /// (memory, int) -> void | ||
| 200 | ReduceIAnd, /// (memory, int) -> void | ||
| 201 | ReduceIOr, /// (memory, int) -> void | ||
| 202 | ReduceIXor, /// (memory, int) -> void | ||
| 203 | |||
| 204 | Branch, /// (uint branch_target) -> void | ||
| 205 | BranchIndirect, /// (uint branch_target) -> void | ||
| 206 | PushFlowStack, /// (uint branch_target) -> void | ||
| 207 | PopFlowStack, /// () -> void | ||
| 208 | Exit, /// () -> void | ||
| 209 | Discard, /// () -> void | ||
| 210 | |||
| 211 | EmitVertex, /// () -> void | ||
| 212 | EndPrimitive, /// () -> void | ||
| 213 | |||
| 214 | InvocationId, /// () -> int | ||
| 215 | YNegate, /// () -> float | ||
| 216 | LocalInvocationIdX, /// () -> uint | ||
| 217 | LocalInvocationIdY, /// () -> uint | ||
| 218 | LocalInvocationIdZ, /// () -> uint | ||
| 219 | WorkGroupIdX, /// () -> uint | ||
| 220 | WorkGroupIdY, /// () -> uint | ||
| 221 | WorkGroupIdZ, /// () -> uint | ||
| 222 | |||
| 223 | BallotThread, /// (bool) -> uint | ||
| 224 | VoteAll, /// (bool) -> bool | ||
| 225 | VoteAny, /// (bool) -> bool | ||
| 226 | VoteEqual, /// (bool) -> bool | ||
| 227 | |||
| 228 | ThreadId, /// () -> uint | ||
| 229 | ThreadEqMask, /// () -> uint | ||
| 230 | ThreadGeMask, /// () -> uint | ||
| 231 | ThreadGtMask, /// () -> uint | ||
| 232 | ThreadLeMask, /// () -> uint | ||
| 233 | ThreadLtMask, /// () -> uint | ||
| 234 | ShuffleIndexed, /// (uint value, uint index) -> uint | ||
| 235 | |||
| 236 | Barrier, /// () -> void | ||
| 237 | MemoryBarrierGroup, /// () -> void | ||
| 238 | MemoryBarrierGlobal, /// () -> void | ||
| 239 | |||
| 240 | Amount, | ||
| 241 | }; | ||
| 242 | |||
| 243 | enum class InternalFlag { | ||
| 244 | Zero = 0, | ||
| 245 | Sign = 1, | ||
| 246 | Carry = 2, | ||
| 247 | Overflow = 3, | ||
| 248 | Amount = 4, | ||
| 249 | }; | ||
| 250 | |||
| 251 | enum class MetaStackClass { | ||
| 252 | Ssy, | ||
| 253 | Pbk, | ||
| 254 | }; | ||
| 255 | |||
| 256 | class OperationNode; | ||
| 257 | class ConditionalNode; | ||
| 258 | class GprNode; | ||
| 259 | class CustomVarNode; | ||
| 260 | class ImmediateNode; | ||
| 261 | class InternalFlagNode; | ||
| 262 | class PredicateNode; | ||
| 263 | class AbufNode; | ||
| 264 | class CbufNode; | ||
| 265 | class LmemNode; | ||
| 266 | class PatchNode; | ||
| 267 | class SmemNode; | ||
| 268 | class GmemNode; | ||
| 269 | class CommentNode; | ||
| 270 | |||
| 271 | using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, | ||
| 272 | InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, | ||
| 273 | LmemNode, SmemNode, GmemNode, CommentNode>; | ||
| 274 | using Node = std::shared_ptr<NodeData>; | ||
| 275 | using Node4 = std::array<Node, 4>; | ||
| 276 | using NodeBlock = std::vector<Node>; | ||
| 277 | |||
| 278 | struct ArraySamplerNode; | ||
| 279 | struct BindlessSamplerNode; | ||
| 280 | struct SeparateSamplerNode; | ||
| 281 | |||
| 282 | using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; | ||
| 283 | using TrackSampler = std::shared_ptr<TrackSamplerData>; | ||
| 284 | |||
| 285 | struct SamplerEntry { | ||
| 286 | /// Bound samplers constructor | ||
| 287 | explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_, | ||
| 288 | bool is_shadow_, bool is_buffer_, bool is_indexed_) | ||
| 289 | : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_}, | ||
| 290 | is_buffer{is_buffer_}, is_indexed{is_indexed_} {} | ||
| 291 | |||
| 292 | /// Separate sampler constructor | ||
| 293 | explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, | ||
| 294 | Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_, | ||
| 295 | bool is_buffer_) | ||
| 296 | : index{index_}, offset{offsets.first}, secondary_offset{offsets.second}, | ||
| 297 | buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_}, | ||
| 298 | is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {} | ||
| 299 | |||
| 300 | /// Bindless samplers constructor | ||
| 301 | explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_, | ||
| 302 | bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_) | ||
| 303 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_}, | ||
| 304 | is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} { | ||
| 305 | } | ||
| 306 | |||
| 307 | u32 index = 0; ///< Emulated index given for the this sampler. | ||
| 308 | u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. | ||
| 309 | u32 secondary_offset = 0; ///< Secondary offset in the const buffer. | ||
| 310 | u32 buffer = 0; ///< Buffer where the bindless sampler is read. | ||
| 311 | u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. | ||
| 312 | u32 size = 1; ///< Size of the sampler. | ||
| 313 | |||
| 314 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | ||
| 315 | bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. | ||
| 316 | bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. | ||
| 317 | bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. | ||
| 318 | bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. | ||
| 319 | bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. | ||
| 320 | bool is_separated = false; ///< Whether the image and sampler is separated or not. | ||
| 321 | }; | ||
| 322 | |||
| 323 | /// Represents a tracked bindless sampler into a direct const buffer | ||
| 324 | struct ArraySamplerNode { | ||
| 325 | u32 index; | ||
| 326 | u32 base_offset; | ||
| 327 | u32 bindless_var; | ||
| 328 | }; | ||
| 329 | |||
| 330 | /// Represents a tracked separate sampler image pair that was folded statically | ||
| 331 | struct SeparateSamplerNode { | ||
| 332 | std::pair<u32, u32> indices; | ||
| 333 | std::pair<u32, u32> offsets; | ||
| 334 | }; | ||
| 335 | |||
| 336 | /// Represents a tracked bindless sampler into a direct const buffer | ||
| 337 | struct BindlessSamplerNode { | ||
| 338 | u32 index; | ||
| 339 | u32 offset; | ||
| 340 | }; | ||
| 341 | |||
| 342 | struct ImageEntry { | ||
| 343 | public: | ||
| 344 | /// Bound images constructor | ||
| 345 | explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_) | ||
| 346 | : index{index_}, offset{offset_}, type{type_} {} | ||
| 347 | |||
| 348 | /// Bindless samplers constructor | ||
| 349 | explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_) | ||
| 350 | : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {} | ||
| 351 | |||
| 352 | void MarkWrite() { | ||
| 353 | is_written = true; | ||
| 354 | } | ||
| 355 | |||
| 356 | void MarkRead() { | ||
| 357 | is_read = true; | ||
| 358 | } | ||
| 359 | |||
| 360 | void MarkAtomic() { | ||
| 361 | MarkWrite(); | ||
| 362 | MarkRead(); | ||
| 363 | is_atomic = true; | ||
| 364 | } | ||
| 365 | |||
| 366 | u32 index = 0; | ||
| 367 | u32 offset = 0; | ||
| 368 | u32 buffer = 0; | ||
| 369 | |||
| 370 | Tegra::Shader::ImageType type{}; | ||
| 371 | bool is_bindless = false; | ||
| 372 | bool is_written = false; | ||
| 373 | bool is_read = false; | ||
| 374 | bool is_atomic = false; | ||
| 375 | }; | ||
| 376 | |||
| 377 | struct GlobalMemoryBase { | ||
| 378 | u32 cbuf_index = 0; | ||
| 379 | u32 cbuf_offset = 0; | ||
| 380 | |||
| 381 | [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const { | ||
| 382 | return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset); | ||
| 383 | } | ||
| 384 | }; | ||
| 385 | |||
| 386 | /// Parameters describing an arithmetic operation | ||
| 387 | struct MetaArithmetic { | ||
| 388 | bool precise{}; ///< Whether the operation can be constraint or not | ||
| 389 | }; | ||
| 390 | |||
| 391 | /// Parameters describing a texture sampler | ||
| 392 | struct MetaTexture { | ||
| 393 | SamplerEntry sampler; | ||
| 394 | Node array; | ||
| 395 | Node depth_compare; | ||
| 396 | std::vector<Node> aoffi; | ||
| 397 | std::vector<Node> ptp; | ||
| 398 | std::vector<Node> derivates; | ||
| 399 | Node bias; | ||
| 400 | Node lod; | ||
| 401 | Node component; | ||
| 402 | u32 element{}; | ||
| 403 | Node index; | ||
| 404 | }; | ||
| 405 | |||
| 406 | struct MetaImage { | ||
| 407 | const ImageEntry& image; | ||
| 408 | std::vector<Node> values; | ||
| 409 | u32 element{}; | ||
| 410 | }; | ||
| 411 | |||
| 412 | /// Parameters that modify an operation but are not part of any particular operand | ||
| 413 | using Meta = | ||
| 414 | std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>; | ||
| 415 | |||
| 416 | class AmendNode { | ||
| 417 | public: | ||
| 418 | [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const { | ||
| 419 | if (amend_index == amend_null_index) { | ||
| 420 | return std::nullopt; | ||
| 421 | } | ||
| 422 | return {amend_index}; | ||
| 423 | } | ||
| 424 | |||
| 425 | void SetAmendIndex(std::size_t index) { | ||
| 426 | amend_index = index; | ||
| 427 | } | ||
| 428 | |||
| 429 | void ClearAmend() { | ||
| 430 | amend_index = amend_null_index; | ||
| 431 | } | ||
| 432 | |||
| 433 | private: | ||
| 434 | static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL; | ||
| 435 | std::size_t amend_index{amend_null_index}; | ||
| 436 | }; | ||
| 437 | |||
| 438 | /// Holds any kind of operation that can be done in the IR | ||
| 439 | class OperationNode final : public AmendNode { | ||
| 440 | public: | ||
| 441 | explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {} | ||
| 442 | |||
| 443 | explicit OperationNode(OperationCode code_, Meta meta_) | ||
| 444 | : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {} | ||
| 445 | |||
| 446 | explicit OperationNode(OperationCode code_, std::vector<Node> operands_) | ||
| 447 | : OperationNode(code_, Meta{}, std::move(operands_)) {} | ||
| 448 | |||
| 449 | explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_) | ||
| 450 | : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {} | ||
| 451 | |||
| 452 | template <typename... Args> | ||
| 453 | explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_) | ||
| 454 | : code{code_}, meta{std::move(meta_)}, operands{operands_...} {} | ||
| 455 | |||
| 456 | [[nodiscard]] OperationCode GetCode() const { | ||
| 457 | return code; | ||
| 458 | } | ||
| 459 | |||
| 460 | [[nodiscard]] const Meta& GetMeta() const { | ||
| 461 | return meta; | ||
| 462 | } | ||
| 463 | |||
| 464 | [[nodiscard]] std::size_t GetOperandsCount() const { | ||
| 465 | return operands.size(); | ||
| 466 | } | ||
| 467 | |||
| 468 | [[nodiscard]] const Node& operator[](std::size_t operand_index) const { | ||
| 469 | return operands.at(operand_index); | ||
| 470 | } | ||
| 471 | |||
| 472 | private: | ||
| 473 | OperationCode code{}; | ||
| 474 | Meta meta{}; | ||
| 475 | std::vector<Node> operands; | ||
| 476 | }; | ||
| 477 | |||
| 478 | /// Encloses inside any kind of node that returns a boolean conditionally-executed code | ||
| 479 | class ConditionalNode final : public AmendNode { | ||
| 480 | public: | ||
| 481 | explicit ConditionalNode(Node condition_, std::vector<Node>&& code_) | ||
| 482 | : condition{std::move(condition_)}, code{std::move(code_)} {} | ||
| 483 | |||
| 484 | [[nodiscard]] const Node& GetCondition() const { | ||
| 485 | return condition; | ||
| 486 | } | ||
| 487 | |||
| 488 | [[nodiscard]] const std::vector<Node>& GetCode() const { | ||
| 489 | return code; | ||
| 490 | } | ||
| 491 | |||
| 492 | private: | ||
| 493 | Node condition; ///< Condition to be satisfied | ||
| 494 | std::vector<Node> code; ///< Code to execute | ||
| 495 | }; | ||
| 496 | |||
| 497 | /// A general purpose register | ||
| 498 | class GprNode final { | ||
| 499 | public: | ||
| 500 | explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {} | ||
| 501 | |||
| 502 | [[nodiscard]] constexpr u32 GetIndex() const { | ||
| 503 | return static_cast<u32>(index); | ||
| 504 | } | ||
| 505 | |||
| 506 | private: | ||
| 507 | Tegra::Shader::Register index{}; | ||
| 508 | }; | ||
| 509 | |||
| 510 | /// A custom variable | ||
| 511 | class CustomVarNode final { | ||
| 512 | public: | ||
| 513 | explicit constexpr CustomVarNode(u32 index_) : index{index_} {} | ||
| 514 | |||
| 515 | [[nodiscard]] constexpr u32 GetIndex() const { | ||
| 516 | return index; | ||
| 517 | } | ||
| 518 | |||
| 519 | private: | ||
| 520 | u32 index{}; | ||
| 521 | }; | ||
| 522 | |||
| 523 | /// A 32-bits value that represents an immediate value | ||
| 524 | class ImmediateNode final { | ||
| 525 | public: | ||
| 526 | explicit constexpr ImmediateNode(u32 value_) : value{value_} {} | ||
| 527 | |||
| 528 | [[nodiscard]] constexpr u32 GetValue() const { | ||
| 529 | return value; | ||
| 530 | } | ||
| 531 | |||
| 532 | private: | ||
| 533 | u32 value{}; | ||
| 534 | }; | ||
| 535 | |||
| 536 | /// One of Maxwell's internal flags | ||
| 537 | class InternalFlagNode final { | ||
| 538 | public: | ||
| 539 | explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {} | ||
| 540 | |||
| 541 | [[nodiscard]] constexpr InternalFlag GetFlag() const { | ||
| 542 | return flag; | ||
| 543 | } | ||
| 544 | |||
| 545 | private: | ||
| 546 | InternalFlag flag{}; | ||
| 547 | }; | ||
| 548 | |||
| 549 | /// A predicate register, it can be negated without additional nodes | ||
| 550 | class PredicateNode final { | ||
| 551 | public: | ||
| 552 | explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_) | ||
| 553 | : index{index_}, negated{negated_} {} | ||
| 554 | |||
| 555 | [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const { | ||
| 556 | return index; | ||
| 557 | } | ||
| 558 | |||
| 559 | [[nodiscard]] constexpr bool IsNegated() const { | ||
| 560 | return negated; | ||
| 561 | } | ||
| 562 | |||
| 563 | private: | ||
| 564 | Tegra::Shader::Pred index{}; | ||
| 565 | bool negated{}; | ||
| 566 | }; | ||
| 567 | |||
| 568 | /// Attribute buffer memory (known as attributes or varyings in GLSL terms) | ||
| 569 | class AbufNode final { | ||
| 570 | public: | ||
| 571 | // Initialize for standard attributes (index is explicit). | ||
| 572 | explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {}) | ||
| 573 | : buffer{std::move(buffer_)}, index{index_}, element{element_} {} | ||
| 574 | |||
| 575 | // Initialize for physical attributes (index is a variable value). | ||
| 576 | explicit AbufNode(Node physical_address_, Node buffer_ = {}) | ||
| 577 | : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {} | ||
| 578 | |||
| 579 | [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const { | ||
| 580 | return index; | ||
| 581 | } | ||
| 582 | |||
| 583 | [[nodiscard]] u32 GetElement() const { | ||
| 584 | return element; | ||
| 585 | } | ||
| 586 | |||
| 587 | [[nodiscard]] const Node& GetBuffer() const { | ||
| 588 | return buffer; | ||
| 589 | } | ||
| 590 | |||
| 591 | [[nodiscard]] bool IsPhysicalBuffer() const { | ||
| 592 | return static_cast<bool>(physical_address); | ||
| 593 | } | ||
| 594 | |||
| 595 | [[nodiscard]] const Node& GetPhysicalAddress() const { | ||
| 596 | return physical_address; | ||
| 597 | } | ||
| 598 | |||
| 599 | private: | ||
| 600 | Node physical_address; | ||
| 601 | Node buffer; | ||
| 602 | Tegra::Shader::Attribute::Index index{}; | ||
| 603 | u32 element{}; | ||
| 604 | }; | ||
| 605 | |||
| 606 | /// Patch memory (used to communicate tessellation stages). | ||
| 607 | class PatchNode final { | ||
| 608 | public: | ||
| 609 | explicit constexpr PatchNode(u32 offset_) : offset{offset_} {} | ||
| 610 | |||
| 611 | [[nodiscard]] constexpr u32 GetOffset() const { | ||
| 612 | return offset; | ||
| 613 | } | ||
| 614 | |||
| 615 | private: | ||
| 616 | u32 offset{}; | ||
| 617 | }; | ||
| 618 | |||
| 619 | /// Constant buffer node, usually mapped to uniform buffers in GLSL | ||
| 620 | class CbufNode final { | ||
| 621 | public: | ||
| 622 | explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {} | ||
| 623 | |||
| 624 | [[nodiscard]] u32 GetIndex() const { | ||
| 625 | return index; | ||
| 626 | } | ||
| 627 | |||
| 628 | [[nodiscard]] const Node& GetOffset() const { | ||
| 629 | return offset; | ||
| 630 | } | ||
| 631 | |||
| 632 | private: | ||
| 633 | u32 index{}; | ||
| 634 | Node offset; | ||
| 635 | }; | ||
| 636 | |||
| 637 | /// Local memory node | ||
| 638 | class LmemNode final { | ||
| 639 | public: | ||
| 640 | explicit LmemNode(Node address_) : address{std::move(address_)} {} | ||
| 641 | |||
| 642 | [[nodiscard]] const Node& GetAddress() const { | ||
| 643 | return address; | ||
| 644 | } | ||
| 645 | |||
| 646 | private: | ||
| 647 | Node address; | ||
| 648 | }; | ||
| 649 | |||
| 650 | /// Shared memory node | ||
| 651 | class SmemNode final { | ||
| 652 | public: | ||
| 653 | explicit SmemNode(Node address_) : address{std::move(address_)} {} | ||
| 654 | |||
| 655 | [[nodiscard]] const Node& GetAddress() const { | ||
| 656 | return address; | ||
| 657 | } | ||
| 658 | |||
| 659 | private: | ||
| 660 | Node address; | ||
| 661 | }; | ||
| 662 | |||
| 663 | /// Global memory node | ||
| 664 | class GmemNode final { | ||
| 665 | public: | ||
| 666 | explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_) | ||
| 667 | : real_address{std::move(real_address_)}, base_address{std::move(base_address_)}, | ||
| 668 | descriptor{descriptor_} {} | ||
| 669 | |||
| 670 | [[nodiscard]] const Node& GetRealAddress() const { | ||
| 671 | return real_address; | ||
| 672 | } | ||
| 673 | |||
| 674 | [[nodiscard]] const Node& GetBaseAddress() const { | ||
| 675 | return base_address; | ||
| 676 | } | ||
| 677 | |||
| 678 | [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const { | ||
| 679 | return descriptor; | ||
| 680 | } | ||
| 681 | |||
| 682 | private: | ||
| 683 | Node real_address; | ||
| 684 | Node base_address; | ||
| 685 | GlobalMemoryBase descriptor; | ||
| 686 | }; | ||
| 687 | |||
| 688 | /// Commentary, can be dropped | ||
| 689 | class CommentNode final { | ||
| 690 | public: | ||
| 691 | explicit CommentNode(std::string text_) : text{std::move(text_)} {} | ||
| 692 | |||
| 693 | [[nodiscard]] const std::string& GetText() const { | ||
| 694 | return text; | ||
| 695 | } | ||
| 696 | |||
| 697 | private: | ||
| 698 | std::string text; | ||
| 699 | }; | ||
| 700 | |||
| 701 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/node_helper.cpp b/src/video_core/shader/node_helper.cpp deleted file mode 100644 index 6a5b6940d..000000000 --- a/src/video_core/shader/node_helper.cpp +++ /dev/null | |||
| @@ -1,115 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <cstring> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "common/common_types.h" | ||
| 9 | #include "video_core/shader/node_helper.h" | ||
| 10 | #include "video_core/shader/shader_ir.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | Node Conditional(Node condition, std::vector<Node> code) { | ||
| 15 | return MakeNode<ConditionalNode>(std::move(condition), std::move(code)); | ||
| 16 | } | ||
| 17 | |||
| 18 | Node Comment(std::string text) { | ||
| 19 | return MakeNode<CommentNode>(std::move(text)); | ||
| 20 | } | ||
| 21 | |||
| 22 | Node Immediate(u32 value) { | ||
| 23 | return MakeNode<ImmediateNode>(value); | ||
| 24 | } | ||
| 25 | |||
| 26 | Node Immediate(s32 value) { | ||
| 27 | return Immediate(static_cast<u32>(value)); | ||
| 28 | } | ||
| 29 | |||
| 30 | Node Immediate(f32 value) { | ||
| 31 | u32 integral; | ||
| 32 | std::memcpy(&integral, &value, sizeof(u32)); | ||
| 33 | return Immediate(integral); | ||
| 34 | } | ||
| 35 | |||
| 36 | OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) { | ||
| 37 | if (is_signed) { | ||
| 38 | return operation_code; | ||
| 39 | } | ||
| 40 | switch (operation_code) { | ||
| 41 | case OperationCode::FCastInteger: | ||
| 42 | return OperationCode::FCastUInteger; | ||
| 43 | case OperationCode::IAdd: | ||
| 44 | return OperationCode::UAdd; | ||
| 45 | case OperationCode::IMul: | ||
| 46 | return OperationCode::UMul; | ||
| 47 | case OperationCode::IDiv: | ||
| 48 | return OperationCode::UDiv; | ||
| 49 | case OperationCode::IMin: | ||
| 50 | return OperationCode::UMin; | ||
| 51 | case OperationCode::IMax: | ||
| 52 | return OperationCode::UMax; | ||
| 53 | case OperationCode::ICastFloat: | ||
| 54 | return OperationCode::UCastFloat; | ||
| 55 | case OperationCode::ICastUnsigned: | ||
| 56 | return OperationCode::UCastSigned; | ||
| 57 | case OperationCode::ILogicalShiftLeft: | ||
| 58 | return OperationCode::ULogicalShiftLeft; | ||
| 59 | case OperationCode::ILogicalShiftRight: | ||
| 60 | return OperationCode::ULogicalShiftRight; | ||
| 61 | case OperationCode::IArithmeticShiftRight: | ||
| 62 | return OperationCode::UArithmeticShiftRight; | ||
| 63 | case OperationCode::IBitwiseAnd: | ||
| 64 | return OperationCode::UBitwiseAnd; | ||
| 65 | case OperationCode::IBitwiseOr: | ||
| 66 | return OperationCode::UBitwiseOr; | ||
| 67 | case OperationCode::IBitwiseXor: | ||
| 68 | return OperationCode::UBitwiseXor; | ||
| 69 | case OperationCode::IBitwiseNot: | ||
| 70 | return OperationCode::UBitwiseNot; | ||
| 71 | case OperationCode::IBitfieldExtract: | ||
| 72 | return OperationCode::UBitfieldExtract; | ||
| 73 | case OperationCode::IBitfieldInsert: | ||
| 74 | return OperationCode::UBitfieldInsert; | ||
| 75 | case OperationCode::IBitCount: | ||
| 76 | return OperationCode::UBitCount; | ||
| 77 | case OperationCode::LogicalILessThan: | ||
| 78 | return OperationCode::LogicalULessThan; | ||
| 79 | case OperationCode::LogicalIEqual: | ||
| 80 | return OperationCode::LogicalUEqual; | ||
| 81 | case OperationCode::LogicalILessEqual: | ||
| 82 | return OperationCode::LogicalULessEqual; | ||
| 83 | case OperationCode::LogicalIGreaterThan: | ||
| 84 | return OperationCode::LogicalUGreaterThan; | ||
| 85 | case OperationCode::LogicalINotEqual: | ||
| 86 | return OperationCode::LogicalUNotEqual; | ||
| 87 | case OperationCode::LogicalIGreaterEqual: | ||
| 88 | return OperationCode::LogicalUGreaterEqual; | ||
| 89 | case OperationCode::AtomicIExchange: | ||
| 90 | return OperationCode::AtomicUExchange; | ||
| 91 | case OperationCode::AtomicIAdd: | ||
| 92 | return OperationCode::AtomicUAdd; | ||
| 93 | case OperationCode::AtomicIMin: | ||
| 94 | return OperationCode::AtomicUMin; | ||
| 95 | case OperationCode::AtomicIMax: | ||
| 96 | return OperationCode::AtomicUMax; | ||
| 97 | case OperationCode::AtomicIAnd: | ||
| 98 | return OperationCode::AtomicUAnd; | ||
| 99 | case OperationCode::AtomicIOr: | ||
| 100 | return OperationCode::AtomicUOr; | ||
| 101 | case OperationCode::AtomicIXor: | ||
| 102 | return OperationCode::AtomicUXor; | ||
| 103 | case OperationCode::INegate: | ||
| 104 | UNREACHABLE_MSG("Can't negate an unsigned integer"); | ||
| 105 | return {}; | ||
| 106 | case OperationCode::IAbsolute: | ||
| 107 | UNREACHABLE_MSG("Can't apply absolute to an unsigned integer"); | ||
| 108 | return {}; | ||
| 109 | default: | ||
| 110 | UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code); | ||
| 111 | return {}; | ||
| 112 | } | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h deleted file mode 100644 index 1e0886185..000000000 --- a/src/video_core/shader/node_helper.h +++ /dev/null | |||
| @@ -1,71 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <string> | ||
| 9 | #include <tuple> | ||
| 10 | #include <type_traits> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "video_core/shader/node.h" | ||
| 16 | |||
| 17 | namespace VideoCommon::Shader { | ||
| 18 | |||
| 19 | /// This arithmetic operation cannot be constraint | ||
| 20 | inline constexpr MetaArithmetic PRECISE = {true}; | ||
| 21 | /// This arithmetic operation can be optimized away | ||
| 22 | inline constexpr MetaArithmetic NO_PRECISE = {false}; | ||
| 23 | |||
| 24 | /// Creates a conditional node | ||
| 25 | Node Conditional(Node condition, std::vector<Node> code); | ||
| 26 | |||
| 27 | /// Creates a commentary node | ||
| 28 | Node Comment(std::string text); | ||
| 29 | |||
| 30 | /// Creates an u32 immediate | ||
| 31 | Node Immediate(u32 value); | ||
| 32 | |||
| 33 | /// Creates a s32 immediate | ||
| 34 | Node Immediate(s32 value); | ||
| 35 | |||
| 36 | /// Creates a f32 immediate | ||
| 37 | Node Immediate(f32 value); | ||
| 38 | |||
| 39 | /// Converts an signed operation code to an unsigned operation code | ||
| 40 | OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed); | ||
| 41 | |||
| 42 | template <typename T, typename... Args> | ||
| 43 | Node MakeNode(Args&&... args) { | ||
| 44 | static_assert(std::is_convertible_v<T, NodeData>); | ||
| 45 | return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); | ||
| 46 | } | ||
| 47 | |||
| 48 | template <typename T, typename... Args> | ||
| 49 | TrackSampler MakeTrackSampler(Args&&... args) { | ||
| 50 | static_assert(std::is_convertible_v<T, TrackSamplerData>); | ||
| 51 | return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...}); | ||
| 52 | } | ||
| 53 | |||
| 54 | template <typename... Args> | ||
| 55 | Node Operation(OperationCode code, Args&&... args) { | ||
| 56 | if constexpr (sizeof...(args) == 0) { | ||
| 57 | return MakeNode<OperationNode>(code); | ||
| 58 | } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>, | ||
| 59 | Meta>) { | ||
| 60 | return MakeNode<OperationNode>(code, std::forward<Args>(args)...); | ||
| 61 | } else { | ||
| 62 | return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...); | ||
| 63 | } | ||
| 64 | } | ||
| 65 | |||
| 66 | template <typename... Args> | ||
| 67 | Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) { | ||
| 68 | return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...); | ||
| 69 | } | ||
| 70 | |||
| 71 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp deleted file mode 100644 index 148d91fcb..000000000 --- a/src/video_core/shader/registry.cpp +++ /dev/null | |||
| @@ -1,181 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <tuple> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/engines/kepler_compute.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/engines/shader_type.h" | ||
| 13 | #include "video_core/shader/registry.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | using Tegra::Engines::ConstBufferEngineInterface; | ||
| 18 | using Tegra::Engines::SamplerDescriptor; | ||
| 19 | using Tegra::Engines::ShaderType; | ||
| 20 | |||
| 21 | namespace { | ||
| 22 | |||
| 23 | GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 24 | if (shader_stage == ShaderType::Compute) { | ||
| 25 | return {}; | ||
| 26 | } | ||
| 27 | |||
| 28 | auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine); | ||
| 29 | |||
| 30 | return { | ||
| 31 | .tfb_layouts = graphics.regs.tfb_layouts, | ||
| 32 | .tfb_varying_locs = graphics.regs.tfb_varying_locs, | ||
| 33 | .primitive_topology = graphics.regs.draw.topology, | ||
| 34 | .tessellation_primitive = graphics.regs.tess_mode.prim, | ||
| 35 | .tessellation_spacing = graphics.regs.tess_mode.spacing, | ||
| 36 | .tfb_enabled = graphics.regs.tfb_enabled != 0, | ||
| 37 | .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, | ||
| 38 | }; | ||
| 39 | } | ||
| 40 | |||
| 41 | ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||
| 42 | if (shader_stage != ShaderType::Compute) { | ||
| 43 | return {}; | ||
| 44 | } | ||
| 45 | |||
| 46 | auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine); | ||
| 47 | const auto& launch = compute.launch_description; | ||
| 48 | |||
| 49 | return { | ||
| 50 | .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, | ||
| 51 | .shared_memory_size_in_words = launch.shared_alloc, | ||
| 52 | .local_memory_size_in_words = launch.local_pos_alloc, | ||
| 53 | }; | ||
| 54 | } | ||
| 55 | |||
| 56 | } // Anonymous namespace | ||
| 57 | |||
| 58 | Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) | ||
| 59 | : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, | ||
| 60 | bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} | ||
| 61 | |||
| 62 | Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) | ||
| 63 | : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, | ||
| 64 | graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( | ||
| 65 | shader_stage, engine_)} {} | ||
| 66 | |||
| 67 | Registry::~Registry() = default; | ||
| 68 | |||
| 69 | std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) { | ||
| 70 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 71 | const auto iter = keys.find(key); | ||
| 72 | if (iter != keys.end()) { | ||
| 73 | return iter->second; | ||
| 74 | } | ||
| 75 | if (!engine) { | ||
| 76 | return std::nullopt; | ||
| 77 | } | ||
| 78 | const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||
| 79 | keys.emplace(key, value); | ||
| 80 | return value; | ||
| 81 | } | ||
| 82 | |||
| 83 | std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { | ||
| 84 | const u32 key = offset; | ||
| 85 | const auto iter = bound_samplers.find(key); | ||
| 86 | if (iter != bound_samplers.end()) { | ||
| 87 | return iter->second; | ||
| 88 | } | ||
| 89 | if (!engine) { | ||
| 90 | return std::nullopt; | ||
| 91 | } | ||
| 92 | const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||
| 93 | bound_samplers.emplace(key, value); | ||
| 94 | return value; | ||
| 95 | } | ||
| 96 | |||
| 97 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler( | ||
| 98 | std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) { | ||
| 99 | SeparateSamplerKey key; | ||
| 100 | key.buffers = buffers; | ||
| 101 | key.offsets = offsets; | ||
| 102 | const auto iter = separate_samplers.find(key); | ||
| 103 | if (iter != separate_samplers.end()) { | ||
| 104 | return iter->second; | ||
| 105 | } | ||
| 106 | if (!engine) { | ||
| 107 | return std::nullopt; | ||
| 108 | } | ||
| 109 | |||
| 110 | const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); | ||
| 111 | const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); | ||
| 112 | const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); | ||
| 113 | separate_samplers.emplace(key, value); | ||
| 114 | return value; | ||
| 115 | } | ||
| 116 | |||
| 117 | std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { | ||
| 118 | const std::pair key = {buffer, offset}; | ||
| 119 | const auto iter = bindless_samplers.find(key); | ||
| 120 | if (iter != bindless_samplers.end()) { | ||
| 121 | return iter->second; | ||
| 122 | } | ||
| 123 | if (!engine) { | ||
| 124 | return std::nullopt; | ||
| 125 | } | ||
| 126 | const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||
| 127 | bindless_samplers.emplace(key, value); | ||
| 128 | return value; | ||
| 129 | } | ||
| 130 | |||
| 131 | void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { | ||
| 132 | keys.insert_or_assign({buffer, offset}, value); | ||
| 133 | } | ||
| 134 | |||
| 135 | void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||
| 136 | bound_samplers.insert_or_assign(offset, sampler); | ||
| 137 | } | ||
| 138 | |||
| 139 | void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||
| 140 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||
| 141 | } | ||
| 142 | |||
| 143 | bool Registry::IsConsistent() const { | ||
| 144 | if (!engine) { | ||
| 145 | return true; | ||
| 146 | } | ||
| 147 | return std::all_of(keys.begin(), keys.end(), | ||
| 148 | [this](const auto& pair) { | ||
| 149 | const auto [cbuf, offset] = pair.first; | ||
| 150 | const auto value = pair.second; | ||
| 151 | return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||
| 152 | }) && | ||
| 153 | std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||
| 154 | [this](const auto& sampler) { | ||
| 155 | const auto [key, value] = sampler; | ||
| 156 | return value == engine->AccessBoundSampler(stage, key); | ||
| 157 | }) && | ||
| 158 | std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||
| 159 | [this](const auto& sampler) { | ||
| 160 | const auto [cbuf, offset] = sampler.first; | ||
| 161 | const auto value = sampler.second; | ||
| 162 | return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||
| 163 | }); | ||
| 164 | } | ||
| 165 | |||
| 166 | bool Registry::HasEqualKeys(const Registry& rhs) const { | ||
| 167 | return std::tie(keys, bound_samplers, bindless_samplers) == | ||
| 168 | std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers); | ||
| 169 | } | ||
| 170 | |||
| 171 | const GraphicsInfo& Registry::GetGraphicsInfo() const { | ||
| 172 | ASSERT(stage != Tegra::Engines::ShaderType::Compute); | ||
| 173 | return graphics_info; | ||
| 174 | } | ||
| 175 | |||
| 176 | const ComputeInfo& Registry::GetComputeInfo() const { | ||
| 177 | ASSERT(stage == Tegra::Engines::ShaderType::Compute); | ||
| 178 | return compute_info; | ||
| 179 | } | ||
| 180 | |||
| 181 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h deleted file mode 100644 index 4bebefdde..000000000 --- a/src/video_core/shader/registry.h +++ /dev/null | |||
| @@ -1,172 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <optional> | ||
| 9 | #include <type_traits> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | |||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "common/hash.h" | ||
| 15 | #include "video_core/engines/const_buffer_engine_interface.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/engines/shader_type.h" | ||
| 18 | #include "video_core/guest_driver.h" | ||
| 19 | |||
| 20 | namespace VideoCommon::Shader { | ||
| 21 | |||
| 22 | struct SeparateSamplerKey { | ||
| 23 | std::pair<u32, u32> buffers; | ||
| 24 | std::pair<u32, u32> offsets; | ||
| 25 | }; | ||
| 26 | |||
| 27 | } // namespace VideoCommon::Shader | ||
| 28 | |||
| 29 | namespace std { | ||
| 30 | |||
| 31 | template <> | ||
| 32 | struct hash<VideoCommon::Shader::SeparateSamplerKey> { | ||
| 33 | std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { | ||
| 34 | return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ | ||
| 35 | key.offsets.second); | ||
| 36 | } | ||
| 37 | }; | ||
| 38 | |||
| 39 | template <> | ||
| 40 | struct equal_to<VideoCommon::Shader::SeparateSamplerKey> { | ||
| 41 | bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, | ||
| 42 | const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { | ||
| 43 | return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; | ||
| 44 | } | ||
| 45 | }; | ||
| 46 | |||
| 47 | } // namespace std | ||
| 48 | |||
| 49 | namespace VideoCommon::Shader { | ||
| 50 | |||
| 51 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 52 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 53 | using SeparateSamplerMap = | ||
| 54 | std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>; | ||
| 55 | using BindlessSamplerMap = | ||
| 56 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 57 | |||
| 58 | struct GraphicsInfo { | ||
| 59 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 60 | |||
| 61 | std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers> | ||
| 62 | tfb_layouts{}; | ||
| 63 | std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{}; | ||
| 64 | Maxwell::PrimitiveTopology primitive_topology{}; | ||
| 65 | Maxwell::TessellationPrimitive tessellation_primitive{}; | ||
| 66 | Maxwell::TessellationSpacing tessellation_spacing{}; | ||
| 67 | bool tfb_enabled = false; | ||
| 68 | bool tessellation_clockwise = false; | ||
| 69 | }; | ||
| 70 | static_assert(std::is_trivially_copyable_v<GraphicsInfo> && | ||
| 71 | std::is_standard_layout_v<GraphicsInfo>); | ||
| 72 | |||
| 73 | struct ComputeInfo { | ||
| 74 | std::array<u32, 3> workgroup_size{}; | ||
| 75 | u32 shared_memory_size_in_words = 0; | ||
| 76 | u32 local_memory_size_in_words = 0; | ||
| 77 | }; | ||
| 78 | static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>); | ||
| 79 | |||
| 80 | struct SerializedRegistryInfo { | ||
| 81 | VideoCore::GuestDriverProfile guest_driver_profile; | ||
| 82 | u32 bound_buffer = 0; | ||
| 83 | GraphicsInfo graphics; | ||
| 84 | ComputeInfo compute; | ||
| 85 | }; | ||
| 86 | |||
| 87 | /** | ||
| 88 | * The Registry is a class use to interface the 3D and compute engines with the shader compiler. | ||
| 89 | * With it, the shader can obtain required data from GPU state and store it for disk shader | ||
| 90 | * compilation. | ||
| 91 | */ | ||
| 92 | class Registry { | ||
| 93 | public: | ||
| 94 | explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); | ||
| 95 | |||
| 96 | explicit Registry(Tegra::Engines::ShaderType shader_stage, | ||
| 97 | Tegra::Engines::ConstBufferEngineInterface& engine_); | ||
| 98 | |||
| 99 | ~Registry(); | ||
| 100 | |||
| 101 | /// Retrieves a key from the registry, if it's registered, it will give the registered value, if | ||
| 102 | /// not it will obtain it from maxwell3d and register it. | ||
| 103 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||
| 104 | |||
| 105 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 106 | |||
| 107 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler( | ||
| 108 | std::pair<u32, u32> buffers, std::pair<u32, u32> offsets); | ||
| 109 | |||
| 110 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 111 | |||
| 112 | /// Inserts a key. | ||
| 113 | void InsertKey(u32 buffer, u32 offset, u32 value); | ||
| 114 | |||
| 115 | /// Inserts a bound sampler key. | ||
| 116 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 117 | |||
| 118 | /// Inserts a bindless sampler key. | ||
| 119 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 120 | |||
| 121 | /// Checks keys and samplers against engine's current const buffers. | ||
| 122 | /// Returns true if they are the same value, false otherwise. | ||
| 123 | bool IsConsistent() const; | ||
| 124 | |||
| 125 | /// Returns true if the keys are equal to the other ones in the registry. | ||
| 126 | bool HasEqualKeys(const Registry& rhs) const; | ||
| 127 | |||
| 128 | /// Returns graphics information from this shader | ||
| 129 | const GraphicsInfo& GetGraphicsInfo() const; | ||
| 130 | |||
| 131 | /// Returns compute information from this shader | ||
| 132 | const ComputeInfo& GetComputeInfo() const; | ||
| 133 | |||
| 134 | /// Gives an getter to the const buffer keys in the database. | ||
| 135 | const KeyMap& GetKeys() const { | ||
| 136 | return keys; | ||
| 137 | } | ||
| 138 | |||
| 139 | /// Gets samplers database. | ||
| 140 | const BoundSamplerMap& GetBoundSamplers() const { | ||
| 141 | return bound_samplers; | ||
| 142 | } | ||
| 143 | |||
| 144 | /// Gets bindless samplers database. | ||
| 145 | const BindlessSamplerMap& GetBindlessSamplers() const { | ||
| 146 | return bindless_samplers; | ||
| 147 | } | ||
| 148 | |||
| 149 | /// Gets bound buffer used on this shader | ||
| 150 | u32 GetBoundBuffer() const { | ||
| 151 | return bound_buffer; | ||
| 152 | } | ||
| 153 | |||
| 154 | /// Obtains access to the guest driver's profile. | ||
| 155 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { | ||
| 156 | return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; | ||
| 157 | } | ||
| 158 | |||
| 159 | private: | ||
| 160 | const Tegra::Engines::ShaderType stage; | ||
| 161 | VideoCore::GuestDriverProfile stored_guest_driver_profile; | ||
| 162 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||
| 163 | KeyMap keys; | ||
| 164 | BoundSamplerMap bound_samplers; | ||
| 165 | SeparateSamplerMap separate_samplers; | ||
| 166 | BindlessSamplerMap bindless_samplers; | ||
| 167 | u32 bound_buffer; | ||
| 168 | GraphicsInfo graphics_info; | ||
| 169 | ComputeInfo compute_info; | ||
| 170 | }; | ||
| 171 | |||
| 172 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp deleted file mode 100644 index a4987ffc6..000000000 --- a/src/video_core/shader/shader_ir.cpp +++ /dev/null | |||
| @@ -1,464 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cmath> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "common/logging/log.h" | ||
| 12 | #include "video_core/engines/shader_bytecode.h" | ||
| 13 | #include "video_core/shader/node.h" | ||
| 14 | #include "video_core/shader/node_helper.h" | ||
| 15 | #include "video_core/shader/registry.h" | ||
| 16 | #include "video_core/shader/shader_ir.h" | ||
| 17 | |||
| 18 | namespace VideoCommon::Shader { | ||
| 19 | |||
| 20 | using Tegra::Shader::Attribute; | ||
| 21 | using Tegra::Shader::Instruction; | ||
| 22 | using Tegra::Shader::IpaMode; | ||
| 23 | using Tegra::Shader::Pred; | ||
| 24 | using Tegra::Shader::PredCondition; | ||
| 25 | using Tegra::Shader::PredOperation; | ||
| 26 | using Tegra::Shader::Register; | ||
| 27 | |||
| 28 | ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_, | ||
| 29 | Registry& registry_) | ||
| 30 | : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{ | ||
| 31 | registry_} { | ||
| 32 | Decode(); | ||
| 33 | PostDecode(); | ||
| 34 | } | ||
| 35 | |||
| 36 | ShaderIR::~ShaderIR() = default; | ||
| 37 | |||
| 38 | Node ShaderIR::GetRegister(Register reg) { | ||
| 39 | if (reg != Register::ZeroIndex) { | ||
| 40 | used_registers.insert(static_cast<u32>(reg)); | ||
| 41 | } | ||
| 42 | return MakeNode<GprNode>(reg); | ||
| 43 | } | ||
| 44 | |||
| 45 | Node ShaderIR::GetCustomVariable(u32 id) { | ||
| 46 | return MakeNode<CustomVarNode>(id); | ||
| 47 | } | ||
| 48 | |||
| 49 | Node ShaderIR::GetImmediate19(Instruction instr) { | ||
| 50 | return Immediate(instr.alu.GetImm20_19()); | ||
| 51 | } | ||
| 52 | |||
| 53 | Node ShaderIR::GetImmediate32(Instruction instr) { | ||
| 54 | return Immediate(instr.alu.GetImm20_32()); | ||
| 55 | } | ||
| 56 | |||
| 57 | Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { | ||
| 58 | const auto index = static_cast<u32>(index_); | ||
| 59 | const auto offset = static_cast<u32>(offset_); | ||
| 60 | |||
| 61 | used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); | ||
| 62 | |||
| 63 | return MakeNode<CbufNode>(index, Immediate(offset)); | ||
| 64 | } | ||
| 65 | |||
| 66 | Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { | ||
| 67 | const auto index = static_cast<u32>(index_); | ||
| 68 | const auto offset = static_cast<u32>(offset_); | ||
| 69 | |||
| 70 | used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); | ||
| 71 | |||
| 72 | Node final_offset = [&] { | ||
| 73 | // Attempt to inline constant buffer without a variable offset. This is done to allow | ||
| 74 | // tracking LDC calls. | ||
| 75 | if (const auto gpr = std::get_if<GprNode>(&*node)) { | ||
| 76 | if (gpr->GetIndex() == Register::ZeroIndex) { | ||
| 77 | return Immediate(offset); | ||
| 78 | } | ||
| 79 | } | ||
| 80 | return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset)); | ||
| 81 | }(); | ||
| 82 | return MakeNode<CbufNode>(index, std::move(final_offset)); | ||
| 83 | } | ||
| 84 | |||
| 85 | Node ShaderIR::GetPredicate(u64 pred_, bool negated) { | ||
| 86 | const auto pred = static_cast<Pred>(pred_); | ||
| 87 | if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) { | ||
| 88 | used_predicates.insert(pred); | ||
| 89 | } | ||
| 90 | |||
| 91 | return MakeNode<PredicateNode>(pred, negated); | ||
| 92 | } | ||
| 93 | |||
| 94 | Node ShaderIR::GetPredicate(bool immediate) { | ||
| 95 | return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute)); | ||
| 96 | } | ||
| 97 | |||
| 98 | Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) { | ||
| 99 | MarkAttributeUsage(index, element); | ||
| 100 | used_input_attributes.emplace(index); | ||
| 101 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); | ||
| 102 | } | ||
| 103 | |||
| 104 | Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) { | ||
| 105 | uses_physical_attributes = true; | ||
| 106 | return MakeNode<AbufNode>(GetRegister(physical_address), buffer); | ||
| 107 | } | ||
| 108 | |||
| 109 | Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) { | ||
| 110 | MarkAttributeUsage(index, element); | ||
| 111 | used_output_attributes.insert(index); | ||
| 112 | return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer)); | ||
| 113 | } | ||
| 114 | |||
| 115 | Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const { | ||
| 116 | Node node = MakeNode<InternalFlagNode>(flag); | ||
| 117 | if (negated) { | ||
| 118 | return Operation(OperationCode::LogicalNegate, std::move(node)); | ||
| 119 | } | ||
| 120 | return node; | ||
| 121 | } | ||
| 122 | |||
| 123 | Node ShaderIR::GetLocalMemory(Node address) { | ||
| 124 | return MakeNode<LmemNode>(std::move(address)); | ||
| 125 | } | ||
| 126 | |||
| 127 | Node ShaderIR::GetSharedMemory(Node address) { | ||
| 128 | return MakeNode<SmemNode>(std::move(address)); | ||
| 129 | } | ||
| 130 | |||
| 131 | Node ShaderIR::GetTemporary(u32 id) { | ||
| 132 | return GetRegister(Register::ZeroIndex + 1 + id); | ||
| 133 | } | ||
| 134 | |||
| 135 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { | ||
| 136 | if (absolute) { | ||
| 137 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value)); | ||
| 138 | } | ||
| 139 | if (negate) { | ||
| 140 | value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value)); | ||
| 141 | } | ||
| 142 | return value; | ||
| 143 | } | ||
| 144 | |||
| 145 | Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) { | ||
| 146 | if (!saturate) { | ||
| 147 | return value; | ||
| 148 | } | ||
| 149 | |||
| 150 | Node positive_zero = Immediate(std::copysignf(0, 1)); | ||
| 151 | Node positive_one = Immediate(1.0f); | ||
| 152 | return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 153 | std::move(positive_one)); | ||
| 154 | } | ||
| 155 | |||
| 156 | Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) { | ||
| 157 | switch (size) { | ||
| 158 | case Register::Size::Byte: | ||
| 159 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, | ||
| 160 | std::move(value), Immediate(24)); | ||
| 161 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, | ||
| 162 | std::move(value), Immediate(24)); | ||
| 163 | return value; | ||
| 164 | case Register::Size::Short: | ||
| 165 | value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE, | ||
| 166 | std::move(value), Immediate(16)); | ||
| 167 | value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, | ||
| 168 | std::move(value), Immediate(16)); | ||
| 169 | return value; | ||
| 170 | case Register::Size::Word: | ||
| 171 | // Default - do nothing | ||
| 172 | return value; | ||
| 173 | default: | ||
| 174 | UNREACHABLE_MSG("Unimplemented conversion size: {}", size); | ||
| 175 | return value; | ||
| 176 | } | ||
| 177 | } | ||
| 178 | |||
| 179 | Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) { | ||
| 180 | if (!is_signed) { | ||
| 181 | // Absolute or negate on an unsigned is pointless | ||
| 182 | return value; | ||
| 183 | } | ||
| 184 | if (absolute) { | ||
| 185 | value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value)); | ||
| 186 | } | ||
| 187 | if (negate) { | ||
| 188 | value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value)); | ||
| 189 | } | ||
| 190 | return value; | ||
| 191 | } | ||
| 192 | |||
| 193 | Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) { | ||
| 194 | Node value = Immediate(instr.half_imm.PackImmediates()); | ||
| 195 | if (!has_negation) { | ||
| 196 | return value; | ||
| 197 | } | ||
| 198 | |||
| 199 | Node first_negate = GetPredicate(instr.half_imm.first_negate != 0); | ||
| 200 | Node second_negate = GetPredicate(instr.half_imm.second_negate != 0); | ||
| 201 | |||
| 202 | return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate), | ||
| 203 | std::move(second_negate)); | ||
| 204 | } | ||
| 205 | |||
| 206 | Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) { | ||
| 207 | return Operation(OperationCode::HUnpack, type, std::move(value)); | ||
| 208 | } | ||
| 209 | |||
| 210 | Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) { | ||
| 211 | switch (merge) { | ||
| 212 | case Tegra::Shader::HalfMerge::H0_H1: | ||
| 213 | return src; | ||
| 214 | case Tegra::Shader::HalfMerge::F32: | ||
| 215 | return Operation(OperationCode::HMergeF32, std::move(src)); | ||
| 216 | case Tegra::Shader::HalfMerge::Mrg_H0: | ||
| 217 | return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src)); | ||
| 218 | case Tegra::Shader::HalfMerge::Mrg_H1: | ||
| 219 | return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src)); | ||
| 220 | } | ||
| 221 | UNREACHABLE(); | ||
| 222 | return src; | ||
| 223 | } | ||
| 224 | |||
| 225 | Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) { | ||
| 226 | if (absolute) { | ||
| 227 | value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value)); | ||
| 228 | } | ||
| 229 | if (negate) { | ||
| 230 | value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true), | ||
| 231 | GetPredicate(true)); | ||
| 232 | } | ||
| 233 | return value; | ||
| 234 | } | ||
| 235 | |||
| 236 | Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) { | ||
| 237 | if (!saturate) { | ||
| 238 | return value; | ||
| 239 | } | ||
| 240 | |||
| 241 | Node positive_zero = Immediate(std::copysignf(0, 1)); | ||
| 242 | Node positive_one = Immediate(1.0f); | ||
| 243 | return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero), | ||
| 244 | std::move(positive_one)); | ||
| 245 | } | ||
| 246 | |||
| 247 | Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) { | ||
| 248 | if (condition == PredCondition::T) { | ||
| 249 | return GetPredicate(true); | ||
| 250 | } else if (condition == PredCondition::F) { | ||
| 251 | return GetPredicate(false); | ||
| 252 | } | ||
| 253 | |||
| 254 | static constexpr std::array comparison_table{ | ||
| 255 | OperationCode(0), | ||
| 256 | OperationCode::LogicalFOrdLessThan, // LT | ||
| 257 | OperationCode::LogicalFOrdEqual, // EQ | ||
| 258 | OperationCode::LogicalFOrdLessEqual, // LE | ||
| 259 | OperationCode::LogicalFOrdGreaterThan, // GT | ||
| 260 | OperationCode::LogicalFOrdNotEqual, // NE | ||
| 261 | OperationCode::LogicalFOrdGreaterEqual, // GE | ||
| 262 | OperationCode::LogicalFOrdered, // NUM | ||
| 263 | OperationCode::LogicalFUnordered, // NAN | ||
| 264 | OperationCode::LogicalFUnordLessThan, // LTU | ||
| 265 | OperationCode::LogicalFUnordEqual, // EQU | ||
| 266 | OperationCode::LogicalFUnordLessEqual, // LEU | ||
| 267 | OperationCode::LogicalFUnordGreaterThan, // GTU | ||
| 268 | OperationCode::LogicalFUnordNotEqual, // NEU | ||
| 269 | OperationCode::LogicalFUnordGreaterEqual, // GEU | ||
| 270 | }; | ||
| 271 | const std::size_t index = static_cast<std::size_t>(condition); | ||
| 272 | ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index); | ||
| 273 | |||
| 274 | return Operation(comparison_table[index], op_a, op_b); | ||
| 275 | } | ||
| 276 | |||
| 277 | Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a, | ||
| 278 | Node op_b) { | ||
| 279 | static constexpr std::array comparison_table{ | ||
| 280 | std::pair{PredCondition::LT, OperationCode::LogicalILessThan}, | ||
| 281 | std::pair{PredCondition::EQ, OperationCode::LogicalIEqual}, | ||
| 282 | std::pair{PredCondition::LE, OperationCode::LogicalILessEqual}, | ||
| 283 | std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan}, | ||
| 284 | std::pair{PredCondition::NE, OperationCode::LogicalINotEqual}, | ||
| 285 | std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual}, | ||
| 286 | }; | ||
| 287 | |||
| 288 | const auto comparison = | ||
| 289 | std::find_if(comparison_table.cbegin(), comparison_table.cend(), | ||
| 290 | [condition](const auto entry) { return condition == entry.first; }); | ||
| 291 | UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), | ||
| 292 | "Unknown predicate comparison operation"); | ||
| 293 | |||
| 294 | return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a), | ||
| 295 | std::move(op_b)); | ||
| 296 | } | ||
| 297 | |||
| 298 | Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, | ||
| 299 | Node op_b) { | ||
| 300 | static constexpr std::array comparison_table{ | ||
| 301 | std::pair{PredCondition::LT, OperationCode::Logical2HLessThan}, | ||
| 302 | std::pair{PredCondition::EQ, OperationCode::Logical2HEqual}, | ||
| 303 | std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual}, | ||
| 304 | std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan}, | ||
| 305 | std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual}, | ||
| 306 | std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual}, | ||
| 307 | std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan}, | ||
| 308 | std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan}, | ||
| 309 | std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan}, | ||
| 310 | std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan}, | ||
| 311 | std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan}, | ||
| 312 | }; | ||
| 313 | |||
| 314 | const auto comparison = | ||
| 315 | std::find_if(comparison_table.cbegin(), comparison_table.cend(), | ||
| 316 | [condition](const auto entry) { return condition == entry.first; }); | ||
| 317 | UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(), | ||
| 318 | "Unknown predicate comparison operation"); | ||
| 319 | |||
| 320 | return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b)); | ||
| 321 | } | ||
| 322 | |||
| 323 | OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) { | ||
| 324 | static constexpr std::array operation_table{ | ||
| 325 | OperationCode::LogicalAnd, | ||
| 326 | OperationCode::LogicalOr, | ||
| 327 | OperationCode::LogicalXor, | ||
| 328 | }; | ||
| 329 | |||
| 330 | const auto index = static_cast<std::size_t>(operation); | ||
| 331 | if (index >= operation_table.size()) { | ||
| 332 | UNIMPLEMENTED_MSG("Unknown predicate operation."); | ||
| 333 | return {}; | ||
| 334 | } | ||
| 335 | |||
| 336 | return operation_table[index]; | ||
| 337 | } | ||
| 338 | |||
| 339 | Node ShaderIR::GetConditionCode(ConditionCode cc) const { | ||
| 340 | switch (cc) { | ||
| 341 | case ConditionCode::NEU: | ||
| 342 | return GetInternalFlag(InternalFlag::Zero, true); | ||
| 343 | case ConditionCode::FCSM_TR: | ||
| 344 | UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented"); | ||
| 345 | return MakeNode<PredicateNode>(Pred::NeverExecute, false); | ||
| 346 | default: | ||
| 347 | UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc); | ||
| 348 | return MakeNode<PredicateNode>(Pred::NeverExecute, false); | ||
| 349 | } | ||
| 350 | } | ||
| 351 | |||
| 352 | void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) { | ||
| 353 | bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src))); | ||
| 354 | } | ||
| 355 | |||
| 356 | void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) { | ||
| 357 | bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src))); | ||
| 358 | } | ||
| 359 | |||
| 360 | void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) { | ||
| 361 | bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value))); | ||
| 362 | } | ||
| 363 | |||
| 364 | void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) { | ||
| 365 | bb.push_back( | ||
| 366 | Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value))); | ||
| 367 | } | ||
| 368 | |||
| 369 | void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) { | ||
| 370 | bb.push_back( | ||
| 371 | Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value))); | ||
| 372 | } | ||
| 373 | |||
| 374 | void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) { | ||
| 375 | SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value)); | ||
| 376 | } | ||
| 377 | |||
| 378 | void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) { | ||
| 379 | if (!sets_cc) { | ||
| 380 | return; | ||
| 381 | } | ||
| 382 | Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f)); | ||
| 383 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); | ||
| 384 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | ||
| 385 | } | ||
| 386 | |||
| 387 | void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) { | ||
| 388 | if (!sets_cc) { | ||
| 389 | return; | ||
| 390 | } | ||
| 391 | Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0)); | ||
| 392 | SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop)); | ||
| 393 | LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete"); | ||
| 394 | } | ||
| 395 | |||
| 396 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | ||
| 397 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value), | ||
| 398 | Immediate(offset), Immediate(bits)); | ||
| 399 | } | ||
| 400 | |||
| 401 | Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) { | ||
| 402 | return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset), | ||
| 403 | Immediate(bits)); | ||
| 404 | } | ||
| 405 | |||
| 406 | void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) { | ||
| 407 | switch (index) { | ||
| 408 | case Attribute::Index::LayerViewportPointSize: | ||
| 409 | switch (element) { | ||
| 410 | case 0: | ||
| 411 | UNIMPLEMENTED(); | ||
| 412 | break; | ||
| 413 | case 1: | ||
| 414 | uses_layer = true; | ||
| 415 | break; | ||
| 416 | case 2: | ||
| 417 | uses_viewport_index = true; | ||
| 418 | break; | ||
| 419 | case 3: | ||
| 420 | uses_point_size = true; | ||
| 421 | break; | ||
| 422 | } | ||
| 423 | break; | ||
| 424 | case Attribute::Index::TessCoordInstanceIDVertexID: | ||
| 425 | switch (element) { | ||
| 426 | case 2: | ||
| 427 | uses_instance_id = true; | ||
| 428 | break; | ||
| 429 | case 3: | ||
| 430 | uses_vertex_id = true; | ||
| 431 | break; | ||
| 432 | } | ||
| 433 | break; | ||
| 434 | case Attribute::Index::ClipDistances0123: | ||
| 435 | case Attribute::Index::ClipDistances4567: { | ||
| 436 | const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element; | ||
| 437 | used_clip_distances.at(clip_index) = true; | ||
| 438 | break; | ||
| 439 | } | ||
| 440 | case Attribute::Index::FrontColor: | ||
| 441 | case Attribute::Index::FrontSecondaryColor: | ||
| 442 | case Attribute::Index::BackColor: | ||
| 443 | case Attribute::Index::BackSecondaryColor: | ||
| 444 | uses_legacy_varyings = true; | ||
| 445 | break; | ||
| 446 | default: | ||
| 447 | if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) { | ||
| 448 | uses_legacy_varyings = true; | ||
| 449 | } | ||
| 450 | break; | ||
| 451 | } | ||
| 452 | } | ||
| 453 | |||
| 454 | std::size_t ShaderIR::DeclareAmend(Node new_amend) { | ||
| 455 | const auto id = amend_code.size(); | ||
| 456 | amend_code.push_back(std::move(new_amend)); | ||
| 457 | return id; | ||
| 458 | } | ||
| 459 | |||
| 460 | u32 ShaderIR::NewCustomVariable() { | ||
| 461 | return num_custom_variables++; | ||
| 462 | } | ||
| 463 | |||
| 464 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h deleted file mode 100644 index 1cd7c14d7..000000000 --- a/src/video_core/shader/shader_ir.h +++ /dev/null | |||
| @@ -1,479 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <list> | ||
| 9 | #include <map> | ||
| 10 | #include <optional> | ||
| 11 | #include <set> | ||
| 12 | #include <tuple> | ||
| 13 | #include <vector> | ||
| 14 | |||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "video_core/engines/maxwell_3d.h" | ||
| 17 | #include "video_core/engines/shader_bytecode.h" | ||
| 18 | #include "video_core/engines/shader_header.h" | ||
| 19 | #include "video_core/shader/ast.h" | ||
| 20 | #include "video_core/shader/compiler_settings.h" | ||
| 21 | #include "video_core/shader/memory_util.h" | ||
| 22 | #include "video_core/shader/node.h" | ||
| 23 | #include "video_core/shader/registry.h" | ||
| 24 | |||
| 25 | namespace VideoCommon::Shader { | ||
| 26 | |||
| 27 | struct ShaderBlock; | ||
| 28 | |||
| 29 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; | ||
| 30 | |||
| 31 | struct ConstBuffer { | ||
| 32 | constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_) | ||
| 33 | : max_offset{max_offset_}, is_indirect{is_indirect_} {} | ||
| 34 | |||
| 35 | constexpr ConstBuffer() = default; | ||
| 36 | |||
| 37 | void MarkAsUsed(u64 offset) { | ||
| 38 | max_offset = std::max(max_offset, static_cast<u32>(offset)); | ||
| 39 | } | ||
| 40 | |||
| 41 | void MarkAsUsedIndirect() { | ||
| 42 | is_indirect = true; | ||
| 43 | } | ||
| 44 | |||
| 45 | bool IsIndirect() const { | ||
| 46 | return is_indirect; | ||
| 47 | } | ||
| 48 | |||
| 49 | u32 GetSize() const { | ||
| 50 | return max_offset + static_cast<u32>(sizeof(float)); | ||
| 51 | } | ||
| 52 | |||
| 53 | u32 GetMaxOffset() const { | ||
| 54 | return max_offset; | ||
| 55 | } | ||
| 56 | |||
| 57 | private: | ||
| 58 | u32 max_offset = 0; | ||
| 59 | bool is_indirect = false; | ||
| 60 | }; | ||
| 61 | |||
| 62 | struct GlobalMemoryUsage { | ||
| 63 | bool is_read{}; | ||
| 64 | bool is_written{}; | ||
| 65 | }; | ||
| 66 | |||
| 67 | class ShaderIR final { | ||
| 68 | public: | ||
| 69 | explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_, | ||
| 70 | CompilerSettings settings_, Registry& registry_); | ||
| 71 | ~ShaderIR(); | ||
| 72 | |||
| 73 | const std::map<u32, NodeBlock>& GetBasicBlocks() const { | ||
| 74 | return basic_blocks; | ||
| 75 | } | ||
| 76 | |||
| 77 | const std::set<u32>& GetRegisters() const { | ||
| 78 | return used_registers; | ||
| 79 | } | ||
| 80 | |||
| 81 | const std::set<Tegra::Shader::Pred>& GetPredicates() const { | ||
| 82 | return used_predicates; | ||
| 83 | } | ||
| 84 | |||
| 85 | const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const { | ||
| 86 | return used_input_attributes; | ||
| 87 | } | ||
| 88 | |||
| 89 | const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const { | ||
| 90 | return used_output_attributes; | ||
| 91 | } | ||
| 92 | |||
| 93 | const std::map<u32, ConstBuffer>& GetConstantBuffers() const { | ||
| 94 | return used_cbufs; | ||
| 95 | } | ||
| 96 | |||
| 97 | const std::list<SamplerEntry>& GetSamplers() const { | ||
| 98 | return used_samplers; | ||
| 99 | } | ||
| 100 | |||
| 101 | const std::list<ImageEntry>& GetImages() const { | ||
| 102 | return used_images; | ||
| 103 | } | ||
| 104 | |||
| 105 | const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances() | ||
| 106 | const { | ||
| 107 | return used_clip_distances; | ||
| 108 | } | ||
| 109 | |||
| 110 | const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const { | ||
| 111 | return used_global_memory; | ||
| 112 | } | ||
| 113 | |||
| 114 | std::size_t GetLength() const { | ||
| 115 | return static_cast<std::size_t>(coverage_end * sizeof(u64)); | ||
| 116 | } | ||
| 117 | |||
| 118 | bool UsesLayer() const { | ||
| 119 | return uses_layer; | ||
| 120 | } | ||
| 121 | |||
| 122 | bool UsesViewportIndex() const { | ||
| 123 | return uses_viewport_index; | ||
| 124 | } | ||
| 125 | |||
| 126 | bool UsesPointSize() const { | ||
| 127 | return uses_point_size; | ||
| 128 | } | ||
| 129 | |||
| 130 | bool UsesInstanceId() const { | ||
| 131 | return uses_instance_id; | ||
| 132 | } | ||
| 133 | |||
| 134 | bool UsesVertexId() const { | ||
| 135 | return uses_vertex_id; | ||
| 136 | } | ||
| 137 | |||
| 138 | bool UsesLegacyVaryings() const { | ||
| 139 | return uses_legacy_varyings; | ||
| 140 | } | ||
| 141 | |||
| 142 | bool UsesYNegate() const { | ||
| 143 | return uses_y_negate; | ||
| 144 | } | ||
| 145 | |||
| 146 | bool UsesWarps() const { | ||
| 147 | return uses_warps; | ||
| 148 | } | ||
| 149 | |||
| 150 | bool HasPhysicalAttributes() const { | ||
| 151 | return uses_physical_attributes; | ||
| 152 | } | ||
| 153 | |||
| 154 | const Tegra::Shader::Header& GetHeader() const { | ||
| 155 | return header; | ||
| 156 | } | ||
| 157 | |||
| 158 | bool IsFlowStackDisabled() const { | ||
| 159 | return disable_flow_stack; | ||
| 160 | } | ||
| 161 | |||
| 162 | bool IsDecompiled() const { | ||
| 163 | return decompiled; | ||
| 164 | } | ||
| 165 | |||
| 166 | const ASTManager& GetASTManager() const { | ||
| 167 | return program_manager; | ||
| 168 | } | ||
| 169 | |||
| 170 | ASTNode GetASTProgram() const { | ||
| 171 | return program_manager.GetProgram(); | ||
| 172 | } | ||
| 173 | |||
| 174 | u32 GetASTNumVariables() const { | ||
| 175 | return program_manager.GetVariables(); | ||
| 176 | } | ||
| 177 | |||
| 178 | u32 ConvertAddressToNvidiaSpace(u32 address) const { | ||
| 179 | return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction)); | ||
| 180 | } | ||
| 181 | |||
| 182 | /// Returns a condition code evaluated from internal flags | ||
| 183 | Node GetConditionCode(Tegra::Shader::ConditionCode cc) const; | ||
| 184 | |||
| 185 | const Node& GetAmendNode(std::size_t index) const { | ||
| 186 | return amend_code[index]; | ||
| 187 | } | ||
| 188 | |||
| 189 | u32 GetNumCustomVariables() const { | ||
| 190 | return num_custom_variables; | ||
| 191 | } | ||
| 192 | |||
| 193 | private: | ||
| 194 | friend class ASTDecoder; | ||
| 195 | |||
| 196 | struct SamplerInfo { | ||
| 197 | std::optional<Tegra::Shader::TextureType> type; | ||
| 198 | std::optional<bool> is_array; | ||
| 199 | std::optional<bool> is_shadow; | ||
| 200 | std::optional<bool> is_buffer; | ||
| 201 | |||
| 202 | constexpr bool IsComplete() const noexcept { | ||
| 203 | return type && is_array && is_shadow && is_buffer; | ||
| 204 | } | ||
| 205 | }; | ||
| 206 | |||
| 207 | void Decode(); | ||
| 208 | void PostDecode(); | ||
| 209 | |||
| 210 | NodeBlock DecodeRange(u32 begin, u32 end); | ||
| 211 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | ||
| 212 | void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block); | ||
| 213 | |||
| 214 | /** | ||
| 215 | * Decodes a single instruction from Tegra to IR. | ||
| 216 | * @param bb Basic block where the nodes will be written to. | ||
| 217 | * @param pc Program counter. Offset to decode. | ||
| 218 | * @return Next address to decode. | ||
| 219 | */ | ||
| 220 | u32 DecodeInstr(NodeBlock& bb, u32 pc); | ||
| 221 | |||
| 222 | u32 DecodeArithmetic(NodeBlock& bb, u32 pc); | ||
| 223 | u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc); | ||
| 224 | u32 DecodeBfe(NodeBlock& bb, u32 pc); | ||
| 225 | u32 DecodeBfi(NodeBlock& bb, u32 pc); | ||
| 226 | u32 DecodeShift(NodeBlock& bb, u32 pc); | ||
| 227 | u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc); | ||
| 228 | u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc); | ||
| 229 | u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc); | ||
| 230 | u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc); | ||
| 231 | u32 DecodeFfma(NodeBlock& bb, u32 pc); | ||
| 232 | u32 DecodeHfma2(NodeBlock& bb, u32 pc); | ||
| 233 | u32 DecodeConversion(NodeBlock& bb, u32 pc); | ||
| 234 | u32 DecodeWarp(NodeBlock& bb, u32 pc); | ||
| 235 | u32 DecodeMemory(NodeBlock& bb, u32 pc); | ||
| 236 | u32 DecodeTexture(NodeBlock& bb, u32 pc); | ||
| 237 | u32 DecodeImage(NodeBlock& bb, u32 pc); | ||
| 238 | u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); | ||
| 239 | u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); | ||
| 240 | u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); | ||
| 241 | u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc); | ||
| 242 | u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc); | ||
| 243 | u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc); | ||
| 244 | u32 DecodeFloatSet(NodeBlock& bb, u32 pc); | ||
| 245 | u32 DecodeIntegerSet(NodeBlock& bb, u32 pc); | ||
| 246 | u32 DecodeHalfSet(NodeBlock& bb, u32 pc); | ||
| 247 | u32 DecodeVideo(NodeBlock& bb, u32 pc); | ||
| 248 | u32 DecodeXmad(NodeBlock& bb, u32 pc); | ||
| 249 | u32 DecodeOther(NodeBlock& bb, u32 pc); | ||
| 250 | |||
| 251 | /// Generates a node for a passed register. | ||
| 252 | Node GetRegister(Tegra::Shader::Register reg); | ||
| 253 | /// Generates a node for a custom variable | ||
| 254 | Node GetCustomVariable(u32 id); | ||
| 255 | /// Generates a node representing a 19-bit immediate value | ||
| 256 | Node GetImmediate19(Tegra::Shader::Instruction instr); | ||
| 257 | /// Generates a node representing a 32-bit immediate value | ||
| 258 | Node GetImmediate32(Tegra::Shader::Instruction instr); | ||
| 259 | /// Generates a node representing a constant buffer | ||
| 260 | Node GetConstBuffer(u64 index, u64 offset); | ||
| 261 | /// Generates a node representing a constant buffer with a variadic offset | ||
| 262 | Node GetConstBufferIndirect(u64 index, u64 offset, Node node); | ||
| 263 | /// Generates a node for a passed predicate. It can be optionally negated | ||
| 264 | Node GetPredicate(u64 pred, bool negated = false); | ||
| 265 | /// Generates a predicate node for an immediate true or false value | ||
| 266 | Node GetPredicate(bool immediate); | ||
| 267 | /// Generates a node representing an input attribute. Keeps track of used attributes. | ||
| 268 | Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {}); | ||
| 269 | /// Generates a node representing a physical input attribute. | ||
| 270 | Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {}); | ||
| 271 | /// Generates a node representing an output attribute. Keeps track of used attributes. | ||
| 272 | Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer); | ||
| 273 | /// Generates a node representing an internal flag | ||
| 274 | Node GetInternalFlag(InternalFlag flag, bool negated = false) const; | ||
| 275 | /// Generates a node representing a local memory address | ||
| 276 | Node GetLocalMemory(Node address); | ||
| 277 | /// Generates a node representing a shared memory address | ||
| 278 | Node GetSharedMemory(Node address); | ||
| 279 | /// Generates a temporary, internally it uses a post-RZ register | ||
| 280 | Node GetTemporary(u32 id); | ||
| 281 | |||
| 282 | /// Sets a register. src value must be a number-evaluated node. | ||
| 283 | void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src); | ||
| 284 | /// Sets a predicate. src value must be a bool-evaluated node | ||
| 285 | void SetPredicate(NodeBlock& bb, u64 dest, Node src); | ||
| 286 | /// Sets an internal flag. src value must be a bool-evaluated node | ||
| 287 | void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value); | ||
| 288 | /// Sets a local memory address with a value. | ||
| 289 | void SetLocalMemory(NodeBlock& bb, Node address, Node value); | ||
| 290 | /// Sets a shared memory address with a value. | ||
| 291 | void SetSharedMemory(NodeBlock& bb, Node address, Node value); | ||
| 292 | /// Sets a temporary. Internally it uses a post-RZ register | ||
| 293 | void SetTemporary(NodeBlock& bb, u32 id, Node value); | ||
| 294 | |||
| 295 | /// Sets internal flags from a float | ||
| 296 | void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true); | ||
| 297 | /// Sets internal flags from an integer | ||
| 298 | void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true); | ||
| 299 | |||
| 300 | /// Conditionally absolute/negated float. Absolute is applied first | ||
| 301 | Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); | ||
| 302 | /// Conditionally saturates a float | ||
| 303 | Node GetSaturatedFloat(Node value, bool saturate = true); | ||
| 304 | |||
| 305 | /// Converts an integer to different sizes. | ||
| 306 | Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed); | ||
| 307 | /// Conditionally absolute/negated integer. Absolute is applied first | ||
| 308 | Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed); | ||
| 309 | |||
| 310 | /// Unpacks a half immediate from an instruction | ||
| 311 | Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation); | ||
| 312 | /// Unpacks a binary value into a half float pair with a type format | ||
| 313 | Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type); | ||
| 314 | /// Merges a half pair into another value | ||
| 315 | Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge); | ||
| 316 | /// Conditionally absolute/negated half float pair. Absolute is applied first | ||
| 317 | Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate); | ||
| 318 | /// Conditionally saturates a half float pair | ||
| 319 | Node GetSaturatedHalfFloat(Node value, bool saturate = true); | ||
| 320 | |||
| 321 | /// Get image component value by type and size | ||
| 322 | std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type, | ||
| 323 | u32 component_size, Node original_value); | ||
| 324 | |||
| 325 | /// Returns a predicate comparing two floats | ||
| 326 | Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | ||
| 327 | /// Returns a predicate comparing two integers | ||
| 328 | Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed, | ||
| 329 | Node op_a, Node op_b); | ||
| 330 | /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared | ||
| 331 | Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | ||
| 332 | |||
| 333 | /// Returns a predicate combiner operation | ||
| 334 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); | ||
| 335 | |||
| 336 | /// Queries the missing sampler info from the execution context. | ||
| 337 | SamplerInfo GetSamplerInfo(SamplerInfo info, | ||
| 338 | std::optional<Tegra::Engines::SamplerDescriptor> sampler); | ||
| 339 | |||
| 340 | /// Accesses a texture sampler. | ||
| 341 | std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); | ||
| 342 | |||
| 343 | /// Accesses a texture sampler for a bindless texture. | ||
| 344 | std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info, | ||
| 345 | Node& index_var); | ||
| 346 | |||
| 347 | /// Accesses an image. | ||
| 348 | ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); | ||
| 349 | |||
| 350 | /// Access a bindless image sampler. | ||
| 351 | ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type); | ||
| 352 | |||
| 353 | /// Extracts a sequence of bits from a node | ||
| 354 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | ||
| 355 | |||
| 356 | /// Inserts a sequence of bits from a node | ||
| 357 | Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits); | ||
| 358 | |||
| 359 | /// Marks the usage of a input or output attribute. | ||
| 360 | void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element); | ||
| 361 | |||
| 362 | /// Decodes VMNMX instruction and inserts its code into the passed basic block. | ||
| 363 | void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr); | ||
| 364 | |||
| 365 | void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | ||
| 366 | const Node4& components); | ||
| 367 | |||
| 368 | void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | ||
| 369 | const Node4& components, bool ignore_mask = false); | ||
| 370 | void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr, | ||
| 371 | const Node4& components, bool ignore_mask = false); | ||
| 372 | |||
| 373 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 374 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 375 | bool is_array, bool is_aoffi, | ||
| 376 | std::optional<Tegra::Shader::Register> bindless_reg); | ||
| 377 | |||
| 378 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 379 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||
| 380 | bool is_array); | ||
| 381 | |||
| 382 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 383 | bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp, | ||
| 384 | bool is_bindless); | ||
| 385 | |||
| 386 | Node4 GetTldCode(Tegra::Shader::Instruction instr); | ||
| 387 | |||
| 388 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 389 | bool is_array); | ||
| 390 | |||
| 391 | std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( | ||
| 392 | Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, | ||
| 393 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | ||
| 394 | |||
| 395 | std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); | ||
| 396 | |||
| 397 | std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs); | ||
| 398 | |||
| 399 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 400 | Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, | ||
| 401 | Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi, | ||
| 402 | std::optional<Tegra::Shader::Register> bindless_reg); | ||
| 403 | |||
| 404 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | ||
| 405 | u64 byte_height); | ||
| 406 | |||
| 407 | void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest, | ||
| 408 | Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b, | ||
| 409 | Tegra::Shader::PredicateResultMode predicate_mode, | ||
| 410 | Tegra::Shader::Pred predicate, bool sets_cc); | ||
| 411 | void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b, | ||
| 412 | Node op_c, Node imm_lut, bool sets_cc); | ||
| 413 | |||
| 414 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | ||
| 415 | |||
| 416 | std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, | ||
| 417 | s64 cursor); | ||
| 418 | |||
| 419 | std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf, | ||
| 420 | const OperationNode& operation, | ||
| 421 | Node gpr, Node base_offset, | ||
| 422 | Node tracked, const NodeBlock& code, | ||
| 423 | s64 cursor); | ||
| 424 | |||
| 425 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | ||
| 426 | |||
| 427 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, | ||
| 428 | s64 cursor) const; | ||
| 429 | |||
| 430 | std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb, | ||
| 431 | Tegra::Shader::Instruction instr, | ||
| 432 | bool is_read, bool is_write); | ||
| 433 | |||
| 434 | /// Register new amending code and obtain the reference id. | ||
| 435 | std::size_t DeclareAmend(Node new_amend); | ||
| 436 | |||
| 437 | u32 NewCustomVariable(); | ||
| 438 | |||
| 439 | const ProgramCode& program_code; | ||
| 440 | const u32 main_offset; | ||
| 441 | const CompilerSettings settings; | ||
| 442 | Registry& registry; | ||
| 443 | |||
| 444 | bool decompiled{}; | ||
| 445 | bool disable_flow_stack{}; | ||
| 446 | |||
| 447 | u32 coverage_begin{}; | ||
| 448 | u32 coverage_end{}; | ||
| 449 | |||
| 450 | std::map<u32, NodeBlock> basic_blocks; | ||
| 451 | NodeBlock global_code; | ||
| 452 | ASTManager program_manager{true, true}; | ||
| 453 | std::vector<Node> amend_code; | ||
| 454 | u32 num_custom_variables{}; | ||
| 455 | |||
| 456 | std::set<u32> used_registers; | ||
| 457 | std::set<Tegra::Shader::Pred> used_predicates; | ||
| 458 | std::set<Tegra::Shader::Attribute::Index> used_input_attributes; | ||
| 459 | std::set<Tegra::Shader::Attribute::Index> used_output_attributes; | ||
| 460 | std::map<u32, ConstBuffer> used_cbufs; | ||
| 461 | std::list<SamplerEntry> used_samplers; | ||
| 462 | std::list<ImageEntry> used_images; | ||
| 463 | std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{}; | ||
| 464 | std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory; | ||
| 465 | bool uses_layer{}; | ||
| 466 | bool uses_viewport_index{}; | ||
| 467 | bool uses_point_size{}; | ||
| 468 | bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes | ||
| 469 | bool uses_instance_id{}; | ||
| 470 | bool uses_vertex_id{}; | ||
| 471 | bool uses_legacy_varyings{}; | ||
| 472 | bool uses_y_negate{}; | ||
| 473 | bool uses_warps{}; | ||
| 474 | bool uses_indexed_samplers{}; | ||
| 475 | |||
| 476 | Tegra::Shader::Header header; | ||
| 477 | }; | ||
| 478 | |||
| 479 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp deleted file mode 100644 index 6be3ea92b..000000000 --- a/src/video_core/shader/track.cpp +++ /dev/null | |||
| @@ -1,236 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <utility> | ||
| 7 | #include <variant> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/shader/node.h" | ||
| 11 | #include "video_core/shader/node_helper.h" | ||
| 12 | #include "video_core/shader/shader_ir.h" | ||
| 13 | |||
| 14 | namespace VideoCommon::Shader { | ||
| 15 | |||
| 16 | namespace { | ||
| 17 | |||
| 18 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | ||
| 19 | OperationCode operation_code) { | ||
| 20 | for (; cursor >= 0; --cursor) { | ||
| 21 | Node node = code.at(cursor); | ||
| 22 | |||
| 23 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 24 | if (operation->GetCode() == operation_code) { | ||
| 25 | return {std::move(node), cursor}; | ||
| 26 | } | ||
| 27 | } | ||
| 28 | |||
| 29 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 30 | const auto& conditional_code = conditional->GetCode(); | ||
| 31 | auto result = FindOperation( | ||
| 32 | conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); | ||
| 33 | auto& found = result.first; | ||
| 34 | if (found) { | ||
| 35 | return {std::move(found), cursor}; | ||
| 36 | } | ||
| 37 | } | ||
| 38 | } | ||
| 39 | return {}; | ||
| 40 | } | ||
| 41 | |||
| 42 | std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { | ||
| 43 | if (operation.GetCode() != OperationCode::UAdd) { | ||
| 44 | return std::nullopt; | ||
| 45 | } | ||
| 46 | Node gpr; | ||
| 47 | Node offset; | ||
| 48 | ASSERT(operation.GetOperandsCount() == 2); | ||
| 49 | for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { | ||
| 50 | Node operand = operation[i]; | ||
| 51 | if (std::holds_alternative<ImmediateNode>(*operand)) { | ||
| 52 | offset = operation[i]; | ||
| 53 | } else if (std::holds_alternative<GprNode>(*operand)) { | ||
| 54 | gpr = operation[i]; | ||
| 55 | } | ||
| 56 | } | ||
| 57 | if (offset && gpr) { | ||
| 58 | return std::make_pair(gpr, offset); | ||
| 59 | } | ||
| 60 | return std::nullopt; | ||
| 61 | } | ||
| 62 | |||
| 63 | bool AmendNodeCv(std::size_t amend_index, Node node) { | ||
| 64 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 65 | operation->SetAmendIndex(amend_index); | ||
| 66 | return true; | ||
| 67 | } | ||
| 68 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 69 | conditional->SetAmendIndex(amend_index); | ||
| 70 | return true; | ||
| 71 | } | ||
| 72 | return false; | ||
| 73 | } | ||
| 74 | |||
| 75 | } // Anonymous namespace | ||
| 76 | |||
| 77 | std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, | ||
| 78 | s64 cursor) { | ||
| 79 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | ||
| 80 | const u32 cbuf_index = cbuf->GetIndex(); | ||
| 81 | |||
| 82 | // Constant buffer found, test if it's an immediate | ||
| 83 | const auto& offset = cbuf->GetOffset(); | ||
| 84 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 85 | auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue()); | ||
| 86 | return {tracked, track}; | ||
| 87 | } | ||
| 88 | if (const auto operation = std::get_if<OperationNode>(&*offset)) { | ||
| 89 | const u32 bound_buffer = registry.GetBoundBuffer(); | ||
| 90 | if (bound_buffer != cbuf_index) { | ||
| 91 | return {}; | ||
| 92 | } | ||
| 93 | if (const std::optional pair = DecoupleIndirectRead(*operation)) { | ||
| 94 | auto [gpr, base_offset] = *pair; | ||
| 95 | return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, | ||
| 96 | code, cursor); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | return {}; | ||
| 100 | } | ||
| 101 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | ||
| 102 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | ||
| 103 | return {}; | ||
| 104 | } | ||
| 105 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | ||
| 106 | // register that it uses as operand | ||
| 107 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | ||
| 108 | if (!source) { | ||
| 109 | return {}; | ||
| 110 | } | ||
| 111 | return TrackBindlessSampler(source, code, new_cursor); | ||
| 112 | } | ||
| 113 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | ||
| 114 | const OperationNode& op = *operation; | ||
| 115 | |||
| 116 | const OperationCode opcode = operation->GetCode(); | ||
| 117 | if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { | ||
| 118 | ASSERT(op.GetOperandsCount() == 2); | ||
| 119 | auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); | ||
| 120 | auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); | ||
| 121 | if (node_a && node_b) { | ||
| 122 | auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b}, | ||
| 123 | std::pair{offset_a, offset_b}); | ||
| 124 | return {tracked, std::move(track)}; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | std::size_t i = op.GetOperandsCount(); | ||
| 128 | while (i--) { | ||
| 129 | if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { | ||
| 130 | // Constant buffer found in operand. | ||
| 131 | return found; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | return {}; | ||
| 135 | } | ||
| 136 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | ||
| 137 | const auto& conditional_code = conditional->GetCode(); | ||
| 138 | return TrackBindlessSampler(tracked, conditional_code, | ||
| 139 | static_cast<s64>(conditional_code.size())); | ||
| 140 | } | ||
| 141 | return {}; | ||
| 142 | } | ||
| 143 | |||
| 144 | std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead( | ||
| 145 | const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, | ||
| 146 | const NodeBlock& code, s64 cursor) { | ||
| 147 | const auto offset_imm = std::get<ImmediateNode>(*base_offset); | ||
| 148 | const auto& gpu_driver = registry.AccessGuestDriverProfile(); | ||
| 149 | const u32 bindless_cv = NewCustomVariable(); | ||
| 150 | const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); | ||
| 151 | Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); | ||
| 152 | |||
| 153 | Node cv_node = GetCustomVariable(bindless_cv); | ||
| 154 | Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); | ||
| 155 | const std::size_t amend_index = DeclareAmend(std::move(amend_op)); | ||
| 156 | AmendNodeCv(amend_index, code[cursor]); | ||
| 157 | |||
| 158 | // TODO: Implement bindless index custom variable | ||
| 159 | auto track = | ||
| 160 | MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); | ||
| 161 | return {tracked, track}; | ||
| 162 | } | ||
| 163 | |||
| 164 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, | ||
| 165 | s64 cursor) const { | ||
| 166 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | ||
| 167 | // Constant buffer found, test if it's an immediate | ||
| 168 | const auto& offset = cbuf->GetOffset(); | ||
| 169 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 170 | return {tracked, cbuf->GetIndex(), immediate->GetValue()}; | ||
| 171 | } | ||
| 172 | return {}; | ||
| 173 | } | ||
| 174 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | ||
| 175 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | ||
| 176 | return {}; | ||
| 177 | } | ||
| 178 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | ||
| 179 | // register that it uses as operand | ||
| 180 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | ||
| 181 | if (!source) { | ||
| 182 | return {}; | ||
| 183 | } | ||
| 184 | return TrackCbuf(source, code, new_cursor); | ||
| 185 | } | ||
| 186 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | ||
| 187 | for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { | ||
| 188 | if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) { | ||
| 189 | // Cbuf found in operand. | ||
| 190 | return found; | ||
| 191 | } | ||
| 192 | } | ||
| 193 | return {}; | ||
| 194 | } | ||
| 195 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | ||
| 196 | const auto& conditional_code = conditional->GetCode(); | ||
| 197 | return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size())); | ||
| 198 | } | ||
| 199 | return {}; | ||
| 200 | } | ||
| 201 | |||
| 202 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { | ||
| 203 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register | ||
| 204 | // that it uses as operand | ||
| 205 | const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); | ||
| 206 | const auto& found = result.first; | ||
| 207 | if (!found) { | ||
| 208 | return std::nullopt; | ||
| 209 | } | ||
| 210 | if (const auto immediate = std::get_if<ImmediateNode>(&*found)) { | ||
| 211 | return immediate->GetValue(); | ||
| 212 | } | ||
| 213 | return std::nullopt; | ||
| 214 | } | ||
| 215 | |||
| 216 | std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, | ||
| 217 | s64 cursor) const { | ||
| 218 | for (; cursor >= 0; --cursor) { | ||
| 219 | const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign); | ||
| 220 | if (!found_node) { | ||
| 221 | return {}; | ||
| 222 | } | ||
| 223 | const auto operation = std::get_if<OperationNode>(&*found_node); | ||
| 224 | ASSERT(operation); | ||
| 225 | |||
| 226 | const auto& target = (*operation)[0]; | ||
| 227 | if (const auto gpr_target = std::get_if<GprNode>(&*target)) { | ||
| 228 | if (gpr_target->GetIndex() == tracked->GetIndex()) { | ||
| 229 | return {(*operation)[1], new_cursor}; | ||
| 230 | } | ||
| 231 | } | ||
| 232 | } | ||
| 233 | return {}; | ||
| 234 | } | ||
| 235 | |||
| 236 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/transform_feedback.cpp b/src/video_core/shader/transform_feedback.cpp deleted file mode 100644 index 22a933761..000000000 --- a/src/video_core/shader/transform_feedback.cpp +++ /dev/null | |||
| @@ -1,115 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | ||
| 12 | #include "video_core/shader/registry.h" | ||
| 13 | #include "video_core/shader/transform_feedback.h" | ||
| 14 | |||
| 15 | namespace VideoCommon::Shader { | ||
| 16 | |||
| 17 | namespace { | ||
| 18 | |||
| 19 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 20 | |||
| 21 | // TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20 | ||
| 22 | |||
| 23 | /// Attribute offsets that describe a vector | ||
| 24 | constexpr std::array VECTORS = { | ||
| 25 | 28, // gl_Position | ||
| 26 | 32, // Generic 0 | ||
| 27 | 36, // Generic 1 | ||
| 28 | 40, // Generic 2 | ||
| 29 | 44, // Generic 3 | ||
| 30 | 48, // Generic 4 | ||
| 31 | 52, // Generic 5 | ||
| 32 | 56, // Generic 6 | ||
| 33 | 60, // Generic 7 | ||
| 34 | 64, // Generic 8 | ||
| 35 | 68, // Generic 9 | ||
| 36 | 72, // Generic 10 | ||
| 37 | 76, // Generic 11 | ||
| 38 | 80, // Generic 12 | ||
| 39 | 84, // Generic 13 | ||
| 40 | 88, // Generic 14 | ||
| 41 | 92, // Generic 15 | ||
| 42 | 96, // Generic 16 | ||
| 43 | 100, // Generic 17 | ||
| 44 | 104, // Generic 18 | ||
| 45 | 108, // Generic 19 | ||
| 46 | 112, // Generic 20 | ||
| 47 | 116, // Generic 21 | ||
| 48 | 120, // Generic 22 | ||
| 49 | 124, // Generic 23 | ||
| 50 | 128, // Generic 24 | ||
| 51 | 132, // Generic 25 | ||
| 52 | 136, // Generic 26 | ||
| 53 | 140, // Generic 27 | ||
| 54 | 144, // Generic 28 | ||
| 55 | 148, // Generic 29 | ||
| 56 | 152, // Generic 30 | ||
| 57 | 156, // Generic 31 | ||
| 58 | 160, // gl_FrontColor | ||
| 59 | 164, // gl_FrontSecondaryColor | ||
| 60 | 160, // gl_BackColor | ||
| 61 | 164, // gl_BackSecondaryColor | ||
| 62 | 192, // gl_TexCoord[0] | ||
| 63 | 196, // gl_TexCoord[1] | ||
| 64 | 200, // gl_TexCoord[2] | ||
| 65 | 204, // gl_TexCoord[3] | ||
| 66 | 208, // gl_TexCoord[4] | ||
| 67 | 212, // gl_TexCoord[5] | ||
| 68 | 216, // gl_TexCoord[6] | ||
| 69 | 220, // gl_TexCoord[7] | ||
| 70 | }; | ||
| 71 | } // namespace | ||
| 72 | |||
| 73 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) { | ||
| 74 | |||
| 75 | std::unordered_map<u8, VaryingTFB> tfb; | ||
| 76 | |||
| 77 | for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) { | ||
| 78 | const auto& locations = info.tfb_varying_locs[buffer]; | ||
| 79 | const auto& layout = info.tfb_layouts[buffer]; | ||
| 80 | const std::size_t varying_count = layout.varying_count; | ||
| 81 | |||
| 82 | std::size_t highest = 0; | ||
| 83 | |||
| 84 | for (std::size_t offset = 0; offset < varying_count; ++offset) { | ||
| 85 | const std::size_t base_offset = offset; | ||
| 86 | const u8 location = locations[offset]; | ||
| 87 | |||
| 88 | VaryingTFB varying; | ||
| 89 | varying.buffer = layout.stream; | ||
| 90 | varying.stride = layout.stride; | ||
| 91 | varying.offset = offset * sizeof(u32); | ||
| 92 | varying.components = 1; | ||
| 93 | |||
| 94 | if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) { | ||
| 95 | UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB"); | ||
| 96 | |||
| 97 | const u8 base_index = location / 4; | ||
| 98 | while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) { | ||
| 99 | ++offset; | ||
| 100 | ++varying.components; | ||
| 101 | } | ||
| 102 | } | ||
| 103 | |||
| 104 | [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second; | ||
| 105 | UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored"); | ||
| 106 | |||
| 107 | highest = std::max(highest, (base_offset + varying.components) * sizeof(u32)); | ||
| 108 | } | ||
| 109 | |||
| 110 | UNIMPLEMENTED_IF(highest != layout.stride); | ||
| 111 | } | ||
| 112 | return tfb; | ||
| 113 | } | ||
| 114 | |||
| 115 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/transform_feedback.h b/src/video_core/shader/transform_feedback.h deleted file mode 100644 index 77d05f64c..000000000 --- a/src/video_core/shader/transform_feedback.h +++ /dev/null | |||
| @@ -1,23 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <unordered_map> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/shader/registry.h" | ||
| 11 | |||
| 12 | namespace VideoCommon::Shader { | ||
| 13 | |||
| 14 | struct VaryingTFB { | ||
| 15 | std::size_t buffer; | ||
| 16 | std::size_t stride; | ||
| 17 | std::size_t offset; | ||
| 18 | std::size_t components; | ||
| 19 | }; | ||
| 20 | |||
| 21 | std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info); | ||
| 22 | |||
| 23 | } // namespace VideoCommon::Shader | ||