summaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/decode/texture.cpp114
-rw-r--r--src/video_core/shader/shader_ir.h12
-rw-r--r--src/video_core/shader/track.cpp17
3 files changed, 114 insertions, 29 deletions
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index a99ae19bf..a775b402b 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -7,7 +7,9 @@
7#include <fmt/format.h> 7#include <fmt/format.h>
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/bit_field.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/logging/log.h"
11#include "video_core/engines/shader_bytecode.h" 13#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h" 14#include "video_core/shader/shader_ir.h"
13 15
@@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
41 43
42 switch (opcode->get().GetId()) { 44 switch (opcode->get().GetId()) {
43 case OpCode::Id::TEX: { 45 case OpCode::Id::TEX: {
44 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
45 "AOFFI is not implemented");
46
47 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { 46 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
48 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); 47 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
49 } 48 }
50 49
51 const TextureType texture_type{instr.tex.texture_type}; 50 const TextureType texture_type{instr.tex.texture_type};
52 const bool is_array = instr.tex.array != 0; 51 const bool is_array = instr.tex.array != 0;
52 const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); 53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
54 const auto process_mode = instr.tex.GetTextureProcessMode(); 54 const auto process_mode = instr.tex.GetTextureProcessMode();
55 WriteTexInstructionFloat( 55 WriteTexInstructionFloat(
56 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); 56 bb, instr,
57 GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
57 break; 58 break;
58 } 59 }
59 case OpCode::Id::TEXS: { 60 case OpCode::Id::TEXS: {
@@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
78 } 79 }
79 case OpCode::Id::TLD4: { 80 case OpCode::Id::TLD4: {
80 ASSERT(instr.tld4.array == 0); 81 ASSERT(instr.tld4.array == 0);
81 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
82 "AOFFI is not implemented");
83 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), 82 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
84 "NDV is not implemented"); 83 "NDV is not implemented");
85 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), 84 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
@@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
92 const auto texture_type = instr.tld4.texture_type.Value(); 91 const auto texture_type = instr.tld4.texture_type.Value();
93 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); 92 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
94 const bool is_array = instr.tld4.array != 0; 93 const bool is_array = instr.tld4.array != 0;
95 WriteTexInstructionFloat(bb, instr, 94 const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
96 GetTld4Code(instr, texture_type, depth_compare, is_array)); 95 WriteTexInstructionFloat(
96 bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
97 break; 97 break;
98 } 98 }
99 case OpCode::Id::TLD4S: { 99 case OpCode::Id::TLD4S: {
@@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
127 Node4 values; 127 Node4 values;
128 for (u32 element = 0; element < values.size(); ++element) { 128 for (u32 element = 0; element < values.size(); ++element) {
129 auto coords_copy = coords; 129 auto coords_copy = coords;
130 MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; 130 MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); 131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
132 } 132 }
133 133
@@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
152 if (!instr.txq.IsComponentEnabled(element)) { 152 if (!instr.txq.IsComponentEnabled(element)) {
153 continue; 153 continue;
154 } 154 }
155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; 155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
156 const Node value = 156 const Node value =
157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); 157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
158 SetTemporal(bb, indexer++, value); 158 SetTemporal(bb, indexer++, value);
@@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
202 202
203 for (u32 element = 0; element < 2; ++element) { 203 for (u32 element = 0; element < 2; ++element) {
204 auto params = coords; 204 auto params = coords;
205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; 205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); 206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
207 SetTemporal(bb, element, value); 207 SetTemporal(bb, element, value);
208 } 208 }
@@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
325 325
326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, 326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
327 TextureProcessMode process_mode, std::vector<Node> coords, 327 TextureProcessMode process_mode, std::vector<Node> coords,
328 Node array, Node depth_compare, u32 bias_offset) { 328 Node array, Node depth_compare, u32 bias_offset,
329 std::vector<Node> aoffi) {
329 const bool is_array = array; 330 const bool is_array = array;
330 const bool is_shadow = depth_compare; 331 const bool is_shadow = depth_compare;
331 332
@@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
374 Node4 values; 375 Node4 values;
375 for (u32 element = 0; element < values.size(); ++element) { 376 for (u32 element = 0; element < values.size(); ++element) {
376 auto copy_coords = coords; 377 auto copy_coords = coords;
377 MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; 378 MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
378 values[element] = Operation(read_method, meta, std::move(copy_coords)); 379 values[element] = Operation(read_method, meta, std::move(copy_coords));
379 } 380 }
380 381
@@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
382} 383}
383 384
384Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, 385Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
385 TextureProcessMode process_mode, bool depth_compare, bool is_array) { 386 TextureProcessMode process_mode, bool depth_compare, bool is_array,
386 const bool lod_bias_enabled = 387 bool is_aoffi) {
387 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); 388 const bool lod_bias_enabled{
389 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
390
391 u64 parameter_register = instr.gpr20.Value();
392 if (lod_bias_enabled) {
393 ++parameter_register;
394 }
388 395
389 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( 396 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
390 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); 397 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
@@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
404 411
405 const Node array = is_array ? GetRegister(array_register) : nullptr; 412 const Node array = is_array ? GetRegister(array_register) : nullptr;
406 413
414 std::vector<Node> aoffi;
415 if (is_aoffi) {
416 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
417 }
418
407 Node dc{}; 419 Node dc{};
408 if (depth_compare) { 420 if (depth_compare) {
409 // Depth is always stored in the register signaled by gpr20 or in the next register if lod 421 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
410 // or bias are used 422 // or bias are used
411 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); 423 dc = GetRegister(parameter_register++);
412 dc = GetRegister(depth_register);
413 } 424 }
414 425
415 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); 426 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
416} 427}
417 428
418Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, 429Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
448 dc = GetRegister(depth_register); 459 dc = GetRegister(depth_register);
449 } 460 }
450 461
451 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); 462 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
452} 463}
453 464
454Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, 465Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
455 bool is_array) { 466 bool is_array, bool is_aoffi) {
456 const std::size_t coord_count = GetCoordCount(texture_type); 467 const std::size_t coord_count = GetCoordCount(texture_type);
457 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); 468 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
458 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); 469 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
@@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
463 const u64 coord_register = array_register + (is_array ? 1 : 0); 474 const u64 coord_register = array_register + (is_array ? 1 : 0);
464 475
465 std::vector<Node> coords; 476 std::vector<Node> coords;
466 for (size_t i = 0; i < coord_count; ++i) 477 for (std::size_t i = 0; i < coord_count; ++i) {
467 coords.push_back(GetRegister(coord_register + i)); 478 coords.push_back(GetRegister(coord_register + i));
479 }
480
481 u64 parameter_register = instr.gpr20.Value();
482 std::vector<Node> aoffi;
483 if (is_aoffi) {
484 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
485 }
486
487 Node dc{};
488 if (depth_compare) {
489 dc = GetRegister(parameter_register++);
490 }
468 491
469 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); 492 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
470 493
471 Node4 values; 494 Node4 values;
472 for (u32 element = 0; element < values.size(); ++element) { 495 for (u32 element = 0; element < values.size(); ++element) {
473 auto coords_copy = coords; 496 auto coords_copy = coords;
474 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; 497 MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
475 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); 498 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
476 } 499 }
477 500
@@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
507 Node4 values; 530 Node4 values;
508 for (u32 element = 0; element < values.size(); ++element) { 531 for (u32 element = 0; element < values.size(); ++element) {
509 auto coords_copy = coords; 532 auto coords_copy = coords;
510 MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; 533 MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
511 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); 534 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
512 } 535 }
513 return values; 536 return values;
@@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
531 return {coord_count, total_coord_count}; 554 return {coord_count, total_coord_count};
532} 555}
533 556
534} // namespace VideoCommon::Shader \ No newline at end of file 557std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
558 bool is_tld4) {
559 const auto [coord_offsets, size, wrap_value,
560 diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
561 if (is_tld4) {
562 return {{0, 8, 16}, 6, 32, 64};
563 } else {
564 return {{0, 4, 8}, 4, 8, 16};
565 }
566 }();
567 const u32 mask = (1U << size) - 1;
568
569 std::vector<Node> aoffi;
570 aoffi.reserve(coord_count);
571
572 const auto aoffi_immediate{
573 TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
574 if (!aoffi_immediate) {
575 // Variable access, not supported on AMD.
576 LOG_WARNING(HW_GPU,
577 "AOFFI constant folding failed, some hardware might have graphical issues");
578 for (std::size_t coord = 0; coord < coord_count; ++coord) {
579 const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
580 const Node condition =
581 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
582 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
583 aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
584 }
585 return aoffi;
586 }
587
588 for (std::size_t coord = 0; coord < coord_count; ++coord) {
589 s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
590 if (value >= wrap_value) {
591 value -= diff_value;
592 }
593 aoffi.push_back(Immediate(value));
594 }
595 return aoffi;
596}
597
598} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 5bc3a3900..4888998d3 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -7,6 +7,7 @@
7#include <array> 7#include <array>
8#include <cstring> 8#include <cstring>
9#include <map> 9#include <map>
10#include <optional>
10#include <set> 11#include <set>
11#include <string> 12#include <string>
12#include <tuple> 13#include <tuple>
@@ -290,6 +291,7 @@ struct MetaTexture {
290 const Sampler& sampler; 291 const Sampler& sampler;
291 Node array{}; 292 Node array{};
292 Node depth_compare{}; 293 Node depth_compare{};
294 std::vector<Node> aoffi;
293 Node bias{}; 295 Node bias{};
294 Node lod{}; 296 Node lod{};
295 Node component{}; 297 Node component{};
@@ -741,14 +743,14 @@ private:
741 743
742 Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 744 Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
743 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, 745 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
744 bool is_array); 746 bool is_array, bool is_aoffi);
745 747
746 Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 748 Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
747 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, 749 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
748 bool is_array); 750 bool is_array);
749 751
750 Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 752 Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
751 bool depth_compare, bool is_array); 753 bool depth_compare, bool is_array, bool is_aoffi);
752 754
753 Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 755 Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
754 bool is_array); 756 bool is_array);
@@ -757,9 +759,11 @@ private:
757 Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, 759 Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
758 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); 760 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
759 761
762 std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
763
760 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 764 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
761 Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, 765 Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
762 Node array, Node depth_compare, u32 bias_offset); 766 Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
763 767
764 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, 768 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
765 u64 byte_height); 769 u64 byte_height);
@@ -773,6 +777,8 @@ private:
773 777
774 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); 778 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
775 779
780 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
781
776 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); 782 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
777 783
778 template <typename... T> 784 template <typename... T>
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 33b071747..4505667ff 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -6,6 +6,7 @@
6#include <utility> 6#include <utility>
7#include <variant> 7#include <variant>
8 8
9#include "common/common_types.h"
9#include "video_core/shader/shader_ir.h" 10#include "video_core/shader/shader_ir.h"
10 11
11namespace VideoCommon::Shader { 12namespace VideoCommon::Shader {
@@ -14,7 +15,7 @@ namespace {
14std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, 15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
15 OperationCode operation_code) { 16 OperationCode operation_code) {
16 for (; cursor >= 0; --cursor) { 17 for (; cursor >= 0; --cursor) {
17 const Node node = code[cursor]; 18 const Node node = code.at(cursor);
18 if (const auto operation = std::get_if<OperationNode>(node)) { 19 if (const auto operation = std::get_if<OperationNode>(node)) {
19 if (operation->GetCode() == operation_code) 20 if (operation->GetCode() == operation_code)
20 return {node, cursor}; 21 return {node, cursor};
@@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
64 return nullptr; 65 return nullptr;
65} 66}
66 67
68std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
69 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
70 // that it uses as operand
71 const auto [found, found_cursor] =
72 TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
73 if (!found) {
74 return {};
75 }
76 if (const auto immediate = std::get_if<ImmediateNode>(found)) {
77 return immediate->GetValue();
78 }
79 return {};
80}
81
67std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, 82std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
68 s64 cursor) { 83 s64 cursor) {
69 for (; cursor >= 0; --cursor) { 84 for (; cursor >= 0; --cursor) {