summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar bunnei2019-04-07 17:52:30 -0400
committerGravatar GitHub2019-04-07 17:52:30 -0400
commit8aaf418bd6e5d7b3cb67a96055773e10726ab52c (patch)
treeed2f2330642501e9498faceca41612c5ad24b37f /src
parentMerge pull request #2361 from lioncash/pagetable (diff)
parentgl_shader_decompiler: Hide local definitions inside an anonymous namespace (diff)
downloadyuzu-8aaf418bd6e5d7b3cb67a96055773e10726ab52c.tar.gz
yuzu-8aaf418bd6e5d7b3cb67a96055773e10726ab52c.tar.xz
yuzu-8aaf418bd6e5d7b3cb67a96055773e10726ab52c.zip
Merge pull request #2306 from ReinUsesLisp/aoffi
shader_ir: Implement AOFFI for TEX and TLD4
Diffstat (limited to 'src')
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp133
-rw-r--r--src/video_core/shader/decode/texture.cpp114
-rw-r--r--src/video_core/shader/shader_ir.h12
-rw-r--r--src/video_core/shader/track.cpp17
4 files changed, 205 insertions, 71 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a1a51f226..3ea08ef7b 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -21,6 +21,8 @@
21 21
22namespace OpenGL::GLShader { 22namespace OpenGL::GLShader {
23 23
24namespace {
25
24using Tegra::Shader::Attribute; 26using Tegra::Shader::Attribute;
25using Tegra::Shader::AttributeUse; 27using Tegra::Shader::AttributeUse;
26using Tegra::Shader::Header; 28using Tegra::Shader::Header;
@@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
34using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; 36using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
35using Operation = const OperationNode&; 37using Operation = const OperationNode&;
36 38
39enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
40
41struct TextureAoffi {};
42using TextureArgument = std::pair<Type, Node>;
43using TextureIR = std::variant<TextureAoffi, TextureArgument>;
44
37enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; 45enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
38constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 46constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
39 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); 47 static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
40constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = 48constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
41 static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); 49 static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
42 50
43enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
44
45class ShaderWriter { 51class ShaderWriter {
46public: 52public:
47 void AddExpression(std::string_view text) { 53 void AddExpression(std::string_view text) {
@@ -91,7 +97,7 @@ private:
91}; 97};
92 98
93/// Generates code to use for a swizzle operation. 99/// Generates code to use for a swizzle operation.
94static std::string GetSwizzle(u32 elem) { 100std::string GetSwizzle(u32 elem) {
95 ASSERT(elem <= 3); 101 ASSERT(elem <= 3);
96 std::string swizzle = "."; 102 std::string swizzle = ".";
97 swizzle += "xyzw"[elem]; 103 swizzle += "xyzw"[elem];
@@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) {
99} 105}
100 106
101/// Translate topology 107/// Translate topology
102static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { 108std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
103 switch (topology) { 109 switch (topology) {
104 case Tegra::Shader::OutputTopology::PointList: 110 case Tegra::Shader::OutputTopology::PointList:
105 return "points"; 111 return "points";
@@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
114} 120}
115 121
116/// Returns true if an object has to be treated as precise 122/// Returns true if an object has to be treated as precise
117static bool IsPrecise(Operation operand) { 123bool IsPrecise(Operation operand) {
118 const auto& meta = operand.GetMeta(); 124 const auto& meta = operand.GetMeta();
119 125
120 if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { 126 if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) {
@@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) {
126 return false; 132 return false;
127} 133}
128 134
129static bool IsPrecise(Node node) { 135bool IsPrecise(Node node) {
130 if (const auto operation = std::get_if<OperationNode>(node)) { 136 if (const auto operation = std::get_if<OperationNode>(node)) {
131 return IsPrecise(*operation); 137 return IsPrecise(*operation);
132 } 138 }
@@ -723,8 +729,8 @@ private:
723 result_type)); 729 result_type));
724 } 730 }
725 731
726 std::string GenerateTexture(Operation operation, const std::string& func, 732 std::string GenerateTexture(Operation operation, const std::string& function_suffix,
727 const std::vector<std::pair<Type, Node>>& extras) { 733 const std::vector<TextureIR>& extras) {
728 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; 734 constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
729 735
730 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 736 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -734,11 +740,11 @@ private:
734 const bool has_array = meta->sampler.IsArray(); 740 const bool has_array = meta->sampler.IsArray();
735 const bool has_shadow = meta->sampler.IsShadow(); 741 const bool has_shadow = meta->sampler.IsShadow();
736 742
737 std::string expr = func; 743 std::string expr = "texture" + function_suffix;
738 expr += '('; 744 if (!meta->aoffi.empty()) {
739 expr += GetSampler(meta->sampler); 745 expr += "Offset";
740 expr += ", "; 746 }
741 747 expr += '(' + GetSampler(meta->sampler) + ", ";
742 expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); 748 expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
743 expr += '('; 749 expr += '(';
744 for (std::size_t i = 0; i < count; ++i) { 750 for (std::size_t i = 0; i < count; ++i) {
@@ -756,36 +762,74 @@ private:
756 } 762 }
757 expr += ')'; 763 expr += ')';
758 764
759 for (const auto& extra_pair : extras) { 765 for (const auto& variant : extras) {
760 const auto [type, operand] = extra_pair; 766 if (const auto argument = std::get_if<TextureArgument>(&variant)) {
761 if (operand == nullptr) { 767 expr += GenerateTextureArgument(*argument);
762 continue; 768 } else if (std::get_if<TextureAoffi>(&variant)) {
769 expr += GenerateTextureAoffi(meta->aoffi);
770 } else {
771 UNREACHABLE();
763 } 772 }
764 expr += ", "; 773 }
765 774
766 switch (type) { 775 return expr + ')';
767 case Type::Int: 776 }
768 if (const auto immediate = std::get_if<ImmediateNode>(operand)) { 777
769 // Inline the string as an immediate integer in GLSL (some extra arguments are 778 std::string GenerateTextureArgument(TextureArgument argument) {
770 // required to be constant) 779 const auto [type, operand] = argument;
771 expr += std::to_string(static_cast<s32>(immediate->GetValue())); 780 if (operand == nullptr) {
772 } else { 781 return {};
773 expr += "ftoi(" + Visit(operand) + ')'; 782 }
774 } 783
775 break; 784 std::string expr = ", ";
776 case Type::Float: 785 switch (type) {
777 expr += Visit(operand); 786 case Type::Int:
778 break; 787 if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
779 default: { 788 // Inline the string as an immediate integer in GLSL (some extra arguments are
780 const auto type_int = static_cast<u32>(type); 789 // required to be constant)
781 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); 790 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
782 expr += '0'; 791 } else {
783 break; 792 expr += "ftoi(" + Visit(operand) + ')';
793 }
794 break;
795 case Type::Float:
796 expr += Visit(operand);
797 break;
798 default: {
799 const auto type_int = static_cast<u32>(type);
800 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int);
801 expr += '0';
802 break;
803 }
804 }
805 return expr;
806 }
807
808 std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
809 if (aoffi.empty()) {
810 return {};
811 }
812 constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
813 std::string expr = ", ";
814 expr += coord_constructors.at(aoffi.size() - 1);
815 expr += '(';
816
817 for (std::size_t index = 0; index < aoffi.size(); ++index) {
818 const auto operand{aoffi.at(index)};
819 if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
820 // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
821 // to be constant by the standard).
822 expr += std::to_string(static_cast<s32>(immediate->GetValue()));
823 } else {
824 expr += "ftoi(" + Visit(operand) + ')';
784 } 825 }
826 if (index + 1 < aoffi.size()) {
827 expr += ", ";
785 } 828 }
786 } 829 }
830 expr += ')';
787 831
788 return expr + ')'; 832 return expr;
789 } 833 }
790 834
791 std::string Assign(Operation operation) { 835 std::string Assign(Operation operation) {
@@ -1164,7 +1208,8 @@ private:
1164 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1208 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1165 ASSERT(meta); 1209 ASSERT(meta);
1166 1210
1167 std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); 1211 std::string expr = GenerateTexture(
1212 operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
1168 if (meta->sampler.IsShadow()) { 1213 if (meta->sampler.IsShadow()) {
1169 expr = "vec4(" + expr + ')'; 1214 expr = "vec4(" + expr + ')';
1170 } 1215 }
@@ -1175,7 +1220,8 @@ private:
1175 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); 1220 const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
1176 ASSERT(meta); 1221 ASSERT(meta);
1177 1222
1178 std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); 1223 std::string expr = GenerateTexture(
1224 operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
1179 if (meta->sampler.IsShadow()) { 1225 if (meta->sampler.IsShadow()) {
1180 expr = "vec4(" + expr + ')'; 1226 expr = "vec4(" + expr + ')';
1181 } 1227 }
@@ -1187,7 +1233,8 @@ private:
1187 ASSERT(meta); 1233 ASSERT(meta);
1188 1234
1189 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; 1235 const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
1190 return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + 1236 return GenerateTexture(operation, "Gather",
1237 {TextureArgument{type, meta->component}, TextureAoffi{}}) +
1191 GetSwizzle(meta->element); 1238 GetSwizzle(meta->element);
1192 } 1239 }
1193 1240
@@ -1217,8 +1264,8 @@ private:
1217 ASSERT(meta); 1264 ASSERT(meta);
1218 1265
1219 if (meta->element < 2) { 1266 if (meta->element < 2) {
1220 return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + 1267 return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" +
1221 " * vec2(256))" + GetSwizzle(meta->element) + "))"; 1268 GetSwizzle(meta->element) + "))";
1222 } 1269 }
1223 return "0"; 1270 return "0";
1224 } 1271 }
@@ -1571,6 +1618,8 @@ private:
1571 ShaderWriter code; 1618 ShaderWriter code;
1572}; 1619};
1573 1620
1621} // Anonymous namespace
1622
1574std::string GetCommonDeclarations() { 1623std::string GetCommonDeclarations() {
1575 const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); 1624 const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
1576 const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); 1625 const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index a99ae19bf..a775b402b 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -7,7 +7,9 @@
7#include <fmt/format.h> 7#include <fmt/format.h>
8 8
9#include "common/assert.h" 9#include "common/assert.h"
10#include "common/bit_field.h"
10#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/logging/log.h"
11#include "video_core/engines/shader_bytecode.h" 13#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h" 14#include "video_core/shader/shader_ir.h"
13 15
@@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
41 43
42 switch (opcode->get().GetId()) { 44 switch (opcode->get().GetId()) {
43 case OpCode::Id::TEX: { 45 case OpCode::Id::TEX: {
44 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
45 "AOFFI is not implemented");
46
47 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { 46 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
48 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); 47 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
49 } 48 }
50 49
51 const TextureType texture_type{instr.tex.texture_type}; 50 const TextureType texture_type{instr.tex.texture_type};
52 const bool is_array = instr.tex.array != 0; 51 const bool is_array = instr.tex.array != 0;
52 const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); 53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
54 const auto process_mode = instr.tex.GetTextureProcessMode(); 54 const auto process_mode = instr.tex.GetTextureProcessMode();
55 WriteTexInstructionFloat( 55 WriteTexInstructionFloat(
56 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); 56 bb, instr,
57 GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
57 break; 58 break;
58 } 59 }
59 case OpCode::Id::TEXS: { 60 case OpCode::Id::TEXS: {
@@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
78 } 79 }
79 case OpCode::Id::TLD4: { 80 case OpCode::Id::TLD4: {
80 ASSERT(instr.tld4.array == 0); 81 ASSERT(instr.tld4.array == 0);
81 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
82 "AOFFI is not implemented");
83 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), 82 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
84 "NDV is not implemented"); 83 "NDV is not implemented");
85 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), 84 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
@@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
92 const auto texture_type = instr.tld4.texture_type.Value(); 91 const auto texture_type = instr.tld4.texture_type.Value();
93 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); 92 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
94 const bool is_array = instr.tld4.array != 0; 93 const bool is_array = instr.tld4.array != 0;
95 WriteTexInstructionFloat(bb, instr, 94 const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
96 GetTld4Code(instr, texture_type, depth_compare, is_array)); 95 WriteTexInstructionFloat(
96 bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
97 break; 97 break;
98 } 98 }
99 case OpCode::Id::TLD4S: { 99 case OpCode::Id::TLD4S: {
@@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
127 Node4 values; 127 Node4 values;
128 for (u32 element = 0; element < values.size(); ++element) { 128 for (u32 element = 0; element < values.size(); ++element) {
129 auto coords_copy = coords; 129 auto coords_copy = coords;
130 MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; 130 MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); 131 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
132 } 132 }
133 133
@@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
152 if (!instr.txq.IsComponentEnabled(element)) { 152 if (!instr.txq.IsComponentEnabled(element)) {
153 continue; 153 continue;
154 } 154 }
155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; 155 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
156 const Node value = 156 const Node value =
157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); 157 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
158 SetTemporal(bb, indexer++, value); 158 SetTemporal(bb, indexer++, value);
@@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
202 202
203 for (u32 element = 0; element < 2; ++element) { 203 for (u32 element = 0; element < 2; ++element) {
204 auto params = coords; 204 auto params = coords;
205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; 205 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); 206 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
207 SetTemporal(bb, element, value); 207 SetTemporal(bb, element, value);
208 } 208 }
@@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
325 325
326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, 326Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
327 TextureProcessMode process_mode, std::vector<Node> coords, 327 TextureProcessMode process_mode, std::vector<Node> coords,
328 Node array, Node depth_compare, u32 bias_offset) { 328 Node array, Node depth_compare, u32 bias_offset,
329 std::vector<Node> aoffi) {
329 const bool is_array = array; 330 const bool is_array = array;
330 const bool is_shadow = depth_compare; 331 const bool is_shadow = depth_compare;
331 332
@@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
374 Node4 values; 375 Node4 values;
375 for (u32 element = 0; element < values.size(); ++element) { 376 for (u32 element = 0; element < values.size(); ++element) {
376 auto copy_coords = coords; 377 auto copy_coords = coords;
377 MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; 378 MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
378 values[element] = Operation(read_method, meta, std::move(copy_coords)); 379 values[element] = Operation(read_method, meta, std::move(copy_coords));
379 } 380 }
380 381
@@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
382} 383}
383 384
384Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, 385Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
385 TextureProcessMode process_mode, bool depth_compare, bool is_array) { 386 TextureProcessMode process_mode, bool depth_compare, bool is_array,
386 const bool lod_bias_enabled = 387 bool is_aoffi) {
387 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); 388 const bool lod_bias_enabled{
389 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
390
391 u64 parameter_register = instr.gpr20.Value();
392 if (lod_bias_enabled) {
393 ++parameter_register;
394 }
388 395
389 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( 396 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
390 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); 397 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
@@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
404 411
405 const Node array = is_array ? GetRegister(array_register) : nullptr; 412 const Node array = is_array ? GetRegister(array_register) : nullptr;
406 413
414 std::vector<Node> aoffi;
415 if (is_aoffi) {
416 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
417 }
418
407 Node dc{}; 419 Node dc{};
408 if (depth_compare) { 420 if (depth_compare) {
409 // Depth is always stored in the register signaled by gpr20 or in the next register if lod 421 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
410 // or bias are used 422 // or bias are used
411 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); 423 dc = GetRegister(parameter_register++);
412 dc = GetRegister(depth_register);
413 } 424 }
414 425
415 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); 426 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
416} 427}
417 428
418Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, 429Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
448 dc = GetRegister(depth_register); 459 dc = GetRegister(depth_register);
449 } 460 }
450 461
451 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); 462 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
452} 463}
453 464
454Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, 465Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
455 bool is_array) { 466 bool is_array, bool is_aoffi) {
456 const std::size_t coord_count = GetCoordCount(texture_type); 467 const std::size_t coord_count = GetCoordCount(texture_type);
457 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); 468 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
458 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); 469 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
@@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
463 const u64 coord_register = array_register + (is_array ? 1 : 0); 474 const u64 coord_register = array_register + (is_array ? 1 : 0);
464 475
465 std::vector<Node> coords; 476 std::vector<Node> coords;
466 for (size_t i = 0; i < coord_count; ++i) 477 for (std::size_t i = 0; i < coord_count; ++i) {
467 coords.push_back(GetRegister(coord_register + i)); 478 coords.push_back(GetRegister(coord_register + i));
479 }
480
481 u64 parameter_register = instr.gpr20.Value();
482 std::vector<Node> aoffi;
483 if (is_aoffi) {
484 aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
485 }
486
487 Node dc{};
488 if (depth_compare) {
489 dc = GetRegister(parameter_register++);
490 }
468 491
469 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); 492 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
470 493
471 Node4 values; 494 Node4 values;
472 for (u32 element = 0; element < values.size(); ++element) { 495 for (u32 element = 0; element < values.size(); ++element) {
473 auto coords_copy = coords; 496 auto coords_copy = coords;
474 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; 497 MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
475 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); 498 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
476 } 499 }
477 500
@@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
507 Node4 values; 530 Node4 values;
508 for (u32 element = 0; element < values.size(); ++element) { 531 for (u32 element = 0; element < values.size(); ++element) {
509 auto coords_copy = coords; 532 auto coords_copy = coords;
510 MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; 533 MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
511 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); 534 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
512 } 535 }
513 return values; 536 return values;
@@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
531 return {coord_count, total_coord_count}; 554 return {coord_count, total_coord_count};
532} 555}
533 556
534} // namespace VideoCommon::Shader \ No newline at end of file 557std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
558 bool is_tld4) {
559 const auto [coord_offsets, size, wrap_value,
560 diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
561 if (is_tld4) {
562 return {{0, 8, 16}, 6, 32, 64};
563 } else {
564 return {{0, 4, 8}, 4, 8, 16};
565 }
566 }();
567 const u32 mask = (1U << size) - 1;
568
569 std::vector<Node> aoffi;
570 aoffi.reserve(coord_count);
571
572 const auto aoffi_immediate{
573 TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
574 if (!aoffi_immediate) {
575 // Variable access, not supported on AMD.
576 LOG_WARNING(HW_GPU,
577 "AOFFI constant folding failed, some hardware might have graphical issues");
578 for (std::size_t coord = 0; coord < coord_count; ++coord) {
579 const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
580 const Node condition =
581 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
582 const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
583 aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
584 }
585 return aoffi;
586 }
587
588 for (std::size_t coord = 0; coord < coord_count; ++coord) {
589 s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
590 if (value >= wrap_value) {
591 value -= diff_value;
592 }
593 aoffi.push_back(Immediate(value));
594 }
595 return aoffi;
596}
597
598} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 5bc3a3900..4888998d3 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -7,6 +7,7 @@
7#include <array> 7#include <array>
8#include <cstring> 8#include <cstring>
9#include <map> 9#include <map>
10#include <optional>
10#include <set> 11#include <set>
11#include <string> 12#include <string>
12#include <tuple> 13#include <tuple>
@@ -290,6 +291,7 @@ struct MetaTexture {
290 const Sampler& sampler; 291 const Sampler& sampler;
291 Node array{}; 292 Node array{};
292 Node depth_compare{}; 293 Node depth_compare{};
294 std::vector<Node> aoffi;
293 Node bias{}; 295 Node bias{};
294 Node lod{}; 296 Node lod{};
295 Node component{}; 297 Node component{};
@@ -741,14 +743,14 @@ private:
741 743
742 Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 744 Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
743 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, 745 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
744 bool is_array); 746 bool is_array, bool is_aoffi);
745 747
746 Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 748 Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
747 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, 749 Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
748 bool is_array); 750 bool is_array);
749 751
750 Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 752 Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
751 bool depth_compare, bool is_array); 753 bool depth_compare, bool is_array, bool is_aoffi);
752 754
753 Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 755 Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
754 bool is_array); 756 bool is_array);
@@ -757,9 +759,11 @@ private:
757 Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, 759 Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
758 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); 760 bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
759 761
762 std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
763
760 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, 764 Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
761 Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, 765 Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
762 Node array, Node depth_compare, u32 bias_offset); 766 Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
763 767
764 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, 768 Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
765 u64 byte_height); 769 u64 byte_height);
@@ -773,6 +777,8 @@ private:
773 777
774 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); 778 Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor);
775 779
780 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor);
781
776 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); 782 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor);
777 783
778 template <typename... T> 784 template <typename... T>
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 33b071747..4505667ff 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -6,6 +6,7 @@
6#include <utility> 6#include <utility>
7#include <variant> 7#include <variant>
8 8
9#include "common/common_types.h"
9#include "video_core/shader/shader_ir.h" 10#include "video_core/shader/shader_ir.h"
10 11
11namespace VideoCommon::Shader { 12namespace VideoCommon::Shader {
@@ -14,7 +15,7 @@ namespace {
14std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, 15std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
15 OperationCode operation_code) { 16 OperationCode operation_code) {
16 for (; cursor >= 0; --cursor) { 17 for (; cursor >= 0; --cursor) {
17 const Node node = code[cursor]; 18 const Node node = code.at(cursor);
18 if (const auto operation = std::get_if<OperationNode>(node)) { 19 if (const auto operation = std::get_if<OperationNode>(node)) {
19 if (operation->GetCode() == operation_code) 20 if (operation->GetCode() == operation_code)
20 return {node, cursor}; 21 return {node, cursor};
@@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) {
64 return nullptr; 65 return nullptr;
65} 66}
66 67
68std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) {
69 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
70 // that it uses as operand
71 const auto [found, found_cursor] =
72 TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
73 if (!found) {
74 return {};
75 }
76 if (const auto immediate = std::get_if<ImmediateNode>(found)) {
77 return immediate->GetValue();
78 }
79 return {};
80}
81
67std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, 82std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
68 s64 cursor) { 83 s64 cursor) {
69 for (; cursor >= 0; --cursor) { 84 for (; cursor >= 0; --cursor) {