diff options
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 133 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 114 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 12 | ||||
| -rw-r--r-- | src/video_core/shader/track.cpp | 17 |
4 files changed, 205 insertions, 71 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index a1a51f226..3ea08ef7b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -21,6 +21,8 @@ | |||
| 21 | 21 | ||
| 22 | namespace OpenGL::GLShader { | 22 | namespace OpenGL::GLShader { |
| 23 | 23 | ||
| 24 | namespace { | ||
| 25 | |||
| 24 | using Tegra::Shader::Attribute; | 26 | using Tegra::Shader::Attribute; |
| 25 | using Tegra::Shader::AttributeUse; | 27 | using Tegra::Shader::AttributeUse; |
| 26 | using Tegra::Shader::Header; | 28 | using Tegra::Shader::Header; |
| @@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 34 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | 36 | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; |
| 35 | using Operation = const OperationNode&; | 37 | using Operation = const OperationNode&; |
| 36 | 38 | ||
| 39 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||
| 40 | |||
| 41 | struct TextureAoffi {}; | ||
| 42 | using TextureArgument = std::pair<Type, Node>; | ||
| 43 | using TextureIR = std::variant<TextureAoffi, TextureArgument>; | ||
| 44 | |||
| 37 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; | 45 | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; |
| 38 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | 46 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = |
| 39 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | 47 | static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); |
| 40 | constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = | 48 | constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = |
| 41 | static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); | 49 | static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); |
| 42 | 50 | ||
| 43 | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||
| 44 | |||
| 45 | class ShaderWriter { | 51 | class ShaderWriter { |
| 46 | public: | 52 | public: |
| 47 | void AddExpression(std::string_view text) { | 53 | void AddExpression(std::string_view text) { |
| @@ -91,7 +97,7 @@ private: | |||
| 91 | }; | 97 | }; |
| 92 | 98 | ||
| 93 | /// Generates code to use for a swizzle operation. | 99 | /// Generates code to use for a swizzle operation. |
| 94 | static std::string GetSwizzle(u32 elem) { | 100 | std::string GetSwizzle(u32 elem) { |
| 95 | ASSERT(elem <= 3); | 101 | ASSERT(elem <= 3); |
| 96 | std::string swizzle = "."; | 102 | std::string swizzle = "."; |
| 97 | swizzle += "xyzw"[elem]; | 103 | swizzle += "xyzw"[elem]; |
| @@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) { | |||
| 99 | } | 105 | } |
| 100 | 106 | ||
| 101 | /// Translate topology | 107 | /// Translate topology |
| 102 | static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | 108 | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { |
| 103 | switch (topology) { | 109 | switch (topology) { |
| 104 | case Tegra::Shader::OutputTopology::PointList: | 110 | case Tegra::Shader::OutputTopology::PointList: |
| 105 | return "points"; | 111 | return "points"; |
| @@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | |||
| 114 | } | 120 | } |
| 115 | 121 | ||
| 116 | /// Returns true if an object has to be treated as precise | 122 | /// Returns true if an object has to be treated as precise |
| 117 | static bool IsPrecise(Operation operand) { | 123 | bool IsPrecise(Operation operand) { |
| 118 | const auto& meta = operand.GetMeta(); | 124 | const auto& meta = operand.GetMeta(); |
| 119 | 125 | ||
| 120 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { | 126 | if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { |
| @@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) { | |||
| 126 | return false; | 132 | return false; |
| 127 | } | 133 | } |
| 128 | 134 | ||
| 129 | static bool IsPrecise(Node node) { | 135 | bool IsPrecise(Node node) { |
| 130 | if (const auto operation = std::get_if<OperationNode>(node)) { | 136 | if (const auto operation = std::get_if<OperationNode>(node)) { |
| 131 | return IsPrecise(*operation); | 137 | return IsPrecise(*operation); |
| 132 | } | 138 | } |
| @@ -723,8 +729,8 @@ private: | |||
| 723 | result_type)); | 729 | result_type)); |
| 724 | } | 730 | } |
| 725 | 731 | ||
| 726 | std::string GenerateTexture(Operation operation, const std::string& func, | 732 | std::string GenerateTexture(Operation operation, const std::string& function_suffix, |
| 727 | const std::vector<std::pair<Type, Node>>& extras) { | 733 | const std::vector<TextureIR>& extras) { |
| 728 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; | 734 | constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; |
| 729 | 735 | ||
| 730 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 736 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| @@ -734,11 +740,11 @@ private: | |||
| 734 | const bool has_array = meta->sampler.IsArray(); | 740 | const bool has_array = meta->sampler.IsArray(); |
| 735 | const bool has_shadow = meta->sampler.IsShadow(); | 741 | const bool has_shadow = meta->sampler.IsShadow(); |
| 736 | 742 | ||
| 737 | std::string expr = func; | 743 | std::string expr = "texture" + function_suffix; |
| 738 | expr += '('; | 744 | if (!meta->aoffi.empty()) { |
| 739 | expr += GetSampler(meta->sampler); | 745 | expr += "Offset"; |
| 740 | expr += ", "; | 746 | } |
| 741 | 747 | expr += '(' + GetSampler(meta->sampler) + ", "; | |
| 742 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); | 748 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); |
| 743 | expr += '('; | 749 | expr += '('; |
| 744 | for (std::size_t i = 0; i < count; ++i) { | 750 | for (std::size_t i = 0; i < count; ++i) { |
| @@ -756,36 +762,74 @@ private: | |||
| 756 | } | 762 | } |
| 757 | expr += ')'; | 763 | expr += ')'; |
| 758 | 764 | ||
| 759 | for (const auto& extra_pair : extras) { | 765 | for (const auto& variant : extras) { |
| 760 | const auto [type, operand] = extra_pair; | 766 | if (const auto argument = std::get_if<TextureArgument>(&variant)) { |
| 761 | if (operand == nullptr) { | 767 | expr += GenerateTextureArgument(*argument); |
| 762 | continue; | 768 | } else if (std::get_if<TextureAoffi>(&variant)) { |
| 769 | expr += GenerateTextureAoffi(meta->aoffi); | ||
| 770 | } else { | ||
| 771 | UNREACHABLE(); | ||
| 763 | } | 772 | } |
| 764 | expr += ", "; | 773 | } |
| 765 | 774 | ||
| 766 | switch (type) { | 775 | return expr + ')'; |
| 767 | case Type::Int: | 776 | } |
| 768 | if (const auto immediate = std::get_if<ImmediateNode>(operand)) { | 777 | |
| 769 | // Inline the string as an immediate integer in GLSL (some extra arguments are | 778 | std::string GenerateTextureArgument(TextureArgument argument) { |
| 770 | // required to be constant) | 779 | const auto [type, operand] = argument; |
| 771 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | 780 | if (operand == nullptr) { |
| 772 | } else { | 781 | return {}; |
| 773 | expr += "ftoi(" + Visit(operand) + ')'; | 782 | } |
| 774 | } | 783 | |
| 775 | break; | 784 | std::string expr = ", "; |
| 776 | case Type::Float: | 785 | switch (type) { |
| 777 | expr += Visit(operand); | 786 | case Type::Int: |
| 778 | break; | 787 | if (const auto immediate = std::get_if<ImmediateNode>(operand)) { |
| 779 | default: { | 788 | // Inline the string as an immediate integer in GLSL (some extra arguments are |
| 780 | const auto type_int = static_cast<u32>(type); | 789 | // required to be constant) |
| 781 | UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); | 790 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); |
| 782 | expr += '0'; | 791 | } else { |
| 783 | break; | 792 | expr += "ftoi(" + Visit(operand) + ')'; |
| 793 | } | ||
| 794 | break; | ||
| 795 | case Type::Float: | ||
| 796 | expr += Visit(operand); | ||
| 797 | break; | ||
| 798 | default: { | ||
| 799 | const auto type_int = static_cast<u32>(type); | ||
| 800 | UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); | ||
| 801 | expr += '0'; | ||
| 802 | break; | ||
| 803 | } | ||
| 804 | } | ||
| 805 | return expr; | ||
| 806 | } | ||
| 807 | |||
| 808 | std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) { | ||
| 809 | if (aoffi.empty()) { | ||
| 810 | return {}; | ||
| 811 | } | ||
| 812 | constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"}; | ||
| 813 | std::string expr = ", "; | ||
| 814 | expr += coord_constructors.at(aoffi.size() - 1); | ||
| 815 | expr += '('; | ||
| 816 | |||
| 817 | for (std::size_t index = 0; index < aoffi.size(); ++index) { | ||
| 818 | const auto operand{aoffi.at(index)}; | ||
| 819 | if (const auto immediate = std::get_if<ImmediateNode>(operand)) { | ||
| 820 | // Inline the string as an immediate integer in GLSL (AOFFI arguments are required | ||
| 821 | // to be constant by the standard). | ||
| 822 | expr += std::to_string(static_cast<s32>(immediate->GetValue())); | ||
| 823 | } else { | ||
| 824 | expr += "ftoi(" + Visit(operand) + ')'; | ||
| 784 | } | 825 | } |
| 826 | if (index + 1 < aoffi.size()) { | ||
| 827 | expr += ", "; | ||
| 785 | } | 828 | } |
| 786 | } | 829 | } |
| 830 | expr += ')'; | ||
| 787 | 831 | ||
| 788 | return expr + ')'; | 832 | return expr; |
| 789 | } | 833 | } |
| 790 | 834 | ||
| 791 | std::string Assign(Operation operation) { | 835 | std::string Assign(Operation operation) { |
| @@ -1164,7 +1208,8 @@ private: | |||
| 1164 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1208 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1165 | ASSERT(meta); | 1209 | ASSERT(meta); |
| 1166 | 1210 | ||
| 1167 | std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); | 1211 | std::string expr = GenerateTexture( |
| 1212 | operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}}); | ||
| 1168 | if (meta->sampler.IsShadow()) { | 1213 | if (meta->sampler.IsShadow()) { |
| 1169 | expr = "vec4(" + expr + ')'; | 1214 | expr = "vec4(" + expr + ')'; |
| 1170 | } | 1215 | } |
| @@ -1175,7 +1220,8 @@ private: | |||
| 1175 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | 1220 | const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |
| 1176 | ASSERT(meta); | 1221 | ASSERT(meta); |
| 1177 | 1222 | ||
| 1178 | std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); | 1223 | std::string expr = GenerateTexture( |
| 1224 | operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}}); | ||
| 1179 | if (meta->sampler.IsShadow()) { | 1225 | if (meta->sampler.IsShadow()) { |
| 1180 | expr = "vec4(" + expr + ')'; | 1226 | expr = "vec4(" + expr + ')'; |
| 1181 | } | 1227 | } |
| @@ -1187,7 +1233,8 @@ private: | |||
| 1187 | ASSERT(meta); | 1233 | ASSERT(meta); |
| 1188 | 1234 | ||
| 1189 | const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; | 1235 | const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; |
| 1190 | return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + | 1236 | return GenerateTexture(operation, "Gather", |
| 1237 | {TextureArgument{type, meta->component}, TextureAoffi{}}) + | ||
| 1191 | GetSwizzle(meta->element); | 1238 | GetSwizzle(meta->element); |
| 1192 | } | 1239 | } |
| 1193 | 1240 | ||
| @@ -1217,8 +1264,8 @@ private: | |||
| 1217 | ASSERT(meta); | 1264 | ASSERT(meta); |
| 1218 | 1265 | ||
| 1219 | if (meta->element < 2) { | 1266 | if (meta->element < 2) { |
| 1220 | return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + | 1267 | return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" + |
| 1221 | " * vec2(256))" + GetSwizzle(meta->element) + "))"; | 1268 | GetSwizzle(meta->element) + "))"; |
| 1222 | } | 1269 | } |
| 1223 | return "0"; | 1270 | return "0"; |
| 1224 | } | 1271 | } |
| @@ -1571,6 +1618,8 @@ private: | |||
| 1571 | ShaderWriter code; | 1618 | ShaderWriter code; |
| 1572 | }; | 1619 | }; |
| 1573 | 1620 | ||
| 1621 | } // Anonymous namespace | ||
| 1622 | |||
| 1574 | std::string GetCommonDeclarations() { | 1623 | std::string GetCommonDeclarations() { |
| 1575 | const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); | 1624 | const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); |
| 1576 | const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); | 1625 | const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index a99ae19bf..a775b402b 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -7,7 +7,9 @@ | |||
| 7 | #include <fmt/format.h> | 7 | #include <fmt/format.h> |
| 8 | 8 | ||
| 9 | #include "common/assert.h" | 9 | #include "common/assert.h" |
| 10 | #include "common/bit_field.h" | ||
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/logging/log.h" | ||
| 11 | #include "video_core/engines/shader_bytecode.h" | 13 | #include "video_core/engines/shader_bytecode.h" |
| 12 | #include "video_core/shader/shader_ir.h" | 14 | #include "video_core/shader/shader_ir.h" |
| 13 | 15 | ||
| @@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 41 | 43 | ||
| 42 | switch (opcode->get().GetId()) { | 44 | switch (opcode->get().GetId()) { |
| 43 | case OpCode::Id::TEX: { | 45 | case OpCode::Id::TEX: { |
| 44 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 45 | "AOFFI is not implemented"); | ||
| 46 | |||
| 47 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | 46 | if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { |
| 48 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | 47 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); |
| 49 | } | 48 | } |
| 50 | 49 | ||
| 51 | const TextureType texture_type{instr.tex.texture_type}; | 50 | const TextureType texture_type{instr.tex.texture_type}; |
| 52 | const bool is_array = instr.tex.array != 0; | 51 | const bool is_array = instr.tex.array != 0; |
| 52 | const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||
| 53 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | 53 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); |
| 54 | const auto process_mode = instr.tex.GetTextureProcessMode(); | 54 | const auto process_mode = instr.tex.GetTextureProcessMode(); |
| 55 | WriteTexInstructionFloat( | 55 | WriteTexInstructionFloat( |
| 56 | bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); | 56 | bb, instr, |
| 57 | GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi)); | ||
| 57 | break; | 58 | break; |
| 58 | } | 59 | } |
| 59 | case OpCode::Id::TEXS: { | 60 | case OpCode::Id::TEXS: { |
| @@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 78 | } | 79 | } |
| 79 | case OpCode::Id::TLD4: { | 80 | case OpCode::Id::TLD4: { |
| 80 | ASSERT(instr.tld4.array == 0); | 81 | ASSERT(instr.tld4.array == 0); |
| 81 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), | ||
| 82 | "AOFFI is not implemented"); | ||
| 83 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | 82 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), |
| 84 | "NDV is not implemented"); | 83 | "NDV is not implemented"); |
| 85 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), | 84 | UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), |
| @@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 92 | const auto texture_type = instr.tld4.texture_type.Value(); | 91 | const auto texture_type = instr.tld4.texture_type.Value(); |
| 93 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | 92 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); |
| 94 | const bool is_array = instr.tld4.array != 0; | 93 | const bool is_array = instr.tld4.array != 0; |
| 95 | WriteTexInstructionFloat(bb, instr, | 94 | const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); |
| 96 | GetTld4Code(instr, texture_type, depth_compare, is_array)); | 95 | WriteTexInstructionFloat( |
| 96 | bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi)); | ||
| 97 | break; | 97 | break; |
| 98 | } | 98 | } |
| 99 | case OpCode::Id::TLD4S: { | 99 | case OpCode::Id::TLD4S: { |
| @@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 127 | Node4 values; | 127 | Node4 values; |
| 128 | for (u32 element = 0; element < values.size(); ++element) { | 128 | for (u32 element = 0; element < values.size(); ++element) { |
| 129 | auto coords_copy = coords; | 129 | auto coords_copy = coords; |
| 130 | MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; | 130 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element}; |
| 131 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | 131 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 132 | } | 132 | } |
| 133 | 133 | ||
| @@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 152 | if (!instr.txq.IsComponentEnabled(element)) { | 152 | if (!instr.txq.IsComponentEnabled(element)) { |
| 153 | continue; | 153 | continue; |
| 154 | } | 154 | } |
| 155 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; | 155 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; |
| 156 | const Node value = | 156 | const Node value = |
| 157 | Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); | 157 | Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); |
| 158 | SetTemporal(bb, indexer++, value); | 158 | SetTemporal(bb, indexer++, value); |
| @@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 202 | 202 | ||
| 203 | for (u32 element = 0; element < 2; ++element) { | 203 | for (u32 element = 0; element < 2; ++element) { |
| 204 | auto params = coords; | 204 | auto params = coords; |
| 205 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; | 205 | MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; |
| 206 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | 206 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |
| 207 | SetTemporal(bb, element, value); | 207 | SetTemporal(bb, element, value); |
| 208 | } | 208 | } |
| @@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | |||
| 325 | 325 | ||
| 326 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | 326 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
| 327 | TextureProcessMode process_mode, std::vector<Node> coords, | 327 | TextureProcessMode process_mode, std::vector<Node> coords, |
| 328 | Node array, Node depth_compare, u32 bias_offset) { | 328 | Node array, Node depth_compare, u32 bias_offset, |
| 329 | std::vector<Node> aoffi) { | ||
| 329 | const bool is_array = array; | 330 | const bool is_array = array; |
| 330 | const bool is_shadow = depth_compare; | 331 | const bool is_shadow = depth_compare; |
| 331 | 332 | ||
| @@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 374 | Node4 values; | 375 | Node4 values; |
| 375 | for (u32 element = 0; element < values.size(); ++element) { | 376 | for (u32 element = 0; element < values.size(); ++element) { |
| 376 | auto copy_coords = coords; | 377 | auto copy_coords = coords; |
| 377 | MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; | 378 | MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element}; |
| 378 | values[element] = Operation(read_method, meta, std::move(copy_coords)); | 379 | values[element] = Operation(read_method, meta, std::move(copy_coords)); |
| 379 | } | 380 | } |
| 380 | 381 | ||
| @@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 382 | } | 383 | } |
| 383 | 384 | ||
| 384 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | 385 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, |
| 385 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | 386 | TextureProcessMode process_mode, bool depth_compare, bool is_array, |
| 386 | const bool lod_bias_enabled = | 387 | bool is_aoffi) { |
| 387 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | 388 | const bool lod_bias_enabled{ |
| 389 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; | ||
| 390 | |||
| 391 | u64 parameter_register = instr.gpr20.Value(); | ||
| 392 | if (lod_bias_enabled) { | ||
| 393 | ++parameter_register; | ||
| 394 | } | ||
| 388 | 395 | ||
| 389 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | 396 | const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( |
| 390 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | 397 | texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); |
| @@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | |||
| 404 | 411 | ||
| 405 | const Node array = is_array ? GetRegister(array_register) : nullptr; | 412 | const Node array = is_array ? GetRegister(array_register) : nullptr; |
| 406 | 413 | ||
| 414 | std::vector<Node> aoffi; | ||
| 415 | if (is_aoffi) { | ||
| 416 | aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); | ||
| 417 | } | ||
| 418 | |||
| 407 | Node dc{}; | 419 | Node dc{}; |
| 408 | if (depth_compare) { | 420 | if (depth_compare) { |
| 409 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod | 421 | // Depth is always stored in the register signaled by gpr20 or in the next register if lod |
| 410 | // or bias are used | 422 | // or bias are used |
| 411 | const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | 423 | dc = GetRegister(parameter_register++); |
| 412 | dc = GetRegister(depth_register); | ||
| 413 | } | 424 | } |
| 414 | 425 | ||
| 415 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); | 426 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi); |
| 416 | } | 427 | } |
| 417 | 428 | ||
| 418 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | 429 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, |
| @@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | |||
| 448 | dc = GetRegister(depth_register); | 459 | dc = GetRegister(depth_register); |
| 449 | } | 460 | } |
| 450 | 461 | ||
| 451 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); | 462 | return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}); |
| 452 | } | 463 | } |
| 453 | 464 | ||
| 454 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | 465 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, |
| 455 | bool is_array) { | 466 | bool is_array, bool is_aoffi) { |
| 456 | const std::size_t coord_count = GetCoordCount(texture_type); | 467 | const std::size_t coord_count = GetCoordCount(texture_type); |
| 457 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | 468 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); |
| 458 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | 469 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); |
| @@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 463 | const u64 coord_register = array_register + (is_array ? 1 : 0); | 474 | const u64 coord_register = array_register + (is_array ? 1 : 0); |
| 464 | 475 | ||
| 465 | std::vector<Node> coords; | 476 | std::vector<Node> coords; |
| 466 | for (size_t i = 0; i < coord_count; ++i) | 477 | for (std::size_t i = 0; i < coord_count; ++i) { |
| 467 | coords.push_back(GetRegister(coord_register + i)); | 478 | coords.push_back(GetRegister(coord_register + i)); |
| 479 | } | ||
| 480 | |||
| 481 | u64 parameter_register = instr.gpr20.Value(); | ||
| 482 | std::vector<Node> aoffi; | ||
| 483 | if (is_aoffi) { | ||
| 484 | aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); | ||
| 485 | } | ||
| 486 | |||
| 487 | Node dc{}; | ||
| 488 | if (depth_compare) { | ||
| 489 | dc = GetRegister(parameter_register++); | ||
| 490 | } | ||
| 468 | 491 | ||
| 469 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | 492 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); |
| 470 | 493 | ||
| 471 | Node4 values; | 494 | Node4 values; |
| 472 | for (u32 element = 0; element < values.size(); ++element) { | 495 | for (u32 element = 0; element < values.size(); ++element) { |
| 473 | auto coords_copy = coords; | 496 | auto coords_copy = coords; |
| 474 | MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; | 497 | MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element}; |
| 475 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | 498 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 476 | } | 499 | } |
| 477 | 500 | ||
| @@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 507 | Node4 values; | 530 | Node4 values; |
| 508 | for (u32 element = 0; element < values.size(); ++element) { | 531 | for (u32 element = 0; element < values.size(); ++element) { |
| 509 | auto coords_copy = coords; | 532 | auto coords_copy = coords; |
| 510 | MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; | 533 | MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element}; |
| 511 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | 534 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |
| 512 | } | 535 | } |
| 513 | return values; | 536 | return values; |
| @@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | |||
| 531 | return {coord_count, total_coord_count}; | 554 | return {coord_count, total_coord_count}; |
| 532 | } | 555 | } |
| 533 | 556 | ||
| 534 | } // namespace VideoCommon::Shader \ No newline at end of file | 557 | std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, |
| 558 | bool is_tld4) { | ||
| 559 | const auto [coord_offsets, size, wrap_value, | ||
| 560 | diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { | ||
| 561 | if (is_tld4) { | ||
| 562 | return {{0, 8, 16}, 6, 32, 64}; | ||
| 563 | } else { | ||
| 564 | return {{0, 4, 8}, 4, 8, 16}; | ||
| 565 | } | ||
| 566 | }(); | ||
| 567 | const u32 mask = (1U << size) - 1; | ||
| 568 | |||
| 569 | std::vector<Node> aoffi; | ||
| 570 | aoffi.reserve(coord_count); | ||
| 571 | |||
| 572 | const auto aoffi_immediate{ | ||
| 573 | TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; | ||
| 574 | if (!aoffi_immediate) { | ||
| 575 | // Variable access, not supported on AMD. | ||
| 576 | LOG_WARNING(HW_GPU, | ||
| 577 | "AOFFI constant folding failed, some hardware might have graphical issues"); | ||
| 578 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||
| 579 | const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); | ||
| 580 | const Node condition = | ||
| 581 | Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); | ||
| 582 | const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); | ||
| 583 | aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); | ||
| 584 | } | ||
| 585 | return aoffi; | ||
| 586 | } | ||
| 587 | |||
| 588 | for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||
| 589 | s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; | ||
| 590 | if (value >= wrap_value) { | ||
| 591 | value -= diff_value; | ||
| 592 | } | ||
| 593 | aoffi.push_back(Immediate(value)); | ||
| 594 | } | ||
| 595 | return aoffi; | ||
| 596 | } | ||
| 597 | |||
| 598 | } // namespace VideoCommon::Shader | ||
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 5bc3a3900..4888998d3 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <cstring> | 8 | #include <cstring> |
| 9 | #include <map> | 9 | #include <map> |
| 10 | #include <optional> | ||
| 10 | #include <set> | 11 | #include <set> |
| 11 | #include <string> | 12 | #include <string> |
| 12 | #include <tuple> | 13 | #include <tuple> |
| @@ -290,6 +291,7 @@ struct MetaTexture { | |||
| 290 | const Sampler& sampler; | 291 | const Sampler& sampler; |
| 291 | Node array{}; | 292 | Node array{}; |
| 292 | Node depth_compare{}; | 293 | Node depth_compare{}; |
| 294 | std::vector<Node> aoffi; | ||
| 293 | Node bias{}; | 295 | Node bias{}; |
| 294 | Node lod{}; | 296 | Node lod{}; |
| 295 | Node component{}; | 297 | Node component{}; |
| @@ -741,14 +743,14 @@ private: | |||
| 741 | 743 | ||
| 742 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 744 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 743 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | 745 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
| 744 | bool is_array); | 746 | bool is_array, bool is_aoffi); |
| 745 | 747 | ||
| 746 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 748 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 747 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | 749 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
| 748 | bool is_array); | 750 | bool is_array); |
| 749 | 751 | ||
| 750 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 752 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 751 | bool depth_compare, bool is_array); | 753 | bool depth_compare, bool is_array, bool is_aoffi); |
| 752 | 754 | ||
| 753 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 755 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 754 | bool is_array); | 756 | bool is_array); |
| @@ -757,9 +759,11 @@ private: | |||
| 757 | Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, | 759 | Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, |
| 758 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | 760 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); |
| 759 | 761 | ||
| 762 | std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); | ||
| 763 | |||
| 760 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 764 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 761 | Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, | 765 | Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, |
| 762 | Node array, Node depth_compare, u32 bias_offset); | 766 | Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi); |
| 763 | 767 | ||
| 764 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | 768 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, |
| 765 | u64 byte_height); | 769 | u64 byte_height); |
| @@ -773,6 +777,8 @@ private: | |||
| 773 | 777 | ||
| 774 | Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); | 778 | Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); |
| 775 | 779 | ||
| 780 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor); | ||
| 781 | |||
| 776 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); | 782 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); |
| 777 | 783 | ||
| 778 | template <typename... T> | 784 | template <typename... T> |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 33b071747..4505667ff 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <utility> | 6 | #include <utility> |
| 7 | #include <variant> | 7 | #include <variant> |
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | ||
| 9 | #include "video_core/shader/shader_ir.h" | 10 | #include "video_core/shader/shader_ir.h" |
| 10 | 11 | ||
| 11 | namespace VideoCommon::Shader { | 12 | namespace VideoCommon::Shader { |
| @@ -14,7 +15,7 @@ namespace { | |||
| 14 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | 15 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, |
| 15 | OperationCode operation_code) { | 16 | OperationCode operation_code) { |
| 16 | for (; cursor >= 0; --cursor) { | 17 | for (; cursor >= 0; --cursor) { |
| 17 | const Node node = code[cursor]; | 18 | const Node node = code.at(cursor); |
| 18 | if (const auto operation = std::get_if<OperationNode>(node)) { | 19 | if (const auto operation = std::get_if<OperationNode>(node)) { |
| 19 | if (operation->GetCode() == operation_code) | 20 | if (operation->GetCode() == operation_code) |
| 20 | return {node, cursor}; | 21 | return {node, cursor}; |
| @@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { | |||
| 64 | return nullptr; | 65 | return nullptr; |
| 65 | } | 66 | } |
| 66 | 67 | ||
| 68 | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) { | ||
| 69 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register | ||
| 70 | // that it uses as operand | ||
| 71 | const auto [found, found_cursor] = | ||
| 72 | TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); | ||
| 73 | if (!found) { | ||
| 74 | return {}; | ||
| 75 | } | ||
| 76 | if (const auto immediate = std::get_if<ImmediateNode>(found)) { | ||
| 77 | return immediate->GetValue(); | ||
| 78 | } | ||
| 79 | return {}; | ||
| 80 | } | ||
| 81 | |||
| 67 | std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, | 82 | std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, |
| 68 | s64 cursor) { | 83 | s64 cursor) { |
| 69 | for (; cursor >= 0; --cursor) { | 84 | for (; cursor >= 0; --cursor) { |