diff options
| author | 2018-12-27 01:50:22 -0300 | |
|---|---|---|
| committer | 2019-01-15 17:54:53 -0300 | |
| commit | d911740e5d474ae459f9e05d82a7dba9c7e06340 (patch) | |
| tree | 1f513427747b05f1305949791ddf90d1406c99c7 /src | |
| parent | gl_shader_decompiler: Fixup AssignCompositeHalf (diff) | |
| download | yuzu-d911740e5d474ae459f9e05d82a7dba9c7e06340.tar.gz yuzu-d911740e5d474ae459f9e05d82a7dba9c7e06340.tar.xz yuzu-d911740e5d474ae459f9e05d82a7dba9c7e06340.zip | |
shader_ir: Remove composite primitives and use temporals instead
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 103 | ||||
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 294 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 60 |
4 files changed, 224 insertions, 241 deletions
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 36dc34777..37c4856d2 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <array> | ||
| 5 | #include <string> | 6 | #include <string> |
| 6 | #include <string_view> | 7 | #include <string_view> |
| 7 | #include <variant> | 8 | #include <variant> |
| @@ -770,49 +771,6 @@ private: | |||
| 770 | return {}; | 771 | return {}; |
| 771 | } | 772 | } |
| 772 | 773 | ||
| 773 | std::string AssignComposite(Operation operation) { | ||
| 774 | const auto& meta = std::get<MetaComponents>(operation.GetMeta()); | ||
| 775 | |||
| 776 | const std::string composite = code.GenerateTemporal(); | ||
| 777 | code.AddLine("vec4 " + composite + " = " + Visit(operation[0]) + ';'); | ||
| 778 | |||
| 779 | constexpr u32 composite_size = 4; | ||
| 780 | for (u32 i = 0; i < composite_size; ++i) { | ||
| 781 | const auto gpr = std::get<GprNode>(*operation[i + 1]).GetIndex(); | ||
| 782 | if (gpr == Register::ZeroIndex) { | ||
| 783 | continue; | ||
| 784 | } | ||
| 785 | code.AddLine(GetRegister(gpr) + " = " + composite + | ||
| 786 | GetSwizzle(meta.GetSourceComponent(i)) + ';'); | ||
| 787 | } | ||
| 788 | return {}; | ||
| 789 | } | ||
| 790 | |||
| 791 | std::string AssignCompositeHalf(Operation operation) { | ||
| 792 | const auto& meta = std::get<MetaComponents>(operation.GetMeta()); | ||
| 793 | |||
| 794 | const std::string composite = code.GenerateTemporal(); | ||
| 795 | code.AddLine("vec4 " + composite + " = " + Visit(operation[0]) + ';'); | ||
| 796 | |||
| 797 | const auto ReadComponent = [&](u32 component) { | ||
| 798 | if (component < meta.count) { | ||
| 799 | return composite + '[' + std::to_string(meta.GetSourceComponent(component)) + ']'; | ||
| 800 | } | ||
| 801 | return std::string("0"); | ||
| 802 | }; | ||
| 803 | |||
| 804 | const auto dst1 = std::get<GprNode>(*operation[1]).GetIndex(); | ||
| 805 | const std::string src1 = "vec2(" + ReadComponent(0) + ", " + ReadComponent(1) + ')'; | ||
| 806 | code.AddLine(GetRegister(dst1) + " = utof(packHalf2x16(" + src1 + "))"); | ||
| 807 | |||
| 808 | if (meta.count > 2) { | ||
| 809 | const auto dst2 = std::get<GprNode>(*operation[2]).GetIndex(); | ||
| 810 | const std::string src2 = "vec2(" + ReadComponent(2) + ", " + ReadComponent(3) + ')'; | ||
| 811 | code.AddLine(GetRegister(dst2) + " = utof(packHalf2x16(" + src2 + "));"); | ||
| 812 | } | ||
| 813 | return {}; | ||
| 814 | } | ||
| 815 | |||
| 816 | std::string Composite(Operation operation) { | 774 | std::string Composite(Operation operation) { |
| 817 | std::string value = "vec4("; | 775 | std::string value = "vec4("; |
| 818 | for (std::size_t i = 0; i < 4; ++i) { | 776 | for (std::size_t i = 0; i < 4; ++i) { |
| @@ -1018,6 +976,10 @@ private: | |||
| 1018 | Visit(operation[1]) + ")[1]))"; | 976 | Visit(operation[1]) + ")[1]))"; |
| 1019 | } | 977 | } |
| 1020 | 978 | ||
| 979 | std::string HPack2(Operation operation) { | ||
| 980 | return "utof(packHalf2x16(vec2(" + Visit(operation[0]) + ", " + Visit(operation[1]) + ")))"; | ||
| 981 | } | ||
| 982 | |||
| 1021 | template <Type type> | 983 | template <Type type> |
| 1022 | std::string LogicalLessThan(Operation operation) { | 984 | std::string LogicalLessThan(Operation operation) { |
| 1023 | return GenerateBinaryInfix(operation, "<", Type::Bool, type, type); | 985 | return GenerateBinaryInfix(operation, "<", Type::Bool, type, type); |
| @@ -1137,30 +1099,35 @@ private: | |||
| 1137 | } | 1099 | } |
| 1138 | 1100 | ||
| 1139 | std::string F4Texture(Operation operation) { | 1101 | std::string F4Texture(Operation operation) { |
| 1102 | const auto meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1140 | std::string expr = GenerateTexture(operation, "texture"); | 1103 | std::string expr = GenerateTexture(operation, "texture"); |
| 1141 | if (std::get<MetaTexture>(operation.GetMeta()).sampler.IsShadow()) { | 1104 | if (meta.sampler.IsShadow()) { |
| 1142 | expr = "vec4(" + expr + ')'; | 1105 | expr = "vec4(" + expr + ')'; |
| 1143 | } | 1106 | } |
| 1144 | return expr; | 1107 | return expr + GetSwizzle(meta.element); |
| 1145 | } | 1108 | } |
| 1146 | 1109 | ||
| 1147 | std::string F4TextureLod(Operation operation) { | 1110 | std::string F4TextureLod(Operation operation) { |
| 1111 | const auto meta = std::get<MetaTexture>(operation.GetMeta()); | ||
| 1148 | std::string expr = GenerateTexture(operation, "textureLod"); | 1112 | std::string expr = GenerateTexture(operation, "textureLod"); |
| 1149 | if (std::get<MetaTexture>(operation.GetMeta()).sampler.IsShadow()) { | 1113 | if (meta.sampler.IsShadow()) { |
| 1150 | expr = "vec4(" + expr + ')'; | 1114 | expr = "vec4(" + expr + ')'; |
| 1151 | } | 1115 | } |
| 1152 | return expr; | 1116 | return expr + GetSwizzle(meta.element); |
| 1153 | } | 1117 | } |
| 1154 | 1118 | ||
| 1155 | std::string F4TextureGather(Operation operation) { | 1119 | std::string F4TextureGather(Operation operation) { |
| 1156 | const bool is_shadow = std::get<MetaTexture>(operation.GetMeta()).sampler.IsShadow(); | 1120 | const auto meta = std::get<MetaTexture>(operation.GetMeta()); |
| 1157 | if (is_shadow) { | 1121 | |
| 1158 | return GenerateTexture(operation, "textureGather", | 1122 | std::string expr; |
| 1123 | if (meta.sampler.IsShadow()) { | ||
| 1124 | expr = GenerateTexture(operation, "textureGather", | ||
| 1159 | [](std::string ref_z) { return ref_z; }); | 1125 | [](std::string ref_z) { return ref_z; }); |
| 1160 | } else { | 1126 | } else { |
| 1161 | return GenerateTexture(operation, "textureGather", | 1127 | expr = GenerateTexture(operation, "textureGather", |
| 1162 | [](std::string comp) { return "ftoi(" + comp + ')'; }); | 1128 | [](std::string comp) { return "ftoi(" + comp + ')'; }); |
| 1163 | } | 1129 | } |
| 1130 | return expr + GetSwizzle(meta.element); | ||
| 1164 | } | 1131 | } |
| 1165 | 1132 | ||
| 1166 | std::string F4TextureQueryDimensions(Operation operation) { | 1133 | std::string F4TextureQueryDimensions(Operation operation) { |
| @@ -1168,20 +1135,26 @@ private: | |||
| 1168 | const std::string sampler = GetSampler(meta.sampler); | 1135 | const std::string sampler = GetSampler(meta.sampler); |
| 1169 | const std::string lod = VisitOperand(operation, 0, Type::Int); | 1136 | const std::string lod = VisitOperand(operation, 0, Type::Int); |
| 1170 | 1137 | ||
| 1171 | const std::string sizes = code.GenerateTemporal(); | 1138 | switch (meta.element) { |
| 1172 | code.AddLine("ivec2 " + sizes + " = textureSize(" + sampler + ", " + lod + ");"); | 1139 | case 0: |
| 1173 | 1140 | case 1: | |
| 1174 | const std::string mip_level = "textureQueryLevels(" + sampler + ')'; | 1141 | return "textureSize(" + sampler + ", " + lod + ')' + GetSwizzle(meta.element); |
| 1175 | 1142 | case 2: | |
| 1176 | return "itof(ivec4(" + sizes + ", 0, " + mip_level + "))"; | 1143 | return "0"; |
| 1144 | case 3: | ||
| 1145 | return "textureQueryLevels(" + sampler + ')'; | ||
| 1146 | } | ||
| 1147 | UNREACHABLE(); | ||
| 1148 | return "0"; | ||
| 1177 | } | 1149 | } |
| 1178 | 1150 | ||
| 1179 | std::string F4TextureQueryLod(Operation operation) { | 1151 | std::string F4TextureQueryLod(Operation operation) { |
| 1180 | const std::string tmp = code.GenerateTemporal(); | 1152 | const auto& meta = std::get<MetaTexture>(operation.GetMeta()); |
| 1181 | code.AddLine("vec2 " + tmp + " = " + GenerateTexture(operation, "textureQueryLod") + | 1153 | if (meta.element < 2) { |
| 1182 | " * vec2(256);"); | 1154 | return "itof(int((" + GenerateTexture(operation, "textureQueryLod") + " * vec2(256))" + |
| 1183 | 1155 | GetSwizzle(meta.element) + "))"; | |
| 1184 | return "vec4(itof(int(" + tmp + ".y)), utof(uint(" + tmp + ".x)), 0, 0)"; | 1156 | } |
| 1157 | return "0"; | ||
| 1185 | } | 1158 | } |
| 1186 | 1159 | ||
| 1187 | std::string F4TexelFetch(Operation operation) { | 1160 | std::string F4TexelFetch(Operation operation) { |
| @@ -1206,7 +1179,7 @@ private: | |||
| 1206 | } | 1179 | } |
| 1207 | } | 1180 | } |
| 1208 | expr += ')'; | 1181 | expr += ')'; |
| 1209 | return expr; | 1182 | return expr + GetSwizzle(meta.element); |
| 1210 | } | 1183 | } |
| 1211 | 1184 | ||
| 1212 | std::string Branch(Operation operation) { | 1185 | std::string Branch(Operation operation) { |
| @@ -1328,10 +1301,7 @@ private: | |||
| 1328 | 1301 | ||
| 1329 | static constexpr OperationDecompilersArray operation_decompilers = { | 1302 | static constexpr OperationDecompilersArray operation_decompilers = { |
| 1330 | &GLSLDecompiler::Assign, | 1303 | &GLSLDecompiler::Assign, |
| 1331 | &GLSLDecompiler::AssignComposite, | ||
| 1332 | &GLSLDecompiler::AssignCompositeHalf, | ||
| 1333 | 1304 | ||
| 1334 | &GLSLDecompiler::Composite, | ||
| 1335 | &GLSLDecompiler::Select, | 1305 | &GLSLDecompiler::Select, |
| 1336 | 1306 | ||
| 1337 | &GLSLDecompiler::Add<Type::Float>, | 1307 | &GLSLDecompiler::Add<Type::Float>, |
| @@ -1403,6 +1373,7 @@ private: | |||
| 1403 | &GLSLDecompiler::HMergeF32, | 1373 | &GLSLDecompiler::HMergeF32, |
| 1404 | &GLSLDecompiler::HMergeH0, | 1374 | &GLSLDecompiler::HMergeH0, |
| 1405 | &GLSLDecompiler::HMergeH1, | 1375 | &GLSLDecompiler::HMergeH1, |
| 1376 | &GLSLDecompiler::HPack2, | ||
| 1406 | 1377 | ||
| 1407 | &GLSLDecompiler::LogicalAssign, | 1378 | &GLSLDecompiler::LogicalAssign, |
| 1408 | &GLSLDecompiler::LogicalAnd, | 1379 | &GLSLDecompiler::LogicalAnd, |
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index f3f78a662..5ae3f344d 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -90,15 +90,10 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 90 | const Node op_b = | 90 | const Node op_b = |
| 91 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); | 91 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); |
| 92 | 92 | ||
| 93 | const Node composite = | 93 | SetTemporal(bb, 0, op_a); |
| 94 | Operation(OperationCode::Composite, op_a, op_b, GetRegister(Register::ZeroIndex), | 94 | SetTemporal(bb, 1, op_b); |
| 95 | GetRegister(Register::ZeroIndex)); | 95 | SetRegister(bb, instr.gpr0, GetTemporal(0)); |
| 96 | 96 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); | |
| 97 | MetaComponents meta{{0, 1, 2, 3}}; | ||
| 98 | bb.push_back(Operation(OperationCode::AssignComposite, meta, composite, | ||
| 99 | GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1), | ||
| 100 | GetRegister(Register::ZeroIndex), | ||
| 101 | GetRegister(Register::ZeroIndex))); | ||
| 102 | break; | 97 | break; |
| 103 | } | 98 | } |
| 104 | default: | 99 | default: |
| @@ -172,10 +167,6 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 172 | break; | 167 | break; |
| 173 | } | 168 | } |
| 174 | case OpCode::Id::TEX: { | 169 | case OpCode::Id::TEX: { |
| 175 | Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; | ||
| 176 | const bool is_array = instr.tex.array != 0; | ||
| 177 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 178 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 179 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | 170 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), |
| 180 | "AOFFI is not implemented"); | 171 | "AOFFI is not implemented"); |
| 181 | 172 | ||
| @@ -183,27 +174,12 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 183 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | 174 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); |
| 184 | } | 175 | } |
| 185 | 176 | ||
| 186 | const Node texture = GetTexCode(instr, texture_type, process_mode, depth_compare, is_array); | 177 | const TextureType texture_type{instr.tex.texture_type}; |
| 187 | 178 | const bool is_array = instr.tex.array != 0; | |
| 188 | MetaComponents meta; | 179 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); |
| 189 | std::array<Node, 4> dest; | 180 | const auto process_mode = instr.tex.GetTextureProcessMode(); |
| 190 | 181 | WriteTexInstructionFloat( | |
| 191 | std::size_t dest_elem = 0; | 182 | bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); |
| 192 | for (std::size_t elem = 0; elem < 4; ++elem) { | ||
| 193 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 194 | // Skip disabled components | ||
| 195 | continue; | ||
| 196 | } | ||
| 197 | meta.components_map[dest_elem] = static_cast<u32>(elem); | ||
| 198 | dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem); | ||
| 199 | |||
| 200 | ++dest_elem; | ||
| 201 | } | ||
| 202 | std::generate(dest.begin() + dest_elem, dest.end(), | ||
| 203 | [&]() { return GetRegister(Register::ZeroIndex); }); | ||
| 204 | |||
| 205 | bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture, dest[0], | ||
| 206 | dest[1], dest[2], dest[3])); | ||
| 207 | break; | 183 | break; |
| 208 | } | 184 | } |
| 209 | case OpCode::Id::TEXS: { | 185 | case OpCode::Id::TEXS: { |
| @@ -216,13 +192,13 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 216 | LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); | 192 | LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); |
| 217 | } | 193 | } |
| 218 | 194 | ||
| 219 | const Node texture = | 195 | const Node4 components = |
| 220 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | 196 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); |
| 221 | 197 | ||
| 222 | if (instr.texs.fp32_flag) { | 198 | if (instr.texs.fp32_flag) { |
| 223 | WriteTexsInstructionFloat(bb, instr, texture); | 199 | WriteTexsInstructionFloat(bb, instr, components); |
| 224 | } else { | 200 | } else { |
| 225 | WriteTexsInstructionHalfFloat(bb, instr, texture); | 201 | WriteTexsInstructionHalfFloat(bb, instr, components); |
| 226 | } | 202 | } |
| 227 | break; | 203 | break; |
| 228 | } | 204 | } |
| @@ -242,27 +218,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 242 | const auto texture_type = instr.tld4.texture_type.Value(); | 218 | const auto texture_type = instr.tld4.texture_type.Value(); |
| 243 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | 219 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); |
| 244 | const bool is_array = instr.tld4.array != 0; | 220 | const bool is_array = instr.tld4.array != 0; |
| 245 | const Node texture = GetTld4Code(instr, texture_type, depth_compare, is_array); | 221 | WriteTexInstructionFloat(bb, instr, |
| 246 | 222 | GetTld4Code(instr, texture_type, depth_compare, is_array)); | |
| 247 | MetaComponents meta_components; | ||
| 248 | std::array<Node, 4> dest; | ||
| 249 | |||
| 250 | std::size_t dest_elem = 0; | ||
| 251 | for (std::size_t elem = 0; elem < 4; ++elem) { | ||
| 252 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 253 | // Skip disabled components | ||
| 254 | continue; | ||
| 255 | } | ||
| 256 | meta_components.components_map[dest_elem] = static_cast<u32>(elem); | ||
| 257 | dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem); | ||
| 258 | |||
| 259 | ++dest_elem; | ||
| 260 | } | ||
| 261 | std::generate(dest.begin() + dest_elem, dest.end(), | ||
| 262 | [&]() { return GetRegister(Register::ZeroIndex); }); | ||
| 263 | |||
| 264 | bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta_components), texture, | ||
| 265 | dest[0], dest[1], dest[2], dest[3])); | ||
| 266 | break; | 223 | break; |
| 267 | } | 224 | } |
| 268 | case OpCode::Id::TLD4S: { | 225 | case OpCode::Id::TLD4S: { |
| @@ -277,28 +234,34 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 277 | const Node op_a = GetRegister(instr.gpr8); | 234 | const Node op_a = GetRegister(instr.gpr8); |
| 278 | const Node op_b = GetRegister(instr.gpr20); | 235 | const Node op_b = GetRegister(instr.gpr20); |
| 279 | 236 | ||
| 280 | std::vector<Node> params; | 237 | std::vector<Node> coords; |
| 281 | 238 | ||
| 282 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | 239 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. |
| 283 | if (depth_compare) { | 240 | if (depth_compare) { |
| 284 | // Note: TLD4S coordinate encoding works just like TEXS's | 241 | // Note: TLD4S coordinate encoding works just like TEXS's |
| 285 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | 242 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); |
| 286 | params.push_back(op_a); | 243 | coords.push_back(op_a); |
| 287 | params.push_back(op_y); | 244 | coords.push_back(op_y); |
| 288 | params.push_back(op_b); | 245 | coords.push_back(op_b); |
| 289 | } else { | 246 | } else { |
| 290 | params.push_back(op_a); | 247 | coords.push_back(op_a); |
| 291 | params.push_back(op_b); | 248 | coords.push_back(op_b); |
| 292 | } | 249 | } |
| 293 | const auto num_coords = static_cast<u32>(params.size()); | 250 | const auto num_coords = static_cast<u32>(coords.size()); |
| 294 | params.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); | 251 | coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); |
| 295 | 252 | ||
| 296 | const auto& sampler = | 253 | const auto& sampler = |
| 297 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | 254 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); |
| 298 | MetaTexture meta{sampler, num_coords}; | ||
| 299 | 255 | ||
| 300 | WriteTexsInstructionFloat( | 256 | Node4 values; |
| 301 | bb, instr, Operation(OperationCode::F4TextureGather, meta, std::move(params))); | 257 | for (u32 element = 0; element < values.size(); ++element) { |
| 258 | auto params = coords; | ||
| 259 | MetaTexture meta{sampler, element, num_coords}; | ||
| 260 | values[element] = | ||
| 261 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 262 | } | ||
| 263 | |||
| 264 | WriteTexsInstructionFloat(bb, instr, values); | ||
| 302 | break; | 265 | break; |
| 303 | } | 266 | } |
| 304 | case OpCode::Id::TXQ: { | 267 | case OpCode::Id::TXQ: { |
| @@ -314,18 +277,15 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 314 | 277 | ||
| 315 | switch (instr.txq.query_type) { | 278 | switch (instr.txq.query_type) { |
| 316 | case Tegra::Shader::TextureQueryType::Dimension: { | 279 | case Tegra::Shader::TextureQueryType::Dimension: { |
| 317 | MetaTexture meta_texture{sampler}; | 280 | for (u32 element = 0; element < 4; ++element) { |
| 318 | const MetaComponents meta_components{{0, 1, 2, 3}}; | 281 | MetaTexture meta{sampler, element}; |
| 319 | 282 | const Node value = Operation(OperationCode::F4TextureQueryDimensions, | |
| 320 | const Node texture = Operation(OperationCode::F4TextureQueryDimensions, meta_texture, | 283 | std::move(meta), GetRegister(instr.gpr8)); |
| 321 | GetRegister(instr.gpr8)); | 284 | SetTemporal(bb, element, value); |
| 322 | std::array<Node, 4> dest; | 285 | } |
| 323 | for (std::size_t i = 0; i < dest.size(); ++i) { | 286 | for (u32 i = 0; i < 4; ++i) { |
| 324 | dest[i] = GetRegister(instr.gpr0.Value() + i); | 287 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); |
| 325 | } | 288 | } |
| 326 | |||
| 327 | bb.push_back(Operation(OperationCode::AssignComposite, meta_components, texture, | ||
| 328 | dest[0], dest[1], dest[2], dest[3])); | ||
| 329 | break; | 289 | break; |
| 330 | } | 290 | } |
| 331 | default: | 291 | default: |
| @@ -366,14 +326,17 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 366 | texture_type = TextureType::Texture2D; | 326 | texture_type = TextureType::Texture2D; |
| 367 | } | 327 | } |
| 368 | 328 | ||
| 369 | MetaTexture meta_texture{sampler, static_cast<u32>(coords.size())}; | 329 | for (u32 element = 0; element < 2; ++element) { |
| 370 | const Node texture = | 330 | auto params = coords; |
| 371 | Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(coords)); | 331 | MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())}; |
| 332 | const Node value = | ||
| 333 | Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); | ||
| 334 | SetTemporal(bb, element, value); | ||
| 335 | } | ||
| 336 | for (u32 element = 0; element < 2; ++element) { | ||
| 337 | SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); | ||
| 338 | } | ||
| 372 | 339 | ||
| 373 | const MetaComponents meta_composite{{0, 1, 2, 3}}; | ||
| 374 | bb.push_back(Operation(OperationCode::AssignComposite, meta_composite, texture, | ||
| 375 | GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1), | ||
| 376 | GetRegister(Register::ZeroIndex), GetRegister(Register::ZeroIndex))); | ||
| 377 | break; | 340 | break; |
| 378 | } | 341 | } |
| 379 | case OpCode::Id::TLDS: { | 342 | case OpCode::Id::TLDS: { |
| @@ -388,8 +351,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 388 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); | 351 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); |
| 389 | } | 352 | } |
| 390 | 353 | ||
| 391 | const Node texture = GetTldsCode(instr, texture_type, is_array); | 354 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); |
| 392 | WriteTexsInstructionFloat(bb, instr, texture); | ||
| 393 | break; | 355 | break; |
| 394 | } | 356 | } |
| 395 | default: | 357 | default: |
| @@ -419,57 +381,80 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu | |||
| 419 | return *used_samplers.emplace(entry).first; | 381 | return *used_samplers.emplace(entry).first; |
| 420 | } | 382 | } |
| 421 | 383 | ||
| 422 | void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, Node texture) { | 384 | void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr, |
| 385 | const Node4& components) { | ||
| 386 | u32 dest_elem = 0; | ||
| 387 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 388 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 389 | // Skip disabled components | ||
| 390 | continue; | ||
| 391 | } | ||
| 392 | SetTemporal(bb, dest_elem++, components[elem]); | ||
| 393 | } | ||
| 394 | // After writing values in temporals, move them to the real registers | ||
| 395 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 396 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 397 | } | ||
| 398 | } | ||
| 399 | |||
| 400 | void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, | ||
| 401 | const Node4& components) { | ||
| 423 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | 402 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle |
| 424 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | 403 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 |
| 425 | 404 | ||
| 426 | MetaComponents meta; | 405 | u32 dest_elem = 0; |
| 427 | std::array<Node, 4> dest; | ||
| 428 | for (u32 component = 0; component < 4; ++component) { | 406 | for (u32 component = 0; component < 4; ++component) { |
| 429 | if (!instr.texs.IsComponentEnabled(component)) { | 407 | if (!instr.texs.IsComponentEnabled(component)) |
| 430 | continue; | 408 | continue; |
| 431 | } | 409 | SetTemporal(bb, dest_elem++, components[component]); |
| 432 | meta.components_map[meta.count] = component; | 410 | } |
| 433 | 411 | ||
| 434 | if (meta.count < 2) { | 412 | for (u32 i = 0; i < dest_elem; ++i) { |
| 413 | if (i < 2) { | ||
| 435 | // Write the first two swizzle components to gpr0 and gpr0+1 | 414 | // Write the first two swizzle components to gpr0 and gpr0+1 |
| 436 | dest[meta.count] = GetRegister(instr.gpr0.Value() + meta.count % 2); | 415 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); |
| 437 | } else { | 416 | } else { |
| 438 | ASSERT(instr.texs.HasTwoDestinations()); | 417 | ASSERT(instr.texs.HasTwoDestinations()); |
| 439 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | 418 | // Write the rest of the swizzle components to gpr28 and gpr28+1 |
| 440 | dest[meta.count] = GetRegister(instr.gpr28.Value() + meta.count % 2); | 419 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); |
| 441 | } | 420 | } |
| 442 | ++meta.count; | ||
| 443 | } | 421 | } |
| 444 | |||
| 445 | std::generate(dest.begin() + meta.count, dest.end(), | ||
| 446 | [&]() { return GetRegister(Register::ZeroIndex); }); | ||
| 447 | |||
| 448 | bb.push_back(Operation(OperationCode::AssignComposite, meta, texture, dest[0], dest[1], dest[2], | ||
| 449 | dest[3])); | ||
| 450 | } | 422 | } |
| 451 | 423 | ||
| 452 | void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, Node texture) { | 424 | void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, |
| 425 | const Node4& components) { | ||
| 453 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | 426 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half |
| 454 | // float instruction). | 427 | // float instruction). |
| 455 | 428 | ||
| 456 | MetaComponents meta; | 429 | Node4 values; |
| 430 | u32 dest_elem = 0; | ||
| 457 | for (u32 component = 0; component < 4; ++component) { | 431 | for (u32 component = 0; component < 4; ++component) { |
| 458 | if (!instr.texs.IsComponentEnabled(component)) | 432 | if (!instr.texs.IsComponentEnabled(component)) |
| 459 | continue; | 433 | continue; |
| 460 | meta.components_map[meta.count++] = component; | 434 | values[dest_elem++] = components[component]; |
| 461 | } | 435 | } |
| 462 | if (meta.count == 0) | 436 | if (dest_elem == 0) |
| 463 | return; | 437 | return; |
| 464 | 438 | ||
| 465 | bb.push_back(Operation(OperationCode::AssignCompositeHalf, meta, texture, | 439 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); |
| 466 | GetRegister(instr.gpr0), GetRegister(instr.gpr28))); | 440 | |
| 441 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 442 | if (dest_elem <= 2) { | ||
| 443 | SetRegister(bb, instr.gpr0, first_value); | ||
| 444 | return; | ||
| 445 | } | ||
| 446 | |||
| 447 | SetTemporal(bb, 0, first_value); | ||
| 448 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 449 | |||
| 450 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | ||
| 451 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | ||
| 467 | } | 452 | } |
| 468 | 453 | ||
| 469 | Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | 454 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
| 470 | TextureProcessMode process_mode, bool depth_compare, bool is_array, | 455 | TextureProcessMode process_mode, bool depth_compare, bool is_array, |
| 471 | std::size_t array_offset, std::size_t bias_offset, | 456 | std::size_t array_offset, std::size_t bias_offset, |
| 472 | std::vector<Node>&& coords) { | 457 | std::vector<Node>&& coords) { |
| 473 | UNIMPLEMENTED_IF_MSG( | 458 | UNIMPLEMENTED_IF_MSG( |
| 474 | (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || | 459 | (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || |
| 475 | (texture_type == TextureType::TextureCube && is_array && depth_compare), | 460 | (texture_type == TextureType::TextureCube && is_array && depth_compare), |
| @@ -495,24 +480,31 @@ Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 495 | std::optional<u32> array_offset_value; | 480 | std::optional<u32> array_offset_value; |
| 496 | if (is_array) | 481 | if (is_array) |
| 497 | array_offset_value = static_cast<u32>(array_offset); | 482 | array_offset_value = static_cast<u32>(array_offset); |
| 498 | MetaTexture meta{sampler, static_cast<u32>(coords.size()), array_offset_value}; | 483 | |
| 499 | std::vector<Node> params = std::move(coords); | 484 | const auto coords_count = static_cast<u32>(coords.size()); |
| 500 | 485 | ||
| 501 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { | 486 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { |
| 502 | if (process_mode == TextureProcessMode::LZ) { | 487 | if (process_mode == TextureProcessMode::LZ) { |
| 503 | params.push_back(Immediate(0.0f)); | 488 | coords.push_back(Immediate(0.0f)); |
| 504 | } else { | 489 | } else { |
| 505 | // If present, lod or bias are always stored in the register indexed by the gpr20 field | 490 | // If present, lod or bias are always stored in the register indexed by the gpr20 |
| 506 | // with an offset depending on the usage of the other registers | 491 | // field with an offset depending on the usage of the other registers |
| 507 | params.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); | 492 | coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); |
| 508 | } | 493 | } |
| 509 | } | 494 | } |
| 510 | 495 | ||
| 511 | return Operation(read_method, meta, std::move(params)); | 496 | Node4 values; |
| 497 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 498 | auto params = coords; | ||
| 499 | MetaTexture meta{sampler, element, coords_count, array_offset_value}; | ||
| 500 | values[element] = Operation(read_method, std::move(meta), std::move(params)); | ||
| 501 | } | ||
| 502 | |||
| 503 | return values; | ||
| 512 | } | 504 | } |
| 513 | 505 | ||
| 514 | Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | 506 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, |
| 515 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | 507 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { |
| 516 | const bool lod_bias_enabled = | 508 | const bool lod_bias_enabled = |
| 517 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | 509 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); |
| 518 | 510 | ||
| @@ -551,8 +543,8 @@ Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | |||
| 551 | 0, std::move(coords)); | 543 | 0, std::move(coords)); |
| 552 | } | 544 | } |
| 553 | 545 | ||
| 554 | Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | 546 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, |
| 555 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | 547 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { |
| 556 | const bool lod_bias_enabled = | 548 | const bool lod_bias_enabled = |
| 557 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | 549 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); |
| 558 | 550 | ||
| @@ -593,8 +585,8 @@ Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | |||
| 593 | (coord_count > 2 ? 1 : 0), std::move(coords)); | 585 | (coord_count > 2 ? 1 : 0), std::move(coords)); |
| 594 | } | 586 | } |
| 595 | 587 | ||
| 596 | Node ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | 588 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, |
| 597 | bool is_array) { | 589 | bool is_array) { |
| 598 | const std::size_t coord_count = GetCoordCount(texture_type); | 590 | const std::size_t coord_count = GetCoordCount(texture_type); |
| 599 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | 591 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); |
| 600 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | 592 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); |
| @@ -604,24 +596,31 @@ Node ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool dep | |||
| 604 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | 596 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used |
| 605 | const u64 coord_register = array_register + (is_array ? 1 : 0); | 597 | const u64 coord_register = array_register + (is_array ? 1 : 0); |
| 606 | 598 | ||
| 607 | std::vector<Node> params; | 599 | std::vector<Node> coords; |
| 608 | 600 | ||
| 609 | for (size_t i = 0; i < coord_count; ++i) { | 601 | for (size_t i = 0; i < coord_count; ++i) { |
| 610 | params.push_back(GetRegister(coord_register + i)); | 602 | coords.push_back(GetRegister(coord_register + i)); |
| 611 | } | 603 | } |
| 612 | std::optional<u32> array_offset; | 604 | std::optional<u32> array_offset; |
| 613 | if (is_array) { | 605 | if (is_array) { |
| 614 | array_offset = static_cast<u32>(params.size()); | 606 | array_offset = static_cast<u32>(coords.size()); |
| 615 | params.push_back(GetRegister(array_register)); | 607 | coords.push_back(GetRegister(array_register)); |
| 616 | } | 608 | } |
| 617 | 609 | ||
| 618 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | 610 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); |
| 619 | MetaTexture meta{sampler, static_cast<u32>(params.size()), array_offset}; | ||
| 620 | 611 | ||
| 621 | return Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | 612 | Node4 values; |
| 613 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 614 | auto params = coords; | ||
| 615 | MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset}; | ||
| 616 | values[element] = | ||
| 617 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 618 | } | ||
| 619 | |||
| 620 | return values; | ||
| 622 | } | 621 | } |
| 623 | 622 | ||
| 624 | Node ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | 623 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { |
| 625 | const std::size_t type_coord_count = GetCoordCount(texture_type); | 624 | const std::size_t type_coord_count = GetCoordCount(texture_type); |
| 626 | const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); | 625 | const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); |
| 627 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | 626 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; |
| @@ -636,36 +635,41 @@ Node ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_ | |||
| 636 | ? static_cast<u64>(instr.gpr20.Value()) | 635 | ? static_cast<u64>(instr.gpr20.Value()) |
| 637 | : coord_register + 1; | 636 | : coord_register + 1; |
| 638 | 637 | ||
| 639 | std::vector<Node> params; | 638 | std::vector<Node> coords; |
| 640 | 639 | ||
| 641 | for (std::size_t i = 0; i < type_coord_count; ++i) { | 640 | for (std::size_t i = 0; i < type_coord_count; ++i) { |
| 642 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | 641 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); |
| 643 | params.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | 642 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); |
| 644 | } | 643 | } |
| 645 | std::optional<u32> array_offset; | 644 | std::optional<u32> array_offset; |
| 646 | if (is_array) { | 645 | if (is_array) { |
| 647 | array_offset = static_cast<u32>(params.size()); | 646 | array_offset = static_cast<u32>(coords.size()); |
| 648 | params.push_back(GetRegister(array_register)); | 647 | coords.push_back(GetRegister(array_register)); |
| 649 | } | 648 | } |
| 650 | const auto coords_count = static_cast<u32>(params.size()); | 649 | const auto coords_count = static_cast<u32>(coords.size()); |
| 651 | 650 | ||
| 652 | if (lod_enabled) { | 651 | if (lod_enabled) { |
| 653 | // When lod is used always is in grp20 | 652 | // When lod is used always is in grp20 |
| 654 | params.push_back(GetRegister(instr.gpr20)); | 653 | coords.push_back(GetRegister(instr.gpr20)); |
| 655 | } else { | 654 | } else { |
| 656 | params.push_back(Immediate(0)); | 655 | coords.push_back(Immediate(0)); |
| 657 | } | 656 | } |
| 658 | 657 | ||
| 659 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | 658 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); |
| 660 | MetaTexture meta{sampler, coords_count, array_offset}; | ||
| 661 | 659 | ||
| 662 | return Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); | 660 | Node4 values; |
| 661 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 662 | auto params = coords; | ||
| 663 | MetaTexture meta{sampler, element, coords_count, array_offset}; | ||
| 664 | values[element] = | ||
| 665 | Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); | ||
| 666 | } | ||
| 667 | return values; | ||
| 663 | } | 668 | } |
| 664 | 669 | ||
| 665 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | 670 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( |
| 666 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | 671 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, |
| 667 | std::size_t max_coords, std::size_t max_inputs) { | 672 | std::size_t max_coords, std::size_t max_inputs) { |
| 668 | |||
| 669 | const std::size_t coord_count = GetCoordCount(texture_type); | 673 | const std::size_t coord_count = GetCoordCount(texture_type); |
| 670 | 674 | ||
| 671 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | 675 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index d4e304b4e..4474af7c4 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -121,6 +121,10 @@ Node ShaderIR::GetLocalMemory(Node address) { | |||
| 121 | return StoreNode(LmemNode(address)); | 121 | return StoreNode(LmemNode(address)); |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | Node ShaderIR::GetTemporal(u32 id) { | ||
| 125 | return GetRegister(Register::ZeroIndex + 1 + id); | ||
| 126 | } | ||
| 127 | |||
| 124 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { | 128 | Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) { |
| 125 | if (absolute) { | 129 | if (absolute) { |
| 126 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); | 130 | value = Operation(OperationCode::FAbsolute, NO_PRECISE, value); |
| @@ -348,6 +352,10 @@ void ShaderIR::SetLocalMemory(BasicBlock& bb, Node address, Node value) { | |||
| 348 | bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); | 352 | bb.push_back(Operation(OperationCode::Assign, GetLocalMemory(address), value)); |
| 349 | } | 353 | } |
| 350 | 354 | ||
| 355 | void ShaderIR::SetTemporal(BasicBlock& bb, u32 id, Node value) { | ||
| 356 | SetRegister(bb, Register::ZeroIndex + 1 + id, value); | ||
| 357 | } | ||
| 358 | |||
| 351 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { | 359 | Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) { |
| 352 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), | 360 | return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, value, Immediate(offset), |
| 353 | Immediate(bits)); | 361 | Immediate(bits)); |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index b8bec0d9e..0c8f4a265 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 7 | #include <cstring> | 8 | #include <cstring> |
| 8 | #include <map> | 9 | #include <map> |
| 9 | #include <set> | 10 | #include <set> |
| @@ -37,17 +38,15 @@ using NodeData = | |||
| 37 | std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, | 38 | std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode, |
| 38 | PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; | 39 | PredicateNode, AbufNode, CbufNode, LmemNode, GmemNode, CommentNode>; |
| 39 | using Node = const NodeData*; | 40 | using Node = const NodeData*; |
| 41 | using Node4 = std::array<Node, 4>; | ||
| 40 | using BasicBlock = std::vector<Node>; | 42 | using BasicBlock = std::vector<Node>; |
| 41 | 43 | ||
| 42 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; | 44 | constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; |
| 43 | 45 | ||
| 44 | enum class OperationCode { | 46 | enum class OperationCode { |
| 45 | Assign, /// (float& dest, float src) -> void | 47 | Assign, /// (float& dest, float src) -> void |
| 46 | AssignComposite, /// (MetaComponents, float4 src, float&[4] dst) -> void | ||
| 47 | AssignCompositeHalf, /// (MetaComponents, float4 src, float&[2] dst) -> void | ||
| 48 | 48 | ||
| 49 | Composite, /// (float[4] values) -> float4 | 49 | Select, /// (MetaArithmetic, bool pred, float a, float b) -> float |
| 50 | Select, /// (MetaArithmetic, bool pred, float a, float b) -> float | ||
| 51 | 50 | ||
| 52 | FAdd, /// (MetaArithmetic, float a, float b) -> float | 51 | FAdd, /// (MetaArithmetic, float a, float b) -> float |
| 53 | FMul, /// (MetaArithmetic, float a, float b) -> float | 52 | FMul, /// (MetaArithmetic, float a, float b) -> float |
| @@ -117,6 +116,7 @@ enum class OperationCode { | |||
| 117 | HMergeF32, /// (f16vec2 src) -> float | 116 | HMergeF32, /// (f16vec2 src) -> float |
| 118 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 117 | HMergeH0, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| 119 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 | 118 | HMergeH1, /// (f16vec2 dest, f16vec2 src) -> f16vec2 |
| 119 | HPack2, /// (float a, float b) -> f16vec2 | ||
| 120 | 120 | ||
| 121 | LogicalAssign, /// (bool& dst, bool src) -> void | 121 | LogicalAssign, /// (bool& dst, bool src) -> void |
| 122 | LogicalAnd, /// (bool a, bool b) -> bool | 122 | LogicalAnd, /// (bool a, bool b) -> bool |
| @@ -270,24 +270,16 @@ struct MetaHalfArithmetic { | |||
| 270 | 270 | ||
| 271 | struct MetaTexture { | 271 | struct MetaTexture { |
| 272 | const Sampler& sampler; | 272 | const Sampler& sampler; |
| 273 | u32 element{}; | ||
| 273 | u32 coords_count{}; | 274 | u32 coords_count{}; |
| 274 | std::optional<u32> array_index; | 275 | std::optional<u32> array_index; |
| 275 | }; | 276 | }; |
| 276 | 277 | ||
| 277 | struct MetaComponents { | ||
| 278 | std::array<u32, 4> components_map{}; | ||
| 279 | u32 count{}; | ||
| 280 | |||
| 281 | u32 GetSourceComponent(u32 dest_index) const { | ||
| 282 | return components_map[dest_index]; | ||
| 283 | } | ||
| 284 | }; | ||
| 285 | |||
| 286 | constexpr MetaArithmetic PRECISE = {true}; | 278 | constexpr MetaArithmetic PRECISE = {true}; |
| 287 | constexpr MetaArithmetic NO_PRECISE = {false}; | 279 | constexpr MetaArithmetic NO_PRECISE = {false}; |
| 288 | constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false}; | 280 | constexpr MetaHalfArithmetic HALF_NO_PRECISE = {false}; |
| 289 | 281 | ||
| 290 | using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture, MetaComponents>; | 282 | using Meta = std::variant<MetaArithmetic, MetaHalfArithmetic, MetaTexture>; |
| 291 | 283 | ||
| 292 | /// Holds any kind of operation that can be done in the IR | 284 | /// Holds any kind of operation that can be done in the IR |
| 293 | class OperationNode final { | 285 | class OperationNode final { |
| @@ -643,6 +635,8 @@ private: | |||
| 643 | Node GetInternalFlag(InternalFlag flag, bool negated = false); | 635 | Node GetInternalFlag(InternalFlag flag, bool negated = false); |
| 644 | /// Generates a node representing a local memory address | 636 | /// Generates a node representing a local memory address |
| 645 | Node GetLocalMemory(Node address); | 637 | Node GetLocalMemory(Node address); |
| 638 | /// Generates a temporal, internally it uses a post-RZ register | ||
| 639 | Node GetTemporal(u32 id); | ||
| 646 | 640 | ||
| 647 | /// Sets a register. src value must be a number-evaluated node. | 641 | /// Sets a register. src value must be a number-evaluated node. |
| 648 | void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src); | 642 | void SetRegister(BasicBlock& bb, Tegra::Shader::Register dest, Node src); |
| @@ -652,6 +646,8 @@ private: | |||
| 652 | void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value); | 646 | void SetInternalFlag(BasicBlock& bb, InternalFlag flag, Node value); |
| 653 | /// Sets a local memory address. address and value must be a number-evaluated node | 647 | /// Sets a local memory address. address and value must be a number-evaluated node |
| 654 | void SetLocalMemory(BasicBlock& bb, Node address, Node value); | 648 | void SetLocalMemory(BasicBlock& bb, Node address, Node value); |
| 649 | /// Sets a temporal. Internally it uses a post-RZ register | ||
| 650 | void SetTemporal(BasicBlock& bb, u32 id, Node value); | ||
| 655 | 651 | ||
| 656 | /// Conditionally absolute/negated float. Absolute is applied first | 652 | /// Conditionally absolute/negated float. Absolute is applied first |
| 657 | Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); | 653 | Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate); |
| @@ -692,32 +688,36 @@ private: | |||
| 692 | /// Extracts a sequence of bits from a node | 688 | /// Extracts a sequence of bits from a node |
| 693 | Node BitfieldExtract(Node value, u32 offset, u32 bits); | 689 | Node BitfieldExtract(Node value, u32 offset, u32 bits); |
| 694 | 690 | ||
| 695 | void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, Node texture); | 691 | void WriteTexInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, |
| 696 | void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, | 692 | const Node4& components); |
| 697 | Node texture); | ||
| 698 | 693 | ||
| 699 | Node GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 694 | void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, |
| 700 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | 695 | const Node4& components); |
| 701 | bool is_array); | 696 | void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, |
| 697 | const Node4& components); | ||
| 702 | 698 | ||
| 703 | Node GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 699 | Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 704 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | 700 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
| 705 | bool is_array); | 701 | bool is_array); |
| 706 | 702 | ||
| 707 | Node GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 703 | Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 708 | bool depth_compare, bool is_array); | 704 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
| 705 | bool is_array); | ||
| 709 | 706 | ||
| 710 | Node GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 707 | Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 711 | bool is_array); | 708 | bool depth_compare, bool is_array); |
| 709 | |||
| 710 | Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||
| 711 | bool is_array); | ||
| 712 | 712 | ||
| 713 | std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( | 713 | std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement( |
| 714 | Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, | 714 | Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, |
| 715 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | 715 | bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); |
| 716 | 716 | ||
| 717 | Node GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | 717 | Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |
| 718 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | 718 | Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |
| 719 | bool is_array, std::size_t array_offset, std::size_t bias_offset, | 719 | bool is_array, std::size_t array_offset, std::size_t bias_offset, |
| 720 | std::vector<Node>&& coords); | 720 | std::vector<Node>&& coords); |
| 721 | 721 | ||
| 722 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | 722 | Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, |
| 723 | u64 byte_height); | 723 | u64 byte_height); |