diff options
| author | 2018-12-27 01:50:22 -0300 | |
|---|---|---|
| committer | 2019-01-15 17:54:53 -0300 | |
| commit | d911740e5d474ae459f9e05d82a7dba9c7e06340 (patch) | |
| tree | 1f513427747b05f1305949791ddf90d1406c99c7 /src/video_core/shader/decode | |
| parent | gl_shader_decompiler: Fixup AssignCompositeHalf (diff) | |
| download | yuzu-d911740e5d474ae459f9e05d82a7dba9c7e06340.tar.gz yuzu-d911740e5d474ae459f9e05d82a7dba9c7e06340.tar.xz yuzu-d911740e5d474ae459f9e05d82a7dba9c7e06340.zip | |
shader_ir: Remove composite primitives and use temporals instead
Diffstat (limited to 'src/video_core/shader/decode')
| -rw-r--r-- | src/video_core/shader/decode/memory.cpp | 294 |
1 files changed, 149 insertions, 145 deletions
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index f3f78a662..5ae3f344d 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp | |||
| @@ -90,15 +90,10 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 90 | const Node op_b = | 90 | const Node op_b = |
| 91 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); | 91 | GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4, index); |
| 92 | 92 | ||
| 93 | const Node composite = | 93 | SetTemporal(bb, 0, op_a); |
| 94 | Operation(OperationCode::Composite, op_a, op_b, GetRegister(Register::ZeroIndex), | 94 | SetTemporal(bb, 1, op_b); |
| 95 | GetRegister(Register::ZeroIndex)); | 95 | SetRegister(bb, instr.gpr0, GetTemporal(0)); |
| 96 | 96 | SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1)); | |
| 97 | MetaComponents meta{{0, 1, 2, 3}}; | ||
| 98 | bb.push_back(Operation(OperationCode::AssignComposite, meta, composite, | ||
| 99 | GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1), | ||
| 100 | GetRegister(Register::ZeroIndex), | ||
| 101 | GetRegister(Register::ZeroIndex))); | ||
| 102 | break; | 97 | break; |
| 103 | } | 98 | } |
| 104 | default: | 99 | default: |
| @@ -172,10 +167,6 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 172 | break; | 167 | break; |
| 173 | } | 168 | } |
| 174 | case OpCode::Id::TEX: { | 169 | case OpCode::Id::TEX: { |
| 175 | Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; | ||
| 176 | const bool is_array = instr.tex.array != 0; | ||
| 177 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||
| 178 | const auto process_mode = instr.tex.GetTextureProcessMode(); | ||
| 179 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | 170 | UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), |
| 180 | "AOFFI is not implemented"); | 171 | "AOFFI is not implemented"); |
| 181 | 172 | ||
| @@ -183,27 +174,12 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 183 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | 174 | LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); |
| 184 | } | 175 | } |
| 185 | 176 | ||
| 186 | const Node texture = GetTexCode(instr, texture_type, process_mode, depth_compare, is_array); | 177 | const TextureType texture_type{instr.tex.texture_type}; |
| 187 | 178 | const bool is_array = instr.tex.array != 0; | |
| 188 | MetaComponents meta; | 179 | const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); |
| 189 | std::array<Node, 4> dest; | 180 | const auto process_mode = instr.tex.GetTextureProcessMode(); |
| 190 | 181 | WriteTexInstructionFloat( | |
| 191 | std::size_t dest_elem = 0; | 182 | bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); |
| 192 | for (std::size_t elem = 0; elem < 4; ++elem) { | ||
| 193 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 194 | // Skip disabled components | ||
| 195 | continue; | ||
| 196 | } | ||
| 197 | meta.components_map[dest_elem] = static_cast<u32>(elem); | ||
| 198 | dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem); | ||
| 199 | |||
| 200 | ++dest_elem; | ||
| 201 | } | ||
| 202 | std::generate(dest.begin() + dest_elem, dest.end(), | ||
| 203 | [&]() { return GetRegister(Register::ZeroIndex); }); | ||
| 204 | |||
| 205 | bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta), texture, dest[0], | ||
| 206 | dest[1], dest[2], dest[3])); | ||
| 207 | break; | 183 | break; |
| 208 | } | 184 | } |
| 209 | case OpCode::Id::TEXS: { | 185 | case OpCode::Id::TEXS: { |
| @@ -216,13 +192,13 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 216 | LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); | 192 | LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete"); |
| 217 | } | 193 | } |
| 218 | 194 | ||
| 219 | const Node texture = | 195 | const Node4 components = |
| 220 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); | 196 | GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array); |
| 221 | 197 | ||
| 222 | if (instr.texs.fp32_flag) { | 198 | if (instr.texs.fp32_flag) { |
| 223 | WriteTexsInstructionFloat(bb, instr, texture); | 199 | WriteTexsInstructionFloat(bb, instr, components); |
| 224 | } else { | 200 | } else { |
| 225 | WriteTexsInstructionHalfFloat(bb, instr, texture); | 201 | WriteTexsInstructionHalfFloat(bb, instr, components); |
| 226 | } | 202 | } |
| 227 | break; | 203 | break; |
| 228 | } | 204 | } |
| @@ -242,27 +218,8 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 242 | const auto texture_type = instr.tld4.texture_type.Value(); | 218 | const auto texture_type = instr.tld4.texture_type.Value(); |
| 243 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | 219 | const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); |
| 244 | const bool is_array = instr.tld4.array != 0; | 220 | const bool is_array = instr.tld4.array != 0; |
| 245 | const Node texture = GetTld4Code(instr, texture_type, depth_compare, is_array); | 221 | WriteTexInstructionFloat(bb, instr, |
| 246 | 222 | GetTld4Code(instr, texture_type, depth_compare, is_array)); | |
| 247 | MetaComponents meta_components; | ||
| 248 | std::array<Node, 4> dest; | ||
| 249 | |||
| 250 | std::size_t dest_elem = 0; | ||
| 251 | for (std::size_t elem = 0; elem < 4; ++elem) { | ||
| 252 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 253 | // Skip disabled components | ||
| 254 | continue; | ||
| 255 | } | ||
| 256 | meta_components.components_map[dest_elem] = static_cast<u32>(elem); | ||
| 257 | dest[dest_elem] = GetRegister(instr.gpr0.Value() + dest_elem); | ||
| 258 | |||
| 259 | ++dest_elem; | ||
| 260 | } | ||
| 261 | std::generate(dest.begin() + dest_elem, dest.end(), | ||
| 262 | [&]() { return GetRegister(Register::ZeroIndex); }); | ||
| 263 | |||
| 264 | bb.push_back(Operation(OperationCode::AssignComposite, std::move(meta_components), texture, | ||
| 265 | dest[0], dest[1], dest[2], dest[3])); | ||
| 266 | break; | 223 | break; |
| 267 | } | 224 | } |
| 268 | case OpCode::Id::TLD4S: { | 225 | case OpCode::Id::TLD4S: { |
| @@ -277,28 +234,34 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 277 | const Node op_a = GetRegister(instr.gpr8); | 234 | const Node op_a = GetRegister(instr.gpr8); |
| 278 | const Node op_b = GetRegister(instr.gpr20); | 235 | const Node op_b = GetRegister(instr.gpr20); |
| 279 | 236 | ||
| 280 | std::vector<Node> params; | 237 | std::vector<Node> coords; |
| 281 | 238 | ||
| 282 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. | 239 | // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction. |
| 283 | if (depth_compare) { | 240 | if (depth_compare) { |
| 284 | // Note: TLD4S coordinate encoding works just like TEXS's | 241 | // Note: TLD4S coordinate encoding works just like TEXS's |
| 285 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); | 242 | const Node op_y = GetRegister(instr.gpr8.Value() + 1); |
| 286 | params.push_back(op_a); | 243 | coords.push_back(op_a); |
| 287 | params.push_back(op_y); | 244 | coords.push_back(op_y); |
| 288 | params.push_back(op_b); | 245 | coords.push_back(op_b); |
| 289 | } else { | 246 | } else { |
| 290 | params.push_back(op_a); | 247 | coords.push_back(op_a); |
| 291 | params.push_back(op_b); | 248 | coords.push_back(op_b); |
| 292 | } | 249 | } |
| 293 | const auto num_coords = static_cast<u32>(params.size()); | 250 | const auto num_coords = static_cast<u32>(coords.size()); |
| 294 | params.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); | 251 | coords.push_back(Immediate(static_cast<u32>(instr.tld4s.component))); |
| 295 | 252 | ||
| 296 | const auto& sampler = | 253 | const auto& sampler = |
| 297 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | 254 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); |
| 298 | MetaTexture meta{sampler, num_coords}; | ||
| 299 | 255 | ||
| 300 | WriteTexsInstructionFloat( | 256 | Node4 values; |
| 301 | bb, instr, Operation(OperationCode::F4TextureGather, meta, std::move(params))); | 257 | for (u32 element = 0; element < values.size(); ++element) { |
| 258 | auto params = coords; | ||
| 259 | MetaTexture meta{sampler, element, num_coords}; | ||
| 260 | values[element] = | ||
| 261 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 262 | } | ||
| 263 | |||
| 264 | WriteTexsInstructionFloat(bb, instr, values); | ||
| 302 | break; | 265 | break; |
| 303 | } | 266 | } |
| 304 | case OpCode::Id::TXQ: { | 267 | case OpCode::Id::TXQ: { |
| @@ -314,18 +277,15 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 314 | 277 | ||
| 315 | switch (instr.txq.query_type) { | 278 | switch (instr.txq.query_type) { |
| 316 | case Tegra::Shader::TextureQueryType::Dimension: { | 279 | case Tegra::Shader::TextureQueryType::Dimension: { |
| 317 | MetaTexture meta_texture{sampler}; | 280 | for (u32 element = 0; element < 4; ++element) { |
| 318 | const MetaComponents meta_components{{0, 1, 2, 3}}; | 281 | MetaTexture meta{sampler, element}; |
| 319 | 282 | const Node value = Operation(OperationCode::F4TextureQueryDimensions, | |
| 320 | const Node texture = Operation(OperationCode::F4TextureQueryDimensions, meta_texture, | 283 | std::move(meta), GetRegister(instr.gpr8)); |
| 321 | GetRegister(instr.gpr8)); | 284 | SetTemporal(bb, element, value); |
| 322 | std::array<Node, 4> dest; | 285 | } |
| 323 | for (std::size_t i = 0; i < dest.size(); ++i) { | 286 | for (u32 i = 0; i < 4; ++i) { |
| 324 | dest[i] = GetRegister(instr.gpr0.Value() + i); | 287 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); |
| 325 | } | 288 | } |
| 326 | |||
| 327 | bb.push_back(Operation(OperationCode::AssignComposite, meta_components, texture, | ||
| 328 | dest[0], dest[1], dest[2], dest[3])); | ||
| 329 | break; | 289 | break; |
| 330 | } | 290 | } |
| 331 | default: | 291 | default: |
| @@ -366,14 +326,17 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 366 | texture_type = TextureType::Texture2D; | 326 | texture_type = TextureType::Texture2D; |
| 367 | } | 327 | } |
| 368 | 328 | ||
| 369 | MetaTexture meta_texture{sampler, static_cast<u32>(coords.size())}; | 329 | for (u32 element = 0; element < 2; ++element) { |
| 370 | const Node texture = | 330 | auto params = coords; |
| 371 | Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(coords)); | 331 | MetaTexture meta_texture{sampler, element, static_cast<u32>(coords.size())}; |
| 332 | const Node value = | ||
| 333 | Operation(OperationCode::F4TextureQueryLod, meta_texture, std::move(params)); | ||
| 334 | SetTemporal(bb, element, value); | ||
| 335 | } | ||
| 336 | for (u32 element = 0; element < 2; ++element) { | ||
| 337 | SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element)); | ||
| 338 | } | ||
| 372 | 339 | ||
| 373 | const MetaComponents meta_composite{{0, 1, 2, 3}}; | ||
| 374 | bb.push_back(Operation(OperationCode::AssignComposite, meta_composite, texture, | ||
| 375 | GetRegister(instr.gpr0), GetRegister(instr.gpr0.Value() + 1), | ||
| 376 | GetRegister(Register::ZeroIndex), GetRegister(Register::ZeroIndex))); | ||
| 377 | break; | 340 | break; |
| 378 | } | 341 | } |
| 379 | case OpCode::Id::TLDS: { | 342 | case OpCode::Id::TLDS: { |
| @@ -388,8 +351,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { | |||
| 388 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); | 351 | LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete"); |
| 389 | } | 352 | } |
| 390 | 353 | ||
| 391 | const Node texture = GetTldsCode(instr, texture_type, is_array); | 354 | WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array)); |
| 392 | WriteTexsInstructionFloat(bb, instr, texture); | ||
| 393 | break; | 355 | break; |
| 394 | } | 356 | } |
| 395 | default: | 357 | default: |
| @@ -419,57 +381,80 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu | |||
| 419 | return *used_samplers.emplace(entry).first; | 381 | return *used_samplers.emplace(entry).first; |
| 420 | } | 382 | } |
| 421 | 383 | ||
| 422 | void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, Node texture) { | 384 | void ShaderIR::WriteTexInstructionFloat(BasicBlock& bb, Instruction instr, |
| 385 | const Node4& components) { | ||
| 386 | u32 dest_elem = 0; | ||
| 387 | for (u32 elem = 0; elem < 4; ++elem) { | ||
| 388 | if (!instr.tex.IsComponentEnabled(elem)) { | ||
| 389 | // Skip disabled components | ||
| 390 | continue; | ||
| 391 | } | ||
| 392 | SetTemporal(bb, dest_elem++, components[elem]); | ||
| 393 | } | ||
| 394 | // After writing values in temporals, move them to the real registers | ||
| 395 | for (u32 i = 0; i < dest_elem; ++i) { | ||
| 396 | SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i)); | ||
| 397 | } | ||
| 398 | } | ||
| 399 | |||
| 400 | void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, | ||
| 401 | const Node4& components) { | ||
| 423 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle | 402 | // TEXS has two destination registers and a swizzle. The first two elements in the swizzle |
| 424 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 | 403 | // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 |
| 425 | 404 | ||
| 426 | MetaComponents meta; | 405 | u32 dest_elem = 0; |
| 427 | std::array<Node, 4> dest; | ||
| 428 | for (u32 component = 0; component < 4; ++component) { | 406 | for (u32 component = 0; component < 4; ++component) { |
| 429 | if (!instr.texs.IsComponentEnabled(component)) { | 407 | if (!instr.texs.IsComponentEnabled(component)) |
| 430 | continue; | 408 | continue; |
| 431 | } | 409 | SetTemporal(bb, dest_elem++, components[component]); |
| 432 | meta.components_map[meta.count] = component; | 410 | } |
| 433 | 411 | ||
| 434 | if (meta.count < 2) { | 412 | for (u32 i = 0; i < dest_elem; ++i) { |
| 413 | if (i < 2) { | ||
| 435 | // Write the first two swizzle components to gpr0 and gpr0+1 | 414 | // Write the first two swizzle components to gpr0 and gpr0+1 |
| 436 | dest[meta.count] = GetRegister(instr.gpr0.Value() + meta.count % 2); | 415 | SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i)); |
| 437 | } else { | 416 | } else { |
| 438 | ASSERT(instr.texs.HasTwoDestinations()); | 417 | ASSERT(instr.texs.HasTwoDestinations()); |
| 439 | // Write the rest of the swizzle components to gpr28 and gpr28+1 | 418 | // Write the rest of the swizzle components to gpr28 and gpr28+1 |
| 440 | dest[meta.count] = GetRegister(instr.gpr28.Value() + meta.count % 2); | 419 | SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i)); |
| 441 | } | 420 | } |
| 442 | ++meta.count; | ||
| 443 | } | 421 | } |
| 444 | |||
| 445 | std::generate(dest.begin() + meta.count, dest.end(), | ||
| 446 | [&]() { return GetRegister(Register::ZeroIndex); }); | ||
| 447 | |||
| 448 | bb.push_back(Operation(OperationCode::AssignComposite, meta, texture, dest[0], dest[1], dest[2], | ||
| 449 | dest[3])); | ||
| 450 | } | 422 | } |
| 451 | 423 | ||
| 452 | void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, Node texture) { | 424 | void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, |
| 425 | const Node4& components) { | ||
| 453 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half | 426 | // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half |
| 454 | // float instruction). | 427 | // float instruction). |
| 455 | 428 | ||
| 456 | MetaComponents meta; | 429 | Node4 values; |
| 430 | u32 dest_elem = 0; | ||
| 457 | for (u32 component = 0; component < 4; ++component) { | 431 | for (u32 component = 0; component < 4; ++component) { |
| 458 | if (!instr.texs.IsComponentEnabled(component)) | 432 | if (!instr.texs.IsComponentEnabled(component)) |
| 459 | continue; | 433 | continue; |
| 460 | meta.components_map[meta.count++] = component; | 434 | values[dest_elem++] = components[component]; |
| 461 | } | 435 | } |
| 462 | if (meta.count == 0) | 436 | if (dest_elem == 0) |
| 463 | return; | 437 | return; |
| 464 | 438 | ||
| 465 | bb.push_back(Operation(OperationCode::AssignCompositeHalf, meta, texture, | 439 | std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); }); |
| 466 | GetRegister(instr.gpr0), GetRegister(instr.gpr28))); | 440 | |
| 441 | const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]); | ||
| 442 | if (dest_elem <= 2) { | ||
| 443 | SetRegister(bb, instr.gpr0, first_value); | ||
| 444 | return; | ||
| 445 | } | ||
| 446 | |||
| 447 | SetTemporal(bb, 0, first_value); | ||
| 448 | SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3])); | ||
| 449 | |||
| 450 | SetRegister(bb, instr.gpr0, GetTemporal(0)); | ||
| 451 | SetRegister(bb, instr.gpr28, GetTemporal(1)); | ||
| 467 | } | 452 | } |
| 468 | 453 | ||
| 469 | Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | 454 | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, |
| 470 | TextureProcessMode process_mode, bool depth_compare, bool is_array, | 455 | TextureProcessMode process_mode, bool depth_compare, bool is_array, |
| 471 | std::size_t array_offset, std::size_t bias_offset, | 456 | std::size_t array_offset, std::size_t bias_offset, |
| 472 | std::vector<Node>&& coords) { | 457 | std::vector<Node>&& coords) { |
| 473 | UNIMPLEMENTED_IF_MSG( | 458 | UNIMPLEMENTED_IF_MSG( |
| 474 | (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || | 459 | (texture_type == TextureType::Texture3D && (is_array || depth_compare)) || |
| 475 | (texture_type == TextureType::TextureCube && is_array && depth_compare), | 460 | (texture_type == TextureType::TextureCube && is_array && depth_compare), |
| @@ -495,24 +480,31 @@ Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 495 | std::optional<u32> array_offset_value; | 480 | std::optional<u32> array_offset_value; |
| 496 | if (is_array) | 481 | if (is_array) |
| 497 | array_offset_value = static_cast<u32>(array_offset); | 482 | array_offset_value = static_cast<u32>(array_offset); |
| 498 | MetaTexture meta{sampler, static_cast<u32>(coords.size()), array_offset_value}; | 483 | |
| 499 | std::vector<Node> params = std::move(coords); | 484 | const auto coords_count = static_cast<u32>(coords.size()); |
| 500 | 485 | ||
| 501 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { | 486 | if (process_mode != TextureProcessMode::None && gl_lod_supported) { |
| 502 | if (process_mode == TextureProcessMode::LZ) { | 487 | if (process_mode == TextureProcessMode::LZ) { |
| 503 | params.push_back(Immediate(0.0f)); | 488 | coords.push_back(Immediate(0.0f)); |
| 504 | } else { | 489 | } else { |
| 505 | // If present, lod or bias are always stored in the register indexed by the gpr20 field | 490 | // If present, lod or bias are always stored in the register indexed by the gpr20 |
| 506 | // with an offset depending on the usage of the other registers | 491 | // field with an offset depending on the usage of the other registers |
| 507 | params.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); | 492 | coords.push_back(GetRegister(instr.gpr20.Value() + bias_offset)); |
| 508 | } | 493 | } |
| 509 | } | 494 | } |
| 510 | 495 | ||
| 511 | return Operation(read_method, meta, std::move(params)); | 496 | Node4 values; |
| 497 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 498 | auto params = coords; | ||
| 499 | MetaTexture meta{sampler, element, coords_count, array_offset_value}; | ||
| 500 | values[element] = Operation(read_method, std::move(meta), std::move(params)); | ||
| 501 | } | ||
| 502 | |||
| 503 | return values; | ||
| 512 | } | 504 | } |
| 513 | 505 | ||
| 514 | Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | 506 | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, |
| 515 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | 507 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { |
| 516 | const bool lod_bias_enabled = | 508 | const bool lod_bias_enabled = |
| 517 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | 509 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); |
| 518 | 510 | ||
| @@ -551,8 +543,8 @@ Node ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | |||
| 551 | 0, std::move(coords)); | 543 | 0, std::move(coords)); |
| 552 | } | 544 | } |
| 553 | 545 | ||
| 554 | Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | 546 | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, |
| 555 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { | 547 | TextureProcessMode process_mode, bool depth_compare, bool is_array) { |
| 556 | const bool lod_bias_enabled = | 548 | const bool lod_bias_enabled = |
| 557 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | 549 | (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); |
| 558 | 550 | ||
| @@ -593,8 +585,8 @@ Node ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | |||
| 593 | (coord_count > 2 ? 1 : 0), std::move(coords)); | 585 | (coord_count > 2 ? 1 : 0), std::move(coords)); |
| 594 | } | 586 | } |
| 595 | 587 | ||
| 596 | Node ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | 588 | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, |
| 597 | bool is_array) { | 589 | bool is_array) { |
| 598 | const std::size_t coord_count = GetCoordCount(texture_type); | 590 | const std::size_t coord_count = GetCoordCount(texture_type); |
| 599 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | 591 | const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); |
| 600 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | 592 | const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); |
| @@ -604,24 +596,31 @@ Node ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool dep | |||
| 604 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used | 596 | // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used |
| 605 | const u64 coord_register = array_register + (is_array ? 1 : 0); | 597 | const u64 coord_register = array_register + (is_array ? 1 : 0); |
| 606 | 598 | ||
| 607 | std::vector<Node> params; | 599 | std::vector<Node> coords; |
| 608 | 600 | ||
| 609 | for (size_t i = 0; i < coord_count; ++i) { | 601 | for (size_t i = 0; i < coord_count; ++i) { |
| 610 | params.push_back(GetRegister(coord_register + i)); | 602 | coords.push_back(GetRegister(coord_register + i)); |
| 611 | } | 603 | } |
| 612 | std::optional<u32> array_offset; | 604 | std::optional<u32> array_offset; |
| 613 | if (is_array) { | 605 | if (is_array) { |
| 614 | array_offset = static_cast<u32>(params.size()); | 606 | array_offset = static_cast<u32>(coords.size()); |
| 615 | params.push_back(GetRegister(array_register)); | 607 | coords.push_back(GetRegister(array_register)); |
| 616 | } | 608 | } |
| 617 | 609 | ||
| 618 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | 610 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); |
| 619 | MetaTexture meta{sampler, static_cast<u32>(params.size()), array_offset}; | ||
| 620 | 611 | ||
| 621 | return Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | 612 | Node4 values; |
| 613 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 614 | auto params = coords; | ||
| 615 | MetaTexture meta{sampler, element, static_cast<u32>(coords.size()), array_offset}; | ||
| 616 | values[element] = | ||
| 617 | Operation(OperationCode::F4TextureGather, std::move(meta), std::move(params)); | ||
| 618 | } | ||
| 619 | |||
| 620 | return values; | ||
| 622 | } | 621 | } |
| 623 | 622 | ||
| 624 | Node ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { | 623 | Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) { |
| 625 | const std::size_t type_coord_count = GetCoordCount(texture_type); | 624 | const std::size_t type_coord_count = GetCoordCount(texture_type); |
| 626 | const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); | 625 | const std::size_t total_coord_count = type_coord_count + (is_array ? 1 : 0); |
| 627 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; | 626 | const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL; |
| @@ -636,36 +635,41 @@ Node ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_ | |||
| 636 | ? static_cast<u64>(instr.gpr20.Value()) | 635 | ? static_cast<u64>(instr.gpr20.Value()) |
| 637 | : coord_register + 1; | 636 | : coord_register + 1; |
| 638 | 637 | ||
| 639 | std::vector<Node> params; | 638 | std::vector<Node> coords; |
| 640 | 639 | ||
| 641 | for (std::size_t i = 0; i < type_coord_count; ++i) { | 640 | for (std::size_t i = 0; i < type_coord_count; ++i) { |
| 642 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); | 641 | const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1); |
| 643 | params.push_back(GetRegister(last ? last_coord_register : coord_register + i)); | 642 | coords.push_back(GetRegister(last ? last_coord_register : coord_register + i)); |
| 644 | } | 643 | } |
| 645 | std::optional<u32> array_offset; | 644 | std::optional<u32> array_offset; |
| 646 | if (is_array) { | 645 | if (is_array) { |
| 647 | array_offset = static_cast<u32>(params.size()); | 646 | array_offset = static_cast<u32>(coords.size()); |
| 648 | params.push_back(GetRegister(array_register)); | 647 | coords.push_back(GetRegister(array_register)); |
| 649 | } | 648 | } |
| 650 | const auto coords_count = static_cast<u32>(params.size()); | 649 | const auto coords_count = static_cast<u32>(coords.size()); |
| 651 | 650 | ||
| 652 | if (lod_enabled) { | 651 | if (lod_enabled) { |
| 653 | // When lod is used always is in grp20 | 652 | // When lod is used always is in grp20 |
| 654 | params.push_back(GetRegister(instr.gpr20)); | 653 | coords.push_back(GetRegister(instr.gpr20)); |
| 655 | } else { | 654 | } else { |
| 656 | params.push_back(Immediate(0)); | 655 | coords.push_back(Immediate(0)); |
| 657 | } | 656 | } |
| 658 | 657 | ||
| 659 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | 658 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); |
| 660 | MetaTexture meta{sampler, coords_count, array_offset}; | ||
| 661 | 659 | ||
| 662 | return Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); | 660 | Node4 values; |
| 661 | for (u32 element = 0; element < values.size(); ++element) { | ||
| 662 | auto params = coords; | ||
| 663 | MetaTexture meta{sampler, element, coords_count, array_offset}; | ||
| 664 | values[element] = | ||
| 665 | Operation(OperationCode::F4TexelFetch, std::move(meta), std::move(params)); | ||
| 666 | } | ||
| 667 | return values; | ||
| 663 | } | 668 | } |
| 664 | 669 | ||
| 665 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | 670 | std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( |
| 666 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, | 671 | TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled, |
| 667 | std::size_t max_coords, std::size_t max_inputs) { | 672 | std::size_t max_coords, std::size_t max_inputs) { |
| 668 | |||
| 669 | const std::size_t coord_count = GetCoordCount(texture_type); | 673 | const std::size_t coord_count = GetCoordCount(texture_type); |
| 670 | 674 | ||
| 671 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); | 675 | std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0); |