summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeModules/GenerateSCMRev.cmake1
-rw-r--r--src/common/CMakeLists.txt1
-rw-r--r--src/video_core/CMakeLists.txt1
-rw-r--r--src/video_core/engines/shader_bytecode.h17
-rw-r--r--src/video_core/shader/decode.cpp1
-rw-r--r--src/video_core/shader/decode/memory.cpp493
-rw-r--r--src/video_core/shader/decode/texture.cpp525
-rw-r--r--src/video_core/shader/shader_ir.h1
8 files changed, 539 insertions, 501 deletions
diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake
index 78728e08b..08315a1f1 100644
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@@ -73,6 +73,7 @@ set(HASH_FILES
73 "${VIDEO_CORE}/shader/decode/integer_set.cpp" 73 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
74 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" 74 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
75 "${VIDEO_CORE}/shader/decode/memory.cpp" 75 "${VIDEO_CORE}/shader/decode/memory.cpp"
76 "${VIDEO_CORE}/shader/decode/texture.cpp"
76 "${VIDEO_CORE}/shader/decode/other.cpp" 77 "${VIDEO_CORE}/shader/decode/other.cpp"
77 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" 78 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
78 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" 79 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index bdd885273..3d30f0e3e 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -47,6 +47,7 @@ add_custom_command(OUTPUT scm_rev.cpp
47 "${VIDEO_CORE}/shader/decode/integer_set.cpp" 47 "${VIDEO_CORE}/shader/decode/integer_set.cpp"
48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp" 48 "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
49 "${VIDEO_CORE}/shader/decode/memory.cpp" 49 "${VIDEO_CORE}/shader/decode/memory.cpp"
50 "${VIDEO_CORE}/shader/decode/texture.cpp"
50 "${VIDEO_CORE}/shader/decode/other.cpp" 51 "${VIDEO_CORE}/shader/decode/other.cpp"
51 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp" 52 "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
52 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp" 53 "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 6036d6ed3..661a113fb 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -74,6 +74,7 @@ add_library(video_core STATIC
74 shader/decode/hfma2.cpp 74 shader/decode/hfma2.cpp
75 shader/decode/conversion.cpp 75 shader/decode/conversion.cpp
76 shader/decode/memory.cpp 76 shader/decode/memory.cpp
77 shader/decode/texture.cpp
77 shader/decode/float_set_predicate.cpp 78 shader/decode/float_set_predicate.cpp
78 shader/decode/integer_set_predicate.cpp 79 shader/decode/integer_set_predicate.cpp
79 shader/decode/half_set_predicate.cpp 80 shader/decode/half_set_predicate.cpp
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 252592edd..d14cd5f20 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1446,6 +1446,7 @@ public:
1446 Flow, 1446 Flow,
1447 Synch, 1447 Synch,
1448 Memory, 1448 Memory,
1449 Texture,
1449 FloatSet, 1450 FloatSet,
1450 FloatSetPredicate, 1451 FloatSetPredicate,
1451 IntegerSet, 1452 IntegerSet,
@@ -1576,14 +1577,14 @@ private:
1576 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), 1577 INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
1577 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), 1578 INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
1578 INST("1110111011011---", Id::STG, Type::Memory, "STG"), 1579 INST("1110111011011---", Id::STG, Type::Memory, "STG"),
1579 INST("110000----111---", Id::TEX, Type::Memory, "TEX"), 1580 INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
1580 INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"), 1581 INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
1581 INST("1101-00---------", Id::TEXS, Type::Memory, "TEXS"), 1582 INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
1582 INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"), 1583 INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
1583 INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"), 1584 INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
1584 INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"), 1585 INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
1585 INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"), 1586 INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
1586 INST("1101111101011---", Id::TMML, Type::Memory, "TMML"), 1587 INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
1587 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"), 1588 INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
1588 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"), 1589 INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
1589 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"), 1590 INST("1111101111100---", Id::OUT_R, Type::Trivial, "OUT_R"),
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 740ac3118..e4c438792 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -165,6 +165,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2}, 165 {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion}, 166 {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory}, 167 {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
168 {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
168 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate}, 169 {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
169 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate}, 170 {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
170 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate}, 171 {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 38f01ca50..ea3c71eed 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -17,24 +17,6 @@ using Tegra::Shader::Attribute;
17using Tegra::Shader::Instruction; 17using Tegra::Shader::Instruction;
18using Tegra::Shader::OpCode; 18using Tegra::Shader::OpCode;
19using Tegra::Shader::Register; 19using Tegra::Shader::Register;
20using Tegra::Shader::TextureMiscMode;
21using Tegra::Shader::TextureProcessMode;
22using Tegra::Shader::TextureType;
23
24static std::size_t GetCoordCount(TextureType texture_type) {
25 switch (texture_type) {
26 case TextureType::Texture1D:
27 return 1;
28 case TextureType::Texture2D:
29 return 2;
30 case TextureType::Texture3D:
31 case TextureType::TextureCube:
32 return 3;
33 default:
34 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
35 return 0;
36 }
37}
38 20
39u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { 21u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
40 const Instruction instr = {program_code[pc]}; 22 const Instruction instr = {program_code[pc]};
@@ -247,194 +229,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
247 } 229 }
248 break; 230 break;
249 } 231 }
250 case OpCode::Id::TEX: {
251 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
252 "AOFFI is not implemented");
253
254 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
255 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
256 }
257
258 const TextureType texture_type{instr.tex.texture_type};
259 const bool is_array = instr.tex.array != 0;
260 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
261 const auto process_mode = instr.tex.GetTextureProcessMode();
262 WriteTexInstructionFloat(
263 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
264 break;
265 }
266 case OpCode::Id::TEXS: {
267 const TextureType texture_type{instr.texs.GetTextureType()};
268 const bool is_array{instr.texs.IsArrayTexture()};
269 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
270 const auto process_mode = instr.texs.GetTextureProcessMode();
271
272 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
273 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
274 }
275
276 const Node4 components =
277 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
278
279 if (instr.texs.fp32_flag) {
280 WriteTexsInstructionFloat(bb, instr, components);
281 } else {
282 WriteTexsInstructionHalfFloat(bb, instr, components);
283 }
284 break;
285 }
286 case OpCode::Id::TLD4: {
287 ASSERT(instr.tld4.array == 0);
288 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
289 "AOFFI is not implemented");
290 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
291 "NDV is not implemented");
292 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
293 "PTP is not implemented");
294
295 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
296 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
297 }
298
299 const auto texture_type = instr.tld4.texture_type.Value();
300 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
301 const bool is_array = instr.tld4.array != 0;
302 WriteTexInstructionFloat(bb, instr,
303 GetTld4Code(instr, texture_type, depth_compare, is_array));
304 break;
305 }
306 case OpCode::Id::TLD4S: {
307 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
308 "AOFFI is not implemented");
309 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
310 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
311 }
312
313 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
314 const Node op_a = GetRegister(instr.gpr8);
315 const Node op_b = GetRegister(instr.gpr20);
316
317 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
318 std::vector<Node> coords;
319 if (depth_compare) {
320 // Note: TLD4S coordinate encoding works just like TEXS's
321 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
322 coords.push_back(op_a);
323 coords.push_back(op_y);
324 coords.push_back(op_b);
325 } else {
326 coords.push_back(op_a);
327 coords.push_back(op_b);
328 }
329 std::vector<Node> extras;
330 extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
331
332 const auto& sampler =
333 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
334
335 Node4 values;
336 for (u32 element = 0; element < values.size(); ++element) {
337 auto coords_copy = coords;
338 MetaTexture meta{sampler, {}, {}, extras, element};
339 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
340 }
341
342 WriteTexsInstructionFloat(bb, instr, values);
343 break;
344 }
345 case OpCode::Id::TXQ: {
346 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
347 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
348 }
349
350 // TODO: The new commits on the texture refactor, change the way samplers work.
351 // Sadly, not all texture instructions specify the type of texture their sampler
352 // uses. This must be fixed at a later instance.
353 const auto& sampler =
354 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
355
356 u32 indexer = 0;
357 switch (instr.txq.query_type) {
358 case Tegra::Shader::TextureQueryType::Dimension: {
359 for (u32 element = 0; element < 4; ++element) {
360 if (!instr.txq.IsComponentEnabled(element)) {
361 continue;
362 }
363 MetaTexture meta{sampler, {}, {}, {}, element};
364 const Node value =
365 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
366 SetTemporal(bb, indexer++, value);
367 }
368 for (u32 i = 0; i < indexer; ++i) {
369 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
370 }
371 break;
372 }
373 default:
374 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
375 static_cast<u32>(instr.txq.query_type.Value()));
376 }
377 break;
378 }
379 case OpCode::Id::TMML: {
380 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
381 "NDV is not implemented");
382
383 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
384 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
385 }
386
387 auto texture_type = instr.tmml.texture_type.Value();
388 const bool is_array = instr.tmml.array != 0;
389 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
390
391 std::vector<Node> coords;
392
393 // TODO: Add coordinates for different samplers once other texture types are implemented.
394 switch (texture_type) {
395 case TextureType::Texture1D:
396 coords.push_back(GetRegister(instr.gpr8));
397 break;
398 case TextureType::Texture2D:
399 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
400 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
401 break;
402 default:
403 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
404
405 // Fallback to interpreting as a 2D texture for now
406 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
407 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
408 texture_type = TextureType::Texture2D;
409 }
410
411 for (u32 element = 0; element < 2; ++element) {
412 auto params = coords;
413 MetaTexture meta{sampler, {}, {}, {}, element};
414 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
415 SetTemporal(bb, element, value);
416 }
417 for (u32 element = 0; element < 2; ++element) {
418 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
419 }
420
421 break;
422 }
423 case OpCode::Id::TLDS: {
424 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
425 const bool is_array{instr.tlds.IsArrayTexture()};
426
427 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
428 "AOFFI is not implemented");
429 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
430
431 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
432 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
433 }
434
435 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
436 break;
437 }
438 default: 232 default:
439 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName()); 233 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
440 } 234 }
@@ -442,291 +236,4 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
442 return pc; 236 return pc;
443} 237}
444 238
445const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
446 bool is_array, bool is_shadow) {
447 const auto offset = static_cast<std::size_t>(sampler.index.Value());
448
449 // If this sampler has already been used, return the existing mapping.
450 const auto itr =
451 std::find_if(used_samplers.begin(), used_samplers.end(),
452 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
453 if (itr != used_samplers.end()) {
454 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
455 itr->IsShadow() == is_shadow);
456 return *itr;
457 }
458
459 // Otherwise create a new mapping for this sampler
460 const std::size_t next_index = used_samplers.size();
461 const Sampler entry{offset, next_index, type, is_array, is_shadow};
462 return *used_samplers.emplace(entry).first;
463}
464
465void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
466 u32 dest_elem = 0;
467 for (u32 elem = 0; elem < 4; ++elem) {
468 if (!instr.tex.IsComponentEnabled(elem)) {
469 // Skip disabled components
470 continue;
471 }
472 SetTemporal(bb, dest_elem++, components[elem]);
473 }
474 // After writing values in temporals, move them to the real registers
475 for (u32 i = 0; i < dest_elem; ++i) {
476 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
477 }
478}
479
480void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
481 const Node4& components) {
482 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
483 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
484
485 u32 dest_elem = 0;
486 for (u32 component = 0; component < 4; ++component) {
487 if (!instr.texs.IsComponentEnabled(component))
488 continue;
489 SetTemporal(bb, dest_elem++, components[component]);
490 }
491
492 for (u32 i = 0; i < dest_elem; ++i) {
493 if (i < 2) {
494 // Write the first two swizzle components to gpr0 and gpr0+1
495 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
496 } else {
497 ASSERT(instr.texs.HasTwoDestinations());
498 // Write the rest of the swizzle components to gpr28 and gpr28+1
499 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
500 }
501 }
502}
503
504void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
505 const Node4& components) {
506 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
507 // float instruction).
508
509 Node4 values;
510 u32 dest_elem = 0;
511 for (u32 component = 0; component < 4; ++component) {
512 if (!instr.texs.IsComponentEnabled(component))
513 continue;
514 values[dest_elem++] = components[component];
515 }
516 if (dest_elem == 0)
517 return;
518
519 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
520
521 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
522 if (dest_elem <= 2) {
523 SetRegister(bb, instr.gpr0, first_value);
524 return;
525 }
526
527 SetTemporal(bb, 0, first_value);
528 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
529
530 SetRegister(bb, instr.gpr0, GetTemporal(0));
531 SetRegister(bb, instr.gpr28, GetTemporal(1));
532}
533
534Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
535 TextureProcessMode process_mode, std::vector<Node> coords,
536 Node array, Node depth_compare, u32 bias_offset) {
537 const bool is_array = array;
538 const bool is_shadow = depth_compare;
539
540 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
541 (texture_type == TextureType::TextureCube && is_array && is_shadow),
542 "This method is not supported.");
543
544 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
545
546 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
547 process_mode == TextureProcessMode::LL ||
548 process_mode == TextureProcessMode::LLA;
549
550 // LOD selection (either via bias or explicit textureLod) not supported in GL for
551 // sampler2DArrayShadow and samplerCubeArrayShadow.
552 const bool gl_lod_supported =
553 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
554 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
555
556 const OperationCode read_method =
557 lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
558
559 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
560
561 std::vector<Node> extras;
562 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
563 if (process_mode == TextureProcessMode::LZ) {
564 extras.push_back(Immediate(0.0f));
565 } else {
566 // If present, lod or bias are always stored in the register indexed by the gpr20
567 // field with an offset depending on the usage of the other registers
568 extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
569 }
570 }
571
572 Node4 values;
573 for (u32 element = 0; element < values.size(); ++element) {
574 auto copy_coords = coords;
575 MetaTexture meta{sampler, array, depth_compare, extras, element};
576 values[element] = Operation(read_method, meta, std::move(copy_coords));
577 }
578
579 return values;
580}
581
582Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
583 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
584 const bool lod_bias_enabled =
585 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
586
587 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
588 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
589 // If enabled arrays index is always stored in the gpr8 field
590 const u64 array_register = instr.gpr8.Value();
591 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
592 const u64 coord_register = array_register + (is_array ? 1 : 0);
593
594 std::vector<Node> coords;
595 for (std::size_t i = 0; i < coord_count; ++i) {
596 coords.push_back(GetRegister(coord_register + i));
597 }
598 // 1D.DC in OpenGL the 2nd component is ignored.
599 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
600 coords.push_back(Immediate(0.0f));
601 }
602
603 const Node array = is_array ? GetRegister(array_register) : nullptr;
604
605 Node dc{};
606 if (depth_compare) {
607 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
608 // or bias are used
609 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
610 dc = GetRegister(depth_register);
611 }
612
613 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
614}
615
616Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
617 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
618 const bool lod_bias_enabled =
619 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
620
621 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
622 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
623 // If enabled arrays index is always stored in the gpr8 field
624 const u64 array_register = instr.gpr8.Value();
625 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
626 const u64 coord_register = array_register + (is_array ? 1 : 0);
627 const u64 last_coord_register =
628 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
629 ? static_cast<u64>(instr.gpr20.Value())
630 : coord_register + 1;
631 const u32 bias_offset = coord_count > 2 ? 1 : 0;
632
633 std::vector<Node> coords;
634 for (std::size_t i = 0; i < coord_count; ++i) {
635 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
636 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
637 }
638
639 const Node array = is_array ? GetRegister(array_register) : nullptr;
640
641 Node dc{};
642 if (depth_compare) {
643 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
644 // or bias are used
645 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
646 dc = GetRegister(depth_register);
647 }
648
649 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
650}
651
652Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
653 bool is_array) {
654 const std::size_t coord_count = GetCoordCount(texture_type);
655 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
656 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
657
658 // If enabled arrays index is always stored in the gpr8 field
659 const u64 array_register = instr.gpr8.Value();
660 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
661 const u64 coord_register = array_register + (is_array ? 1 : 0);
662
663 std::vector<Node> coords;
664 for (size_t i = 0; i < coord_count; ++i)
665 coords.push_back(GetRegister(coord_register + i));
666
667 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
668
669 Node4 values;
670 for (u32 element = 0; element < values.size(); ++element) {
671 auto coords_copy = coords;
672 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
673 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
674 }
675
676 return values;
677}
678
679Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
680 const std::size_t type_coord_count = GetCoordCount(texture_type);
681 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
682
683 // If enabled arrays index is always stored in the gpr8 field
684 const u64 array_register = instr.gpr8.Value();
685 // if is array gpr20 is used
686 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
687
688 const u64 last_coord_register =
689 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
690 ? static_cast<u64>(instr.gpr20.Value())
691 : coord_register + 1;
692
693 std::vector<Node> coords;
694 for (std::size_t i = 0; i < type_coord_count; ++i) {
695 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
696 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
697 }
698
699 const Node array = is_array ? GetRegister(array_register) : nullptr;
700 // When lod is used always is in gpr20
701 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
702
703 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
704
705 Node4 values;
706 for (u32 element = 0; element < values.size(); ++element) {
707 auto coords_copy = coords;
708 MetaTexture meta{sampler, array, {}, {lod}, element};
709 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
710 }
711 return values;
712}
713
714std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
715 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
716 std::size_t max_coords, std::size_t max_inputs) {
717 const std::size_t coord_count = GetCoordCount(texture_type);
718
719 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
720 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
721 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
722 UNIMPLEMENTED_MSG("Unsupported Texture operation");
723 total_coord_count = std::min(total_coord_count, max_coords);
724 }
725 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
726 total_coord_count +=
727 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
728
729 return {coord_count, total_coord_count};
730}
731
732} // namespace VideoCommon::Shader 239} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
new file mode 100644
index 000000000..50e2d0584
--- /dev/null
+++ b/src/video_core/shader/decode/texture.cpp
@@ -0,0 +1,525 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <vector>
7#include <fmt/format.h>
8
9#include "common/assert.h"
10#include "common/common_types.h"
11#include "video_core/engines/shader_bytecode.h"
12#include "video_core/shader/shader_ir.h"
13
14namespace VideoCommon::Shader {
15
16using Tegra::Shader::Instruction;
17using Tegra::Shader::OpCode;
18using Tegra::Shader::Register;
19using Tegra::Shader::TextureMiscMode;
20using Tegra::Shader::TextureProcessMode;
21using Tegra::Shader::TextureType;
22
23static std::size_t GetCoordCount(TextureType texture_type) {
24 switch (texture_type) {
25 case TextureType::Texture1D:
26 return 1;
27 case TextureType::Texture2D:
28 return 2;
29 case TextureType::Texture3D:
30 case TextureType::TextureCube:
31 return 3;
32 default:
33 UNIMPLEMENTED_MSG("Unhandled texture type: {}", static_cast<u32>(texture_type));
34 return 0;
35 }
36}
37
38u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
39 const Instruction instr = {program_code[pc]};
40 const auto opcode = OpCode::Decode(instr);
41
42 switch (opcode->get().GetId()) {
43 case OpCode::Id::TEX: {
44 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
45 "AOFFI is not implemented");
46
47 if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
48 LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
49 }
50
51 const TextureType texture_type{instr.tex.texture_type};
52 const bool is_array = instr.tex.array != 0;
53 const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
54 const auto process_mode = instr.tex.GetTextureProcessMode();
55 WriteTexInstructionFloat(
56 bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
57 break;
58 }
59 case OpCode::Id::TEXS: {
60 const TextureType texture_type{instr.texs.GetTextureType()};
61 const bool is_array{instr.texs.IsArrayTexture()};
62 const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
63 const auto process_mode = instr.texs.GetTextureProcessMode();
64
65 if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
66 LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
67 }
68
69 const Node4 components =
70 GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
71
72 if (instr.texs.fp32_flag) {
73 WriteTexsInstructionFloat(bb, instr, components);
74 } else {
75 WriteTexsInstructionHalfFloat(bb, instr, components);
76 }
77 break;
78 }
79 case OpCode::Id::TLD4: {
80 ASSERT(instr.tld4.array == 0);
81 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
82 "AOFFI is not implemented");
83 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
84 "NDV is not implemented");
85 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
86 "PTP is not implemented");
87
88 if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
89 LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
90 }
91
92 const auto texture_type = instr.tld4.texture_type.Value();
93 const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
94 const bool is_array = instr.tld4.array != 0;
95 WriteTexInstructionFloat(bb, instr,
96 GetTld4Code(instr, texture_type, depth_compare, is_array));
97 break;
98 }
99 case OpCode::Id::TLD4S: {
100 UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
101 "AOFFI is not implemented");
102 if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
103 LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
104 }
105
106 const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
107 const Node op_a = GetRegister(instr.gpr8);
108 const Node op_b = GetRegister(instr.gpr20);
109
110 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
111 std::vector<Node> coords;
112 if (depth_compare) {
113 // Note: TLD4S coordinate encoding works just like TEXS's
114 const Node op_y = GetRegister(instr.gpr8.Value() + 1);
115 coords.push_back(op_a);
116 coords.push_back(op_y);
117 coords.push_back(op_b);
118 } else {
119 coords.push_back(op_a);
120 coords.push_back(op_b);
121 }
122 std::vector<Node> extras;
123 extras.push_back(Immediate(static_cast<u32>(instr.tld4s.component)));
124
125 const auto& sampler =
126 GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
127
128 Node4 values;
129 for (u32 element = 0; element < values.size(); ++element) {
130 auto coords_copy = coords;
131 MetaTexture meta{sampler, {}, {}, extras, element};
132 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
133 }
134
135 WriteTexsInstructionFloat(bb, instr, values);
136 break;
137 }
138 case OpCode::Id::TXQ: {
139 if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
140 LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
141 }
142
143 // TODO: The new commits on the texture refactor, change the way samplers work.
144 // Sadly, not all texture instructions specify the type of texture their sampler
145 // uses. This must be fixed at a later instance.
146 const auto& sampler =
147 GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
148
149 u32 indexer = 0;
150 switch (instr.txq.query_type) {
151 case Tegra::Shader::TextureQueryType::Dimension: {
152 for (u32 element = 0; element < 4; ++element) {
153 if (!instr.txq.IsComponentEnabled(element)) {
154 continue;
155 }
156 MetaTexture meta{sampler, {}, {}, {}, element};
157 const Node value =
158 Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
159 SetTemporal(bb, indexer++, value);
160 }
161 for (u32 i = 0; i < indexer; ++i) {
162 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
163 }
164 break;
165 }
166 default:
167 UNIMPLEMENTED_MSG("Unhandled texture query type: {}",
168 static_cast<u32>(instr.txq.query_type.Value()));
169 }
170 break;
171 }
172 case OpCode::Id::TMML: {
173 UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
174 "NDV is not implemented");
175
176 if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
177 LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
178 }
179
180 auto texture_type = instr.tmml.texture_type.Value();
181 const bool is_array = instr.tmml.array != 0;
182 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
183
184 std::vector<Node> coords;
185
186 // TODO: Add coordinates for different samplers once other texture types are implemented.
187 switch (texture_type) {
188 case TextureType::Texture1D:
189 coords.push_back(GetRegister(instr.gpr8));
190 break;
191 case TextureType::Texture2D:
192 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
193 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
194 break;
195 default:
196 UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
197
198 // Fallback to interpreting as a 2D texture for now
199 coords.push_back(GetRegister(instr.gpr8.Value() + 0));
200 coords.push_back(GetRegister(instr.gpr8.Value() + 1));
201 texture_type = TextureType::Texture2D;
202 }
203
204 for (u32 element = 0; element < 2; ++element) {
205 auto params = coords;
206 MetaTexture meta{sampler, {}, {}, {}, element};
207 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
208 SetTemporal(bb, element, value);
209 }
210 for (u32 element = 0; element < 2; ++element) {
211 SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
212 }
213
214 break;
215 }
216 case OpCode::Id::TLDS: {
217 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
218 const bool is_array{instr.tlds.IsArrayTexture()};
219
220 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
221 "AOFFI is not implemented");
222 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
223
224 if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
225 LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
226 }
227
228 WriteTexsInstructionFloat(bb, instr, GetTldsCode(instr, texture_type, is_array));
229 break;
230 }
231 default:
232 UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
233 }
234
235 return pc;
236}
237
238const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
239 bool is_array, bool is_shadow) {
240 const auto offset = static_cast<std::size_t>(sampler.index.Value());
241
242 // If this sampler has already been used, return the existing mapping.
243 const auto itr =
244 std::find_if(used_samplers.begin(), used_samplers.end(),
245 [&](const Sampler& entry) { return entry.GetOffset() == offset; });
246 if (itr != used_samplers.end()) {
247 ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
248 itr->IsShadow() == is_shadow);
249 return *itr;
250 }
251
252 // Otherwise create a new mapping for this sampler
253 const std::size_t next_index = used_samplers.size();
254 const Sampler entry{offset, next_index, type, is_array, is_shadow};
255 return *used_samplers.emplace(entry).first;
256}
257
258void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
259 u32 dest_elem = 0;
260 for (u32 elem = 0; elem < 4; ++elem) {
261 if (!instr.tex.IsComponentEnabled(elem)) {
262 // Skip disabled components
263 continue;
264 }
265 SetTemporal(bb, dest_elem++, components[elem]);
266 }
267 // After writing values in temporals, move them to the real registers
268 for (u32 i = 0; i < dest_elem; ++i) {
269 SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
270 }
271}
272
273void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
274 const Node4& components) {
275 // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
276 // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
277
278 u32 dest_elem = 0;
279 for (u32 component = 0; component < 4; ++component) {
280 if (!instr.texs.IsComponentEnabled(component))
281 continue;
282 SetTemporal(bb, dest_elem++, components[component]);
283 }
284
285 for (u32 i = 0; i < dest_elem; ++i) {
286 if (i < 2) {
287 // Write the first two swizzle components to gpr0 and gpr0+1
288 SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporal(i));
289 } else {
290 ASSERT(instr.texs.HasTwoDestinations());
291 // Write the rest of the swizzle components to gpr28 and gpr28+1
292 SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporal(i));
293 }
294 }
295}
296
297void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
298 const Node4& components) {
299 // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
300 // float instruction).
301
302 Node4 values;
303 u32 dest_elem = 0;
304 for (u32 component = 0; component < 4; ++component) {
305 if (!instr.texs.IsComponentEnabled(component))
306 continue;
307 values[dest_elem++] = components[component];
308 }
309 if (dest_elem == 0)
310 return;
311
312 std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
313
314 const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
315 if (dest_elem <= 2) {
316 SetRegister(bb, instr.gpr0, first_value);
317 return;
318 }
319
320 SetTemporal(bb, 0, first_value);
321 SetTemporal(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
322
323 SetRegister(bb, instr.gpr0, GetTemporal(0));
324 SetRegister(bb, instr.gpr28, GetTemporal(1));
325}
326
327Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
328 TextureProcessMode process_mode, std::vector<Node> coords,
329 Node array, Node depth_compare, u32 bias_offset) {
330 const bool is_array = array;
331 const bool is_shadow = depth_compare;
332
333 UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
334 (texture_type == TextureType::TextureCube && is_array && is_shadow),
335 "This method is not supported.");
336
337 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
338
339 const bool lod_needed = process_mode == TextureProcessMode::LZ ||
340 process_mode == TextureProcessMode::LL ||
341 process_mode == TextureProcessMode::LLA;
342
343 // LOD selection (either via bias or explicit textureLod) not supported in GL for
344 // sampler2DArrayShadow and samplerCubeArrayShadow.
345 const bool gl_lod_supported =
346 !((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
347 (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
348
349 const OperationCode read_method =
350 lod_needed && gl_lod_supported ? OperationCode::TextureLod : OperationCode::Texture;
351
352 UNIMPLEMENTED_IF(process_mode != TextureProcessMode::None && !gl_lod_supported);
353
354 std::vector<Node> extras;
355 if (process_mode != TextureProcessMode::None && gl_lod_supported) {
356 if (process_mode == TextureProcessMode::LZ) {
357 extras.push_back(Immediate(0.0f));
358 } else {
359 // If present, lod or bias are always stored in the register indexed by the gpr20
360 // field with an offset depending on the usage of the other registers
361 extras.push_back(GetRegister(instr.gpr20.Value() + bias_offset));
362 }
363 }
364
365 Node4 values;
366 for (u32 element = 0; element < values.size(); ++element) {
367 auto copy_coords = coords;
368 MetaTexture meta{sampler, array, depth_compare, extras, element};
369 values[element] = Operation(read_method, meta, std::move(copy_coords));
370 }
371
372 return values;
373}
374
375Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
376 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
377 const bool lod_bias_enabled =
378 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
379
380 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
381 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
382 // If enabled arrays index is always stored in the gpr8 field
383 const u64 array_register = instr.gpr8.Value();
384 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
385 const u64 coord_register = array_register + (is_array ? 1 : 0);
386
387 std::vector<Node> coords;
388 for (std::size_t i = 0; i < coord_count; ++i) {
389 coords.push_back(GetRegister(coord_register + i));
390 }
391 // 1D.DC in OpenGL the 2nd component is ignored.
392 if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
393 coords.push_back(Immediate(0.0f));
394 }
395
396 const Node array = is_array ? GetRegister(array_register) : nullptr;
397
398 Node dc{};
399 if (depth_compare) {
400 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
401 // or bias are used
402 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
403 dc = GetRegister(depth_register);
404 }
405
406 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
407}
408
409Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
410 TextureProcessMode process_mode, bool depth_compare, bool is_array) {
411 const bool lod_bias_enabled =
412 (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
413
414 const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
415 texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
416 // If enabled arrays index is always stored in the gpr8 field
417 const u64 array_register = instr.gpr8.Value();
418 // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
419 const u64 coord_register = array_register + (is_array ? 1 : 0);
420 const u64 last_coord_register =
421 (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
422 ? static_cast<u64>(instr.gpr20.Value())
423 : coord_register + 1;
424 const u32 bias_offset = coord_count > 2 ? 1 : 0;
425
426 std::vector<Node> coords;
427 for (std::size_t i = 0; i < coord_count; ++i) {
428 const bool last = (i == (coord_count - 1)) && (coord_count > 1);
429 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
430 }
431
432 const Node array = is_array ? GetRegister(array_register) : nullptr;
433
434 Node dc{};
435 if (depth_compare) {
436 // Depth is always stored in the register signaled by gpr20 or in the next register if lod
437 // or bias are used
438 const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
439 dc = GetRegister(depth_register);
440 }
441
442 return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
443}
444
445Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
446 bool is_array) {
447 const std::size_t coord_count = GetCoordCount(texture_type);
448 const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
449 const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
450
451 // If enabled arrays index is always stored in the gpr8 field
452 const u64 array_register = instr.gpr8.Value();
453 // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
454 const u64 coord_register = array_register + (is_array ? 1 : 0);
455
456 std::vector<Node> coords;
457 for (size_t i = 0; i < coord_count; ++i)
458 coords.push_back(GetRegister(coord_register + i));
459
460 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
461
462 Node4 values;
463 for (u32 element = 0; element < values.size(); ++element) {
464 auto coords_copy = coords;
465 MetaTexture meta{sampler, GetRegister(array_register), {}, {}, element};
466 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
467 }
468
469 return values;
470}
471
472Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
473 const std::size_t type_coord_count = GetCoordCount(texture_type);
474 const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
475
476 // If enabled arrays index is always stored in the gpr8 field
477 const u64 array_register = instr.gpr8.Value();
478 // if is array gpr20 is used
479 const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
480
481 const u64 last_coord_register =
482 ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
483 ? static_cast<u64>(instr.gpr20.Value())
484 : coord_register + 1;
485
486 std::vector<Node> coords;
487 for (std::size_t i = 0; i < type_coord_count; ++i) {
488 const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
489 coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
490 }
491
492 const Node array = is_array ? GetRegister(array_register) : nullptr;
493 // When lod is used always is in gpr20
494 const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
495
496 const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
497
498 Node4 values;
499 for (u32 element = 0; element < values.size(); ++element) {
500 auto coords_copy = coords;
501 MetaTexture meta{sampler, array, {}, {lod}, element};
502 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
503 }
504 return values;
505}
506
507std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
508 TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
509 std::size_t max_coords, std::size_t max_inputs) {
510 const std::size_t coord_count = GetCoordCount(texture_type);
511
512 std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
513 const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
514 if (total_coord_count > max_coords || total_reg_count > max_inputs) {
515 UNIMPLEMENTED_MSG("Unsupported Texture operation");
516 total_coord_count = std::min(total_coord_count, max_coords);
517 }
518 // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
519 total_coord_count +=
520 (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
521
522 return {coord_count, total_coord_count};
523}
524
525} // namespace VideoCommon::Shader \ No newline at end of file
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 52c7f2c4e..0548c46f0 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -614,6 +614,7 @@ private:
614 u32 DecodeHfma2(NodeBlock& bb, u32 pc); 614 u32 DecodeHfma2(NodeBlock& bb, u32 pc);
615 u32 DecodeConversion(NodeBlock& bb, u32 pc); 615 u32 DecodeConversion(NodeBlock& bb, u32 pc);
616 u32 DecodeMemory(NodeBlock& bb, u32 pc); 616 u32 DecodeMemory(NodeBlock& bb, u32 pc);
617 u32 DecodeTexture(NodeBlock& bb, u32 pc);
617 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc); 618 u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
618 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc); 619 u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
619 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc); 620 u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);