summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar bunnei2020-02-01 20:41:40 -0500
committerGravatar GitHub2020-02-01 20:41:40 -0500
commitb5bbe7e752d5d36839a86638bfaa4b4c348497cd (patch)
treeb16b3f8ce5ec6233f9f822ad56418d74f0cd47ae
parentMerge pull request #3268 from CJBok/deadzone (diff)
parentShader_IR: Address feedback. (diff)
downloadyuzu-b5bbe7e752d5d36839a86638bfaa4b4c348497cd.tar.gz
yuzu-b5bbe7e752d5d36839a86638bfaa4b4c348497cd.tar.xz
yuzu-b5bbe7e752d5d36839a86638bfaa4b4c348497cd.zip
Merge pull request #3282 from FernandoS27/indexed-samplers
Partially implement Indexed samplers in general and specific code in GLSL
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h4
-rw-r--r--src/video_core/engines/kepler_compute.cpp8
-rw-r--r--src/video_core/engines/kepler_compute.h4
-rw-r--r--src/video_core/engines/maxwell_3d.cpp8
-rw-r--r--src/video_core/engines/maxwell_3d.h4
-rw-r--r--src/video_core/guest_driver.cpp36
-rw-r--r--src/video_core/guest_driver.h41
-rw-r--r--src/video_core/rasterizer_interface.h14
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp34
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp37
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp19
-rw-r--r--src/video_core/shader/const_buffer_locker.cpp17
-rw-r--r--src/video_core/shader/const_buffer_locker.h21
-rw-r--r--src/video_core/shader/decode.cpp68
-rw-r--r--src/video_core/shader/decode/texture.cpp110
-rw-r--r--src/video_core/shader/node.h87
-rw-r--r--src/video_core/shader/node_helper.h6
-rw-r--r--src/video_core/shader/shader_ir.cpp9
-rw-r--r--src/video_core/shader/shader_ir.h16
-rw-r--r--src/video_core/shader/track.cpp106
24 files changed, 610 insertions, 58 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index ccfed4f2e..04a25da4f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,6 +29,8 @@ add_library(video_core STATIC
29 gpu_synch.h 29 gpu_synch.h
30 gpu_thread.cpp 30 gpu_thread.cpp
31 gpu_thread.h 31 gpu_thread.h
32 guest_driver.cpp
33 guest_driver.h
32 macro_interpreter.cpp 34 macro_interpreter.cpp
33 macro_interpreter.h 35 macro_interpreter.h
34 memory_manager.cpp 36 memory_manager.cpp
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index 44b8b8d22..d56a47710 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/engines/shader_bytecode.h" 10#include "video_core/engines/shader_bytecode.h"
11#include "video_core/engines/shader_type.h" 11#include "video_core/engines/shader_type.h"
12#include "video_core/guest_driver.h"
12#include "video_core/textures/texture.h" 13#include "video_core/textures/texture.h"
13 14
14namespace Tegra::Engines { 15namespace Tegra::Engines {
@@ -106,6 +107,9 @@ public:
106 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, 107 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
107 u64 offset) const = 0; 108 u64 offset) const = 0;
108 virtual u32 GetBoundBuffer() const = 0; 109 virtual u32 GetBoundBuffer() const = 0;
110
111 virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
112 virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
109}; 113};
110 114
111} // namespace Tegra::Engines 115} // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 110406f2f..4b824aa4e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -94,6 +94,14 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
94 return result; 94 return result;
95} 95}
96 96
97VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
98 return rasterizer.AccessGuestDriverProfile();
99}
100
101const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
102 return rasterizer.AccessGuestDriverProfile();
103}
104
97void KeplerCompute::ProcessLaunch() { 105void KeplerCompute::ProcessLaunch() {
98 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); 106 const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
99 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, 107 memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 4ef3e0613..eeb79c56f 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -218,6 +218,10 @@ public:
218 return regs.tex_cb_index; 218 return regs.tex_cb_index;
219 } 219 }
220 220
221 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
222
223 const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
224
221private: 225private:
222 Core::System& system; 226 Core::System& system;
223 VideoCore::RasterizerInterface& rasterizer; 227 VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 58dfa8033..7cea146f0 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -784,4 +784,12 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
784 return result; 784 return result;
785} 785}
786 786
787VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
788 return rasterizer.AccessGuestDriverProfile();
789}
790
791const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
792 return rasterizer.AccessGuestDriverProfile();
793}
794
787} // namespace Tegra::Engines 795} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ee79260fc..8808bbf76 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1306,6 +1306,10 @@ public:
1306 return regs.tex_cb_index; 1306 return regs.tex_cb_index;
1307 } 1307 }
1308 1308
1309 VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
1310
1311 const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
1312
1309 /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than 1313 /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
1310 /// we've seen used. 1314 /// we've seen used.
1311 using MacroMemory = std::array<u32, 0x40000>; 1315 using MacroMemory = std::array<u32, 0x40000>;
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
new file mode 100644
index 000000000..6adef459e
--- /dev/null
+++ b/src/video_core/guest_driver.cpp
@@ -0,0 +1,36 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <algorithm>
6#include <limits>
7
8#include "video_core/guest_driver.h"
9
10namespace VideoCore {
11
12void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
13 if (texture_handler_size_deduced) {
14 return;
15 }
16 const std::size_t size = bound_offsets.size();
17 if (size < 2) {
18 return;
19 }
20 std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
21 u32 min_val = std::numeric_limits<u32>::max();
22 for (std::size_t i = 1; i < size; ++i) {
23 if (bound_offsets[i] == bound_offsets[i - 1]) {
24 continue;
25 }
26 const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
27 min_val = std::min(min_val, new_min);
28 }
29 if (min_val > 2) {
30 return;
31 }
32 texture_handler_size_deduced = true;
33 texture_handler_size = min_texture_handler_size * min_val;
34}
35
36} // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
new file mode 100644
index 000000000..fc1917347
--- /dev/null
+++ b/src/video_core/guest_driver.h
@@ -0,0 +1,41 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8
9#include "common/common_types.h"
10
11namespace VideoCore {
12
13/**
14 * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
15 * information necessary for impossible to avoid HLE methods like shader tracks as they are
16 * Entscheidungsproblems.
17 */
18class GuestDriverProfile {
19public:
20 void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
21
22 u32 GetTextureHandlerSize() const {
23 return texture_handler_size;
24 }
25
26 bool TextureHandlerSizeKnown() const {
27 return texture_handler_size_deduced;
28 }
29
30private:
31 // Minimum size of texture handler any driver can use.
32 static constexpr u32 min_texture_handler_size = 4;
33 // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
34 // use 4 bytes instead. Thus, certain drivers may squish the size.
35 static constexpr u32 default_texture_handler_size = 8;
36
37 u32 texture_handler_size = default_texture_handler_size;
38 bool texture_handler_size_deduced = false;
39};
40
41} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 5b0eca9e2..c586cd6fe 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,6 +9,7 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/engines/fermi_2d.h" 10#include "video_core/engines/fermi_2d.h"
11#include "video_core/gpu.h" 11#include "video_core/gpu.h"
12#include "video_core/guest_driver.h"
12 13
13namespace Tegra { 14namespace Tegra {
14class MemoryManager; 15class MemoryManager;
@@ -78,5 +79,18 @@ public:
78 /// Initialize disk cached resources for the game being emulated 79 /// Initialize disk cached resources for the game being emulated
79 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, 80 virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
80 const DiskResourceLoadCallback& callback = {}) {} 81 const DiskResourceLoadCallback& callback = {}) {}
82
83 /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
84 GuestDriverProfile& AccessGuestDriverProfile() {
85 return guest_driver_profile;
86 }
87
88 /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
89 const GuestDriverProfile& AccessGuestDriverProfile() const {
90 return guest_driver_profile;
91 }
92
93private:
94 GuestDriverProfile guest_driver_profile{};
81}; 95};
82} // namespace VideoCore 96} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c428f06e4..362942e09 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -55,16 +55,20 @@ namespace {
55 55
56template <typename Engine, typename Entry> 56template <typename Engine, typename Entry>
57Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 57Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
58 Tegra::Engines::ShaderType shader_type) { 58 Tegra::Engines::ShaderType shader_type,
59 std::size_t index = 0) {
59 if (entry.IsBindless()) { 60 if (entry.IsBindless()) {
60 const Tegra::Texture::TextureHandle tex_handle = 61 const Tegra::Texture::TextureHandle tex_handle =
61 engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); 62 engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
62 return engine.GetTextureInfo(tex_handle); 63 return engine.GetTextureInfo(tex_handle);
63 } 64 }
65 const auto& gpu_profile = engine.AccessGuestDriverProfile();
66 const u32 offset =
67 entry.GetOffset() + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
64 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { 68 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
65 return engine.GetStageTexture(shader_type, entry.GetOffset()); 69 return engine.GetStageTexture(shader_type, offset);
66 } else { 70 } else {
67 return engine.GetTexture(entry.GetOffset()); 71 return engine.GetTexture(offset);
68 } 72 }
69} 73}
70 74
@@ -942,8 +946,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
942 u32 binding = device.GetBaseBindings(stage_index).sampler; 946 u32 binding = device.GetBaseBindings(stage_index).sampler;
943 for (const auto& entry : shader->GetShaderEntries().samplers) { 947 for (const auto& entry : shader->GetShaderEntries().samplers) {
944 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); 948 const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
945 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); 949 if (!entry.IsIndexed()) {
946 SetupTexture(binding++, texture, entry); 950 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
951 SetupTexture(binding++, texture, entry);
952 } else {
953 for (std::size_t i = 0; i < entry.Size(); ++i) {
954 const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
955 SetupTexture(binding++, texture, entry);
956 }
957 }
947 } 958 }
948} 959}
949 960
@@ -952,8 +963,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
952 const auto& compute = system.GPU().KeplerCompute(); 963 const auto& compute = system.GPU().KeplerCompute();
953 u32 binding = 0; 964 u32 binding = 0;
954 for (const auto& entry : kernel->GetShaderEntries().samplers) { 965 for (const auto& entry : kernel->GetShaderEntries().samplers) {
955 const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); 966 if (!entry.IsIndexed()) {
956 SetupTexture(binding++, texture, entry); 967 const auto texture =
968 GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
969 SetupTexture(binding++, texture, entry);
970 } else {
971 for (std::size_t i = 0; i < entry.Size(); ++i) {
972 const auto texture =
973 GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i);
974 SetupTexture(binding++, texture, entry);
975 }
976 }
957 } 977 }
958} 978}
959 979
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3c5bdd377..489eb143c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,6 +214,7 @@ std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType s
214} 214}
215 215
216void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { 216void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
217 locker.SetBoundBuffer(usage.bound_buffer);
217 for (const auto& key : usage.keys) { 218 for (const auto& key : usage.keys) {
218 const auto [buffer, offset] = key.first; 219 const auto [buffer, offset] = key.first;
219 locker.InsertKey(buffer, offset, key.second); 220 locker.InsertKey(buffer, offset, key.second);
@@ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() {
418 419
419ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, 420ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
420 const ConstBufferLocker& locker) const { 421 const ConstBufferLocker& locker) const {
421 return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(), 422 return ShaderDiskCacheUsage{unique_identifier, variant,
423 locker.GetBoundBuffer(), locker.GetKeys(),
422 locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; 424 locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
423} 425}
424 426
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a1ac3d7a9..4735000b5 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -391,6 +391,7 @@ public:
391 DeclareVertex(); 391 DeclareVertex();
392 DeclareGeometry(); 392 DeclareGeometry();
393 DeclareRegisters(); 393 DeclareRegisters();
394 DeclareCustomVariables();
394 DeclarePredicates(); 395 DeclarePredicates();
395 DeclareLocalMemory(); 396 DeclareLocalMemory();
396 DeclareInternalFlags(); 397 DeclareInternalFlags();
@@ -503,6 +504,16 @@ private:
503 } 504 }
504 } 505 }
505 506
507 void DeclareCustomVariables() {
508 const u32 num_custom_variables = ir.GetNumCustomVariables();
509 for (u32 i = 0; i < num_custom_variables; ++i) {
510 code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
511 }
512 if (num_custom_variables > 0) {
513 code.AddNewLine();
514 }
515 }
516
506 void DeclarePredicates() { 517 void DeclarePredicates() {
507 const auto& predicates = ir.GetPredicates(); 518 const auto& predicates = ir.GetPredicates();
508 for (const auto pred : predicates) { 519 for (const auto pred : predicates) {
@@ -655,7 +666,8 @@ private:
655 u32 binding = device.GetBaseBindings(stage).sampler; 666 u32 binding = device.GetBaseBindings(stage).sampler;
656 for (const auto& sampler : ir.GetSamplers()) { 667 for (const auto& sampler : ir.GetSamplers()) {
657 const std::string name = GetSampler(sampler); 668 const std::string name = GetSampler(sampler);
658 const std::string description = fmt::format("layout (binding = {}) uniform", binding++); 669 const std::string description = fmt::format("layout (binding = {}) uniform", binding);
670 binding += sampler.IsIndexed() ? sampler.Size() : 1;
659 671
660 std::string sampler_type = [&]() { 672 std::string sampler_type = [&]() {
661 if (sampler.IsBuffer()) { 673 if (sampler.IsBuffer()) {
@@ -682,7 +694,11 @@ private:
682 sampler_type += "Shadow"; 694 sampler_type += "Shadow";
683 } 695 }
684 696
685 code.AddLine("{} {} {};", description, sampler_type, name); 697 if (!sampler.IsIndexed()) {
698 code.AddLine("{} {} {};", description, sampler_type, name);
699 } else {
700 code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size());
701 }
686 } 702 }
687 if (!ir.GetSamplers().empty()) { 703 if (!ir.GetSamplers().empty()) {
688 code.AddNewLine(); 704 code.AddNewLine();
@@ -775,6 +791,11 @@ private:
775 return {GetRegister(index), Type::Float}; 791 return {GetRegister(index), Type::Float};
776 } 792 }
777 793
794 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
795 const u32 index = cv->GetIndex();
796 return {GetCustomVariable(index), Type::Float};
797 }
798
778 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { 799 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
779 const u32 value = immediate->GetValue(); 800 const u32 value = immediate->GetValue();
780 if (value < 10) { 801 if (value < 10) {
@@ -1098,7 +1119,11 @@ private:
1098 } else if (!meta->ptp.empty()) { 1119 } else if (!meta->ptp.empty()) {
1099 expr += "Offsets"; 1120 expr += "Offsets";
1100 } 1121 }
1101 expr += '(' + GetSampler(meta->sampler) + ", "; 1122 if (!meta->sampler.IsIndexed()) {
1123 expr += '(' + GetSampler(meta->sampler) + ", ";
1124 } else {
1125 expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
1126 }
1102 expr += coord_constructors.at(count + (has_array ? 1 : 0) + 1127 expr += coord_constructors.at(count + (has_array ? 1 : 0) +
1103 (has_shadow && !separate_dc ? 1 : 0) - 1); 1128 (has_shadow && !separate_dc ? 1 : 0) - 1);
1104 expr += '('; 1129 expr += '(';
@@ -1310,6 +1335,8 @@ private:
1310 const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); 1335 const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
1311 target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), 1336 target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
1312 Type::Uint}; 1337 Type::Uint};
1338 } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
1339 target = {GetCustomVariable(cv->GetIndex()), Type::Float};
1313 } else { 1340 } else {
1314 UNREACHABLE_MSG("Assign called without a proper target"); 1341 UNREACHABLE_MSG("Assign called without a proper target");
1315 } 1342 }
@@ -2237,6 +2264,10 @@ private:
2237 return GetDeclarationWithSuffix(index, "gpr"); 2264 return GetDeclarationWithSuffix(index, "gpr");
2238 } 2265 }
2239 2266
2267 std::string GetCustomVariable(u32 index) const {
2268 return GetDeclarationWithSuffix(index, "custom_var");
2269 }
2270
2240 std::string GetPredicate(Tegra::Shader::Pred pred) const { 2271 std::string GetPredicate(Tegra::Shader::Pred pred) const {
2241 return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); 2272 return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
2242 } 2273 }
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index cf874a09a..1fc204f6f 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -53,7 +53,7 @@ struct BindlessSamplerKey {
53 Tegra::Engines::SamplerDescriptor sampler{}; 53 Tegra::Engines::SamplerDescriptor sampler{};
54}; 54};
55 55
56constexpr u32 NativeVersion = 11; 56constexpr u32 NativeVersion = 12;
57 57
58// Making sure sizes doesn't change by accident 58// Making sure sizes doesn't change by accident
59static_assert(sizeof(ProgramVariant) == 20); 59static_assert(sizeof(ProgramVariant) == 20);
@@ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
186 u32 num_bound_samplers{}; 186 u32 num_bound_samplers{};
187 u32 num_bindless_samplers{}; 187 u32 num_bindless_samplers{};
188 if (file.ReadArray(&usage.unique_identifier, 1) != 1 || 188 if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
189 file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || 189 file.ReadArray(&usage.variant, 1) != 1 ||
190 file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
190 file.ReadArray(&num_bound_samplers, 1) != 1 || 191 file.ReadArray(&num_bound_samplers, 1) != 1 ||
191 file.ReadArray(&num_bindless_samplers, 1) != 1) { 192 file.ReadArray(&num_bindless_samplers, 1) != 1) {
192 LOG_ERROR(Render_OpenGL, error_loading); 193 LOG_ERROR(Render_OpenGL, error_loading);
@@ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
281 u32 num_bindless_samplers{}; 282 u32 num_bindless_samplers{};
282 ShaderDiskCacheUsage usage; 283 ShaderDiskCacheUsage usage;
283 if (!LoadObjectFromPrecompiled(usage.unique_identifier) || 284 if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
284 !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || 285 !LoadObjectFromPrecompiled(usage.variant) ||
286 !LoadObjectFromPrecompiled(usage.bound_buffer) ||
287 !LoadObjectFromPrecompiled(num_keys) ||
285 !LoadObjectFromPrecompiled(num_bound_samplers) || 288 !LoadObjectFromPrecompiled(num_bound_samplers) ||
286 !LoadObjectFromPrecompiled(num_bindless_samplers)) { 289 !LoadObjectFromPrecompiled(num_bindless_samplers)) {
287 return {}; 290 return {};
@@ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
393 396
394 if (file.WriteObject(TransferableEntryKind::Usage) != 1 || 397 if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
395 file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || 398 file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
399 file.WriteObject(usage.bound_buffer) != 1 ||
396 file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || 400 file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
397 file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || 401 file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
398 file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { 402 file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
@@ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
447 }; 451 };
448 452
449 if (!SaveObjectToPrecompiled(usage.unique_identifier) || 453 if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
450 !SaveObjectToPrecompiled(usage.variant) || 454 !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
451 !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || 455 !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
452 !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || 456 !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
453 !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { 457 !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 69a2fbdda..ef2371f6d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v<ProgramVariant>);
79struct ShaderDiskCacheUsage { 79struct ShaderDiskCacheUsage {
80 u64 unique_identifier{}; 80 u64 unique_identifier{};
81 ProgramVariant variant; 81 ProgramVariant variant;
82 u32 bound_buffer{};
82 VideoCommon::Shader::KeyMap keys; 83 VideoCommon::Shader::KeyMap keys;
83 VideoCommon::Shader::BoundSamplerMap bound_samplers; 84 VideoCommon::Shader::BoundSamplerMap bound_samplers;
84 VideoCommon::Shader::BindlessSamplerMap bindless_samplers; 85 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 1ab22251e..24a658dce 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -353,6 +353,7 @@ private:
353 DeclareFragment(); 353 DeclareFragment();
354 DeclareCompute(); 354 DeclareCompute();
355 DeclareRegisters(); 355 DeclareRegisters();
356 DeclareCustomVariables();
356 DeclarePredicates(); 357 DeclarePredicates();
357 DeclareLocalMemory(); 358 DeclareLocalMemory();
358 DeclareSharedMemory(); 359 DeclareSharedMemory();
@@ -586,6 +587,15 @@ private:
586 } 587 }
587 } 588 }
588 589
590 void DeclareCustomVariables() {
591 const u32 num_custom_variables = ir.GetNumCustomVariables();
592 for (u32 i = 0; i < num_custom_variables; ++i) {
593 const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
594 Name(id, fmt::format("custom_var_{}", i));
595 custom_variables.emplace(i, AddGlobalVariable(id));
596 }
597 }
598
589 void DeclarePredicates() { 599 void DeclarePredicates() {
590 for (const auto pred : ir.GetPredicates()) { 600 for (const auto pred : ir.GetPredicates()) {
591 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); 601 const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
@@ -982,6 +992,11 @@ private:
982 return {OpLoad(t_float, registers.at(index)), Type::Float}; 992 return {OpLoad(t_float, registers.at(index)), Type::Float};
983 } 993 }
984 994
995 if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
996 const u32 index = cv->GetIndex();
997 return {OpLoad(t_float, custom_variables.at(index)), Type::Float};
998 }
999
985 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { 1000 if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
986 return {Constant(t_uint, immediate->GetValue()), Type::Uint}; 1001 return {Constant(t_uint, immediate->GetValue()), Type::Uint};
987 } 1002 }
@@ -1333,6 +1348,9 @@ private:
1333 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { 1348 } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
1334 target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; 1349 target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
1335 1350
1351 } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
1352 target = {custom_variables.at(cv->GetIndex()), Type::Float};
1353
1336 } else { 1354 } else {
1337 UNIMPLEMENTED(); 1355 UNIMPLEMENTED();
1338 } 1356 }
@@ -2508,6 +2526,7 @@ private:
2508 Id out_vertex{}; 2526 Id out_vertex{};
2509 Id in_vertex{}; 2527 Id in_vertex{};
2510 std::map<u32, Id> registers; 2528 std::map<u32, Id> registers;
2529 std::map<u32, Id> custom_variables;
2511 std::map<Tegra::Shader::Pred, Id> predicates; 2530 std::map<Tegra::Shader::Pred, Id> predicates;
2512 std::map<u32, Id> flow_variables; 2531 std::map<u32, Id> flow_variables;
2513 Id local_memory{}; 2532 Id local_memory{};
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
index a4a0319eb..0638be8cb 100644
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle
66 return value; 66 return value;
67} 67}
68 68
69std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
70 if (bound_buffer_saved) {
71 return bound_buffer;
72 }
73 if (!engine) {
74 return std::nullopt;
75 }
76 bound_buffer_saved = true;
77 bound_buffer = engine->GetBoundBuffer();
78 return bound_buffer;
79}
80
69void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { 81void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
70 keys.insert_or_assign({buffer, offset}, value); 82 keys.insert_or_assign({buffer, offset}, value);
71} 83}
@@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes
78 bindless_samplers.insert_or_assign({buffer, offset}, sampler); 90 bindless_samplers.insert_or_assign({buffer, offset}, sampler);
79} 91}
80 92
93void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
94 bound_buffer_saved = true;
95 bound_buffer = buffer;
96}
97
81bool ConstBufferLocker::IsConsistent() const { 98bool ConstBufferLocker::IsConsistent() const {
82 if (!engine) { 99 if (!engine) {
83 return false; 100 return false;
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index d32e2d657..d3ea11087 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -10,6 +10,7 @@
10#include "common/hash.h" 10#include "common/hash.h"
11#include "video_core/engines/const_buffer_engine_interface.h" 11#include "video_core/engines/const_buffer_engine_interface.h"
12#include "video_core/engines/shader_type.h" 12#include "video_core/engines/shader_type.h"
13#include "video_core/guest_driver.h"
13 14
14namespace VideoCommon::Shader { 15namespace VideoCommon::Shader {
15 16
@@ -40,6 +41,8 @@ public:
40 41
41 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); 42 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
42 43
44 std::optional<u32> ObtainBoundBuffer();
45
43 /// Inserts a key. 46 /// Inserts a key.
44 void InsertKey(u32 buffer, u32 offset, u32 value); 47 void InsertKey(u32 buffer, u32 offset, u32 value);
45 48
@@ -49,6 +52,9 @@ public:
49 /// Inserts a bindless sampler key. 52 /// Inserts a bindless sampler key.
50 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); 53 void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
51 54
55 /// Set the bound buffer for this locker.
56 void SetBoundBuffer(u32 buffer);
57
52 /// Checks keys and samplers against engine's current const buffers. Returns true if they are 58 /// Checks keys and samplers against engine's current const buffers. Returns true if they are
53 /// the same value, false otherwise; 59 /// the same value, false otherwise;
54 bool IsConsistent() const; 60 bool IsConsistent() const;
@@ -71,12 +77,27 @@ public:
71 return bindless_samplers; 77 return bindless_samplers;
72 } 78 }
73 79
80 /// Gets bound buffer used on this shader
81 u32 GetBoundBuffer() const {
82 return bound_buffer;
83 }
84
85 /// Obtains access to the guest driver's profile.
86 VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
87 if (engine) {
88 return &engine->AccessGuestDriverProfile();
89 }
90 return nullptr;
91 }
92
74private: 93private:
75 const Tegra::Engines::ShaderType stage; 94 const Tegra::Engines::ShaderType stage;
76 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; 95 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
77 KeyMap keys; 96 KeyMap keys;
78 BoundSamplerMap bound_samplers; 97 BoundSamplerMap bound_samplers;
79 BindlessSamplerMap bindless_samplers; 98 BindlessSamplerMap bindless_samplers;
99 bool bound_buffer_saved{};
100 u32 bound_buffer{};
80}; 101};
81 102
82} // namespace VideoCommon::Shader 103} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 22c3e5120..6b697ed5d 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -3,6 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include <cstring> 5#include <cstring>
6#include <limits>
6#include <set> 7#include <set>
7 8
8#include <fmt/format.h> 9#include <fmt/format.h>
@@ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
33 return (absolute_offset % SchedPeriod) == 0; 34 return (absolute_offset % SchedPeriod) == 0;
34} 35}
35 36
37void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
38 const std::list<Sampler>& used_samplers) {
39 if (gpu_driver == nullptr) {
40 LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
41 return;
42 }
43 if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
44 return;
45 }
46 u32 count{};
47 std::vector<u32> bound_offsets;
48 for (const auto& sampler : used_samplers) {
49 if (sampler.IsBindless()) {
50 continue;
51 }
52 ++count;
53 bound_offsets.emplace_back(sampler.GetOffset());
54 }
55 if (count > 1) {
56 gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
57 }
58}
59
60std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
61 VideoCore::GuestDriverProfile* gpu_driver,
62 const std::list<Sampler>& used_samplers) {
63 if (gpu_driver == nullptr) {
64 LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
65 return std::nullopt;
66 }
67 const u32 base_offset = sampler_to_deduce.GetOffset();
68 u32 max_offset{std::numeric_limits<u32>::max()};
69 for (const auto& sampler : used_samplers) {
70 if (sampler.IsBindless()) {
71 continue;
72 }
73 if (sampler.GetOffset() > base_offset) {
74 max_offset = std::min(sampler.GetOffset(), max_offset);
75 }
76 }
77 if (max_offset == std::numeric_limits<u32>::max()) {
78 return std::nullopt;
79 }
80 return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
81}
82
36} // Anonymous namespace 83} // Anonymous namespace
37 84
38class ASTDecoder { 85class ASTDecoder {
@@ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
315 return pc + 1; 362 return pc + 1;
316} 363}
317 364
365void ShaderIR::PostDecode() {
366 // Deduce texture handler size if needed
367 auto gpu_driver = locker.AccessGuestDriverProfile();
368 DeduceTextureHandlerSize(gpu_driver, used_samplers);
369 // Deduce Indexed Samplers
370 if (!uses_indexed_samplers) {
371 return;
372 }
373 for (auto& sampler : used_samplers) {
374 if (!sampler.IsIndexed()) {
375 continue;
376 }
377 if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
378 sampler.SetSize(*size);
379 } else {
380 LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
381 sampler.SetSize(1);
382 }
383 }
384}
385
318} // namespace VideoCommon::Shader 386} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0b567e39d..d980535b1 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
144 Node4 values; 144 Node4 values;
145 for (u32 element = 0; element < values.size(); ++element) { 145 for (u32 element = 0; element < values.size(); ++element) {
146 auto coords_copy = coords; 146 auto coords_copy = coords;
147 MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element}; 147 MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {},
148 {}, {}, component, element, {}};
148 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); 149 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
149 } 150 }
150 151
@@ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
167 const auto derivate_reg = instr.gpr20.Value(); 168 const auto derivate_reg = instr.gpr20.Value();
168 const auto texture_type = instr.txd.texture_type.Value(); 169 const auto texture_type = instr.txd.texture_type.Value();
169 const auto coord_count = GetCoordCount(texture_type); 170 const auto coord_count = GetCoordCount(texture_type);
170 171 Node index_var{};
171 const Sampler* sampler = 172 const Sampler* sampler =
172 is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}}) 173 is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}})
173 : GetSampler(instr.sampler, {{texture_type, is_array, false}}); 174 : GetSampler(instr.sampler, {{texture_type, is_array, false}});
174 Node4 values; 175 Node4 values;
175 if (sampler == nullptr) { 176 if (sampler == nullptr) {
@@ -200,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
200 } 201 }
201 202
202 for (u32 element = 0; element < values.size(); ++element) { 203 for (u32 element = 0; element < values.size(); ++element) {
203 MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element}; 204 MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates,
205 {}, {}, {}, element, index_var};
204 values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); 206 values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
205 } 207 }
206 208
@@ -215,8 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
215 // TODO: The new commits on the texture refactor, change the way samplers work. 217 // TODO: The new commits on the texture refactor, change the way samplers work.
216 // Sadly, not all texture instructions specify the type of texture their sampler 218 // Sadly, not all texture instructions specify the type of texture their sampler
217 // uses. This must be fixed at a later instance. 219 // uses. This must be fixed at a later instance.
220 Node index_var{};
218 const Sampler* sampler = 221 const Sampler* sampler =
219 is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); 222 is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler);
220 223
221 if (sampler == nullptr) { 224 if (sampler == nullptr) {
222 u32 indexer = 0; 225 u32 indexer = 0;
@@ -240,7 +243,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
240 if (!instr.txq.IsComponentEnabled(element)) { 243 if (!instr.txq.IsComponentEnabled(element)) {
241 continue; 244 continue;
242 } 245 }
243 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; 246 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
244 const Node value = 247 const Node value =
245 Operation(OperationCode::TextureQueryDimensions, meta, 248 Operation(OperationCode::TextureQueryDimensions, meta,
246 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); 249 GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
@@ -266,8 +269,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
266 269
267 auto texture_type = instr.tmml.texture_type.Value(); 270 auto texture_type = instr.tmml.texture_type.Value();
268 const bool is_array = instr.tmml.array != 0; 271 const bool is_array = instr.tmml.array != 0;
272 Node index_var{};
269 const Sampler* sampler = 273 const Sampler* sampler =
270 is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); 274 is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler);
271 275
272 if (sampler == nullptr) { 276 if (sampler == nullptr) {
273 u32 indexer = 0; 277 u32 indexer = 0;
@@ -309,7 +313,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
309 continue; 313 continue;
310 } 314 }
311 auto params = coords; 315 auto params = coords;
312 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; 316 MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
313 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); 317 const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
314 SetTemporary(bb, indexer++, value); 318 SetTemporary(bb, indexer++, value);
315 } 319 }
@@ -383,37 +387,65 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
383 // Otherwise create a new mapping for this sampler 387 // Otherwise create a new mapping for this sampler
384 const auto next_index = static_cast<u32>(used_samplers.size()); 388 const auto next_index = static_cast<u32>(used_samplers.size());
385 return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, 389 return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
386 info.is_buffer); 390 info.is_buffer, false);
387} 391}
388 392
389const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, 393const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,
390 std::optional<SamplerInfo> sampler_info) { 394 std::optional<SamplerInfo> sampler_info) {
391 const Node sampler_register = GetRegister(reg); 395 const Node sampler_register = GetRegister(reg);
392 const auto [base_sampler, buffer, offset] = 396 const auto [base_node, tracked_sampler_info] =
393 TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); 397 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
394 ASSERT(base_sampler != nullptr); 398 ASSERT(base_node != nullptr);
395 if (base_sampler == nullptr) { 399 if (base_node == nullptr) {
396 return nullptr; 400 return nullptr;
397 } 401 }
398 402
399 const auto info = GetSamplerInfo(sampler_info, offset, buffer); 403 if (const auto bindless_sampler_info =
404 std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
405 const u32 buffer = bindless_sampler_info->GetIndex();
406 const u32 offset = bindless_sampler_info->GetOffset();
407 const auto info = GetSamplerInfo(sampler_info, offset, buffer);
408
409 // If this sampler has already been used, return the existing mapping.
410 const auto it =
411 std::find_if(used_samplers.begin(), used_samplers.end(),
412 [buffer = buffer, offset = offset](const Sampler& entry) {
413 return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
414 });
415 if (it != used_samplers.end()) {
416 ASSERT(it->IsBindless() && it->GetType() == info.type &&
417 it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow);
418 return &*it;
419 }
400 420
401 // If this sampler has already been used, return the existing mapping. 421 // Otherwise create a new mapping for this sampler
402 const auto it = 422 const auto next_index = static_cast<u32>(used_samplers.size());
403 std::find_if(used_samplers.begin(), used_samplers.end(), 423 return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
404 [buffer = buffer, offset = offset](const Sampler& entry) { 424 info.is_shadow, info.is_buffer, false);
405 return entry.GetBuffer() == buffer && entry.GetOffset() == offset; 425 } else if (const auto array_sampler_info =
406 }); 426 std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
407 if (it != used_samplers.end()) { 427 const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
408 ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && 428 index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
409 it->IsShadow() == info.is_shadow); 429 const auto info = GetSamplerInfo(sampler_info, base_offset);
410 return &*it; 430
411 } 431 // If this sampler has already been used, return the existing mapping.
432 const auto it = std::find_if(
433 used_samplers.begin(), used_samplers.end(),
434 [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; });
435 if (it != used_samplers.end()) {
436 ASSERT(!it->IsBindless() && it->GetType() == info.type &&
437 it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow &&
438 it->IsBuffer() == info.is_buffer && it->IsIndexed());
439 return &*it;
440 }
412 441
413 // Otherwise create a new mapping for this sampler 442 uses_indexed_samplers = true;
414 const auto next_index = static_cast<u32>(used_samplers.size()); 443 // Otherwise create a new mapping for this sampler
415 return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, 444 const auto next_index = static_cast<u32>(used_samplers.size());
416 info.is_shadow, info.is_buffer); 445 return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array,
446 info.is_shadow, info.is_buffer, true);
447 }
448 return nullptr;
417} 449}
418 450
419void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { 451void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -499,8 +531,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
499 "This method is not supported."); 531 "This method is not supported.");
500 532
501 const SamplerInfo info{texture_type, is_array, is_shadow, false}; 533 const SamplerInfo info{texture_type, is_array, is_shadow, false};
502 const Sampler* sampler = 534 Node index_var{};
503 is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); 535 const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info)
536 : GetSampler(instr.sampler, info);
504 Node4 values; 537 Node4 values;
505 if (sampler == nullptr) { 538 if (sampler == nullptr) {
506 for (u32 element = 0; element < values.size(); ++element) { 539 for (u32 element = 0; element < values.size(); ++element) {
@@ -548,7 +581,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
548 581
549 for (u32 element = 0; element < values.size(); ++element) { 582 for (u32 element = 0; element < values.size(); ++element) {
550 auto copy_coords = coords; 583 auto copy_coords = coords;
551 MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element}; 584 MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias,
585 lod, {}, element, index_var};
552 values[element] = Operation(read_method, meta, std::move(copy_coords)); 586 values[element] = Operation(read_method, meta, std::move(copy_coords));
553 } 587 }
554 588
@@ -663,7 +697,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
663 u64 parameter_register = instr.gpr20.Value(); 697 u64 parameter_register = instr.gpr20.Value();
664 698
665 const SamplerInfo info{texture_type, is_array, depth_compare, false}; 699 const SamplerInfo info{texture_type, is_array, depth_compare, false};
666 const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) 700 Node index_var{};
701 const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info)
667 : GetSampler(instr.sampler, info); 702 : GetSampler(instr.sampler, info);
668 Node4 values; 703 Node4 values;
669 if (sampler == nullptr) { 704 if (sampler == nullptr) {
@@ -692,7 +727,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
692 for (u32 element = 0; element < values.size(); ++element) { 727 for (u32 element = 0; element < values.size(); ++element) {
693 auto coords_copy = coords; 728 auto coords_copy = coords;
694 MetaTexture meta{ 729 MetaTexture meta{
695 *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element}; 730 *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
731 index_var};
696 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); 732 values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
697 } 733 }
698 734
@@ -725,7 +761,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
725 Node4 values; 761 Node4 values;
726 for (u32 element = 0; element < values.size(); ++element) { 762 for (u32 element = 0; element < values.size(); ++element) {
727 auto coords_copy = coords; 763 auto coords_copy = coords;
728 MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element}; 764 MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
729 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); 765 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
730 } 766 }
731 767
@@ -775,7 +811,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
775 Node4 values; 811 Node4 values;
776 for (u32 element = 0; element < values.size(); ++element) { 812 for (u32 element = 0; element < values.size(); ++element) {
777 auto coords_copy = coords; 813 auto coords_copy = coords;
778 MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element}; 814 MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};
779 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); 815 values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
780 } 816 }
781 return values; 817 return values;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 9af1f0228..5f83403db 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -212,6 +212,7 @@ enum class MetaStackClass {
212class OperationNode; 212class OperationNode;
213class ConditionalNode; 213class ConditionalNode;
214class GprNode; 214class GprNode;
215class CustomVarNode;
215class ImmediateNode; 216class ImmediateNode;
216class InternalFlagNode; 217class InternalFlagNode;
217class PredicateNode; 218class PredicateNode;
@@ -223,26 +224,32 @@ class SmemNode;
223class GmemNode; 224class GmemNode;
224class CommentNode; 225class CommentNode;
225 226
226using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, 227using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
227 InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, 228 InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
228 LmemNode, SmemNode, GmemNode, CommentNode>; 229 LmemNode, SmemNode, GmemNode, CommentNode>;
229using Node = std::shared_ptr<NodeData>; 230using Node = std::shared_ptr<NodeData>;
230using Node4 = std::array<Node, 4>; 231using Node4 = std::array<Node, 4>;
231using NodeBlock = std::vector<Node>; 232using NodeBlock = std::vector<Node>;
232 233
234class BindlessSamplerNode;
235class ArraySamplerNode;
236
237using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
238using TrackSampler = std::shared_ptr<TrackSamplerData>;
239
233class Sampler { 240class Sampler {
234public: 241public:
235 /// This constructor is for bound samplers 242 /// This constructor is for bound samplers
236 constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, 243 constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
237 bool is_array, bool is_shadow, bool is_buffer) 244 bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
238 : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, 245 : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
239 is_buffer{is_buffer} {} 246 is_buffer{is_buffer}, is_indexed{is_indexed} {}
240 247
241 /// This constructor is for bindless samplers 248 /// This constructor is for bindless samplers
242 constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, 249 constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
243 bool is_array, bool is_shadow, bool is_buffer) 250 bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
244 : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, 251 : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
245 is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} 252 is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
246 253
247 constexpr u32 GetIndex() const { 254 constexpr u32 GetIndex() const {
248 return index; 255 return index;
@@ -276,16 +283,72 @@ public:
276 return is_bindless; 283 return is_bindless;
277 } 284 }
278 285
286 constexpr bool IsIndexed() const {
287 return is_indexed;
288 }
289
290 constexpr u32 Size() const {
291 return size;
292 }
293
294 constexpr void SetSize(u32 new_size) {
295 size = new_size;
296 }
297
279private: 298private:
280 u32 index{}; ///< Emulated index given for the this sampler. 299 u32 index{}; ///< Emulated index given for the this sampler.
281 u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. 300 u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
282 u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). 301 u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
302 u32 size{}; ///< Size of the sampler if indexed.
283 303
284 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) 304 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
285 bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. 305 bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
286 bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. 306 bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
287 bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. 307 bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler.
288 bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. 308 bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
309 bool is_indexed{}; ///< Whether this sampler is an indexed array of textures.
310};
311
312/// Represents a tracked bindless sampler into a direct const buffer
313class ArraySamplerNode final {
314public:
315 explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
316 : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
317
318 constexpr u32 GetIndex() const {
319 return index;
320 }
321
322 constexpr u32 GetBaseOffset() const {
323 return base_offset;
324 }
325
326 constexpr u32 GetIndexVar() const {
327 return bindless_var;
328 }
329
330private:
331 u32 index;
332 u32 base_offset;
333 u32 bindless_var;
334};
335
336/// Represents a tracked bindless sampler into a direct const buffer
337class BindlessSamplerNode final {
338public:
339 explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
340
341 constexpr u32 GetIndex() const {
342 return index;
343 }
344
345 constexpr u32 GetOffset() const {
346 return offset;
347 }
348
349private:
350 u32 index;
351 u32 offset;
289}; 352};
290 353
291class Image final { 354class Image final {
@@ -382,6 +445,7 @@ struct MetaTexture {
382 Node lod; 445 Node lod;
383 Node component{}; 446 Node component{};
384 u32 element{}; 447 u32 element{};
448 Node index{};
385}; 449};
386 450
387struct MetaImage { 451struct MetaImage {
@@ -488,6 +552,19 @@ private:
488 Tegra::Shader::Register index{}; 552 Tegra::Shader::Register index{};
489}; 553};
490 554
555/// A custom variable
556class CustomVarNode final {
557public:
558 explicit constexpr CustomVarNode(u32 index) : index{index} {}
559
560 constexpr u32 GetIndex() const {
561 return index;
562 }
563
564private:
565 u32 index{};
566};
567
491/// A 32-bits value that represents an immediate value 568/// A 32-bits value that represents an immediate value
492class ImmediateNode final { 569class ImmediateNode final {
493public: 570public:
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 0c2aa749b..11231bbea 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) {
45 return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); 45 return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
46} 46}
47 47
48template <typename T, typename... Args>
49TrackSampler MakeTrackSampler(Args&&... args) {
50 static_assert(std::is_convertible_v<T, TrackSamplerData>);
51 return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
52}
53
48template <typename... Args> 54template <typename... Args>
49Node Operation(OperationCode code, Args&&... args) { 55Node Operation(OperationCode code, Args&&... args) {
50 if constexpr (sizeof...(args) == 0) { 56 if constexpr (sizeof...(args) == 0) {
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 31eecb3f4..3a5d280a9 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet
27 ConstBufferLocker& locker) 27 ConstBufferLocker& locker)
28 : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { 28 : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
29 Decode(); 29 Decode();
30 PostDecode();
30} 31}
31 32
32ShaderIR::~ShaderIR() = default; 33ShaderIR::~ShaderIR() = default;
@@ -38,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) {
38 return MakeNode<GprNode>(reg); 39 return MakeNode<GprNode>(reg);
39} 40}
40 41
42Node ShaderIR::GetCustomVariable(u32 id) {
43 return MakeNode<CustomVarNode>(id);
44}
45
41Node ShaderIR::GetImmediate19(Instruction instr) { 46Node ShaderIR::GetImmediate19(Instruction instr) {
42 return Immediate(instr.alu.GetImm20_19()); 47 return Immediate(instr.alu.GetImm20_19());
43} 48}
@@ -452,4 +457,8 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) {
452 return id; 457 return id;
453} 458}
454 459
460u32 ShaderIR::NewCustomVariable() {
461 return num_custom_variables++;
462}
463
455} // namespace VideoCommon::Shader 464} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ba1db4c11..b0851c3be 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -180,6 +180,10 @@ public:
180 return amend_code[index]; 180 return amend_code[index];
181 } 181 }
182 182
183 u32 GetNumCustomVariables() const {
184 return num_custom_variables;
185 }
186
183private: 187private:
184 friend class ASTDecoder; 188 friend class ASTDecoder;
185 189
@@ -191,6 +195,7 @@ private:
191 }; 195 };
192 196
193 void Decode(); 197 void Decode();
198 void PostDecode();
194 199
195 NodeBlock DecodeRange(u32 begin, u32 end); 200 NodeBlock DecodeRange(u32 begin, u32 end);
196 void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); 201 void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
@@ -235,6 +240,8 @@ private:
235 240
236 /// Generates a node for a passed register. 241 /// Generates a node for a passed register.
237 Node GetRegister(Tegra::Shader::Register reg); 242 Node GetRegister(Tegra::Shader::Register reg);
243 /// Generates a node for a custom variable
244 Node GetCustomVariable(u32 id);
238 /// Generates a node representing a 19-bit immediate value 245 /// Generates a node representing a 19-bit immediate value
239 Node GetImmediate19(Tegra::Shader::Instruction instr); 246 Node GetImmediate19(Tegra::Shader::Instruction instr);
240 /// Generates a node representing a 32-bit immediate value 247 /// Generates a node representing a 32-bit immediate value
@@ -321,7 +328,7 @@ private:
321 std::optional<SamplerInfo> sampler_info = std::nullopt); 328 std::optional<SamplerInfo> sampler_info = std::nullopt);
322 329
323 /// Accesses a texture sampler for a bindless texture. 330 /// Accesses a texture sampler for a bindless texture.
324 const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, 331 const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,
325 std::optional<SamplerInfo> sampler_info = std::nullopt); 332 std::optional<SamplerInfo> sampler_info = std::nullopt);
326 333
327 /// Accesses an image. 334 /// Accesses an image.
@@ -387,6 +394,9 @@ private:
387 394
388 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; 395 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
389 396
397 std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
398 s64 cursor);
399
390 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; 400 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
391 401
392 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, 402 std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
@@ -399,6 +409,8 @@ private:
399 /// Register new amending code and obtain the reference id. 409 /// Register new amending code and obtain the reference id.
400 std::size_t DeclareAmend(Node new_amend); 410 std::size_t DeclareAmend(Node new_amend);
401 411
412 u32 NewCustomVariable();
413
402 const ProgramCode& program_code; 414 const ProgramCode& program_code;
403 const u32 main_offset; 415 const u32 main_offset;
404 const CompilerSettings settings; 416 const CompilerSettings settings;
@@ -414,6 +426,7 @@ private:
414 NodeBlock global_code; 426 NodeBlock global_code;
415 ASTManager program_manager{true, true}; 427 ASTManager program_manager{true, true};
416 std::vector<Node> amend_code; 428 std::vector<Node> amend_code;
429 u32 num_custom_variables{};
417 430
418 std::set<u32> used_registers; 431 std::set<u32> used_registers;
419 std::set<Tegra::Shader::Pred> used_predicates; 432 std::set<Tegra::Shader::Pred> used_predicates;
@@ -431,6 +444,7 @@ private:
431 bool uses_instance_id{}; 444 bool uses_instance_id{};
432 bool uses_vertex_id{}; 445 bool uses_vertex_id{};
433 bool uses_warps{}; 446 bool uses_warps{};
447 bool uses_indexed_samplers{};
434 448
435 Tegra::Shader::Header header; 449 Tegra::Shader::Header header;
436}; 450};
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 165c79330..ea39bca54 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -8,6 +8,7 @@
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/shader/node.h" 10#include "video_core/shader/node.h"
11#include "video_core/shader/node_helper.h"
11#include "video_core/shader/shader_ir.h" 12#include "video_core/shader/shader_ir.h"
12 13
13namespace VideoCommon::Shader { 14namespace VideoCommon::Shader {
@@ -35,8 +36,113 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
35 } 36 }
36 return {}; 37 return {};
37} 38}
39
40std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
41 if (operation.GetCode() != OperationCode::UAdd) {
42 return std::nullopt;
43 }
44 Node gpr{};
45 Node offset{};
46 ASSERT(operation.GetOperandsCount() == 2);
47 for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
48 Node operand = operation[i];
49 if (std::holds_alternative<ImmediateNode>(*operand)) {
50 offset = operation[i];
51 } else if (std::holds_alternative<GprNode>(*operand)) {
52 gpr = operation[i];
53 }
54 }
55 if (offset && gpr) {
56 return std::make_pair(gpr, offset);
57 }
58 return std::nullopt;
59}
60
61bool AmendNodeCv(std::size_t amend_index, Node node) {
62 if (const auto operation = std::get_if<OperationNode>(&*node)) {
63 operation->SetAmendIndex(amend_index);
64 return true;
65 } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
66 conditional->SetAmendIndex(amend_index);
67 return true;
68 }
69 return false;
70}
71
38} // Anonymous namespace 72} // Anonymous namespace
39 73
74std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
75 s64 cursor) {
76 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
77 // Constant buffer found, test if it's an immediate
78 const auto offset = cbuf->GetOffset();
79 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
80 auto track =
81 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
82 return {tracked, track};
83 } else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
84 auto bound_buffer = locker.ObtainBoundBuffer();
85 if (!bound_buffer) {
86 return {};
87 }
88 if (*bound_buffer != cbuf->GetIndex()) {
89 return {};
90 }
91 auto pair = DecoupleIndirectRead(*operation);
92 if (!pair) {
93 return {};
94 }
95 auto [gpr, base_offset] = *pair;
96 const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
97 auto gpu_driver = locker.AccessGuestDriverProfile();
98 if (gpu_driver == nullptr) {
99 return {};
100 }
101 const u32 bindless_cv = NewCustomVariable();
102 const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr,
103 Immediate(gpu_driver->GetTextureHandlerSize()));
104
105 const Node cv_node = GetCustomVariable(bindless_cv);
106 Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
107 const std::size_t amend_index = DeclareAmend(amend_op);
108 AmendNodeCv(amend_index, code[cursor]);
109 // TODO Implement Bindless Index custom variable
110 auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
111 offset_inm->GetValue(), bindless_cv);
112 return {tracked, track};
113 }
114 return {};
115 }
116 if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
117 if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
118 return {};
119 }
120 // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
121 // register that it uses as operand
122 const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
123 if (!source) {
124 return {};
125 }
126 return TrackBindlessSampler(source, code, new_cursor);
127 }
128 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
129 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
130 if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
131 std::get<0>(found)) {
132 // Cbuf found in operand.
133 return found;
134 }
135 }
136 return {};
137 }
138 if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
139 const auto& conditional_code = conditional->GetCode();
140 return TrackBindlessSampler(tracked, conditional_code,
141 static_cast<s64>(conditional_code.size()));
142 }
143 return {};
144}
145
40std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, 146std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
41 s64 cursor) const { 147 s64 cursor) const {
42 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { 148 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {