diff options
| author | 2020-02-01 20:41:40 -0500 | |
|---|---|---|
| committer | 2020-02-01 20:41:40 -0500 | |
| commit | b5bbe7e752d5d36839a86638bfaa4b4c348497cd (patch) | |
| tree | b16b3f8ce5ec6233f9f822ad56418d74f0cd47ae /src | |
| parent | Merge pull request #3268 from CJBok/deadzone (diff) | |
| parent | Shader_IR: Address feedback. (diff) | |
| download | yuzu-b5bbe7e752d5d36839a86638bfaa4b4c348497cd.tar.gz yuzu-b5bbe7e752d5d36839a86638bfaa4b4c348497cd.tar.xz yuzu-b5bbe7e752d5d36839a86638bfaa4b4c348497cd.zip | |
Merge pull request #3282 from FernandoS27/indexed-samplers
Partially implement Indexed samplers in general and specific code in GLSL
Diffstat (limited to 'src')
24 files changed, 610 insertions, 58 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index ccfed4f2e..04a25da4f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -29,6 +29,8 @@ add_library(video_core STATIC | |||
| 29 | gpu_synch.h | 29 | gpu_synch.h |
| 30 | gpu_thread.cpp | 30 | gpu_thread.cpp |
| 31 | gpu_thread.h | 31 | gpu_thread.h |
| 32 | guest_driver.cpp | ||
| 33 | guest_driver.h | ||
| 32 | macro_interpreter.cpp | 34 | macro_interpreter.cpp |
| 33 | macro_interpreter.h | 35 | macro_interpreter.h |
| 34 | memory_manager.cpp | 36 | memory_manager.cpp |
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index 44b8b8d22..d56a47710 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/engines/shader_bytecode.h" | 10 | #include "video_core/engines/shader_bytecode.h" |
| 11 | #include "video_core/engines/shader_type.h" | 11 | #include "video_core/engines/shader_type.h" |
| 12 | #include "video_core/guest_driver.h" | ||
| 12 | #include "video_core/textures/texture.h" | 13 | #include "video_core/textures/texture.h" |
| 13 | 14 | ||
| 14 | namespace Tegra::Engines { | 15 | namespace Tegra::Engines { |
| @@ -106,6 +107,9 @@ public: | |||
| 106 | virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | 107 | virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, |
| 107 | u64 offset) const = 0; | 108 | u64 offset) const = 0; |
| 108 | virtual u32 GetBoundBuffer() const = 0; | 109 | virtual u32 GetBoundBuffer() const = 0; |
| 110 | |||
| 111 | virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; | ||
| 112 | virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0; | ||
| 109 | }; | 113 | }; |
| 110 | 114 | ||
| 111 | } // namespace Tegra::Engines | 115 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index 110406f2f..4b824aa4e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -94,6 +94,14 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con | |||
| 94 | return result; | 94 | return result; |
| 95 | } | 95 | } |
| 96 | 96 | ||
| 97 | VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { | ||
| 98 | return rasterizer.AccessGuestDriverProfile(); | ||
| 99 | } | ||
| 100 | |||
| 101 | const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { | ||
| 102 | return rasterizer.AccessGuestDriverProfile(); | ||
| 103 | } | ||
| 104 | |||
| 97 | void KeplerCompute::ProcessLaunch() { | 105 | void KeplerCompute::ProcessLaunch() { |
| 98 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | 106 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |
| 99 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | 107 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 4ef3e0613..eeb79c56f 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -218,6 +218,10 @@ public: | |||
| 218 | return regs.tex_cb_index; | 218 | return regs.tex_cb_index; |
| 219 | } | 219 | } |
| 220 | 220 | ||
| 221 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; | ||
| 222 | |||
| 223 | const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; | ||
| 224 | |||
| 221 | private: | 225 | private: |
| 222 | Core::System& system; | 226 | Core::System& system; |
| 223 | VideoCore::RasterizerInterface& rasterizer; | 227 | VideoCore::RasterizerInterface& rasterizer; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 58dfa8033..7cea146f0 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -784,4 +784,12 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b | |||
| 784 | return result; | 784 | return result; |
| 785 | } | 785 | } |
| 786 | 786 | ||
| 787 | VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { | ||
| 788 | return rasterizer.AccessGuestDriverProfile(); | ||
| 789 | } | ||
| 790 | |||
| 791 | const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { | ||
| 792 | return rasterizer.AccessGuestDriverProfile(); | ||
| 793 | } | ||
| 794 | |||
| 787 | } // namespace Tegra::Engines | 795 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ee79260fc..8808bbf76 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1306,6 +1306,10 @@ public: | |||
| 1306 | return regs.tex_cb_index; | 1306 | return regs.tex_cb_index; |
| 1307 | } | 1307 | } |
| 1308 | 1308 | ||
| 1309 | VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override; | ||
| 1310 | |||
| 1311 | const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; | ||
| 1312 | |||
| 1309 | /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than | 1313 | /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than |
| 1310 | /// we've seen used. | 1314 | /// we've seen used. |
| 1311 | using MacroMemory = std::array<u32, 0x40000>; | 1315 | using MacroMemory = std::array<u32, 0x40000>; |
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp new file mode 100644 index 000000000..6adef459e --- /dev/null +++ b/src/video_core/guest_driver.cpp | |||
| @@ -0,0 +1,36 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <limits> | ||
| 7 | |||
| 8 | #include "video_core/guest_driver.h" | ||
| 9 | |||
| 10 | namespace VideoCore { | ||
| 11 | |||
| 12 | void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) { | ||
| 13 | if (texture_handler_size_deduced) { | ||
| 14 | return; | ||
| 15 | } | ||
| 16 | const std::size_t size = bound_offsets.size(); | ||
| 17 | if (size < 2) { | ||
| 18 | return; | ||
| 19 | } | ||
| 20 | std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{}); | ||
| 21 | u32 min_val = std::numeric_limits<u32>::max(); | ||
| 22 | for (std::size_t i = 1; i < size; ++i) { | ||
| 23 | if (bound_offsets[i] == bound_offsets[i - 1]) { | ||
| 24 | continue; | ||
| 25 | } | ||
| 26 | const u32 new_min = bound_offsets[i] - bound_offsets[i - 1]; | ||
| 27 | min_val = std::min(min_val, new_min); | ||
| 28 | } | ||
| 29 | if (min_val > 2) { | ||
| 30 | return; | ||
| 31 | } | ||
| 32 | texture_handler_size_deduced = true; | ||
| 33 | texture_handler_size = min_texture_handler_size * min_val; | ||
| 34 | } | ||
| 35 | |||
| 36 | } // namespace VideoCore | ||
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h new file mode 100644 index 000000000..fc1917347 --- /dev/null +++ b/src/video_core/guest_driver.h | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace VideoCore { | ||
| 12 | |||
| 13 | /** | ||
| 14 | * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect | ||
| 15 | * information necessary for impossible to avoid HLE methods like shader tracks as they are | ||
| 16 | * Entscheidungsproblems. | ||
| 17 | */ | ||
| 18 | class GuestDriverProfile { | ||
| 19 | public: | ||
| 20 | void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets); | ||
| 21 | |||
| 22 | u32 GetTextureHandlerSize() const { | ||
| 23 | return texture_handler_size; | ||
| 24 | } | ||
| 25 | |||
| 26 | bool TextureHandlerSizeKnown() const { | ||
| 27 | return texture_handler_size_deduced; | ||
| 28 | } | ||
| 29 | |||
| 30 | private: | ||
| 31 | // Minimum size of texture handler any driver can use. | ||
| 32 | static constexpr u32 min_texture_handler_size = 4; | ||
| 33 | // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily | ||
| 34 | // use 4 bytes instead. Thus, certain drivers may squish the size. | ||
| 35 | static constexpr u32 default_texture_handler_size = 8; | ||
| 36 | |||
| 37 | u32 texture_handler_size = default_texture_handler_size; | ||
| 38 | bool texture_handler_size_deduced = false; | ||
| 39 | }; | ||
| 40 | |||
| 41 | } // namespace VideoCore | ||
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5b0eca9e2..c586cd6fe 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/engines/fermi_2d.h" | 10 | #include "video_core/engines/fermi_2d.h" |
| 11 | #include "video_core/gpu.h" | 11 | #include "video_core/gpu.h" |
| 12 | #include "video_core/guest_driver.h" | ||
| 12 | 13 | ||
| 13 | namespace Tegra { | 14 | namespace Tegra { |
| 14 | class MemoryManager; | 15 | class MemoryManager; |
| @@ -78,5 +79,18 @@ public: | |||
| 78 | /// Initialize disk cached resources for the game being emulated | 79 | /// Initialize disk cached resources for the game being emulated |
| 79 | virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, | 80 | virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, |
| 80 | const DiskResourceLoadCallback& callback = {}) {} | 81 | const DiskResourceLoadCallback& callback = {}) {} |
| 82 | |||
| 83 | /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. | ||
| 84 | GuestDriverProfile& AccessGuestDriverProfile() { | ||
| 85 | return guest_driver_profile; | ||
| 86 | } | ||
| 87 | |||
| 88 | /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. | ||
| 89 | const GuestDriverProfile& AccessGuestDriverProfile() const { | ||
| 90 | return guest_driver_profile; | ||
| 91 | } | ||
| 92 | |||
| 93 | private: | ||
| 94 | GuestDriverProfile guest_driver_profile{}; | ||
| 81 | }; | 95 | }; |
| 82 | } // namespace VideoCore | 96 | } // namespace VideoCore |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index c428f06e4..362942e09 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -55,16 +55,20 @@ namespace { | |||
| 55 | 55 | ||
| 56 | template <typename Engine, typename Entry> | 56 | template <typename Engine, typename Entry> |
| 57 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 57 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, |
| 58 | Tegra::Engines::ShaderType shader_type) { | 58 | Tegra::Engines::ShaderType shader_type, |
| 59 | std::size_t index = 0) { | ||
| 59 | if (entry.IsBindless()) { | 60 | if (entry.IsBindless()) { |
| 60 | const Tegra::Texture::TextureHandle tex_handle = | 61 | const Tegra::Texture::TextureHandle tex_handle = |
| 61 | engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); | 62 | engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); |
| 62 | return engine.GetTextureInfo(tex_handle); | 63 | return engine.GetTextureInfo(tex_handle); |
| 63 | } | 64 | } |
| 65 | const auto& gpu_profile = engine.AccessGuestDriverProfile(); | ||
| 66 | const u32 offset = | ||
| 67 | entry.GetOffset() + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); | ||
| 64 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | 68 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { |
| 65 | return engine.GetStageTexture(shader_type, entry.GetOffset()); | 69 | return engine.GetStageTexture(shader_type, offset); |
| 66 | } else { | 70 | } else { |
| 67 | return engine.GetTexture(entry.GetOffset()); | 71 | return engine.GetTexture(offset); |
| 68 | } | 72 | } |
| 69 | } | 73 | } |
| 70 | 74 | ||
| @@ -942,8 +946,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& | |||
| 942 | u32 binding = device.GetBaseBindings(stage_index).sampler; | 946 | u32 binding = device.GetBaseBindings(stage_index).sampler; |
| 943 | for (const auto& entry : shader->GetShaderEntries().samplers) { | 947 | for (const auto& entry : shader->GetShaderEntries().samplers) { |
| 944 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); | 948 | const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); |
| 945 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); | 949 | if (!entry.IsIndexed()) { |
| 946 | SetupTexture(binding++, texture, entry); | 950 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); |
| 951 | SetupTexture(binding++, texture, entry); | ||
| 952 | } else { | ||
| 953 | for (std::size_t i = 0; i < entry.Size(); ++i) { | ||
| 954 | const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); | ||
| 955 | SetupTexture(binding++, texture, entry); | ||
| 956 | } | ||
| 957 | } | ||
| 947 | } | 958 | } |
| 948 | } | 959 | } |
| 949 | 960 | ||
| @@ -952,8 +963,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | |||
| 952 | const auto& compute = system.GPU().KeplerCompute(); | 963 | const auto& compute = system.GPU().KeplerCompute(); |
| 953 | u32 binding = 0; | 964 | u32 binding = 0; |
| 954 | for (const auto& entry : kernel->GetShaderEntries().samplers) { | 965 | for (const auto& entry : kernel->GetShaderEntries().samplers) { |
| 955 | const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); | 966 | if (!entry.IsIndexed()) { |
| 956 | SetupTexture(binding++, texture, entry); | 967 | const auto texture = |
| 968 | GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); | ||
| 969 | SetupTexture(binding++, texture, entry); | ||
| 970 | } else { | ||
| 971 | for (std::size_t i = 0; i < entry.Size(); ++i) { | ||
| 972 | const auto texture = | ||
| 973 | GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i); | ||
| 974 | SetupTexture(binding++, texture, entry); | ||
| 975 | } | ||
| 976 | } | ||
| 957 | } | 977 | } |
| 958 | } | 978 | } |
| 959 | 979 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3c5bdd377..489eb143c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -214,6 +214,7 @@ std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType s | |||
| 214 | } | 214 | } |
| 215 | 215 | ||
| 216 | void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { | 216 | void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { |
| 217 | locker.SetBoundBuffer(usage.bound_buffer); | ||
| 217 | for (const auto& key : usage.keys) { | 218 | for (const auto& key : usage.keys) { |
| 218 | const auto [buffer, offset] = key.first; | 219 | const auto [buffer, offset] = key.first; |
| 219 | locker.InsertKey(buffer, offset, key.second); | 220 | locker.InsertKey(buffer, offset, key.second); |
| @@ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() { | |||
| 418 | 419 | ||
| 419 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, | 420 | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, |
| 420 | const ConstBufferLocker& locker) const { | 421 | const ConstBufferLocker& locker) const { |
| 421 | return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(), | 422 | return ShaderDiskCacheUsage{unique_identifier, variant, |
| 423 | locker.GetBoundBuffer(), locker.GetKeys(), | ||
| 422 | locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; | 424 | locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; |
| 423 | } | 425 | } |
| 424 | 426 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index a1ac3d7a9..4735000b5 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -391,6 +391,7 @@ public: | |||
| 391 | DeclareVertex(); | 391 | DeclareVertex(); |
| 392 | DeclareGeometry(); | 392 | DeclareGeometry(); |
| 393 | DeclareRegisters(); | 393 | DeclareRegisters(); |
| 394 | DeclareCustomVariables(); | ||
| 394 | DeclarePredicates(); | 395 | DeclarePredicates(); |
| 395 | DeclareLocalMemory(); | 396 | DeclareLocalMemory(); |
| 396 | DeclareInternalFlags(); | 397 | DeclareInternalFlags(); |
| @@ -503,6 +504,16 @@ private: | |||
| 503 | } | 504 | } |
| 504 | } | 505 | } |
| 505 | 506 | ||
| 507 | void DeclareCustomVariables() { | ||
| 508 | const u32 num_custom_variables = ir.GetNumCustomVariables(); | ||
| 509 | for (u32 i = 0; i < num_custom_variables; ++i) { | ||
| 510 | code.AddLine("float {} = 0.0f;", GetCustomVariable(i)); | ||
| 511 | } | ||
| 512 | if (num_custom_variables > 0) { | ||
| 513 | code.AddNewLine(); | ||
| 514 | } | ||
| 515 | } | ||
| 516 | |||
| 506 | void DeclarePredicates() { | 517 | void DeclarePredicates() { |
| 507 | const auto& predicates = ir.GetPredicates(); | 518 | const auto& predicates = ir.GetPredicates(); |
| 508 | for (const auto pred : predicates) { | 519 | for (const auto pred : predicates) { |
| @@ -655,7 +666,8 @@ private: | |||
| 655 | u32 binding = device.GetBaseBindings(stage).sampler; | 666 | u32 binding = device.GetBaseBindings(stage).sampler; |
| 656 | for (const auto& sampler : ir.GetSamplers()) { | 667 | for (const auto& sampler : ir.GetSamplers()) { |
| 657 | const std::string name = GetSampler(sampler); | 668 | const std::string name = GetSampler(sampler); |
| 658 | const std::string description = fmt::format("layout (binding = {}) uniform", binding++); | 669 | const std::string description = fmt::format("layout (binding = {}) uniform", binding); |
| 670 | binding += sampler.IsIndexed() ? sampler.Size() : 1; | ||
| 659 | 671 | ||
| 660 | std::string sampler_type = [&]() { | 672 | std::string sampler_type = [&]() { |
| 661 | if (sampler.IsBuffer()) { | 673 | if (sampler.IsBuffer()) { |
| @@ -682,7 +694,11 @@ private: | |||
| 682 | sampler_type += "Shadow"; | 694 | sampler_type += "Shadow"; |
| 683 | } | 695 | } |
| 684 | 696 | ||
| 685 | code.AddLine("{} {} {};", description, sampler_type, name); | 697 | if (!sampler.IsIndexed()) { |
| 698 | code.AddLine("{} {} {};", description, sampler_type, name); | ||
| 699 | } else { | ||
| 700 | code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size()); | ||
| 701 | } | ||
| 686 | } | 702 | } |
| 687 | if (!ir.GetSamplers().empty()) { | 703 | if (!ir.GetSamplers().empty()) { |
| 688 | code.AddNewLine(); | 704 | code.AddNewLine(); |
| @@ -775,6 +791,11 @@ private: | |||
| 775 | return {GetRegister(index), Type::Float}; | 791 | return {GetRegister(index), Type::Float}; |
| 776 | } | 792 | } |
| 777 | 793 | ||
| 794 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 795 | const u32 index = cv->GetIndex(); | ||
| 796 | return {GetCustomVariable(index), Type::Float}; | ||
| 797 | } | ||
| 798 | |||
| 778 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | 799 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { |
| 779 | const u32 value = immediate->GetValue(); | 800 | const u32 value = immediate->GetValue(); |
| 780 | if (value < 10) { | 801 | if (value < 10) { |
| @@ -1098,7 +1119,11 @@ private: | |||
| 1098 | } else if (!meta->ptp.empty()) { | 1119 | } else if (!meta->ptp.empty()) { |
| 1099 | expr += "Offsets"; | 1120 | expr += "Offsets"; |
| 1100 | } | 1121 | } |
| 1101 | expr += '(' + GetSampler(meta->sampler) + ", "; | 1122 | if (!meta->sampler.IsIndexed()) { |
| 1123 | expr += '(' + GetSampler(meta->sampler) + ", "; | ||
| 1124 | } else { | ||
| 1125 | expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], "; | ||
| 1126 | } | ||
| 1102 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + | 1127 | expr += coord_constructors.at(count + (has_array ? 1 : 0) + |
| 1103 | (has_shadow && !separate_dc ? 1 : 0) - 1); | 1128 | (has_shadow && !separate_dc ? 1 : 0) - 1); |
| 1104 | expr += '('; | 1129 | expr += '('; |
| @@ -1310,6 +1335,8 @@ private: | |||
| 1310 | const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); | 1335 | const std::string final_offset = fmt::format("({} - {}) >> 2", real, base); |
| 1311 | target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), | 1336 | target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset), |
| 1312 | Type::Uint}; | 1337 | Type::Uint}; |
| 1338 | } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { | ||
| 1339 | target = {GetCustomVariable(cv->GetIndex()), Type::Float}; | ||
| 1313 | } else { | 1340 | } else { |
| 1314 | UNREACHABLE_MSG("Assign called without a proper target"); | 1341 | UNREACHABLE_MSG("Assign called without a proper target"); |
| 1315 | } | 1342 | } |
| @@ -2237,6 +2264,10 @@ private: | |||
| 2237 | return GetDeclarationWithSuffix(index, "gpr"); | 2264 | return GetDeclarationWithSuffix(index, "gpr"); |
| 2238 | } | 2265 | } |
| 2239 | 2266 | ||
| 2267 | std::string GetCustomVariable(u32 index) const { | ||
| 2268 | return GetDeclarationWithSuffix(index, "custom_var"); | ||
| 2269 | } | ||
| 2270 | |||
| 2240 | std::string GetPredicate(Tegra::Shader::Pred pred) const { | 2271 | std::string GetPredicate(Tegra::Shader::Pred pred) const { |
| 2241 | return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); | 2272 | return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); |
| 2242 | } | 2273 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index cf874a09a..1fc204f6f 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -53,7 +53,7 @@ struct BindlessSamplerKey { | |||
| 53 | Tegra::Engines::SamplerDescriptor sampler{}; | 53 | Tegra::Engines::SamplerDescriptor sampler{}; |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| 56 | constexpr u32 NativeVersion = 11; | 56 | constexpr u32 NativeVersion = 12; |
| 57 | 57 | ||
| 58 | // Making sure sizes doesn't change by accident | 58 | // Making sure sizes doesn't change by accident |
| 59 | static_assert(sizeof(ProgramVariant) == 20); | 59 | static_assert(sizeof(ProgramVariant) == 20); |
| @@ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||
| 186 | u32 num_bound_samplers{}; | 186 | u32 num_bound_samplers{}; |
| 187 | u32 num_bindless_samplers{}; | 187 | u32 num_bindless_samplers{}; |
| 188 | if (file.ReadArray(&usage.unique_identifier, 1) != 1 || | 188 | if (file.ReadArray(&usage.unique_identifier, 1) != 1 || |
| 189 | file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || | 189 | file.ReadArray(&usage.variant, 1) != 1 || |
| 190 | file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || | ||
| 190 | file.ReadArray(&num_bound_samplers, 1) != 1 || | 191 | file.ReadArray(&num_bound_samplers, 1) != 1 || |
| 191 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | 192 | file.ReadArray(&num_bindless_samplers, 1) != 1) { |
| 192 | LOG_ERROR(Render_OpenGL, error_loading); | 193 | LOG_ERROR(Render_OpenGL, error_loading); |
| @@ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | |||
| 281 | u32 num_bindless_samplers{}; | 282 | u32 num_bindless_samplers{}; |
| 282 | ShaderDiskCacheUsage usage; | 283 | ShaderDiskCacheUsage usage; |
| 283 | if (!LoadObjectFromPrecompiled(usage.unique_identifier) || | 284 | if (!LoadObjectFromPrecompiled(usage.unique_identifier) || |
| 284 | !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || | 285 | !LoadObjectFromPrecompiled(usage.variant) || |
| 286 | !LoadObjectFromPrecompiled(usage.bound_buffer) || | ||
| 287 | !LoadObjectFromPrecompiled(num_keys) || | ||
| 285 | !LoadObjectFromPrecompiled(num_bound_samplers) || | 288 | !LoadObjectFromPrecompiled(num_bound_samplers) || |
| 286 | !LoadObjectFromPrecompiled(num_bindless_samplers)) { | 289 | !LoadObjectFromPrecompiled(num_bindless_samplers)) { |
| 287 | return {}; | 290 | return {}; |
| @@ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { | |||
| 393 | 396 | ||
| 394 | if (file.WriteObject(TransferableEntryKind::Usage) != 1 || | 397 | if (file.WriteObject(TransferableEntryKind::Usage) != 1 || |
| 395 | file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || | 398 | file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || |
| 399 | file.WriteObject(usage.bound_buffer) != 1 || | ||
| 396 | file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || | 400 | file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || |
| 397 | file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || | 401 | file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || |
| 398 | file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { | 402 | file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { |
| @@ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | |||
| 447 | }; | 451 | }; |
| 448 | 452 | ||
| 449 | if (!SaveObjectToPrecompiled(usage.unique_identifier) || | 453 | if (!SaveObjectToPrecompiled(usage.unique_identifier) || |
| 450 | !SaveObjectToPrecompiled(usage.variant) || | 454 | !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) || |
| 451 | !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || | 455 | !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || |
| 452 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || | 456 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || |
| 453 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { | 457 | !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 69a2fbdda..ef2371f6d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v<ProgramVariant>); | |||
| 79 | struct ShaderDiskCacheUsage { | 79 | struct ShaderDiskCacheUsage { |
| 80 | u64 unique_identifier{}; | 80 | u64 unique_identifier{}; |
| 81 | ProgramVariant variant; | 81 | ProgramVariant variant; |
| 82 | u32 bound_buffer{}; | ||
| 82 | VideoCommon::Shader::KeyMap keys; | 83 | VideoCommon::Shader::KeyMap keys; |
| 83 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | 84 | VideoCommon::Shader::BoundSamplerMap bound_samplers; |
| 84 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | 85 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 1ab22251e..24a658dce 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -353,6 +353,7 @@ private: | |||
| 353 | DeclareFragment(); | 353 | DeclareFragment(); |
| 354 | DeclareCompute(); | 354 | DeclareCompute(); |
| 355 | DeclareRegisters(); | 355 | DeclareRegisters(); |
| 356 | DeclareCustomVariables(); | ||
| 356 | DeclarePredicates(); | 357 | DeclarePredicates(); |
| 357 | DeclareLocalMemory(); | 358 | DeclareLocalMemory(); |
| 358 | DeclareSharedMemory(); | 359 | DeclareSharedMemory(); |
| @@ -586,6 +587,15 @@ private: | |||
| 586 | } | 587 | } |
| 587 | } | 588 | } |
| 588 | 589 | ||
| 590 | void DeclareCustomVariables() { | ||
| 591 | const u32 num_custom_variables = ir.GetNumCustomVariables(); | ||
| 592 | for (u32 i = 0; i < num_custom_variables; ++i) { | ||
| 593 | const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero); | ||
| 594 | Name(id, fmt::format("custom_var_{}", i)); | ||
| 595 | custom_variables.emplace(i, AddGlobalVariable(id)); | ||
| 596 | } | ||
| 597 | } | ||
| 598 | |||
| 589 | void DeclarePredicates() { | 599 | void DeclarePredicates() { |
| 590 | for (const auto pred : ir.GetPredicates()) { | 600 | for (const auto pred : ir.GetPredicates()) { |
| 591 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); | 601 | const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); |
| @@ -982,6 +992,11 @@ private: | |||
| 982 | return {OpLoad(t_float, registers.at(index)), Type::Float}; | 992 | return {OpLoad(t_float, registers.at(index)), Type::Float}; |
| 983 | } | 993 | } |
| 984 | 994 | ||
| 995 | if (const auto cv = std::get_if<CustomVarNode>(&*node)) { | ||
| 996 | const u32 index = cv->GetIndex(); | ||
| 997 | return {OpLoad(t_float, custom_variables.at(index)), Type::Float}; | ||
| 998 | } | ||
| 999 | |||
| 985 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { | 1000 | if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { |
| 986 | return {Constant(t_uint, immediate->GetValue()), Type::Uint}; | 1001 | return {Constant(t_uint, immediate->GetValue()), Type::Uint}; |
| 987 | } | 1002 | } |
| @@ -1333,6 +1348,9 @@ private: | |||
| 1333 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | 1348 | } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |
| 1334 | target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; | 1349 | target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; |
| 1335 | 1350 | ||
| 1351 | } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) { | ||
| 1352 | target = {custom_variables.at(cv->GetIndex()), Type::Float}; | ||
| 1353 | |||
| 1336 | } else { | 1354 | } else { |
| 1337 | UNIMPLEMENTED(); | 1355 | UNIMPLEMENTED(); |
| 1338 | } | 1356 | } |
| @@ -2508,6 +2526,7 @@ private: | |||
| 2508 | Id out_vertex{}; | 2526 | Id out_vertex{}; |
| 2509 | Id in_vertex{}; | 2527 | Id in_vertex{}; |
| 2510 | std::map<u32, Id> registers; | 2528 | std::map<u32, Id> registers; |
| 2529 | std::map<u32, Id> custom_variables; | ||
| 2511 | std::map<Tegra::Shader::Pred, Id> predicates; | 2530 | std::map<Tegra::Shader::Pred, Id> predicates; |
| 2512 | std::map<u32, Id> flow_variables; | 2531 | std::map<u32, Id> flow_variables; |
| 2513 | Id local_memory{}; | 2532 | Id local_memory{}; |
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index a4a0319eb..0638be8cb 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp | |||
| @@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle | |||
| 66 | return value; | 66 | return value; |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() { | ||
| 70 | if (bound_buffer_saved) { | ||
| 71 | return bound_buffer; | ||
| 72 | } | ||
| 73 | if (!engine) { | ||
| 74 | return std::nullopt; | ||
| 75 | } | ||
| 76 | bound_buffer_saved = true; | ||
| 77 | bound_buffer = engine->GetBoundBuffer(); | ||
| 78 | return bound_buffer; | ||
| 79 | } | ||
| 80 | |||
| 69 | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { | 81 | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { |
| 70 | keys.insert_or_assign({buffer, offset}, value); | 82 | keys.insert_or_assign({buffer, offset}, value); |
| 71 | } | 83 | } |
| @@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes | |||
| 78 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); | 90 | bindless_samplers.insert_or_assign({buffer, offset}, sampler); |
| 79 | } | 91 | } |
| 80 | 92 | ||
| 93 | void ConstBufferLocker::SetBoundBuffer(u32 buffer) { | ||
| 94 | bound_buffer_saved = true; | ||
| 95 | bound_buffer = buffer; | ||
| 96 | } | ||
| 97 | |||
| 81 | bool ConstBufferLocker::IsConsistent() const { | 98 | bool ConstBufferLocker::IsConsistent() const { |
| 82 | if (!engine) { | 99 | if (!engine) { |
| 83 | return false; | 100 | return false; |
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index d32e2d657..d3ea11087 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/hash.h" | 10 | #include "common/hash.h" |
| 11 | #include "video_core/engines/const_buffer_engine_interface.h" | 11 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 12 | #include "video_core/engines/shader_type.h" | 12 | #include "video_core/engines/shader_type.h" |
| 13 | #include "video_core/guest_driver.h" | ||
| 13 | 14 | ||
| 14 | namespace VideoCommon::Shader { | 15 | namespace VideoCommon::Shader { |
| 15 | 16 | ||
| @@ -40,6 +41,8 @@ public: | |||
| 40 | 41 | ||
| 41 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | 42 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); |
| 42 | 43 | ||
| 44 | std::optional<u32> ObtainBoundBuffer(); | ||
| 45 | |||
| 43 | /// Inserts a key. | 46 | /// Inserts a key. |
| 44 | void InsertKey(u32 buffer, u32 offset, u32 value); | 47 | void InsertKey(u32 buffer, u32 offset, u32 value); |
| 45 | 48 | ||
| @@ -49,6 +52,9 @@ public: | |||
| 49 | /// Inserts a bindless sampler key. | 52 | /// Inserts a bindless sampler key. |
| 50 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | 53 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); |
| 51 | 54 | ||
| 55 | /// Set the bound buffer for this locker. | ||
| 56 | void SetBoundBuffer(u32 buffer); | ||
| 57 | |||
| 52 | /// Checks keys and samplers against engine's current const buffers. Returns true if they are | 58 | /// Checks keys and samplers against engine's current const buffers. Returns true if they are |
| 53 | /// the same value, false otherwise; | 59 | /// the same value, false otherwise; |
| 54 | bool IsConsistent() const; | 60 | bool IsConsistent() const; |
| @@ -71,12 +77,27 @@ public: | |||
| 71 | return bindless_samplers; | 77 | return bindless_samplers; |
| 72 | } | 78 | } |
| 73 | 79 | ||
| 80 | /// Gets bound buffer used on this shader | ||
| 81 | u32 GetBoundBuffer() const { | ||
| 82 | return bound_buffer; | ||
| 83 | } | ||
| 84 | |||
| 85 | /// Obtains access to the guest driver's profile. | ||
| 86 | VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const { | ||
| 87 | if (engine) { | ||
| 88 | return &engine->AccessGuestDriverProfile(); | ||
| 89 | } | ||
| 90 | return nullptr; | ||
| 91 | } | ||
| 92 | |||
| 74 | private: | 93 | private: |
| 75 | const Tegra::Engines::ShaderType stage; | 94 | const Tegra::Engines::ShaderType stage; |
| 76 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | 95 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; |
| 77 | KeyMap keys; | 96 | KeyMap keys; |
| 78 | BoundSamplerMap bound_samplers; | 97 | BoundSamplerMap bound_samplers; |
| 79 | BindlessSamplerMap bindless_samplers; | 98 | BindlessSamplerMap bindless_samplers; |
| 99 | bool bound_buffer_saved{}; | ||
| 100 | u32 bound_buffer{}; | ||
| 80 | }; | 101 | }; |
| 81 | 102 | ||
| 82 | } // namespace VideoCommon::Shader | 103 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 22c3e5120..6b697ed5d 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <cstring> | 5 | #include <cstring> |
| 6 | #include <limits> | ||
| 6 | #include <set> | 7 | #include <set> |
| 7 | 8 | ||
| 8 | #include <fmt/format.h> | 9 | #include <fmt/format.h> |
| @@ -33,6 +34,52 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||
| 33 | return (absolute_offset % SchedPeriod) == 0; | 34 | return (absolute_offset % SchedPeriod) == 0; |
| 34 | } | 35 | } |
| 35 | 36 | ||
| 37 | void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver, | ||
| 38 | const std::list<Sampler>& used_samplers) { | ||
| 39 | if (gpu_driver == nullptr) { | ||
| 40 | LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet"); | ||
| 41 | return; | ||
| 42 | } | ||
| 43 | if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) { | ||
| 44 | return; | ||
| 45 | } | ||
| 46 | u32 count{}; | ||
| 47 | std::vector<u32> bound_offsets; | ||
| 48 | for (const auto& sampler : used_samplers) { | ||
| 49 | if (sampler.IsBindless()) { | ||
| 50 | continue; | ||
| 51 | } | ||
| 52 | ++count; | ||
| 53 | bound_offsets.emplace_back(sampler.GetOffset()); | ||
| 54 | } | ||
| 55 | if (count > 1) { | ||
| 56 | gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets)); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | |||
| 60 | std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce, | ||
| 61 | VideoCore::GuestDriverProfile* gpu_driver, | ||
| 62 | const std::list<Sampler>& used_samplers) { | ||
| 63 | if (gpu_driver == nullptr) { | ||
| 64 | LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet"); | ||
| 65 | return std::nullopt; | ||
| 66 | } | ||
| 67 | const u32 base_offset = sampler_to_deduce.GetOffset(); | ||
| 68 | u32 max_offset{std::numeric_limits<u32>::max()}; | ||
| 69 | for (const auto& sampler : used_samplers) { | ||
| 70 | if (sampler.IsBindless()) { | ||
| 71 | continue; | ||
| 72 | } | ||
| 73 | if (sampler.GetOffset() > base_offset) { | ||
| 74 | max_offset = std::min(sampler.GetOffset(), max_offset); | ||
| 75 | } | ||
| 76 | } | ||
| 77 | if (max_offset == std::numeric_limits<u32>::max()) { | ||
| 78 | return std::nullopt; | ||
| 79 | } | ||
| 80 | return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize(); | ||
| 81 | } | ||
| 82 | |||
| 36 | } // Anonymous namespace | 83 | } // Anonymous namespace |
| 37 | 84 | ||
| 38 | class ASTDecoder { | 85 | class ASTDecoder { |
| @@ -315,4 +362,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | |||
| 315 | return pc + 1; | 362 | return pc + 1; |
| 316 | } | 363 | } |
| 317 | 364 | ||
| 365 | void ShaderIR::PostDecode() { | ||
| 366 | // Deduce texture handler size if needed | ||
| 367 | auto gpu_driver = locker.AccessGuestDriverProfile(); | ||
| 368 | DeduceTextureHandlerSize(gpu_driver, used_samplers); | ||
| 369 | // Deduce Indexed Samplers | ||
| 370 | if (!uses_indexed_samplers) { | ||
| 371 | return; | ||
| 372 | } | ||
| 373 | for (auto& sampler : used_samplers) { | ||
| 374 | if (!sampler.IsIndexed()) { | ||
| 375 | continue; | ||
| 376 | } | ||
| 377 | if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) { | ||
| 378 | sampler.SetSize(*size); | ||
| 379 | } else { | ||
| 380 | LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler"); | ||
| 381 | sampler.SetSize(1); | ||
| 382 | } | ||
| 383 | } | ||
| 384 | } | ||
| 385 | |||
| 318 | } // namespace VideoCommon::Shader | 386 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b567e39d..d980535b1 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 144 | Node4 values; | 144 | Node4 values; |
| 145 | for (u32 element = 0; element < values.size(); ++element) { | 145 | for (u32 element = 0; element < values.size(); ++element) { |
| 146 | auto coords_copy = coords; | 146 | auto coords_copy = coords; |
| 147 | MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element}; | 147 | MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, |
| 148 | {}, {}, component, element, {}}; | ||
| 148 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | 149 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 149 | } | 150 | } |
| 150 | 151 | ||
| @@ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 167 | const auto derivate_reg = instr.gpr20.Value(); | 168 | const auto derivate_reg = instr.gpr20.Value(); |
| 168 | const auto texture_type = instr.txd.texture_type.Value(); | 169 | const auto texture_type = instr.txd.texture_type.Value(); |
| 169 | const auto coord_count = GetCoordCount(texture_type); | 170 | const auto coord_count = GetCoordCount(texture_type); |
| 170 | 171 | Node index_var{}; | |
| 171 | const Sampler* sampler = | 172 | const Sampler* sampler = |
| 172 | is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}}) | 173 | is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}}) |
| 173 | : GetSampler(instr.sampler, {{texture_type, is_array, false}}); | 174 | : GetSampler(instr.sampler, {{texture_type, is_array, false}}); |
| 174 | Node4 values; | 175 | Node4 values; |
| 175 | if (sampler == nullptr) { | 176 | if (sampler == nullptr) { |
| @@ -200,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 200 | } | 201 | } |
| 201 | 202 | ||
| 202 | for (u32 element = 0; element < values.size(); ++element) { | 203 | for (u32 element = 0; element < values.size(); ++element) { |
| 203 | MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element}; | 204 | MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, |
| 205 | {}, {}, {}, element, index_var}; | ||
| 204 | values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); | 206 | values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords); |
| 205 | } | 207 | } |
| 206 | 208 | ||
| @@ -215,8 +217,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 215 | // TODO: The new commits on the texture refactor, change the way samplers work. | 217 | // TODO: The new commits on the texture refactor, change the way samplers work. |
| 216 | // Sadly, not all texture instructions specify the type of texture their sampler | 218 | // Sadly, not all texture instructions specify the type of texture their sampler |
| 217 | // uses. This must be fixed at a later instance. | 219 | // uses. This must be fixed at a later instance. |
| 220 | Node index_var{}; | ||
| 218 | const Sampler* sampler = | 221 | const Sampler* sampler = |
| 219 | is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler); | 222 | is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler); |
| 220 | 223 | ||
| 221 | if (sampler == nullptr) { | 224 | if (sampler == nullptr) { |
| 222 | u32 indexer = 0; | 225 | u32 indexer = 0; |
| @@ -240,7 +243,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 240 | if (!instr.txq.IsComponentEnabled(element)) { | 243 | if (!instr.txq.IsComponentEnabled(element)) { |
| 241 | continue; | 244 | continue; |
| 242 | } | 245 | } |
| 243 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; | 246 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; |
| 244 | const Node value = | 247 | const Node value = |
| 245 | Operation(OperationCode::TextureQueryDimensions, meta, | 248 | Operation(OperationCode::TextureQueryDimensions, meta, |
| 246 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); | 249 | GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0))); |
| @@ -266,8 +269,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 266 | 269 | ||
| 267 | auto texture_type = instr.tmml.texture_type.Value(); | 270 | auto texture_type = instr.tmml.texture_type.Value(); |
| 268 | const bool is_array = instr.tmml.array != 0; | 271 | const bool is_array = instr.tmml.array != 0; |
| 272 | Node index_var{}; | ||
| 269 | const Sampler* sampler = | 273 | const Sampler* sampler = |
| 270 | is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler); | 274 | is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler); |
| 271 | 275 | ||
| 272 | if (sampler == nullptr) { | 276 | if (sampler == nullptr) { |
| 273 | u32 indexer = 0; | 277 | u32 indexer = 0; |
| @@ -309,7 +313,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 309 | continue; | 313 | continue; |
| 310 | } | 314 | } |
| 311 | auto params = coords; | 315 | auto params = coords; |
| 312 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element}; | 316 | MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; |
| 313 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | 317 | const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |
| 314 | SetTemporary(bb, indexer++, value); | 318 | SetTemporary(bb, indexer++, value); |
| 315 | } | 319 | } |
| @@ -383,37 +387,65 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, | |||
| 383 | // Otherwise create a new mapping for this sampler | 387 | // Otherwise create a new mapping for this sampler |
| 384 | const auto next_index = static_cast<u32>(used_samplers.size()); | 388 | const auto next_index = static_cast<u32>(used_samplers.size()); |
| 385 | return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, | 389 | return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow, |
| 386 | info.is_buffer); | 390 | info.is_buffer, false); |
| 387 | } | 391 | } |
| 388 | 392 | ||
| 389 | const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | 393 | const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, |
| 390 | std::optional<SamplerInfo> sampler_info) { | 394 | std::optional<SamplerInfo> sampler_info) { |
| 391 | const Node sampler_register = GetRegister(reg); | 395 | const Node sampler_register = GetRegister(reg); |
| 392 | const auto [base_sampler, buffer, offset] = | 396 | const auto [base_node, tracked_sampler_info] = |
| 393 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); | 397 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| 394 | ASSERT(base_sampler != nullptr); | 398 | ASSERT(base_node != nullptr); |
| 395 | if (base_sampler == nullptr) { | 399 | if (base_node == nullptr) { |
| 396 | return nullptr; | 400 | return nullptr; |
| 397 | } | 401 | } |
| 398 | 402 | ||
| 399 | const auto info = GetSamplerInfo(sampler_info, offset, buffer); | 403 | if (const auto bindless_sampler_info = |
| 404 | std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { | ||
| 405 | const u32 buffer = bindless_sampler_info->GetIndex(); | ||
| 406 | const u32 offset = bindless_sampler_info->GetOffset(); | ||
| 407 | const auto info = GetSamplerInfo(sampler_info, offset, buffer); | ||
| 408 | |||
| 409 | // If this sampler has already been used, return the existing mapping. | ||
| 410 | const auto it = | ||
| 411 | std::find_if(used_samplers.begin(), used_samplers.end(), | ||
| 412 | [buffer = buffer, offset = offset](const Sampler& entry) { | ||
| 413 | return entry.GetBuffer() == buffer && entry.GetOffset() == offset; | ||
| 414 | }); | ||
| 415 | if (it != used_samplers.end()) { | ||
| 416 | ASSERT(it->IsBindless() && it->GetType() == info.type && | ||
| 417 | it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow); | ||
| 418 | return &*it; | ||
| 419 | } | ||
| 400 | 420 | ||
| 401 | // If this sampler has already been used, return the existing mapping. | 421 | // Otherwise create a new mapping for this sampler |
| 402 | const auto it = | 422 | const auto next_index = static_cast<u32>(used_samplers.size()); |
| 403 | std::find_if(used_samplers.begin(), used_samplers.end(), | 423 | return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, |
| 404 | [buffer = buffer, offset = offset](const Sampler& entry) { | 424 | info.is_shadow, info.is_buffer, false); |
| 405 | return entry.GetBuffer() == buffer && entry.GetOffset() == offset; | 425 | } else if (const auto array_sampler_info = |
| 406 | }); | 426 | std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { |
| 407 | if (it != used_samplers.end()) { | 427 | const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; |
| 408 | ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array && | 428 | index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); |
| 409 | it->IsShadow() == info.is_shadow); | 429 | const auto info = GetSamplerInfo(sampler_info, base_offset); |
| 410 | return &*it; | 430 | |
| 411 | } | 431 | // If this sampler has already been used, return the existing mapping. |
| 432 | const auto it = std::find_if( | ||
| 433 | used_samplers.begin(), used_samplers.end(), | ||
| 434 | [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; }); | ||
| 435 | if (it != used_samplers.end()) { | ||
| 436 | ASSERT(!it->IsBindless() && it->GetType() == info.type && | ||
| 437 | it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow && | ||
| 438 | it->IsBuffer() == info.is_buffer && it->IsIndexed()); | ||
| 439 | return &*it; | ||
| 440 | } | ||
| 412 | 441 | ||
| 413 | // Otherwise create a new mapping for this sampler | 442 | uses_indexed_samplers = true; |
| 414 | const auto next_index = static_cast<u32>(used_samplers.size()); | 443 | // Otherwise create a new mapping for this sampler |
| 415 | return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array, | 444 | const auto next_index = static_cast<u32>(used_samplers.size()); |
| 416 | info.is_shadow, info.is_buffer); | 445 | return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array, |
| 446 | info.is_shadow, info.is_buffer, true); | ||
| 447 | } | ||
| 448 | return nullptr; | ||
| 417 | } | 449 | } |
| 418 | 450 | ||
| 419 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { | 451 | void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) { |
| @@ -499,8 +531,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 499 | "This method is not supported."); | 531 | "This method is not supported."); |
| 500 | 532 | ||
| 501 | const SamplerInfo info{texture_type, is_array, is_shadow, false}; | 533 | const SamplerInfo info{texture_type, is_array, is_shadow, false}; |
| 502 | const Sampler* sampler = | 534 | Node index_var{}; |
| 503 | is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info); | 535 | const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info) |
| 536 | : GetSampler(instr.sampler, info); | ||
| 504 | Node4 values; | 537 | Node4 values; |
| 505 | if (sampler == nullptr) { | 538 | if (sampler == nullptr) { |
| 506 | for (u32 element = 0; element < values.size(); ++element) { | 539 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -548,7 +581,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 548 | 581 | ||
| 549 | for (u32 element = 0; element < values.size(); ++element) { | 582 | for (u32 element = 0; element < values.size(); ++element) { |
| 550 | auto copy_coords = coords; | 583 | auto copy_coords = coords; |
| 551 | MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element}; | 584 | MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, |
| 585 | lod, {}, element, index_var}; | ||
| 552 | values[element] = Operation(read_method, meta, std::move(copy_coords)); | 586 | values[element] = Operation(read_method, meta, std::move(copy_coords)); |
| 553 | } | 587 | } |
| 554 | 588 | ||
| @@ -663,7 +697,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 663 | u64 parameter_register = instr.gpr20.Value(); | 697 | u64 parameter_register = instr.gpr20.Value(); |
| 664 | 698 | ||
| 665 | const SamplerInfo info{texture_type, is_array, depth_compare, false}; | 699 | const SamplerInfo info{texture_type, is_array, depth_compare, false}; |
| 666 | const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info) | 700 | Node index_var{}; |
| 701 | const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info) | ||
| 667 | : GetSampler(instr.sampler, info); | 702 | : GetSampler(instr.sampler, info); |
| 668 | Node4 values; | 703 | Node4 values; |
| 669 | if (sampler == nullptr) { | 704 | if (sampler == nullptr) { |
| @@ -692,7 +727,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 692 | for (u32 element = 0; element < values.size(); ++element) { | 727 | for (u32 element = 0; element < values.size(); ++element) { |
| 693 | auto coords_copy = coords; | 728 | auto coords_copy = coords; |
| 694 | MetaTexture meta{ | 729 | MetaTexture meta{ |
| 695 | *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element}; | 730 | *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element, |
| 731 | index_var}; | ||
| 696 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | 732 | values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |
| 697 | } | 733 | } |
| 698 | 734 | ||
| @@ -725,7 +761,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | |||
| 725 | Node4 values; | 761 | Node4 values; |
| 726 | for (u32 element = 0; element < values.size(); ++element) { | 762 | for (u32 element = 0; element < values.size(); ++element) { |
| 727 | auto coords_copy = coords; | 763 | auto coords_copy = coords; |
| 728 | MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element}; | 764 | MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}}; |
| 729 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | 765 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |
| 730 | } | 766 | } |
| 731 | 767 | ||
| @@ -775,7 +811,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 775 | Node4 values; | 811 | Node4 values; |
| 776 | for (u32 element = 0; element < values.size(); ++element) { | 812 | for (u32 element = 0; element < values.size(); ++element) { |
| 777 | auto coords_copy = coords; | 813 | auto coords_copy = coords; |
| 778 | MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element}; | 814 | MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}}; |
| 779 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | 815 | values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |
| 780 | } | 816 | } |
| 781 | return values; | 817 | return values; |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 9af1f0228..5f83403db 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -212,6 +212,7 @@ enum class MetaStackClass { | |||
| 212 | class OperationNode; | 212 | class OperationNode; |
| 213 | class ConditionalNode; | 213 | class ConditionalNode; |
| 214 | class GprNode; | 214 | class GprNode; |
| 215 | class CustomVarNode; | ||
| 215 | class ImmediateNode; | 216 | class ImmediateNode; |
| 216 | class InternalFlagNode; | 217 | class InternalFlagNode; |
| 217 | class PredicateNode; | 218 | class PredicateNode; |
| @@ -223,26 +224,32 @@ class SmemNode; | |||
| 223 | class GmemNode; | 224 | class GmemNode; |
| 224 | class CommentNode; | 225 | class CommentNode; |
| 225 | 226 | ||
| 226 | using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, | 227 | using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode, |
| 227 | InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, | 228 | InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode, |
| 228 | LmemNode, SmemNode, GmemNode, CommentNode>; | 229 | LmemNode, SmemNode, GmemNode, CommentNode>; |
| 229 | using Node = std::shared_ptr<NodeData>; | 230 | using Node = std::shared_ptr<NodeData>; |
| 230 | using Node4 = std::array<Node, 4>; | 231 | using Node4 = std::array<Node, 4>; |
| 231 | using NodeBlock = std::vector<Node>; | 232 | using NodeBlock = std::vector<Node>; |
| 232 | 233 | ||
| 234 | class BindlessSamplerNode; | ||
| 235 | class ArraySamplerNode; | ||
| 236 | |||
| 237 | using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; | ||
| 238 | using TrackSampler = std::shared_ptr<TrackSamplerData>; | ||
| 239 | |||
| 233 | class Sampler { | 240 | class Sampler { |
| 234 | public: | 241 | public: |
| 235 | /// This constructor is for bound samplers | 242 | /// This constructor is for bound samplers |
| 236 | constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, | 243 | constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type, |
| 237 | bool is_array, bool is_shadow, bool is_buffer) | 244 | bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) |
| 238 | : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, | 245 | : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, |
| 239 | is_buffer{is_buffer} {} | 246 | is_buffer{is_buffer}, is_indexed{is_indexed} {} |
| 240 | 247 | ||
| 241 | /// This constructor is for bindless samplers | 248 | /// This constructor is for bindless samplers |
| 242 | constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, | 249 | constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, |
| 243 | bool is_array, bool is_shadow, bool is_buffer) | 250 | bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) |
| 244 | : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, | 251 | : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, |
| 245 | is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {} | 252 | is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} |
| 246 | 253 | ||
| 247 | constexpr u32 GetIndex() const { | 254 | constexpr u32 GetIndex() const { |
| 248 | return index; | 255 | return index; |
| @@ -276,16 +283,72 @@ public: | |||
| 276 | return is_bindless; | 283 | return is_bindless; |
| 277 | } | 284 | } |
| 278 | 285 | ||
| 286 | constexpr bool IsIndexed() const { | ||
| 287 | return is_indexed; | ||
| 288 | } | ||
| 289 | |||
| 290 | constexpr u32 Size() const { | ||
| 291 | return size; | ||
| 292 | } | ||
| 293 | |||
| 294 | constexpr void SetSize(u32 new_size) { | ||
| 295 | size = new_size; | ||
| 296 | } | ||
| 297 | |||
| 279 | private: | 298 | private: |
| 280 | u32 index{}; ///< Emulated index given for the this sampler. | 299 | u32 index{}; ///< Emulated index given for the this sampler. |
| 281 | u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. | 300 | u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. |
| 282 | u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). | 301 | u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). |
| 302 | u32 size{}; ///< Size of the sampler if indexed. | ||
| 283 | 303 | ||
| 284 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | 304 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) |
| 285 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. | 305 | bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. |
| 286 | bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. | 306 | bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not. |
| 287 | bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. | 307 | bool is_buffer{}; ///< Whether the texture is a texture buffer without sampler. |
| 288 | bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. | 308 | bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not. |
| 309 | bool is_indexed{}; ///< Whether this sampler is an indexed array of textures. | ||
| 310 | }; | ||
| 311 | |||
| 312 | /// Represents a tracked bindless sampler into a direct const buffer | ||
| 313 | class ArraySamplerNode final { | ||
| 314 | public: | ||
| 315 | explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) | ||
| 316 | : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} | ||
| 317 | |||
| 318 | constexpr u32 GetIndex() const { | ||
| 319 | return index; | ||
| 320 | } | ||
| 321 | |||
| 322 | constexpr u32 GetBaseOffset() const { | ||
| 323 | return base_offset; | ||
| 324 | } | ||
| 325 | |||
| 326 | constexpr u32 GetIndexVar() const { | ||
| 327 | return bindless_var; | ||
| 328 | } | ||
| 329 | |||
| 330 | private: | ||
| 331 | u32 index; | ||
| 332 | u32 base_offset; | ||
| 333 | u32 bindless_var; | ||
| 334 | }; | ||
| 335 | |||
| 336 | /// Represents a tracked bindless sampler into a direct const buffer | ||
| 337 | class BindlessSamplerNode final { | ||
| 338 | public: | ||
| 339 | explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} | ||
| 340 | |||
| 341 | constexpr u32 GetIndex() const { | ||
| 342 | return index; | ||
| 343 | } | ||
| 344 | |||
| 345 | constexpr u32 GetOffset() const { | ||
| 346 | return offset; | ||
| 347 | } | ||
| 348 | |||
| 349 | private: | ||
| 350 | u32 index; | ||
| 351 | u32 offset; | ||
| 289 | }; | 352 | }; |
| 290 | 353 | ||
| 291 | class Image final { | 354 | class Image final { |
| @@ -382,6 +445,7 @@ struct MetaTexture { | |||
| 382 | Node lod; | 445 | Node lod; |
| 383 | Node component{}; | 446 | Node component{}; |
| 384 | u32 element{}; | 447 | u32 element{}; |
| 448 | Node index{}; | ||
| 385 | }; | 449 | }; |
| 386 | 450 | ||
| 387 | struct MetaImage { | 451 | struct MetaImage { |
| @@ -488,6 +552,19 @@ private: | |||
| 488 | Tegra::Shader::Register index{}; | 552 | Tegra::Shader::Register index{}; |
| 489 | }; | 553 | }; |
| 490 | 554 | ||
| 555 | /// A custom variable | ||
| 556 | class CustomVarNode final { | ||
| 557 | public: | ||
| 558 | explicit constexpr CustomVarNode(u32 index) : index{index} {} | ||
| 559 | |||
| 560 | constexpr u32 GetIndex() const { | ||
| 561 | return index; | ||
| 562 | } | ||
| 563 | |||
| 564 | private: | ||
| 565 | u32 index{}; | ||
| 566 | }; | ||
| 567 | |||
| 491 | /// A 32-bits value that represents an immediate value | 568 | /// A 32-bits value that represents an immediate value |
| 492 | class ImmediateNode final { | 569 | class ImmediateNode final { |
| 493 | public: | 570 | public: |
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 0c2aa749b..11231bbea 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h | |||
| @@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) { | |||
| 45 | return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); | 45 | return std::make_shared<NodeData>(T(std::forward<Args>(args)...)); |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | template <typename T, typename... Args> | ||
| 49 | TrackSampler MakeTrackSampler(Args&&... args) { | ||
| 50 | static_assert(std::is_convertible_v<T, TrackSamplerData>); | ||
| 51 | return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); | ||
| 52 | } | ||
| 53 | |||
| 48 | template <typename... Args> | 54 | template <typename... Args> |
| 49 | Node Operation(OperationCode code, Args&&... args) { | 55 | Node Operation(OperationCode code, Args&&... args) { |
| 50 | if constexpr (sizeof...(args) == 0) { | 56 | if constexpr (sizeof...(args) == 0) { |
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 31eecb3f4..3a5d280a9 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp | |||
| @@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet | |||
| 27 | ConstBufferLocker& locker) | 27 | ConstBufferLocker& locker) |
| 28 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { | 28 | : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { |
| 29 | Decode(); | 29 | Decode(); |
| 30 | PostDecode(); | ||
| 30 | } | 31 | } |
| 31 | 32 | ||
| 32 | ShaderIR::~ShaderIR() = default; | 33 | ShaderIR::~ShaderIR() = default; |
| @@ -38,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) { | |||
| 38 | return MakeNode<GprNode>(reg); | 39 | return MakeNode<GprNode>(reg); |
| 39 | } | 40 | } |
| 40 | 41 | ||
| 42 | Node ShaderIR::GetCustomVariable(u32 id) { | ||
| 43 | return MakeNode<CustomVarNode>(id); | ||
| 44 | } | ||
| 45 | |||
| 41 | Node ShaderIR::GetImmediate19(Instruction instr) { | 46 | Node ShaderIR::GetImmediate19(Instruction instr) { |
| 42 | return Immediate(instr.alu.GetImm20_19()); | 47 | return Immediate(instr.alu.GetImm20_19()); |
| 43 | } | 48 | } |
| @@ -452,4 +457,8 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) { | |||
| 452 | return id; | 457 | return id; |
| 453 | } | 458 | } |
| 454 | 459 | ||
| 460 | u32 ShaderIR::NewCustomVariable() { | ||
| 461 | return num_custom_variables++; | ||
| 462 | } | ||
| 463 | |||
| 455 | } // namespace VideoCommon::Shader | 464 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index ba1db4c11..b0851c3be 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -180,6 +180,10 @@ public: | |||
| 180 | return amend_code[index]; | 180 | return amend_code[index]; |
| 181 | } | 181 | } |
| 182 | 182 | ||
| 183 | u32 GetNumCustomVariables() const { | ||
| 184 | return num_custom_variables; | ||
| 185 | } | ||
| 186 | |||
| 183 | private: | 187 | private: |
| 184 | friend class ASTDecoder; | 188 | friend class ASTDecoder; |
| 185 | 189 | ||
| @@ -191,6 +195,7 @@ private: | |||
| 191 | }; | 195 | }; |
| 192 | 196 | ||
| 193 | void Decode(); | 197 | void Decode(); |
| 198 | void PostDecode(); | ||
| 194 | 199 | ||
| 195 | NodeBlock DecodeRange(u32 begin, u32 end); | 200 | NodeBlock DecodeRange(u32 begin, u32 end); |
| 196 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); | 201 | void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end); |
| @@ -235,6 +240,8 @@ private: | |||
| 235 | 240 | ||
| 236 | /// Generates a node for a passed register. | 241 | /// Generates a node for a passed register. |
| 237 | Node GetRegister(Tegra::Shader::Register reg); | 242 | Node GetRegister(Tegra::Shader::Register reg); |
| 243 | /// Generates a node for a custom variable | ||
| 244 | Node GetCustomVariable(u32 id); | ||
| 238 | /// Generates a node representing a 19-bit immediate value | 245 | /// Generates a node representing a 19-bit immediate value |
| 239 | Node GetImmediate19(Tegra::Shader::Instruction instr); | 246 | Node GetImmediate19(Tegra::Shader::Instruction instr); |
| 240 | /// Generates a node representing a 32-bit immediate value | 247 | /// Generates a node representing a 32-bit immediate value |
| @@ -321,7 +328,7 @@ private: | |||
| 321 | std::optional<SamplerInfo> sampler_info = std::nullopt); | 328 | std::optional<SamplerInfo> sampler_info = std::nullopt); |
| 322 | 329 | ||
| 323 | /// Accesses a texture sampler for a bindless texture. | 330 | /// Accesses a texture sampler for a bindless texture. |
| 324 | const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, | 331 | const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var, |
| 325 | std::optional<SamplerInfo> sampler_info = std::nullopt); | 332 | std::optional<SamplerInfo> sampler_info = std::nullopt); |
| 326 | 333 | ||
| 327 | /// Accesses an image. | 334 | /// Accesses an image. |
| @@ -387,6 +394,9 @@ private: | |||
| 387 | 394 | ||
| 388 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | 395 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 389 | 396 | ||
| 397 | std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, | ||
| 398 | s64 cursor); | ||
| 399 | |||
| 390 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | 400 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 391 | 401 | ||
| 392 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, | 402 | std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, |
| @@ -399,6 +409,8 @@ private: | |||
| 399 | /// Register new amending code and obtain the reference id. | 409 | /// Register new amending code and obtain the reference id. |
| 400 | std::size_t DeclareAmend(Node new_amend); | 410 | std::size_t DeclareAmend(Node new_amend); |
| 401 | 411 | ||
| 412 | u32 NewCustomVariable(); | ||
| 413 | |||
| 402 | const ProgramCode& program_code; | 414 | const ProgramCode& program_code; |
| 403 | const u32 main_offset; | 415 | const u32 main_offset; |
| 404 | const CompilerSettings settings; | 416 | const CompilerSettings settings; |
| @@ -414,6 +426,7 @@ private: | |||
| 414 | NodeBlock global_code; | 426 | NodeBlock global_code; |
| 415 | ASTManager program_manager{true, true}; | 427 | ASTManager program_manager{true, true}; |
| 416 | std::vector<Node> amend_code; | 428 | std::vector<Node> amend_code; |
| 429 | u32 num_custom_variables{}; | ||
| 417 | 430 | ||
| 418 | std::set<u32> used_registers; | 431 | std::set<u32> used_registers; |
| 419 | std::set<Tegra::Shader::Pred> used_predicates; | 432 | std::set<Tegra::Shader::Pred> used_predicates; |
| @@ -431,6 +444,7 @@ private: | |||
| 431 | bool uses_instance_id{}; | 444 | bool uses_instance_id{}; |
| 432 | bool uses_vertex_id{}; | 445 | bool uses_vertex_id{}; |
| 433 | bool uses_warps{}; | 446 | bool uses_warps{}; |
| 447 | bool uses_indexed_samplers{}; | ||
| 434 | 448 | ||
| 435 | Tegra::Shader::Header header; | 449 | Tegra::Shader::Header header; |
| 436 | }; | 450 | }; |
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 165c79330..ea39bca54 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/shader/node.h" | 10 | #include "video_core/shader/node.h" |
| 11 | #include "video_core/shader/node_helper.h" | ||
| 11 | #include "video_core/shader/shader_ir.h" | 12 | #include "video_core/shader/shader_ir.h" |
| 12 | 13 | ||
| 13 | namespace VideoCommon::Shader { | 14 | namespace VideoCommon::Shader { |
| @@ -35,8 +36,113 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | |||
| 35 | } | 36 | } |
| 36 | return {}; | 37 | return {}; |
| 37 | } | 38 | } |
| 39 | |||
| 40 | std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) { | ||
| 41 | if (operation.GetCode() != OperationCode::UAdd) { | ||
| 42 | return std::nullopt; | ||
| 43 | } | ||
| 44 | Node gpr{}; | ||
| 45 | Node offset{}; | ||
| 46 | ASSERT(operation.GetOperandsCount() == 2); | ||
| 47 | for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) { | ||
| 48 | Node operand = operation[i]; | ||
| 49 | if (std::holds_alternative<ImmediateNode>(*operand)) { | ||
| 50 | offset = operation[i]; | ||
| 51 | } else if (std::holds_alternative<GprNode>(*operand)) { | ||
| 52 | gpr = operation[i]; | ||
| 53 | } | ||
| 54 | } | ||
| 55 | if (offset && gpr) { | ||
| 56 | return std::make_pair(gpr, offset); | ||
| 57 | } | ||
| 58 | return std::nullopt; | ||
| 59 | } | ||
| 60 | |||
| 61 | bool AmendNodeCv(std::size_t amend_index, Node node) { | ||
| 62 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | ||
| 63 | operation->SetAmendIndex(amend_index); | ||
| 64 | return true; | ||
| 65 | } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 66 | conditional->SetAmendIndex(amend_index); | ||
| 67 | return true; | ||
| 68 | } | ||
| 69 | return false; | ||
| 70 | } | ||
| 71 | |||
| 38 | } // Anonymous namespace | 72 | } // Anonymous namespace |
| 39 | 73 | ||
| 74 | std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, | ||
| 75 | s64 cursor) { | ||
| 76 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | ||
| 77 | // Constant buffer found, test if it's an immediate | ||
| 78 | const auto offset = cbuf->GetOffset(); | ||
| 79 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | ||
| 80 | auto track = | ||
| 81 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); | ||
| 82 | return {tracked, track}; | ||
| 83 | } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { | ||
| 84 | auto bound_buffer = locker.ObtainBoundBuffer(); | ||
| 85 | if (!bound_buffer) { | ||
| 86 | return {}; | ||
| 87 | } | ||
| 88 | if (*bound_buffer != cbuf->GetIndex()) { | ||
| 89 | return {}; | ||
| 90 | } | ||
| 91 | auto pair = DecoupleIndirectRead(*operation); | ||
| 92 | if (!pair) { | ||
| 93 | return {}; | ||
| 94 | } | ||
| 95 | auto [gpr, base_offset] = *pair; | ||
| 96 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); | ||
| 97 | auto gpu_driver = locker.AccessGuestDriverProfile(); | ||
| 98 | if (gpu_driver == nullptr) { | ||
| 99 | return {}; | ||
| 100 | } | ||
| 101 | const u32 bindless_cv = NewCustomVariable(); | ||
| 102 | const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr, | ||
| 103 | Immediate(gpu_driver->GetTextureHandlerSize())); | ||
| 104 | |||
| 105 | const Node cv_node = GetCustomVariable(bindless_cv); | ||
| 106 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); | ||
| 107 | const std::size_t amend_index = DeclareAmend(amend_op); | ||
| 108 | AmendNodeCv(amend_index, code[cursor]); | ||
| 109 | // TODO Implement Bindless Index custom variable | ||
| 110 | auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), | ||
| 111 | offset_inm->GetValue(), bindless_cv); | ||
| 112 | return {tracked, track}; | ||
| 113 | } | ||
| 114 | return {}; | ||
| 115 | } | ||
| 116 | if (const auto gpr = std::get_if<GprNode>(&*tracked)) { | ||
| 117 | if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) { | ||
| 118 | return {}; | ||
| 119 | } | ||
| 120 | // Reduce the cursor in one to avoid infinite loops when the instruction sets the same | ||
| 121 | // register that it uses as operand | ||
| 122 | const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1); | ||
| 123 | if (!source) { | ||
| 124 | return {}; | ||
| 125 | } | ||
| 126 | return TrackBindlessSampler(source, code, new_cursor); | ||
| 127 | } | ||
| 128 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | ||
| 129 | for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { | ||
| 130 | if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); | ||
| 131 | std::get<0>(found)) { | ||
| 132 | // Cbuf found in operand. | ||
| 133 | return found; | ||
| 134 | } | ||
| 135 | } | ||
| 136 | return {}; | ||
| 137 | } | ||
| 138 | if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) { | ||
| 139 | const auto& conditional_code = conditional->GetCode(); | ||
| 140 | return TrackBindlessSampler(tracked, conditional_code, | ||
| 141 | static_cast<s64>(conditional_code.size())); | ||
| 142 | } | ||
| 143 | return {}; | ||
| 144 | } | ||
| 145 | |||
| 40 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, | 146 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, |
| 41 | s64 cursor) const { | 147 | s64 cursor) const { |
| 42 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | 148 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |