diff options
| author | 2019-09-25 09:53:18 -0400 | |
|---|---|---|
| committer | 2019-10-25 09:01:30 -0400 | |
| commit | 33fcec3502f5dd5a99b7a8337128b7c99bfba908 (patch) | |
| tree | 4f41d09678600fc3e12708f8a4f8ae2f05c37ad1 /src | |
| parent | Shader_IR: Implement Fast BRX and allow multi-branches in the CFG. (diff) | |
| download | yuzu-33fcec3502f5dd5a99b7a8337128b7c99bfba908.tar.gz yuzu-33fcec3502f5dd5a99b7a8337128b7c99bfba908.tar.xz yuzu-33fcec3502f5dd5a99b7a8337128b7c99bfba908.zip | |
Shader_IR: allow lookup of texture samplers within the shader_ir for instructions that don't provide it
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/const_buffer_engine_interface.h | 95 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/engines/kepler_compute.h | 13 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 18 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 11 | ||||
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.cpp | 110 | ||||
| -rw-r--r-- | src/video_core/shader/const_buffer_locker.h | 60 | ||||
| -rw-r--r-- | src/video_core/shader/decode/texture.cpp | 72 | ||||
| -rw-r--r-- | src/video_core/shader/shader_ir.h | 12 |
9 files changed, 363 insertions, 46 deletions
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index cc41a9cac..c0e3a3a17 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h | |||
| @@ -4,7 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include "common/bit_field.h" | ||
| 7 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "video_core/engines/shader_bytecode.h" | ||
| 10 | #include "video_core/textures/texture.h" | ||
| 8 | 11 | ||
| 9 | namespace Tegra::Engines { | 12 | namespace Tegra::Engines { |
| 10 | 13 | ||
| @@ -17,10 +20,100 @@ enum class ShaderType : u32 { | |||
| 17 | Compute = 5, | 20 | Compute = 5, |
| 18 | }; | 21 | }; |
| 19 | 22 | ||
| 23 | struct SamplerDescriptor { | ||
| 24 | union { | ||
| 25 | BitField<0, 20, Tegra::Shader::TextureType> texture_type; | ||
| 26 | BitField<20, 1, u32> is_array; | ||
| 27 | BitField<21, 1, u32> is_buffer; | ||
| 28 | BitField<22, 1, u32> is_shadow; | ||
| 29 | u32 raw{}; | ||
| 30 | }; | ||
| 31 | |||
| 32 | static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { | ||
| 33 | SamplerDescriptor result{}; | ||
| 34 | switch (tic_texture_type) { | ||
| 35 | case Tegra::Texture::TextureType::Texture1D: { | ||
| 36 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | ||
| 37 | result.is_array.Assign(0); | ||
| 38 | result.is_buffer.Assign(0); | ||
| 39 | result.is_shadow.Assign(0); | ||
| 40 | return result; | ||
| 41 | } | ||
| 42 | case Tegra::Texture::TextureType::Texture2D: { | ||
| 43 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | ||
| 44 | result.is_array.Assign(0); | ||
| 45 | result.is_buffer.Assign(0); | ||
| 46 | result.is_shadow.Assign(0); | ||
| 47 | return result; | ||
| 48 | } | ||
| 49 | case Tegra::Texture::TextureType::Texture3D: { | ||
| 50 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); | ||
| 51 | result.is_array.Assign(0); | ||
| 52 | result.is_buffer.Assign(0); | ||
| 53 | result.is_shadow.Assign(0); | ||
| 54 | return result; | ||
| 55 | } | ||
| 56 | case Tegra::Texture::TextureType::TextureCubemap: { | ||
| 57 | result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); | ||
| 58 | result.is_array.Assign(0); | ||
| 59 | result.is_buffer.Assign(0); | ||
| 60 | result.is_shadow.Assign(0); | ||
| 61 | return result; | ||
| 62 | } | ||
| 63 | case Tegra::Texture::TextureType::Texture1DArray: { | ||
| 64 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | ||
| 65 | result.is_array.Assign(1); | ||
| 66 | result.is_buffer.Assign(0); | ||
| 67 | result.is_shadow.Assign(0); | ||
| 68 | return result; | ||
| 69 | } | ||
| 70 | case Tegra::Texture::TextureType::Texture2DArray: { | ||
| 71 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | ||
| 72 | result.is_array.Assign(1); | ||
| 73 | result.is_buffer.Assign(0); | ||
| 74 | result.is_shadow.Assign(0); | ||
| 75 | return result; | ||
| 76 | } | ||
| 77 | case Tegra::Texture::TextureType::Texture1DBuffer: { | ||
| 78 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | ||
| 79 | result.is_array.Assign(0); | ||
| 80 | result.is_buffer.Assign(1); | ||
| 81 | result.is_shadow.Assign(0); | ||
| 82 | return result; | ||
| 83 | } | ||
| 84 | case Tegra::Texture::TextureType::Texture2DNoMipmap: { | ||
| 85 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | ||
| 86 | result.is_array.Assign(0); | ||
| 87 | result.is_buffer.Assign(0); | ||
| 88 | result.is_shadow.Assign(0); | ||
| 89 | return result; | ||
| 90 | } | ||
| 91 | case Tegra::Texture::TextureType::TextureCubeArray: { | ||
| 92 | result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); | ||
| 93 | result.is_array.Assign(1); | ||
| 94 | result.is_buffer.Assign(0); | ||
| 95 | result.is_shadow.Assign(0); | ||
| 96 | return result; | ||
| 97 | } | ||
| 98 | default: { | ||
| 99 | result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | ||
| 100 | result.is_array.Assign(0); | ||
| 101 | result.is_buffer.Assign(0); | ||
| 102 | result.is_shadow.Assign(0); | ||
| 103 | return result; | ||
| 104 | } | ||
| 105 | } | ||
| 106 | } | ||
| 107 | }; | ||
| 108 | |||
| 20 | class ConstBufferEngineInterface { | 109 | class ConstBufferEngineInterface { |
| 21 | public: | 110 | public: |
| 22 | virtual ~ConstBufferEngineInterface() {} | 111 | virtual ~ConstBufferEngineInterface() {} |
| 23 | virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; | 112 | virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; |
| 113 | virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; | ||
| 114 | virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 115 | u64 offset) const = 0; | ||
| 116 | virtual u32 GetBoundBuffer() const = 0; | ||
| 24 | }; | 117 | }; |
| 25 | 118 | ||
| 26 | } | 119 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index ba97c2894..6f00db1c1 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -78,6 +78,24 @@ u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 o | |||
| 78 | return result; | 78 | return result; |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { | ||
| 82 | return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); | ||
| 83 | } | ||
| 84 | |||
| 85 | SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 86 | u64 offset) const { | ||
| 87 | ASSERT(stage == ShaderType::Compute); | ||
| 88 | const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; | ||
| 89 | const GPUVAddr tex_info_address = | ||
| 90 | tex_info_buffer.Address() + offset; | ||
| 91 | |||
| 92 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | ||
| 93 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); | ||
| 94 | SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); | ||
| 95 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | ||
| 96 | return result; | ||
| 97 | } | ||
| 98 | |||
| 81 | void KeplerCompute::ProcessLaunch() { | 99 | void KeplerCompute::ProcessLaunch() { |
| 82 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | 100 | const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |
| 83 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | 101 | memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index d7e0dfcd6..8e7182727 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -10,8 +10,8 @@ | |||
| 10 | #include "common/bit_field.h" | 10 | #include "common/bit_field.h" |
| 11 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/engines/engine_upload.h" | ||
| 14 | #include "video_core/engines/const_buffer_engine_interface.h" | 13 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 14 | #include "video_core/engines/engine_upload.h" | ||
| 15 | #include "video_core/gpu.h" | 15 | #include "video_core/gpu.h" |
| 16 | #include "video_core/textures/texture.h" | 16 | #include "video_core/textures/texture.h" |
| 17 | 17 | ||
| @@ -38,7 +38,7 @@ namespace Tegra::Engines { | |||
| 38 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ | 38 | #define KEPLER_COMPUTE_REG_INDEX(field_name) \ |
| 39 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) | 39 | (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) |
| 40 | 40 | ||
| 41 | class KeplerCompute final : public ConstBufferEngineInterface { | 41 | class KeplerCompute final : public ConstBufferEngineInterface { |
| 42 | public: | 42 | public: |
| 43 | explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 43 | explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 44 | MemoryManager& memory_manager); | 44 | MemoryManager& memory_manager); |
| @@ -204,6 +204,15 @@ public: | |||
| 204 | 204 | ||
| 205 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | 205 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; |
| 206 | 206 | ||
| 207 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | ||
| 208 | |||
| 209 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 210 | u64 offset) const override; | ||
| 211 | |||
| 212 | u32 GetBoundBuffer() const override { | ||
| 213 | return regs.tex_cb_index; | ||
| 214 | } | ||
| 215 | |||
| 207 | private: | 216 | private: |
| 208 | Core::System& system; | 217 | Core::System& system; |
| 209 | VideoCore::RasterizerInterface& rasterizer; | 218 | VideoCore::RasterizerInterface& rasterizer; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 92e38b071..558955451 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -856,4 +856,22 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse | |||
| 856 | return result; | 856 | return result; |
| 857 | } | 857 | } |
| 858 | 858 | ||
| 859 | SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { | ||
| 860 | return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); | ||
| 861 | } | ||
| 862 | |||
| 863 | SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 864 | u64 offset) const { | ||
| 865 | ASSERT(stage != ShaderType::Compute); | ||
| 866 | const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | ||
| 867 | const auto& tex_info_buffer = shader.const_buffers[const_buffer]; | ||
| 868 | const GPUVAddr tex_info_address = tex_info_buffer.address + offset; | ||
| 869 | |||
| 870 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | ||
| 871 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); | ||
| 872 | SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); | ||
| 873 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | ||
| 874 | return result; | ||
| 875 | } | ||
| 876 | |||
| 859 | } // namespace Tegra::Engines | 877 | } // namespace Tegra::Engines |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 04d02d208..fa846a621 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -15,8 +15,8 @@ | |||
| 15 | #include "common/common_funcs.h" | 15 | #include "common/common_funcs.h" |
| 16 | #include "common/common_types.h" | 16 | #include "common/common_types.h" |
| 17 | #include "common/math_util.h" | 17 | #include "common/math_util.h" |
| 18 | #include "video_core/engines/const_buffer_info.h" | ||
| 19 | #include "video_core/engines/const_buffer_engine_interface.h" | 18 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 19 | #include "video_core/engines/const_buffer_info.h" | ||
| 20 | #include "video_core/engines/engine_upload.h" | 20 | #include "video_core/engines/engine_upload.h" |
| 21 | #include "video_core/gpu.h" | 21 | #include "video_core/gpu.h" |
| 22 | #include "video_core/macro_interpreter.h" | 22 | #include "video_core/macro_interpreter.h" |
| @@ -1260,6 +1260,15 @@ public: | |||
| 1260 | 1260 | ||
| 1261 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | 1261 | u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; |
| 1262 | 1262 | ||
| 1263 | SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | ||
| 1264 | |||
| 1265 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||
| 1266 | u64 offset) const override; | ||
| 1267 | |||
| 1268 | u32 GetBoundBuffer() const override { | ||
| 1269 | return regs.tex_cb_index; | ||
| 1270 | } | ||
| 1271 | |||
| 1263 | /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than | 1272 | /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than |
| 1264 | /// we've seen used. | 1273 | /// we've seen used. |
| 1265 | using MacroMemory = std::array<u32, 0x40000>; | 1274 | using MacroMemory = std::array<u32, 0x40000>; |
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp index 6a9e0ed5e..4f5de8ae9 100644 --- a/src/video_core/shader/const_buffer_locker.cpp +++ b/src/video_core/shader/const_buffer_locker.cpp | |||
| @@ -27,43 +27,121 @@ void ConstBufferLocker::SetEngine(Tegra::Engines::ConstBufferEngineInterface* en | |||
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { | 29 | std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { |
| 30 | if (!keys) { | ||
| 31 | keys = std::make_shared<KeyMap>(); | ||
| 32 | } | ||
| 33 | auto& key_map = *keys; | ||
| 30 | const std::pair<u32, u32> key = {buffer, offset}; | 34 | const std::pair<u32, u32> key = {buffer, offset}; |
| 31 | const auto iter = keys.find(key); | 35 | const auto iter = key_map.find(key); |
| 32 | if (iter != keys.end()) { | 36 | if (iter != key_map.end()) { |
| 33 | return {iter->second}; | 37 | return {iter->second}; |
| 34 | } | 38 | } |
| 35 | if (!IsEngineSet()) { | 39 | if (!IsEngineSet()) { |
| 36 | return {}; | 40 | return {}; |
| 37 | } | 41 | } |
| 38 | const u32 value = engine->AccessConstBuffer32(shader_stage, buffer, offset); | 42 | const u32 value = engine->AccessConstBuffer32(shader_stage, buffer, offset); |
| 39 | keys.emplace(key, value); | 43 | key_map.emplace(key, value); |
| 44 | return {value}; | ||
| 45 | } | ||
| 46 | |||
| 47 | std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) { | ||
| 48 | if (!bound_samplers) { | ||
| 49 | bound_samplers = std::make_shared<BoundSamplerMap>(); | ||
| 50 | } | ||
| 51 | auto& key_map = *bound_samplers; | ||
| 52 | const u32 key = offset; | ||
| 53 | const auto iter = key_map.find(key); | ||
| 54 | if (iter != key_map.end()) { | ||
| 55 | return {iter->second}; | ||
| 56 | } | ||
| 57 | if (!IsEngineSet()) { | ||
| 58 | return {}; | ||
| 59 | } | ||
| 60 | const Tegra::Engines::SamplerDescriptor value = | ||
| 61 | engine->AccessBoundSampler(shader_stage, offset); | ||
| 62 | key_map.emplace(key, value); | ||
| 63 | return {value}; | ||
| 64 | } | ||
| 65 | |||
| 66 | std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler( | ||
| 67 | u32 buffer, u32 offset) { | ||
| 68 | if (!bindless_samplers) { | ||
| 69 | bindless_samplers = std::make_shared<BindlessSamplerMap>(); | ||
| 70 | } | ||
| 71 | auto& key_map = *bindless_samplers; | ||
| 72 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 73 | const auto iter = key_map.find(key); | ||
| 74 | if (iter != key_map.end()) { | ||
| 75 | return {iter->second}; | ||
| 76 | } | ||
| 77 | if (!IsEngineSet()) { | ||
| 78 | return {}; | ||
| 79 | } | ||
| 80 | const Tegra::Engines::SamplerDescriptor value = | ||
| 81 | engine->AccessBindlessSampler(shader_stage, buffer, offset); | ||
| 82 | key_map.emplace(key, value); | ||
| 40 | return {value}; | 83 | return {value}; |
| 41 | } | 84 | } |
| 42 | 85 | ||
| 43 | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { | 86 | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { |
| 87 | if (!keys) { | ||
| 88 | keys = std::make_shared<KeyMap>(); | ||
| 89 | } | ||
| 44 | const std::pair<u32, u32> key = {buffer, offset}; | 90 | const std::pair<u32, u32> key = {buffer, offset}; |
| 45 | keys[key] = value; | 91 | (*keys)[key] = value; |
| 46 | } | 92 | } |
| 47 | 93 | ||
| 48 | u32 ConstBufferLocker::NumKeys() const { | 94 | void ConstBufferLocker::InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler) { |
| 49 | return keys.size(); | 95 | if (!bound_samplers) { |
| 96 | bound_samplers = std::make_shared<BoundSamplerMap>(); | ||
| 97 | } | ||
| 98 | (*bound_samplers)[offset] = sampler; | ||
| 50 | } | 99 | } |
| 51 | 100 | ||
| 52 | const std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>& | 101 | void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, |
| 53 | ConstBufferLocker::AccessKeys() const { | 102 | Tegra::Engines::SamplerDescriptor sampler) { |
| 54 | return keys; | 103 | if (!bindless_samplers) { |
| 104 | bindless_samplers = std::make_shared<BindlessSamplerMap>(); | ||
| 105 | } | ||
| 106 | const std::pair<u32, u32> key = {buffer, offset}; | ||
| 107 | (*bindless_samplers)[key] = sampler; | ||
| 55 | } | 108 | } |
| 56 | 109 | ||
| 57 | bool ConstBufferLocker::AreKeysConsistant() const { | 110 | bool ConstBufferLocker::IsConsistant() const { |
| 58 | if (!IsEngineSet()) { | 111 | if (!IsEngineSet()) { |
| 59 | return false; | 112 | return false; |
| 60 | } | 113 | } |
| 61 | for (const auto& key_val : keys) { | 114 | if (keys) { |
| 62 | const std::pair<u32, u32> key = key_val.first; | 115 | for (const auto& key_val : *keys) { |
| 63 | const u32 value = key_val.second; | 116 | const std::pair<u32, u32> key = key_val.first; |
| 64 | const u32 other_value = engine->AccessConstBuffer32(shader_stage, key.first, key.second); | 117 | const u32 value = key_val.second; |
| 65 | if (other_value != value) { | 118 | const u32 other_value = |
| 66 | return false; | 119 | engine->AccessConstBuffer32(shader_stage, key.first, key.second); |
| 120 | if (other_value != value) { | ||
| 121 | return false; | ||
| 122 | } | ||
| 123 | } | ||
| 124 | } | ||
| 125 | if (bound_samplers) { | ||
| 126 | for (const auto& sampler_val : *bound_samplers) { | ||
| 127 | const u32 key = sampler_val.first; | ||
| 128 | const Tegra::Engines::SamplerDescriptor value = sampler_val.second; | ||
| 129 | const Tegra::Engines::SamplerDescriptor other_value = | ||
| 130 | engine->AccessBoundSampler(shader_stage, key); | ||
| 131 | if (other_value.raw != value.raw) { | ||
| 132 | return false; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | } | ||
| 136 | if (bindless_samplers) { | ||
| 137 | for (const auto& sampler_val : *bindless_samplers) { | ||
| 138 | const std::pair<u32, u32> key = sampler_val.first; | ||
| 139 | const Tegra::Engines::SamplerDescriptor value = sampler_val.second; | ||
| 140 | const Tegra::Engines::SamplerDescriptor other_value = | ||
| 141 | engine->AccessBindlessSampler(shader_stage, key.first, key.second); | ||
| 142 | if (other_value.raw != value.raw) { | ||
| 143 | return false; | ||
| 144 | } | ||
| 67 | } | 145 | } |
| 68 | } | 146 | } |
| 69 | return true; | 147 | return true; |
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h index 39e62584d..0bc257781 100644 --- a/src/video_core/shader/const_buffer_locker.h +++ b/src/video_core/shader/const_buffer_locker.h | |||
| @@ -11,6 +11,11 @@ | |||
| 11 | 11 | ||
| 12 | namespace VideoCommon::Shader { | 12 | namespace VideoCommon::Shader { |
| 13 | 13 | ||
| 14 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||
| 15 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||
| 16 | using BindlessSamplerMap = | ||
| 17 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||
| 18 | |||
| 14 | class ConstBufferLocker { | 19 | class ConstBufferLocker { |
| 15 | public: | 20 | public: |
| 16 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); | 21 | explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); |
| @@ -29,22 +34,67 @@ public: | |||
| 29 | // registered value, if not it will obtain it from maxwell3d and register it. | 34 | // registered value, if not it will obtain it from maxwell3d and register it. |
| 30 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); | 35 | std::optional<u32> ObtainKey(u32 buffer, u32 offset); |
| 31 | 36 | ||
| 37 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||
| 38 | |||
| 39 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||
| 40 | |||
| 32 | // Manually inserts a key. | 41 | // Manually inserts a key. |
| 33 | void InsertKey(u32 buffer, u32 offset, u32 value); | 42 | void InsertKey(u32 buffer, u32 offset, u32 value); |
| 34 | 43 | ||
| 44 | void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 45 | |||
| 46 | void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||
| 47 | |||
| 35 | // Retrieves the number of keys registered. | 48 | // Retrieves the number of keys registered. |
| 36 | u32 NumKeys() const; | 49 | std::size_t NumKeys() const { |
| 50 | if (!keys) { | ||
| 51 | return 0; | ||
| 52 | } | ||
| 53 | return keys->size(); | ||
| 54 | } | ||
| 55 | |||
| 56 | std::size_t NumBoundSamplers() const { | ||
| 57 | if (!bound_samplers) { | ||
| 58 | return 0; | ||
| 59 | } | ||
| 60 | return bound_samplers->size(); | ||
| 61 | } | ||
| 62 | |||
| 63 | std::size_t NumBindlessSamplers() const { | ||
| 64 | if (!bindless_samplers) { | ||
| 65 | return 0; | ||
| 66 | } | ||
| 67 | return bindless_samplers->size(); | ||
| 68 | } | ||
| 37 | 69 | ||
| 38 | // Gives an accessor to the key's database. | 70 | // Gives an accessor to the key's database. |
| 39 | const std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>& AccessKeys() const; | 71 | // Pre: NumKeys > 0 |
| 72 | const KeyMap& AccessKeys() const { | ||
| 73 | return *keys; | ||
| 74 | } | ||
| 75 | |||
| 76 | // Gives an accessor to the sampler's database. | ||
| 77 | // Pre: NumBindlessSamplers > 0 | ||
| 78 | const BoundSamplerMap& AccessBoundSamplers() const { | ||
| 79 | return *bound_samplers; | ||
| 80 | } | ||
| 81 | |||
| 82 | // Gives an accessor to the sampler's database. | ||
| 83 | // Pre: NumBindlessSamplers > 0 | ||
| 84 | const BindlessSamplerMap& AccessBindlessSamplers() const { | ||
| 85 | return *bindless_samplers; | ||
| 86 | } | ||
| 40 | 87 | ||
| 41 | // Checks keys against maxwell3d's current const buffers. Returns true if they | 88 | // Checks keys & samplers against engine's current const buffers. Returns true if they |
| 42 | // are the same value, false otherwise; | 89 | // are the same value, false otherwise; |
| 43 | bool AreKeysConsistant() const; | 90 | bool IsConsistant() const; |
| 44 | 91 | ||
| 45 | private: | 92 | private: |
| 46 | Tegra::Engines::ConstBufferEngineInterface* engine; | 93 | Tegra::Engines::ConstBufferEngineInterface* engine; |
| 47 | Tegra::Engines::ShaderType shader_stage; | 94 | Tegra::Engines::ShaderType shader_stage; |
| 48 | std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash> keys{}; | 95 | // All containers are lazy initialized as most shaders don't use them. |
| 96 | std::shared_ptr<KeyMap> keys{}; | ||
| 97 | std::shared_ptr<BoundSamplerMap> bound_samplers{}; | ||
| 98 | std::shared_ptr<BindlessSamplerMap> bindless_samplers{}; | ||
| 49 | }; | 99 | }; |
| 50 | } // namespace VideoCommon::Shader | 100 | } // namespace VideoCommon::Shader |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 0b934a069..c369e23ad 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 141 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); | 141 | const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); |
| 142 | 142 | ||
| 143 | const auto& sampler = | 143 | const auto& sampler = |
| 144 | GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); | 144 | GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); |
| 145 | 145 | ||
| 146 | Node4 values; | 146 | Node4 values; |
| 147 | for (u32 element = 0; element < values.size(); ++element) { | 147 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -165,10 +165,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 165 | // Sadly, not all texture instructions specify the type of texture their sampler | 165 | // Sadly, not all texture instructions specify the type of texture their sampler |
| 166 | // uses. This must be fixed at a later instance. | 166 | // uses. This must be fixed at a later instance. |
| 167 | const auto& sampler = | 167 | const auto& sampler = |
| 168 | is_bindless | 168 | is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); |
| 169 | ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, | ||
| 170 | false) | ||
| 171 | : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); | ||
| 172 | 169 | ||
| 173 | u32 indexer = 0; | 170 | u32 indexer = 0; |
| 174 | switch (instr.txq.query_type) { | 171 | switch (instr.txq.query_type) { |
| @@ -207,9 +204,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 207 | 204 | ||
| 208 | auto texture_type = instr.tmml.texture_type.Value(); | 205 | auto texture_type = instr.tmml.texture_type.Value(); |
| 209 | const bool is_array = instr.tmml.array != 0; | 206 | const bool is_array = instr.tmml.array != 0; |
| 210 | const auto& sampler = is_bindless | 207 | const auto& sampler = |
| 211 | ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) | 208 | is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) |
| 212 | : GetSampler(instr.sampler, texture_type, is_array, false); | 209 | : GetSampler(instr.sampler, {{texture_type, is_array, false}}); |
| 213 | 210 | ||
| 214 | std::vector<Node> coords; | 211 | std::vector<Node> coords; |
| 215 | 212 | ||
| @@ -285,10 +282,30 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 285 | return pc; | 282 | return pc; |
| 286 | } | 283 | } |
| 287 | 284 | ||
| 288 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, | 285 | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, |
| 289 | bool is_array, bool is_shadow) { | 286 | std::optional<SamplerInfo> sampler_info) { |
| 290 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); | 287 | const auto offset = static_cast<std::size_t>(sampler.index.Value()); |
| 291 | 288 | ||
| 289 | Tegra::Shader::TextureType type; | ||
| 290 | bool is_array; | ||
| 291 | bool is_shadow; | ||
| 292 | if (sampler_info) { | ||
| 293 | type = sampler_info->type; | ||
| 294 | is_array = sampler_info->is_array; | ||
| 295 | is_shadow = sampler_info->is_shadow; | ||
| 296 | } else { | ||
| 297 | auto sampler = locker.ObtainBoundSampler(offset); | ||
| 298 | if (sampler) { | ||
| 299 | type = sampler->texture_type.Value(); | ||
| 300 | is_array = sampler->is_array.Value() != 0; | ||
| 301 | is_shadow = sampler->is_shadow.Value() != 0; | ||
| 302 | } else { | ||
| 303 | type = Tegra::Shader::TextureType::Texture2D; | ||
| 304 | is_array = false; | ||
| 305 | is_shadow = false; | ||
| 306 | } | ||
| 307 | } | ||
| 308 | |||
| 292 | // If this sampler has already been used, return the existing mapping. | 309 | // If this sampler has already been used, return the existing mapping. |
| 293 | const auto itr = | 310 | const auto itr = |
| 294 | std::find_if(used_samplers.begin(), used_samplers.end(), | 311 | std::find_if(used_samplers.begin(), used_samplers.end(), |
| @@ -305,13 +322,32 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu | |||
| 305 | return *used_samplers.emplace(entry).first; | 322 | return *used_samplers.emplace(entry).first; |
| 306 | } | 323 | } |
| 307 | 324 | ||
| 308 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, | 325 | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, |
| 309 | bool is_array, bool is_shadow) { | 326 | std::optional<SamplerInfo> sampler_info) { |
| 310 | const Node sampler_register = GetRegister(reg); | 327 | const Node sampler_register = GetRegister(reg); |
| 311 | const auto [base_sampler, cbuf_index, cbuf_offset] = | 328 | const auto [base_sampler, cbuf_index, cbuf_offset] = |
| 312 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); | 329 | TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| 313 | ASSERT(base_sampler != nullptr); | 330 | ASSERT(base_sampler != nullptr); |
| 314 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); | 331 | const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); |
| 332 | Tegra::Shader::TextureType type; | ||
| 333 | bool is_array; | ||
| 334 | bool is_shadow; | ||
| 335 | if (sampler_info) { | ||
| 336 | type = sampler_info->type; | ||
| 337 | is_array = sampler_info->is_array; | ||
| 338 | is_shadow = sampler_info->is_shadow; | ||
| 339 | } else { | ||
| 340 | auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); | ||
| 341 | if (sampler) { | ||
| 342 | type = sampler->texture_type.Value(); | ||
| 343 | is_array = sampler->is_array.Value() != 0; | ||
| 344 | is_shadow = sampler->is_shadow.Value() != 0; | ||
| 345 | } else { | ||
| 346 | type = Tegra::Shader::TextureType::Texture2D; | ||
| 347 | is_array = false; | ||
| 348 | is_shadow = false; | ||
| 349 | } | ||
| 350 | } | ||
| 315 | 351 | ||
| 316 | // If this sampler has already been used, return the existing mapping. | 352 | // If this sampler has already been used, return the existing mapping. |
| 317 | const auto itr = | 353 | const auto itr = |
| @@ -411,9 +447,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||
| 411 | (texture_type == TextureType::TextureCube && is_array && is_shadow), | 447 | (texture_type == TextureType::TextureCube && is_array && is_shadow), |
| 412 | "This method is not supported."); | 448 | "This method is not supported."); |
| 413 | 449 | ||
| 414 | const auto& sampler = is_bindless | 450 | const auto& sampler = |
| 415 | ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) | 451 | is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) |
| 416 | : GetSampler(instr.sampler, texture_type, is_array, is_shadow); | 452 | : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}}); |
| 417 | 453 | ||
| 418 | const bool lod_needed = process_mode == TextureProcessMode::LZ || | 454 | const bool lod_needed = process_mode == TextureProcessMode::LZ || |
| 419 | process_mode == TextureProcessMode::LL || | 455 | process_mode == TextureProcessMode::LL || |
| @@ -577,7 +613,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||
| 577 | dc = GetRegister(parameter_register++); | 613 | dc = GetRegister(parameter_register++); |
| 578 | } | 614 | } |
| 579 | 615 | ||
| 580 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | 616 | const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); |
| 581 | 617 | ||
| 582 | Node4 values; | 618 | Node4 values; |
| 583 | for (u32 element = 0; element < values.size(); ++element) { | 619 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -610,7 +646,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | |||
| 610 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; | 646 | // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; |
| 611 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; | 647 | // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; |
| 612 | 648 | ||
| 613 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | 649 | const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); |
| 614 | 650 | ||
| 615 | Node4 values; | 651 | Node4 values; |
| 616 | for (u32 element = 0; element < values.size(); ++element) { | 652 | for (u32 element = 0; element < values.size(); ++element) { |
| @@ -646,7 +682,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||
| 646 | // When lod is used always is in gpr20 | 682 | // When lod is used always is in gpr20 |
| 647 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | 683 | const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); |
| 648 | 684 | ||
| 649 | const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); | 685 | const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); |
| 650 | 686 | ||
| 651 | Node4 values; | 687 | Node4 values; |
| 652 | for (u32 element = 0; element < values.size(); ++element) { | 688 | for (u32 element = 0; element < values.size(); ++element) { |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index e3b568d3e..3a3e381d2 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -173,6 +173,13 @@ public: | |||
| 173 | 173 | ||
| 174 | private: | 174 | private: |
| 175 | friend class ASTDecoder; | 175 | friend class ASTDecoder; |
| 176 | |||
| 177 | struct SamplerInfo { | ||
| 178 | Tegra::Shader::TextureType type; | ||
| 179 | bool is_array; | ||
| 180 | bool is_shadow; | ||
| 181 | }; | ||
| 182 | |||
| 176 | void Decode(); | 183 | void Decode(); |
| 177 | 184 | ||
| 178 | NodeBlock DecodeRange(u32 begin, u32 end); | 185 | NodeBlock DecodeRange(u32 begin, u32 end); |
| @@ -297,12 +304,11 @@ private: | |||
| 297 | 304 | ||
| 298 | /// Accesses a texture sampler | 305 | /// Accesses a texture sampler |
| 299 | const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, | 306 | const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, |
| 300 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow); | 307 | std::optional<SamplerInfo> sampler_info); |
| 301 | 308 | ||
| 302 | // Accesses a texture sampler for a bindless texture. | 309 | // Accesses a texture sampler for a bindless texture. |
| 303 | const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, | 310 | const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, |
| 304 | Tegra::Shader::TextureType type, bool is_array, | 311 | std::optional<SamplerInfo> sampler_info); |
| 305 | bool is_shadow); | ||
| 306 | 312 | ||
| 307 | /// Accesses an image. | 313 | /// Accesses an image. |
| 308 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); | 314 | Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); |