diff options
| author | 2020-06-04 23:03:49 -0300 | |
|---|---|---|
| committer | 2020-06-05 00:24:51 -0300 | |
| commit | 5b2b6d594c6cfa77c3fb92faee63ad524bfe7204 (patch) | |
| tree | 807efa01ce6b050f9660ab50a15d355d0be05bd5 /src/video_core/renderer_opengl | |
| parent | shader/track: Move bindless tracking to a separate function (diff) | |
| download | yuzu-5b2b6d594c6cfa77c3fb92faee63ad524bfe7204.tar.gz yuzu-5b2b6d594c6cfa77c3fb92faee63ad524bfe7204.tar.xz yuzu-5b2b6d594c6cfa77c3fb92faee63ad524bfe7204.zip | |
shader/texture: Join separate image and sampler pairs offline
Games using D3D idioms can join images and samplers when a shader
executes, instead of baking them into a combined sampler image. This is
also possible on Vulkan.
One approach to this solution would be to use separate samplers on
Vulkan and leave this unimplemented on OpenGL, but we can't do this
because there's no consistent way of determining which constant buffer
holds a sampler and which one an image. We could in theory find the
first bit and if it's in the TIC area, it's an image; but this falls
apart when an image or sampler handle use an index of zero.
The used approach is to track for a LOP.OR operation (this is done at an
IR level, not at an ISA level), track again the constant buffers used as
source and store this pair. Then, outside of shader execution, join
the sample and image pair with a bitwise or operation.
This approach won't work on games that truly use separate samplers in a
meaningful way. For example, pooling textures in a 2D array and
determining at runtime what sampler to use.
This invalidates OpenGL's disk shader cache :)
- Used mostly by D3D ports to Switch
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | 64 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_disk_cache.h | 1 |
3 files changed, 64 insertions, 17 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 55e79aaf6..aedcdcb78 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -65,10 +65,22 @@ constexpr std::size_t NumSupportedVertexAttributes = 16; | |||
| 65 | template <typename Engine, typename Entry> | 65 | template <typename Engine, typename Entry> |
| 66 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 66 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, |
| 67 | ShaderType shader_type, std::size_t index = 0) { | 67 | ShaderType shader_type, std::size_t index = 0) { |
| 68 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | ||
| 69 | if (entry.is_separated) { | ||
| 70 | const u32 buffer_1 = entry.buffer; | ||
| 71 | const u32 buffer_2 = entry.secondary_buffer; | ||
| 72 | const u32 offset_1 = entry.offset; | ||
| 73 | const u32 offset_2 = entry.secondary_offset; | ||
| 74 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); | ||
| 75 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); | ||
| 76 | return engine.GetTextureInfo(handle_1 | handle_2); | ||
| 77 | } | ||
| 78 | } | ||
| 68 | if (entry.is_bindless) { | 79 | if (entry.is_bindless) { |
| 69 | const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); | 80 | const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); |
| 70 | return engine.GetTextureInfo(tex_handle); | 81 | return engine.GetTextureInfo(handle); |
| 71 | } | 82 | } |
| 83 | |||
| 72 | const auto& gpu_profile = engine.AccessGuestDriverProfile(); | 84 | const auto& gpu_profile = engine.AccessGuestDriverProfile(); |
| 73 | const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); | 85 | const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); |
| 74 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | 86 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 9e95a122b..653c3f2f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap; | |||
| 29 | 29 | ||
| 30 | namespace { | 30 | namespace { |
| 31 | 31 | ||
| 32 | using VideoCommon::Shader::SeparateSamplerKey; | ||
| 33 | |||
| 32 | using ShaderCacheVersionHash = std::array<u8, 64>; | 34 | using ShaderCacheVersionHash = std::array<u8, 64>; |
| 33 | 35 | ||
| 34 | struct ConstBufferKey { | 36 | struct ConstBufferKey { |
| @@ -37,18 +39,26 @@ struct ConstBufferKey { | |||
| 37 | u32 value = 0; | 39 | u32 value = 0; |
| 38 | }; | 40 | }; |
| 39 | 41 | ||
| 40 | struct BoundSamplerKey { | 42 | struct BoundSamplerEntry { |
| 41 | u32 offset = 0; | 43 | u32 offset = 0; |
| 42 | Tegra::Engines::SamplerDescriptor sampler; | 44 | Tegra::Engines::SamplerDescriptor sampler; |
| 43 | }; | 45 | }; |
| 44 | 46 | ||
| 45 | struct BindlessSamplerKey { | 47 | struct SeparateSamplerEntry { |
| 48 | u32 cbuf1 = 0; | ||
| 49 | u32 cbuf2 = 0; | ||
| 50 | u32 offset1 = 0; | ||
| 51 | u32 offset2 = 0; | ||
| 52 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 53 | }; | ||
| 54 | |||
| 55 | struct BindlessSamplerEntry { | ||
| 46 | u32 cbuf = 0; | 56 | u32 cbuf = 0; |
| 47 | u32 offset = 0; | 57 | u32 offset = 0; |
| 48 | Tegra::Engines::SamplerDescriptor sampler; | 58 | Tegra::Engines::SamplerDescriptor sampler; |
| 49 | }; | 59 | }; |
| 50 | 60 | ||
| 51 | constexpr u32 NativeVersion = 20; | 61 | constexpr u32 NativeVersion = 21; |
| 52 | 62 | ||
| 53 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | 63 | ShaderCacheVersionHash GetShaderCacheVersionHash() { |
| 54 | ShaderCacheVersionHash hash{}; | 64 | ShaderCacheVersionHash hash{}; |
| @@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { | |||
| 87 | u32 texture_handler_size_value; | 97 | u32 texture_handler_size_value; |
| 88 | u32 num_keys; | 98 | u32 num_keys; |
| 89 | u32 num_bound_samplers; | 99 | u32 num_bound_samplers; |
| 100 | u32 num_separate_samplers; | ||
| 90 | u32 num_bindless_samplers; | 101 | u32 num_bindless_samplers; |
| 91 | if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || | 102 | if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || |
| 92 | file.ReadArray(&is_texture_handler_size_known, 1) != 1 || | 103 | file.ReadArray(&is_texture_handler_size_known, 1) != 1 || |
| 93 | file.ReadArray(&texture_handler_size_value, 1) != 1 || | 104 | file.ReadArray(&texture_handler_size_value, 1) != 1 || |
| 94 | file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || | 105 | file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || |
| 95 | file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || | 106 | file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || |
| 107 | file.ReadArray(&num_separate_samplers, 1) != 1 || | ||
| 96 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | 108 | file.ReadArray(&num_bindless_samplers, 1) != 1) { |
| 97 | return false; | 109 | return false; |
| 98 | } | 110 | } |
| @@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { | |||
| 101 | } | 113 | } |
| 102 | 114 | ||
| 103 | std::vector<ConstBufferKey> flat_keys(num_keys); | 115 | std::vector<ConstBufferKey> flat_keys(num_keys); |
| 104 | std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); | 116 | std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers); |
| 105 | std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); | 117 | std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers); |
| 118 | std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers); | ||
| 106 | if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || | 119 | if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || |
| 107 | file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != | 120 | file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != |
| 108 | flat_bound_samplers.size() || | 121 | flat_bound_samplers.size() || |
| 122 | file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) != | ||
| 123 | flat_separate_samplers.size() || | ||
| 109 | file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != | 124 | file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != |
| 110 | flat_bindless_samplers.size()) { | 125 | flat_bindless_samplers.size()) { |
| 111 | return false; | 126 | return false; |
| 112 | } | 127 | } |
| 113 | for (const auto& key : flat_keys) { | 128 | for (const auto& entry : flat_keys) { |
| 114 | keys.insert({{key.cbuf, key.offset}, key.value}); | 129 | keys.insert({{entry.cbuf, entry.offset}, entry.value}); |
| 115 | } | 130 | } |
| 116 | for (const auto& key : flat_bound_samplers) { | 131 | for (const auto& entry : flat_bound_samplers) { |
| 117 | bound_samplers.emplace(key.offset, key.sampler); | 132 | bound_samplers.emplace(entry.offset, entry.sampler); |
| 118 | } | 133 | } |
| 119 | for (const auto& key : flat_bindless_samplers) { | 134 | for (const auto& entry : flat_separate_samplers) { |
| 120 | bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | 135 | SeparateSamplerKey key; |
| 136 | key.buffers = {entry.cbuf1, entry.cbuf2}; | ||
| 137 | key.offsets = {entry.offset1, entry.offset2}; | ||
| 138 | separate_samplers.emplace(key, entry.sampler); | ||
| 139 | } | ||
| 140 | for (const auto& entry : flat_bindless_samplers) { | ||
| 141 | bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); | ||
| 121 | } | 142 | } |
| 122 | 143 | ||
| 123 | return true; | 144 | return true; |
| @@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { | |||
| 142 | file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || | 163 | file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || |
| 143 | file.WriteObject(static_cast<u32>(keys.size())) != 1 || | 164 | file.WriteObject(static_cast<u32>(keys.size())) != 1 || |
| 144 | file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || | 165 | file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || |
| 166 | file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 || | ||
| 145 | file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { | 167 | file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { |
| 146 | return false; | 168 | return false; |
| 147 | } | 169 | } |
| @@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { | |||
| 152 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); | 174 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); |
| 153 | } | 175 | } |
| 154 | 176 | ||
| 155 | std::vector<BoundSamplerKey> flat_bound_samplers; | 177 | std::vector<BoundSamplerEntry> flat_bound_samplers; |
| 156 | flat_bound_samplers.reserve(bound_samplers.size()); | 178 | flat_bound_samplers.reserve(bound_samplers.size()); |
| 157 | for (const auto& [address, sampler] : bound_samplers) { | 179 | for (const auto& [address, sampler] : bound_samplers) { |
| 158 | flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); | 180 | flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); |
| 181 | } | ||
| 182 | |||
| 183 | std::vector<SeparateSamplerEntry> flat_separate_samplers; | ||
| 184 | flat_separate_samplers.reserve(separate_samplers.size()); | ||
| 185 | for (const auto& [key, sampler] : separate_samplers) { | ||
| 186 | SeparateSamplerEntry entry; | ||
| 187 | std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; | ||
| 188 | std::tie(entry.offset1, entry.offset2) = key.offsets; | ||
| 189 | entry.sampler = sampler; | ||
| 190 | flat_separate_samplers.push_back(entry); | ||
| 159 | } | 191 | } |
| 160 | 192 | ||
| 161 | std::vector<BindlessSamplerKey> flat_bindless_samplers; | 193 | std::vector<BindlessSamplerEntry> flat_bindless_samplers; |
| 162 | flat_bindless_samplers.reserve(bindless_samplers.size()); | 194 | flat_bindless_samplers.reserve(bindless_samplers.size()); |
| 163 | for (const auto& [address, sampler] : bindless_samplers) { | 195 | for (const auto& [address, sampler] : bindless_samplers) { |
| 164 | flat_bindless_samplers.push_back( | 196 | flat_bindless_samplers.push_back( |
| 165 | BindlessSamplerKey{address.first, address.second, sampler}); | 197 | BindlessSamplerEntry{address.first, address.second, sampler}); |
| 166 | } | 198 | } |
| 167 | 199 | ||
| 168 | return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && | 200 | return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && |
| 169 | file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == | 201 | file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == |
| 170 | flat_bound_samplers.size() && | 202 | flat_bound_samplers.size() && |
| 203 | file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) == | ||
| 204 | flat_separate_samplers.size() && | ||
| 171 | file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == | 205 | file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == |
| 172 | flat_bindless_samplers.size(); | 206 | flat_bindless_samplers.size(); |
| 173 | } | 207 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index d5be52e40..a79cef0e9 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry { | |||
| 57 | VideoCommon::Shader::ComputeInfo compute_info; | 57 | VideoCommon::Shader::ComputeInfo compute_info; |
| 58 | VideoCommon::Shader::KeyMap keys; | 58 | VideoCommon::Shader::KeyMap keys; |
| 59 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | 59 | VideoCommon::Shader::BoundSamplerMap bound_samplers; |
| 60 | VideoCommon::Shader::SeparateSamplerMap separate_samplers; | ||
| 60 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | 61 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; |
| 61 | }; | 62 | }; |
| 62 | 63 | ||