summaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2020-06-04 23:03:49 -0300
committerGravatar ReinUsesLisp2020-06-05 00:24:51 -0300
commit5b2b6d594c6cfa77c3fb92faee63ad524bfe7204 (patch)
tree807efa01ce6b050f9660ab50a15d355d0be05bd5 /src/video_core/renderer_opengl
parentshader/track: Move bindless tracking to a separate function (diff)
downloadyuzu-5b2b6d594c6cfa77c3fb92faee63ad524bfe7204.tar.gz
yuzu-5b2b6d594c6cfa77c3fb92faee63ad524bfe7204.tar.xz
yuzu-5b2b6d594c6cfa77c3fb92faee63ad524bfe7204.zip
shader/texture: Join separate image and sampler pairs offline
Games using D3D idioms can join images and samplers when a shader executes, instead of baking them into a combined sampler image. This is also possible on Vulkan. One approach to this solution would be to use separate samplers on Vulkan and leave this unimplemented on OpenGL, but we can't do this because there's no consistent way of determining which constant buffer holds a sampler and which one an image. We could in theory find the first bit and if it's in the TIC area, it's an image; but this falls apart when an image or sampler handle use an index of zero. The used approach is to track for a LOP.OR operation (this is done at an IR level, not at an ISA level), track again the constant buffers used as source and store this pair. Then, outside of shader execution, join the sample and image pair with a bitwise or operation. This approach won't work on games that truly use separate samplers in a meaningful way. For example, pooling textures in a 2D array and determining at runtime what sampler to use. This invalidates OpenGL's disk shader cache :) - Used mostly by D3D ports to Switch
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h1
3 files changed, 64 insertions, 17 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 55e79aaf6..aedcdcb78 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -65,10 +65,22 @@ constexpr std::size_t NumSupportedVertexAttributes = 16;
65template <typename Engine, typename Entry> 65template <typename Engine, typename Entry>
66Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 66Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
67 ShaderType shader_type, std::size_t index = 0) { 67 ShaderType shader_type, std::size_t index = 0) {
68 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
69 if (entry.is_separated) {
70 const u32 buffer_1 = entry.buffer;
71 const u32 buffer_2 = entry.secondary_buffer;
72 const u32 offset_1 = entry.offset;
73 const u32 offset_2 = entry.secondary_offset;
74 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
75 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
76 return engine.GetTextureInfo(handle_1 | handle_2);
77 }
78 }
68 if (entry.is_bindless) { 79 if (entry.is_bindless) {
69 const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); 80 const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
70 return engine.GetTextureInfo(tex_handle); 81 return engine.GetTextureInfo(handle);
71 } 82 }
83
72 const auto& gpu_profile = engine.AccessGuestDriverProfile(); 84 const auto& gpu_profile = engine.AccessGuestDriverProfile();
73 const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); 85 const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
74 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { 86 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 9e95a122b..653c3f2f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
29 29
30namespace { 30namespace {
31 31
32using VideoCommon::Shader::SeparateSamplerKey;
33
32using ShaderCacheVersionHash = std::array<u8, 64>; 34using ShaderCacheVersionHash = std::array<u8, 64>;
33 35
34struct ConstBufferKey { 36struct ConstBufferKey {
@@ -37,18 +39,26 @@ struct ConstBufferKey {
37 u32 value = 0; 39 u32 value = 0;
38}; 40};
39 41
40struct BoundSamplerKey { 42struct BoundSamplerEntry {
41 u32 offset = 0; 43 u32 offset = 0;
42 Tegra::Engines::SamplerDescriptor sampler; 44 Tegra::Engines::SamplerDescriptor sampler;
43}; 45};
44 46
45struct BindlessSamplerKey { 47struct SeparateSamplerEntry {
48 u32 cbuf1 = 0;
49 u32 cbuf2 = 0;
50 u32 offset1 = 0;
51 u32 offset2 = 0;
52 Tegra::Engines::SamplerDescriptor sampler;
53};
54
55struct BindlessSamplerEntry {
46 u32 cbuf = 0; 56 u32 cbuf = 0;
47 u32 offset = 0; 57 u32 offset = 0;
48 Tegra::Engines::SamplerDescriptor sampler; 58 Tegra::Engines::SamplerDescriptor sampler;
49}; 59};
50 60
51constexpr u32 NativeVersion = 20; 61constexpr u32 NativeVersion = 21;
52 62
53ShaderCacheVersionHash GetShaderCacheVersionHash() { 63ShaderCacheVersionHash GetShaderCacheVersionHash() {
54 ShaderCacheVersionHash hash{}; 64 ShaderCacheVersionHash hash{};
@@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
87 u32 texture_handler_size_value; 97 u32 texture_handler_size_value;
88 u32 num_keys; 98 u32 num_keys;
89 u32 num_bound_samplers; 99 u32 num_bound_samplers;
100 u32 num_separate_samplers;
90 u32 num_bindless_samplers; 101 u32 num_bindless_samplers;
91 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || 102 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
92 file.ReadArray(&is_texture_handler_size_known, 1) != 1 || 103 file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
93 file.ReadArray(&texture_handler_size_value, 1) != 1 || 104 file.ReadArray(&texture_handler_size_value, 1) != 1 ||
94 file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || 105 file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
95 file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || 106 file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
107 file.ReadArray(&num_separate_samplers, 1) != 1 ||
96 file.ReadArray(&num_bindless_samplers, 1) != 1) { 108 file.ReadArray(&num_bindless_samplers, 1) != 1) {
97 return false; 109 return false;
98 } 110 }
@@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
101 } 113 }
102 114
103 std::vector<ConstBufferKey> flat_keys(num_keys); 115 std::vector<ConstBufferKey> flat_keys(num_keys);
104 std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); 116 std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
105 std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); 117 std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
118 std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
106 if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || 119 if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
107 file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != 120 file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
108 flat_bound_samplers.size() || 121 flat_bound_samplers.size() ||
122 file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
123 flat_separate_samplers.size() ||
109 file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != 124 file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
110 flat_bindless_samplers.size()) { 125 flat_bindless_samplers.size()) {
111 return false; 126 return false;
112 } 127 }
113 for (const auto& key : flat_keys) { 128 for (const auto& entry : flat_keys) {
114 keys.insert({{key.cbuf, key.offset}, key.value}); 129 keys.insert({{entry.cbuf, entry.offset}, entry.value});
115 } 130 }
116 for (const auto& key : flat_bound_samplers) { 131 for (const auto& entry : flat_bound_samplers) {
117 bound_samplers.emplace(key.offset, key.sampler); 132 bound_samplers.emplace(entry.offset, entry.sampler);
118 } 133 }
119 for (const auto& key : flat_bindless_samplers) { 134 for (const auto& entry : flat_separate_samplers) {
120 bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); 135 SeparateSamplerKey key;
136 key.buffers = {entry.cbuf1, entry.cbuf2};
137 key.offsets = {entry.offset1, entry.offset2};
138 separate_samplers.emplace(key, entry.sampler);
139 }
140 for (const auto& entry : flat_bindless_samplers) {
141 bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
121 } 142 }
122 143
123 return true; 144 return true;
@@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
142 file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || 163 file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
143 file.WriteObject(static_cast<u32>(keys.size())) != 1 || 164 file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
144 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || 165 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
166 file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
145 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { 167 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
146 return false; 168 return false;
147 } 169 }
@@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
152 flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); 174 flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
153 } 175 }
154 176
155 std::vector<BoundSamplerKey> flat_bound_samplers; 177 std::vector<BoundSamplerEntry> flat_bound_samplers;
156 flat_bound_samplers.reserve(bound_samplers.size()); 178 flat_bound_samplers.reserve(bound_samplers.size());
157 for (const auto& [address, sampler] : bound_samplers) { 179 for (const auto& [address, sampler] : bound_samplers) {
158 flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); 180 flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
181 }
182
183 std::vector<SeparateSamplerEntry> flat_separate_samplers;
184 flat_separate_samplers.reserve(separate_samplers.size());
185 for (const auto& [key, sampler] : separate_samplers) {
186 SeparateSamplerEntry entry;
187 std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
188 std::tie(entry.offset1, entry.offset2) = key.offsets;
189 entry.sampler = sampler;
190 flat_separate_samplers.push_back(entry);
159 } 191 }
160 192
161 std::vector<BindlessSamplerKey> flat_bindless_samplers; 193 std::vector<BindlessSamplerEntry> flat_bindless_samplers;
162 flat_bindless_samplers.reserve(bindless_samplers.size()); 194 flat_bindless_samplers.reserve(bindless_samplers.size());
163 for (const auto& [address, sampler] : bindless_samplers) { 195 for (const auto& [address, sampler] : bindless_samplers) {
164 flat_bindless_samplers.push_back( 196 flat_bindless_samplers.push_back(
165 BindlessSamplerKey{address.first, address.second, sampler}); 197 BindlessSamplerEntry{address.first, address.second, sampler});
166 } 198 }
167 199
168 return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && 200 return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
169 file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == 201 file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
170 flat_bound_samplers.size() && 202 flat_bound_samplers.size() &&
203 file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
204 flat_separate_samplers.size() &&
171 file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == 205 file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
172 flat_bindless_samplers.size(); 206 flat_bindless_samplers.size();
173} 207}
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index d5be52e40..a79cef0e9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
57 VideoCommon::Shader::ComputeInfo compute_info; 57 VideoCommon::Shader::ComputeInfo compute_info;
58 VideoCommon::Shader::KeyMap keys; 58 VideoCommon::Shader::KeyMap keys;
59 VideoCommon::Shader::BoundSamplerMap bound_samplers; 59 VideoCommon::Shader::BoundSamplerMap bound_samplers;
60 VideoCommon::Shader::SeparateSamplerMap separate_samplers;
60 VideoCommon::Shader::BindlessSamplerMap bindless_samplers; 61 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
61}; 62};
62 63