summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/core/hle/kernel/process.cpp6
-rw-r--r--src/core/hle/kernel/resource_limit.cpp6
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h16
-rw-r--r--src/video_core/engines/const_buffer_engine_interface.h1
-rw-r--r--src/video_core/engines/kepler_compute.cpp5
-rw-r--r--src/video_core/engines/kepler_compute.h2
-rw-r--r--src/video_core/engines/maxwell_3d.cpp5
-rw-r--r--src/video_core/engines/maxwell_3d.h2
-rw-r--r--src/video_core/rasterizer_cache.cpp7
-rw-r--r--src/video_core/rasterizer_cache.h253
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp87
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h51
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp72
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h33
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp18
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h2
-rw-r--r--src/video_core/shader/decode/texture.cpp55
-rw-r--r--src/video_core/shader/node.h75
-rw-r--r--src/video_core/shader/node_helper.h2
-rw-r--r--src/video_core/shader/registry.cpp20
-rw-r--r--src/video_core/shader/registry.h35
-rw-r--r--src/video_core/shader/shader_ir.h14
-rw-r--r--src/video_core/shader/track.cpp78
-rw-r--r--src/video_core/shader_cache.h228
31 files changed, 661 insertions, 545 deletions
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 36724569f..c4c5199b1 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -132,7 +132,8 @@ std::shared_ptr<ResourceLimit> Process::GetResourceLimit() const {
132 132
133u64 Process::GetTotalPhysicalMemoryAvailable() const { 133u64 Process::GetTotalPhysicalMemoryAvailable() const {
134 const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) + 134 const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) +
135 page_table->GetTotalHeapSize() + image_size + main_thread_stack_size}; 135 page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size +
136 main_thread_stack_size};
136 137
137 if (capacity < memory_usage_capacity) { 138 if (capacity < memory_usage_capacity) {
138 return capacity; 139 return capacity;
@@ -146,7 +147,8 @@ u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const {
146} 147}
147 148
148u64 Process::GetTotalPhysicalMemoryUsed() const { 149u64 Process::GetTotalPhysicalMemoryUsed() const {
149 return image_size + main_thread_stack_size + page_table->GetTotalHeapSize(); 150 return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() +
151 GetSystemResourceSize();
150} 152}
151 153
152u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { 154u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const {
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp
index d9beaa3a4..212e442f4 100644
--- a/src/core/hle/kernel/resource_limit.cpp
+++ b/src/core/hle/kernel/resource_limit.cpp
@@ -24,13 +24,9 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) {
24 const std::size_t index{ResourceTypeToIndex(resource)}; 24 const std::size_t index{ResourceTypeToIndex(resource)};
25 25
26 s64 new_value = current[index] + amount; 26 s64 new_value = current[index] + amount;
27 while (new_value > limit[index] && available[index] + amount <= limit[index]) { 27 if (new_value > limit[index] && available[index] + amount <= limit[index]) {
28 // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout 28 // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout
29 new_value = current[index] + amount; 29 new_value = current[index] + amount;
30
31 if (timeout >= 0) {
32 break;
33 }
34 } 30 }
35 31
36 if (new_value <= limit[index]) { 32 if (new_value <= limit[index]) {
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 2bf8d68ce..39d5d8401 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -49,8 +49,6 @@ add_library(video_core STATIC
49 query_cache.h 49 query_cache.h
50 rasterizer_accelerated.cpp 50 rasterizer_accelerated.cpp
51 rasterizer_accelerated.h 51 rasterizer_accelerated.h
52 rasterizer_cache.cpp
53 rasterizer_cache.h
54 rasterizer_interface.h 52 rasterizer_interface.h
55 renderer_base.cpp 53 renderer_base.cpp
56 renderer_base.h 54 renderer_base.h
@@ -93,6 +91,7 @@ add_library(video_core STATIC
93 renderer_opengl/utils.h 91 renderer_opengl/utils.h
94 sampler_cache.cpp 92 sampler_cache.cpp
95 sampler_cache.h 93 sampler_cache.h
94 shader_cache.h
96 shader/decode/arithmetic.cpp 95 shader/decode/arithmetic.cpp
97 shader/decode/arithmetic_immediate.cpp 96 shader/decode/arithmetic_immediate.cpp
98 shader/decode/bfe.cpp 97 shader/decode/bfe.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index b88fce2cd..77ae34339 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -110,19 +110,23 @@ public:
110 }); 110 });
111 } 111 }
112 112
113 void Map(std::size_t max_size) { 113 /// Prepares the buffer cache for data uploading
114 /// @param max_size Maximum number of bytes that will be uploaded
115 /// @return True when a stream buffer invalidation was required, false otherwise
116 bool Map(std::size_t max_size) {
114 std::lock_guard lock{mutex}; 117 std::lock_guard lock{mutex};
115 118
119 bool invalidated;
116 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); 120 std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
117 buffer_offset = buffer_offset_base; 121 buffer_offset = buffer_offset_base;
122
123 return invalidated;
118 } 124 }
119 125
120 /// Finishes the upload stream, returns true on bindings invalidation. 126 /// Finishes the upload stream
121 bool Unmap() { 127 void Unmap() {
122 std::lock_guard lock{mutex}; 128 std::lock_guard lock{mutex};
123
124 stream_buffer->Unmap(buffer_offset - buffer_offset_base); 129 stream_buffer->Unmap(buffer_offset - buffer_offset_base);
125 return std::exchange(invalidated, false);
126 } 130 }
127 131
128 void TickFrame() { 132 void TickFrame() {
@@ -576,8 +580,6 @@ private:
576 std::unique_ptr<StreamBuffer> stream_buffer; 580 std::unique_ptr<StreamBuffer> stream_buffer;
577 BufferType stream_buffer_handle{}; 581 BufferType stream_buffer_handle{};
578 582
579 bool invalidated = false;
580
581 u8* buffer_ptr = nullptr; 583 u8* buffer_ptr = nullptr;
582 u64 buffer_offset = 0; 584 u64 buffer_offset = 0;
583 u64 buffer_offset_base = 0; 585 u64 buffer_offset_base = 0;
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index ebe139504..f46e81bb7 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -93,6 +93,7 @@ public:
93 virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; 93 virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
94 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, 94 virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
95 u64 offset) const = 0; 95 u64 offset) const = 0;
96 virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
96 virtual u32 GetBoundBuffer() const = 0; 97 virtual u32 GetBoundBuffer() const = 0;
97 98
98 virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; 99 virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index f6237fc6a..a82b06a38 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
92 ASSERT(stage == ShaderType::Compute); 92 ASSERT(stage == ShaderType::Compute);
93 const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; 93 const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
94 const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; 94 const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
95 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
96}
95 97
96 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; 98SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
99 const Texture::TextureHandle tex_handle{handle};
97 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 100 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
98 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); 101 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
99 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 102 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 18ceedfaf..b7f668d88 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -219,6 +219,8 @@ public:
219 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, 219 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
220 u64 offset) const override; 220 u64 offset) const override;
221 221
222 SamplerDescriptor AccessSampler(u32 handle) const override;
223
222 u32 GetBoundBuffer() const override { 224 u32 GetBoundBuffer() const override {
223 return regs.tex_cb_index; 225 return regs.tex_cb_index;
224 } 226 }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index e46b153f9..ea3c8a963 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -740,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
740 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; 740 const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
741 const auto& tex_info_buffer = shader.const_buffers[const_buffer]; 741 const auto& tex_info_buffer = shader.const_buffers[const_buffer];
742 const GPUVAddr tex_info_address = tex_info_buffer.address + offset; 742 const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
743 return AccessSampler(memory_manager.Read<u32>(tex_info_address));
744}
743 745
744 const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; 746SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
747 const Texture::TextureHandle tex_handle{handle};
745 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); 748 const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
746 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); 749 SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
747 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); 750 result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 79fc9bbea..d5fe25065 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1404,6 +1404,8 @@ public:
1404 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, 1404 SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
1405 u64 offset) const override; 1405 u64 offset) const override;
1406 1406
1407 SamplerDescriptor AccessSampler(u32 handle) const override;
1408
1407 u32 GetBoundBuffer() const override { 1409 u32 GetBoundBuffer() const override {
1408 return regs.tex_cb_index; 1410 return regs.tex_cb_index;
1409 } 1411 }
diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp
deleted file mode 100644
index 093b2cdf4..000000000
--- a/src/video_core/rasterizer_cache.cpp
+++ /dev/null
@@ -1,7 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/rasterizer_cache.h"
6
7RasterizerCacheObject::~RasterizerCacheObject() = default;
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h
deleted file mode 100644
index 096ee337c..000000000
--- a/src/video_core/rasterizer_cache.h
+++ /dev/null
@@ -1,253 +0,0 @@
1// Copyright 2018 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <mutex>
8#include <set>
9#include <unordered_map>
10
11#include <boost/icl/interval_map.hpp>
12#include <boost/range/iterator_range_core.hpp>
13
14#include "common/common_types.h"
15#include "core/settings.h"
16#include "video_core/gpu.h"
17#include "video_core/rasterizer_interface.h"
18
19class RasterizerCacheObject {
20public:
21 explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
22
23 virtual ~RasterizerCacheObject();
24
25 VAddr GetCpuAddr() const {
26 return cpu_addr;
27 }
28
29 /// Gets the size of the shader in guest memory, required for cache management
30 virtual std::size_t GetSizeInBytes() const = 0;
31
32 /// Sets whether the cached object should be considered registered
33 void SetIsRegistered(bool registered) {
34 is_registered = registered;
35 }
36
37 /// Returns true if the cached object is registered
38 bool IsRegistered() const {
39 return is_registered;
40 }
41
42 /// Returns true if the cached object is dirty
43 bool IsDirty() const {
44 return is_dirty;
45 }
46
47 /// Returns ticks from when this cached object was last modified
48 u64 GetLastModifiedTicks() const {
49 return last_modified_ticks;
50 }
51
52 /// Marks an object as recently modified, used to specify whether it is clean or dirty
53 template <class T>
54 void MarkAsModified(bool dirty, T& cache) {
55 is_dirty = dirty;
56 last_modified_ticks = cache.GetModifiedTicks();
57 }
58
59 void SetMemoryMarked(bool is_memory_marked_) {
60 is_memory_marked = is_memory_marked_;
61 }
62
63 bool IsMemoryMarked() const {
64 return is_memory_marked;
65 }
66
67 void SetSyncPending(bool is_sync_pending_) {
68 is_sync_pending = is_sync_pending_;
69 }
70
71 bool IsSyncPending() const {
72 return is_sync_pending;
73 }
74
75private:
76 bool is_registered{}; ///< Whether the object is currently registered with the cache
77 bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
78 bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory.
79 bool is_sync_pending{}; ///< Whether the object is pending deletion.
80 u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
81 VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
82};
83
84template <class T>
85class RasterizerCache : NonCopyable {
86 friend class RasterizerCacheObject;
87
88public:
89 explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
90
91 /// Write any cached resources overlapping the specified region back to memory
92 void FlushRegion(VAddr addr, std::size_t size) {
93 std::lock_guard lock{mutex};
94
95 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
96 for (auto& object : objects) {
97 FlushObject(object);
98 }
99 }
100
101 /// Mark the specified region as being invalidated
102 void InvalidateRegion(VAddr addr, u64 size) {
103 std::lock_guard lock{mutex};
104
105 const auto& objects{GetSortedObjectsFromRegion(addr, size)};
106 for (auto& object : objects) {
107 if (!object->IsRegistered()) {
108 // Skip duplicates
109 continue;
110 }
111 Unregister(object);
112 }
113 }
114
115 void OnCPUWrite(VAddr addr, std::size_t size) {
116 std::lock_guard lock{mutex};
117
118 for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
119 if (object->IsRegistered()) {
120 UnmarkMemory(object);
121 object->SetSyncPending(true);
122 marked_for_unregister.emplace_back(object);
123 }
124 }
125 }
126
127 void SyncGuestHost() {
128 std::lock_guard lock{mutex};
129
130 for (const auto& object : marked_for_unregister) {
131 if (object->IsRegistered()) {
132 object->SetSyncPending(false);
133 Unregister(object);
134 }
135 }
136 marked_for_unregister.clear();
137 }
138
139 /// Invalidates everything in the cache
140 void InvalidateAll() {
141 std::lock_guard lock{mutex};
142
143 while (interval_cache.begin() != interval_cache.end()) {
144 Unregister(*interval_cache.begin()->second.begin());
145 }
146 }
147
148protected:
149 /// Tries to get an object from the cache with the specified cache address
150 T TryGet(VAddr addr) const {
151 const auto iter = map_cache.find(addr);
152 if (iter != map_cache.end())
153 return iter->second;
154 return nullptr;
155 }
156
157 /// Register an object into the cache
158 virtual void Register(const T& object) {
159 std::lock_guard lock{mutex};
160
161 object->SetIsRegistered(true);
162 interval_cache.add({GetInterval(object), ObjectSet{object}});
163 map_cache.insert({object->GetCpuAddr(), object});
164 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
165 object->SetMemoryMarked(true);
166 }
167
168 /// Unregisters an object from the cache
169 virtual void Unregister(const T& object) {
170 std::lock_guard lock{mutex};
171
172 UnmarkMemory(object);
173 object->SetIsRegistered(false);
174 if (object->IsSyncPending()) {
175 marked_for_unregister.remove(object);
176 object->SetSyncPending(false);
177 }
178 const VAddr addr = object->GetCpuAddr();
179 interval_cache.subtract({GetInterval(object), ObjectSet{object}});
180 map_cache.erase(addr);
181 }
182
183 void UnmarkMemory(const T& object) {
184 if (!object->IsMemoryMarked()) {
185 return;
186 }
187 rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
188 object->SetMemoryMarked(false);
189 }
190
191 /// Returns a ticks counter used for tracking when cached objects were last modified
192 u64 GetModifiedTicks() {
193 std::lock_guard lock{mutex};
194
195 return ++modified_ticks;
196 }
197
198 virtual void FlushObjectInner(const T& object) = 0;
199
200 /// Flushes the specified object, updating appropriate cache state as needed
201 void FlushObject(const T& object) {
202 std::lock_guard lock{mutex};
203
204 if (!object->IsDirty()) {
205 return;
206 }
207 FlushObjectInner(object);
208 object->MarkAsModified(false, *this);
209 }
210
211 std::recursive_mutex mutex;
212
213private:
214 /// Returns a list of cached objects from the specified memory region, ordered by access time
215 std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
216 if (size == 0) {
217 return {};
218 }
219
220 std::vector<T> objects;
221 const ObjectInterval interval{addr, addr + size};
222 for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
223 for (auto& cached_object : pair.second) {
224 if (!cached_object) {
225 continue;
226 }
227 objects.push_back(cached_object);
228 }
229 }
230
231 std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
232 return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
233 });
234
235 return objects;
236 }
237
238 using ObjectSet = std::set<T>;
239 using ObjectCache = std::unordered_map<VAddr, T>;
240 using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
241 using ObjectInterval = typename IntervalCache::interval_type;
242
243 static auto GetInterval(const T& object) {
244 return ObjectInterval::right_open(object->GetCpuAddr(),
245 object->GetCpuAddr() + object->GetSizeInBytes());
246 }
247
248 ObjectCache map_cache;
249 IntervalCache interval_cache; ///< Cache of objects
250 u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
251 VideoCore::RasterizerInterface& rasterizer;
252 std::list<T> marked_for_unregister;
253};
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index a9e86cfc7..679b9b1d7 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -10,7 +10,6 @@
10#include "common/common_types.h" 10#include "common/common_types.h"
11#include "video_core/buffer_cache/buffer_cache.h" 11#include "video_core/buffer_cache/buffer_cache.h"
12#include "video_core/engines/maxwell_3d.h" 12#include "video_core/engines/maxwell_3d.h"
13#include "video_core/rasterizer_cache.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_stream_buffer.h" 14#include "video_core/renderer_opengl/gl_stream_buffer.h"
16 15
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f802fd384..2d6c11320 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -30,6 +30,7 @@
30#include "video_core/renderer_opengl/gl_shader_cache.h" 30#include "video_core/renderer_opengl/gl_shader_cache.h"
31#include "video_core/renderer_opengl/maxwell_to_gl.h" 31#include "video_core/renderer_opengl/maxwell_to_gl.h"
32#include "video_core/renderer_opengl/renderer_opengl.h" 32#include "video_core/renderer_opengl/renderer_opengl.h"
33#include "video_core/shader_cache.h"
33 34
34namespace OpenGL { 35namespace OpenGL {
35 36
@@ -65,10 +66,22 @@ constexpr std::size_t NumSupportedVertexAttributes = 16;
65template <typename Engine, typename Entry> 66template <typename Engine, typename Entry>
66Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 67Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
67 ShaderType shader_type, std::size_t index = 0) { 68 ShaderType shader_type, std::size_t index = 0) {
69 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
70 if (entry.is_separated) {
71 const u32 buffer_1 = entry.buffer;
72 const u32 buffer_2 = entry.secondary_buffer;
73 const u32 offset_1 = entry.offset;
74 const u32 offset_2 = entry.secondary_offset;
75 const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
76 const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
77 return engine.GetTextureInfo(handle_1 | handle_2);
78 }
79 }
68 if (entry.is_bindless) { 80 if (entry.is_bindless) {
69 const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); 81 const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
70 return engine.GetTextureInfo(tex_handle); 82 return engine.GetTextureInfo(handle);
71 } 83 }
84
72 const auto& gpu_profile = engine.AccessGuestDriverProfile(); 85 const auto& gpu_profile = engine.AccessGuestDriverProfile();
73 const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); 86 const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
74 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { 87 if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
@@ -310,7 +323,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
310 continue; 323 continue;
311 } 324 }
312 325
313 Shader shader{shader_cache.GetStageProgram(program)}; 326 Shader* const shader = shader_cache.GetStageProgram(program);
314 327
315 if (device.UseAssemblyShaders()) { 328 if (device.UseAssemblyShaders()) {
316 // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this 329 // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
@@ -604,7 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
604 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); 617 (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
605 618
606 // Prepare the vertex array. 619 // Prepare the vertex array.
607 buffer_cache.Map(buffer_size); 620 const bool invalidated = buffer_cache.Map(buffer_size);
621
622 if (invalidated) {
623 // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
624 auto& dirty = gpu.dirty.flags;
625 dirty[Dirty::VertexBuffers] = true;
626 for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
627 dirty[index] = true;
628 }
629 }
608 630
609 // Prepare vertex array format. 631 // Prepare vertex array format.
610 SetupVertexFormat(); 632 SetupVertexFormat();
@@ -870,7 +892,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
870 return true; 892 return true;
871} 893}
872 894
873void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { 895void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
874 static constexpr std::array PARAMETER_LUT = { 896 static constexpr std::array PARAMETER_LUT = {
875 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, 897 GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
876 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, 898 GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
@@ -900,7 +922,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
900 } 922 }
901} 923}
902 924
903void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { 925void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
904 MICROPROFILE_SCOPE(OpenGL_UBO); 926 MICROPROFILE_SCOPE(OpenGL_UBO);
905 const auto& launch_desc = system.GPU().KeplerCompute().launch_description; 927 const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
906 const auto& entries = kernel->GetEntries(); 928 const auto& entries = kernel->GetEntries();
@@ -969,7 +991,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
969 } 991 }
970} 992}
971 993
972void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { 994void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
973 auto& gpu{system.GPU()}; 995 auto& gpu{system.GPU()};
974 auto& memory_manager{gpu.MemoryManager()}; 996 auto& memory_manager{gpu.MemoryManager()};
975 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; 997 const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
@@ -984,7 +1006,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
984 } 1006 }
985} 1007}
986 1008
987void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { 1009void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
988 auto& gpu{system.GPU()}; 1010 auto& gpu{system.GPU()};
989 auto& memory_manager{gpu.MemoryManager()}; 1011 auto& memory_manager{gpu.MemoryManager()};
990 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; 1012 const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
@@ -1007,7 +1029,7 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
1007 static_cast<GLsizeiptr>(size)); 1029 static_cast<GLsizeiptr>(size));
1008} 1030}
1009 1031
1010void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { 1032void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
1011 MICROPROFILE_SCOPE(OpenGL_Texture); 1033 MICROPROFILE_SCOPE(OpenGL_Texture);
1012 const auto& maxwell3d = system.GPU().Maxwell3D(); 1034 const auto& maxwell3d = system.GPU().Maxwell3D();
1013 u32 binding = device.GetBaseBindings(stage_index).sampler; 1035 u32 binding = device.GetBaseBindings(stage_index).sampler;
@@ -1020,7 +1042,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
1020 } 1042 }
1021} 1043}
1022 1044
1023void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { 1045void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
1024 MICROPROFILE_SCOPE(OpenGL_Texture); 1046 MICROPROFILE_SCOPE(OpenGL_Texture);
1025 const auto& compute = system.GPU().KeplerCompute(); 1047 const auto& compute = system.GPU().KeplerCompute();
1026 u32 binding = 0; 1048 u32 binding = 0;
@@ -1049,7 +1071,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
1049 } 1071 }
1050} 1072}
1051 1073
1052void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { 1074void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
1053 const auto& maxwell3d = system.GPU().Maxwell3D(); 1075 const auto& maxwell3d = system.GPU().Maxwell3D();
1054 u32 binding = device.GetBaseBindings(stage_index).image; 1076 u32 binding = device.GetBaseBindings(stage_index).image;
1055 for (const auto& entry : shader->GetEntries().images) { 1077 for (const auto& entry : shader->GetEntries().images) {
@@ -1059,7 +1081,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
1059 } 1081 }
1060} 1082}
1061 1083
1062void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { 1084void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
1063 const auto& compute = system.GPU().KeplerCompute(); 1085 const auto& compute = system.GPU().KeplerCompute();
1064 u32 binding = 0; 1086 u32 binding = 0;
1065 for (const auto& entry : shader->GetEntries().images) { 1087 for (const auto& entry : shader->GetEntries().images) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 7abc8fdbd..4f082592f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -19,7 +19,6 @@
19#include "video_core/engines/const_buffer_info.h" 19#include "video_core/engines/const_buffer_info.h"
20#include "video_core/engines/maxwell_3d.h" 20#include "video_core/engines/maxwell_3d.h"
21#include "video_core/rasterizer_accelerated.h" 21#include "video_core/rasterizer_accelerated.h"
22#include "video_core/rasterizer_cache.h"
23#include "video_core/rasterizer_interface.h" 22#include "video_core/rasterizer_interface.h"
24#include "video_core/renderer_opengl/gl_buffer_cache.h" 23#include "video_core/renderer_opengl/gl_buffer_cache.h"
25#include "video_core/renderer_opengl/gl_device.h" 24#include "video_core/renderer_opengl/gl_device.h"
@@ -100,10 +99,10 @@ private:
100 void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); 99 void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
101 100
102 /// Configures the current constbuffers to use for the draw command. 101 /// Configures the current constbuffers to use for the draw command.
103 void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); 102 void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
104 103
105 /// Configures the current constbuffers to use for the kernel invocation. 104 /// Configures the current constbuffers to use for the kernel invocation.
106 void SetupComputeConstBuffers(const Shader& kernel); 105 void SetupComputeConstBuffers(Shader* kernel);
107 106
108 /// Configures a constant buffer. 107 /// Configures a constant buffer.
109 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, 108 void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
@@ -111,30 +110,30 @@ private:
111 std::size_t unified_offset); 110 std::size_t unified_offset);
112 111
113 /// Configures the current global memory entries to use for the draw command. 112 /// Configures the current global memory entries to use for the draw command.
114 void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); 113 void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
115 114
116 /// Configures the current global memory entries to use for the kernel invocation. 115 /// Configures the current global memory entries to use for the kernel invocation.
117 void SetupComputeGlobalMemory(const Shader& kernel); 116 void SetupComputeGlobalMemory(Shader* kernel);
118 117
119 /// Configures a constant buffer. 118 /// Configures a constant buffer.
120 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, 119 void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
121 std::size_t size); 120 std::size_t size);
122 121
123 /// Configures the current textures to use for the draw command. 122 /// Configures the current textures to use for the draw command.
124 void SetupDrawTextures(std::size_t stage_index, const Shader& shader); 123 void SetupDrawTextures(std::size_t stage_index, Shader* shader);
125 124
126 /// Configures the textures used in a compute shader. 125 /// Configures the textures used in a compute shader.
127 void SetupComputeTextures(const Shader& kernel); 126 void SetupComputeTextures(Shader* kernel);
128 127
129 /// Configures a texture. 128 /// Configures a texture.
130 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, 129 void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
131 const SamplerEntry& entry); 130 const SamplerEntry& entry);
132 131
133 /// Configures images in a graphics shader. 132 /// Configures images in a graphics shader.
134 void SetupDrawImages(std::size_t stage_index, const Shader& shader); 133 void SetupDrawImages(std::size_t stage_index, Shader* shader);
135 134
136 /// Configures images in a compute shader. 135 /// Configures images in a compute shader.
137 void SetupComputeImages(const Shader& shader); 136 void SetupComputeImages(Shader* shader);
138 137
139 /// Configures an image. 138 /// Configures an image.
140 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); 139 void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index a991ca64a..c28486b1d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -29,6 +29,7 @@
29#include "video_core/shader/memory_util.h" 29#include "video_core/shader/memory_util.h"
30#include "video_core/shader/registry.h" 30#include "video_core/shader/registry.h"
31#include "video_core/shader/shader_ir.h" 31#include "video_core/shader/shader_ir.h"
32#include "video_core/shader_cache.h"
32 33
33namespace OpenGL { 34namespace OpenGL {
34 35
@@ -194,12 +195,9 @@ std::unordered_set<GLenum> GetSupportedFormats() {
194 195
195} // Anonymous namespace 196} // Anonymous namespace
196 197
197CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, 198Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
198 std::shared_ptr<VideoCommon::Shader::Registry> registry, 199 ProgramSharedPtr program_)
199 ShaderEntries entries, ProgramSharedPtr program_) 200 : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
200 : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
201 size_in_bytes{size_in_bytes}, program{std::move(program_)} {
202 // Assign either the assembly program or source program. We can't have both.
203 handle = program->assembly_program.handle; 201 handle = program->assembly_program.handle;
204 if (handle == 0) { 202 if (handle == 0) {
205 handle = program->source_program.handle; 203 handle = program->source_program.handle;
@@ -207,16 +205,16 @@ CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
207 ASSERT(handle != 0); 205 ASSERT(handle != 0);
208} 206}
209 207
210CachedShader::~CachedShader() = default; 208Shader::~Shader() = default;
211 209
212GLuint CachedShader::GetHandle() const { 210GLuint Shader::GetHandle() const {
213 DEBUG_ASSERT(registry->IsConsistent()); 211 DEBUG_ASSERT(registry->IsConsistent());
214 return handle; 212 return handle;
215} 213}
216 214
217Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, 215std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
218 Maxwell::ShaderProgram program_type, ProgramCode code, 216 Maxwell::ShaderProgram program_type,
219 ProgramCode code_b) { 217 ProgramCode code, ProgramCode code_b) {
220 const auto shader_type = GetShaderType(program_type); 218 const auto shader_type = GetShaderType(program_type);
221 const std::size_t size_in_bytes = code.size() * sizeof(u64); 219 const std::size_t size_in_bytes = code.size() * sizeof(u64);
222 220
@@ -241,12 +239,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
241 entry.bindless_samplers = registry->GetBindlessSamplers(); 239 entry.bindless_samplers = registry->GetBindlessSamplers();
242 params.disk_cache.SaveEntry(std::move(entry)); 240 params.disk_cache.SaveEntry(std::move(entry));
243 241
244 return std::shared_ptr<CachedShader>( 242 return std::unique_ptr<Shader>(new Shader(
245 new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry), 243 std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
246 MakeEntries(params.device, ir, shader_type), std::move(program)));
247} 244}
248 245
249Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { 246std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
247 ProgramCode code) {
250 const std::size_t size_in_bytes = code.size() * sizeof(u64); 248 const std::size_t size_in_bytes = code.size() * sizeof(u64);
251 249
252 auto& engine = params.system.GPU().KeplerCompute(); 250 auto& engine = params.system.GPU().KeplerCompute();
@@ -266,23 +264,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
266 entry.bindless_samplers = registry->GetBindlessSamplers(); 264 entry.bindless_samplers = registry->GetBindlessSamplers();
267 params.disk_cache.SaveEntry(std::move(entry)); 265 params.disk_cache.SaveEntry(std::move(entry));
268 266
269 return std::shared_ptr<CachedShader>( 267 return std::unique_ptr<Shader>(new Shader(std::move(registry),
270 new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry), 268 MakeEntries(params.device, ir, ShaderType::Compute),
271 MakeEntries(params.device, ir, ShaderType::Compute), std::move(program))); 269 std::move(program)));
272} 270}
273 271
274Shader CachedShader::CreateFromCache(const ShaderParameters& params, 272std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
275 const PrecompiledShader& precompiled_shader, 273 const PrecompiledShader& precompiled_shader) {
276 std::size_t size_in_bytes) { 274 return std::unique_ptr<Shader>(new Shader(
277 return std::shared_ptr<CachedShader>( 275 precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
278 new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
279 precompiled_shader.entries, precompiled_shader.program));
280} 276}
281 277
282ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 278ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
283 Core::Frontend::EmuWindow& emu_window, const Device& device) 279 Core::Frontend::EmuWindow& emu_window, const Device& device)
284 : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device}, 280 : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
285 disk_cache{system} {} 281 emu_window{emu_window}, device{device}, disk_cache{system} {}
282
283ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
286 284
287void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, 285void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
288 const VideoCore::DiskResourceLoadCallback& callback) { 286 const VideoCore::DiskResourceLoadCallback& callback) {
@@ -436,7 +434,7 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
436 return program; 434 return program;
437} 435}
438 436
439Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { 437Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
440 if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { 438 if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
441 return last_shaders[static_cast<std::size_t>(program)]; 439 return last_shaders[static_cast<std::size_t>(program)];
442 } 440 }
@@ -446,8 +444,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
446 444
447 // Look up shader in the cache based on address 445 // Look up shader in the cache based on address
448 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; 446 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
449 Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader}; 447 if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
450 if (shader) {
451 return last_shaders[static_cast<std::size_t>(program)] = shader; 448 return last_shaders[static_cast<std::size_t>(program)] = shader;
452 } 449 }
453 450
@@ -468,30 +465,29 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
468 const ShaderParameters params{system, disk_cache, device, 465 const ShaderParameters params{system, disk_cache, device,
469 *cpu_addr, host_ptr, unique_identifier}; 466 *cpu_addr, host_ptr, unique_identifier};
470 467
468 std::unique_ptr<Shader> shader;
471 const auto found = runtime_cache.find(unique_identifier); 469 const auto found = runtime_cache.find(unique_identifier);
472 if (found == runtime_cache.end()) { 470 if (found == runtime_cache.end()) {
473 shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), 471 shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
474 std::move(code_b));
475 } else { 472 } else {
476 const std::size_t size_in_bytes = code.size() * sizeof(u64); 473 shader = Shader::CreateFromCache(params, found->second);
477 shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
478 } 474 }
479 475
476 Shader* const result = shader.get();
480 if (cpu_addr) { 477 if (cpu_addr) {
481 Register(shader); 478 Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64));
482 } else { 479 } else {
483 null_shader = shader; 480 null_shader = std::move(shader);
484 } 481 }
485 482
486 return last_shaders[static_cast<std::size_t>(program)] = shader; 483 return last_shaders[static_cast<std::size_t>(program)] = result;
487} 484}
488 485
489Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { 486Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
490 auto& memory_manager{system.GPU().MemoryManager()}; 487 auto& memory_manager{system.GPU().MemoryManager()};
491 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; 488 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
492 489
493 auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel; 490 if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
494 if (kernel) {
495 return kernel; 491 return kernel;
496 } 492 }
497 493
@@ -503,20 +499,21 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
503 const ShaderParameters params{system, disk_cache, device, 499 const ShaderParameters params{system, disk_cache, device,
504 *cpu_addr, host_ptr, unique_identifier}; 500 *cpu_addr, host_ptr, unique_identifier};
505 501
502 std::unique_ptr<Shader> kernel;
506 const auto found = runtime_cache.find(unique_identifier); 503 const auto found = runtime_cache.find(unique_identifier);
507 if (found == runtime_cache.end()) { 504 if (found == runtime_cache.end()) {
508 kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); 505 kernel = Shader::CreateKernelFromMemory(params, std::move(code));
509 } else { 506 } else {
510 const std::size_t size_in_bytes = code.size() * sizeof(u64); 507 kernel = Shader::CreateFromCache(params, found->second);
511 kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
512 } 508 }
513 509
510 Shader* const result = kernel.get();
514 if (cpu_addr) { 511 if (cpu_addr) {
515 Register(kernel); 512 Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64));
516 } else { 513 } else {
517 null_kernel = kernel; 514 null_kernel = std::move(kernel);
518 } 515 }
519 return kernel; 516 return result;
520} 517}
521 518
522} // namespace OpenGL 519} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index b2ae8d7f9..6848f1388 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -18,12 +18,12 @@
18 18
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "video_core/engines/shader_type.h" 20#include "video_core/engines/shader_type.h"
21#include "video_core/rasterizer_cache.h"
22#include "video_core/renderer_opengl/gl_resource_manager.h" 21#include "video_core/renderer_opengl/gl_resource_manager.h"
23#include "video_core/renderer_opengl/gl_shader_decompiler.h" 22#include "video_core/renderer_opengl/gl_shader_decompiler.h"
24#include "video_core/renderer_opengl/gl_shader_disk_cache.h" 23#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
25#include "video_core/shader/registry.h" 24#include "video_core/shader/registry.h"
26#include "video_core/shader/shader_ir.h" 25#include "video_core/shader/shader_ir.h"
26#include "video_core/shader_cache.h"
27 27
28namespace Core { 28namespace Core {
29class System; 29class System;
@@ -35,12 +35,10 @@ class EmuWindow;
35 35
36namespace OpenGL { 36namespace OpenGL {
37 37
38class CachedShader;
39class Device; 38class Device;
40class RasterizerOpenGL; 39class RasterizerOpenGL;
41struct UnspecializedShader; 40struct UnspecializedShader;
42 41
43using Shader = std::shared_ptr<CachedShader>;
44using Maxwell = Tegra::Engines::Maxwell3D::Regs; 42using Maxwell = Tegra::Engines::Maxwell3D::Regs;
45 43
46struct ProgramHandle { 44struct ProgramHandle {
@@ -64,62 +62,53 @@ struct ShaderParameters {
64 u64 unique_identifier; 62 u64 unique_identifier;
65}; 63};
66 64
67class CachedShader final : public RasterizerCacheObject { 65class Shader final {
68public: 66public:
69 ~CachedShader(); 67 ~Shader();
70 68
71 /// Gets the GL program handle for the shader 69 /// Gets the GL program handle for the shader
72 GLuint GetHandle() const; 70 GLuint GetHandle() const;
73 71
74 /// Returns the size in bytes of the shader
75 std::size_t GetSizeInBytes() const override {
76 return size_in_bytes;
77 }
78
79 /// Gets the shader entries for the shader 72 /// Gets the shader entries for the shader
80 const ShaderEntries& GetEntries() const { 73 const ShaderEntries& GetEntries() const {
81 return entries; 74 return entries;
82 } 75 }
83 76
84 static Shader CreateStageFromMemory(const ShaderParameters& params, 77 static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
85 Maxwell::ShaderProgram program_type, 78 Maxwell::ShaderProgram program_type,
86 ProgramCode program_code, ProgramCode program_code_b); 79 ProgramCode program_code,
87 static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); 80 ProgramCode program_code_b);
81 static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
82 ProgramCode code);
88 83
89 static Shader CreateFromCache(const ShaderParameters& params, 84 static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
90 const PrecompiledShader& precompiled_shader, 85 const PrecompiledShader& precompiled_shader);
91 std::size_t size_in_bytes);
92 86
93private: 87private:
94 explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, 88 explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
95 std::shared_ptr<VideoCommon::Shader::Registry> registry, 89 ProgramSharedPtr program);
96 ShaderEntries entries, ProgramSharedPtr program);
97 90
98 std::shared_ptr<VideoCommon::Shader::Registry> registry; 91 std::shared_ptr<VideoCommon::Shader::Registry> registry;
99 ShaderEntries entries; 92 ShaderEntries entries;
100 std::size_t size_in_bytes = 0;
101 ProgramSharedPtr program; 93 ProgramSharedPtr program;
102 GLuint handle = 0; 94 GLuint handle = 0;
103}; 95};
104 96
105class ShaderCacheOpenGL final : public RasterizerCache<Shader> { 97class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
106public: 98public:
107 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, 99 explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
108 Core::Frontend::EmuWindow& emu_window, const Device& device); 100 Core::Frontend::EmuWindow& emu_window, const Device& device);
101 ~ShaderCacheOpenGL() override;
109 102
110 /// Loads disk cache for the current game 103 /// Loads disk cache for the current game
111 void LoadDiskCache(const std::atomic_bool& stop_loading, 104 void LoadDiskCache(const std::atomic_bool& stop_loading,
112 const VideoCore::DiskResourceLoadCallback& callback); 105 const VideoCore::DiskResourceLoadCallback& callback);
113 106
114 /// Gets the current specified shader stage program 107 /// Gets the current specified shader stage program
115 Shader GetStageProgram(Maxwell::ShaderProgram program); 108 Shader* GetStageProgram(Maxwell::ShaderProgram program);
116 109
117 /// Gets a compute kernel in the passed address 110 /// Gets a compute kernel in the passed address
118 Shader GetComputeKernel(GPUVAddr code_addr); 111 Shader* GetComputeKernel(GPUVAddr code_addr);
119
120protected:
121 // We do not have to flush this cache as things in it are never modified by us.
122 void FlushObjectInner(const Shader& object) override {}
123 112
124private: 113private:
125 ProgramSharedPtr GeneratePrecompiledProgram( 114 ProgramSharedPtr GeneratePrecompiledProgram(
@@ -132,10 +121,10 @@ private:
132 ShaderDiskCacheOpenGL disk_cache; 121 ShaderDiskCacheOpenGL disk_cache;
133 std::unordered_map<u64, PrecompiledShader> runtime_cache; 122 std::unordered_map<u64, PrecompiledShader> runtime_cache;
134 123
135 Shader null_shader{}; 124 std::unique_ptr<Shader> null_shader;
136 Shader null_kernel{}; 125 std::unique_ptr<Shader> null_kernel;
137 126
138 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 127 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
139}; 128};
140 129
141} // namespace OpenGL 130} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 9e95a122b..653c3f2f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
29 29
30namespace { 30namespace {
31 31
32using VideoCommon::Shader::SeparateSamplerKey;
33
32using ShaderCacheVersionHash = std::array<u8, 64>; 34using ShaderCacheVersionHash = std::array<u8, 64>;
33 35
34struct ConstBufferKey { 36struct ConstBufferKey {
@@ -37,18 +39,26 @@ struct ConstBufferKey {
37 u32 value = 0; 39 u32 value = 0;
38}; 40};
39 41
40struct BoundSamplerKey { 42struct BoundSamplerEntry {
41 u32 offset = 0; 43 u32 offset = 0;
42 Tegra::Engines::SamplerDescriptor sampler; 44 Tegra::Engines::SamplerDescriptor sampler;
43}; 45};
44 46
45struct BindlessSamplerKey { 47struct SeparateSamplerEntry {
48 u32 cbuf1 = 0;
49 u32 cbuf2 = 0;
50 u32 offset1 = 0;
51 u32 offset2 = 0;
52 Tegra::Engines::SamplerDescriptor sampler;
53};
54
55struct BindlessSamplerEntry {
46 u32 cbuf = 0; 56 u32 cbuf = 0;
47 u32 offset = 0; 57 u32 offset = 0;
48 Tegra::Engines::SamplerDescriptor sampler; 58 Tegra::Engines::SamplerDescriptor sampler;
49}; 59};
50 60
51constexpr u32 NativeVersion = 20; 61constexpr u32 NativeVersion = 21;
52 62
53ShaderCacheVersionHash GetShaderCacheVersionHash() { 63ShaderCacheVersionHash GetShaderCacheVersionHash() {
54 ShaderCacheVersionHash hash{}; 64 ShaderCacheVersionHash hash{};
@@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
87 u32 texture_handler_size_value; 97 u32 texture_handler_size_value;
88 u32 num_keys; 98 u32 num_keys;
89 u32 num_bound_samplers; 99 u32 num_bound_samplers;
100 u32 num_separate_samplers;
90 u32 num_bindless_samplers; 101 u32 num_bindless_samplers;
91 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || 102 if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
92 file.ReadArray(&is_texture_handler_size_known, 1) != 1 || 103 file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
93 file.ReadArray(&texture_handler_size_value, 1) != 1 || 104 file.ReadArray(&texture_handler_size_value, 1) != 1 ||
94 file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || 105 file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
95 file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || 106 file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
107 file.ReadArray(&num_separate_samplers, 1) != 1 ||
96 file.ReadArray(&num_bindless_samplers, 1) != 1) { 108 file.ReadArray(&num_bindless_samplers, 1) != 1) {
97 return false; 109 return false;
98 } 110 }
@@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
101 } 113 }
102 114
103 std::vector<ConstBufferKey> flat_keys(num_keys); 115 std::vector<ConstBufferKey> flat_keys(num_keys);
104 std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); 116 std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
105 std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); 117 std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
118 std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
106 if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || 119 if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
107 file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != 120 file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
108 flat_bound_samplers.size() || 121 flat_bound_samplers.size() ||
122 file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
123 flat_separate_samplers.size() ||
109 file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != 124 file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
110 flat_bindless_samplers.size()) { 125 flat_bindless_samplers.size()) {
111 return false; 126 return false;
112 } 127 }
113 for (const auto& key : flat_keys) { 128 for (const auto& entry : flat_keys) {
114 keys.insert({{key.cbuf, key.offset}, key.value}); 129 keys.insert({{entry.cbuf, entry.offset}, entry.value});
115 } 130 }
116 for (const auto& key : flat_bound_samplers) { 131 for (const auto& entry : flat_bound_samplers) {
117 bound_samplers.emplace(key.offset, key.sampler); 132 bound_samplers.emplace(entry.offset, entry.sampler);
118 } 133 }
119 for (const auto& key : flat_bindless_samplers) { 134 for (const auto& entry : flat_separate_samplers) {
120 bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); 135 SeparateSamplerKey key;
136 key.buffers = {entry.cbuf1, entry.cbuf2};
137 key.offsets = {entry.offset1, entry.offset2};
138 separate_samplers.emplace(key, entry.sampler);
139 }
140 for (const auto& entry : flat_bindless_samplers) {
141 bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
121 } 142 }
122 143
123 return true; 144 return true;
@@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
142 file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || 163 file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
143 file.WriteObject(static_cast<u32>(keys.size())) != 1 || 164 file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
144 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || 165 file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
166 file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
145 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { 167 file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
146 return false; 168 return false;
147 } 169 }
@@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
152 flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); 174 flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
153 } 175 }
154 176
155 std::vector<BoundSamplerKey> flat_bound_samplers; 177 std::vector<BoundSamplerEntry> flat_bound_samplers;
156 flat_bound_samplers.reserve(bound_samplers.size()); 178 flat_bound_samplers.reserve(bound_samplers.size());
157 for (const auto& [address, sampler] : bound_samplers) { 179 for (const auto& [address, sampler] : bound_samplers) {
158 flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); 180 flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
181 }
182
183 std::vector<SeparateSamplerEntry> flat_separate_samplers;
184 flat_separate_samplers.reserve(separate_samplers.size());
185 for (const auto& [key, sampler] : separate_samplers) {
186 SeparateSamplerEntry entry;
187 std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
188 std::tie(entry.offset1, entry.offset2) = key.offsets;
189 entry.sampler = sampler;
190 flat_separate_samplers.push_back(entry);
159 } 191 }
160 192
161 std::vector<BindlessSamplerKey> flat_bindless_samplers; 193 std::vector<BindlessSamplerEntry> flat_bindless_samplers;
162 flat_bindless_samplers.reserve(bindless_samplers.size()); 194 flat_bindless_samplers.reserve(bindless_samplers.size());
163 for (const auto& [address, sampler] : bindless_samplers) { 195 for (const auto& [address, sampler] : bindless_samplers) {
164 flat_bindless_samplers.push_back( 196 flat_bindless_samplers.push_back(
165 BindlessSamplerKey{address.first, address.second, sampler}); 197 BindlessSamplerEntry{address.first, address.second, sampler});
166 } 198 }
167 199
168 return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && 200 return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
169 file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == 201 file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
170 flat_bound_samplers.size() && 202 flat_bound_samplers.size() &&
203 file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
204 flat_separate_samplers.size() &&
171 file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == 205 file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
172 flat_bindless_samplers.size(); 206 flat_bindless_samplers.size();
173} 207}
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index d5be52e40..a79cef0e9 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
57 VideoCommon::Shader::ComputeInfo compute_info; 57 VideoCommon::Shader::ComputeInfo compute_info;
58 VideoCommon::Shader::KeyMap keys; 58 VideoCommon::Shader::KeyMap keys;
59 VideoCommon::Shader::BoundSamplerMap bound_samplers; 59 VideoCommon::Shader::BoundSamplerMap bound_samplers;
60 VideoCommon::Shader::SeparateSamplerMap separate_samplers;
60 VideoCommon::Shader::BindlessSamplerMap bindless_samplers; 61 VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
61}; 62};
62 63
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index a54583e7d..65cb3c8ad 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -8,7 +8,6 @@
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/buffer_cache/buffer_cache.h" 10#include "video_core/buffer_cache/buffer_cache.h"
11#include "video_core/rasterizer_cache.h"
12#include "video_core/renderer_vulkan/vk_memory_manager.h" 11#include "video_core/renderer_vulkan/vk_memory_manager.h"
13#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 12#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
14#include "video_core/renderer_vulkan/vk_stream_buffer.h" 13#include "video_core/renderer_vulkan/vk_stream_buffer.h"
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index b8ccf164f..ea66e621e 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,6 +27,7 @@
27#include "video_core/renderer_vulkan/wrapper.h" 27#include "video_core/renderer_vulkan/wrapper.h"
28#include "video_core/shader/compiler_settings.h" 28#include "video_core/shader/compiler_settings.h"
29#include "video_core/shader/memory_util.h" 29#include "video_core/shader/memory_util.h"
30#include "video_core/shader_cache.h"
30 31
31namespace Vulkan { 32namespace Vulkan {
32 33
@@ -132,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
132 return std::memcmp(&rhs, this, sizeof *this) == 0; 133 return std::memcmp(&rhs, this, sizeof *this) == 0;
133} 134}
134 135
135CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, 136Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
136 GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, 137 VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
137 u32 main_offset) 138 : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
138 : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
139 registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, 139 registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
140 compiler_settings, registry}, 140 compiler_settings, registry},
141 entries{GenerateShaderEntries(shader_ir)} {} 141 entries{GenerateShaderEntries(shader_ir)} {}
142 142
143CachedShader::~CachedShader() = default; 143Shader::~Shader() = default;
144 144
145Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( 145Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
146 Core::System& system, Tegra::Engines::ShaderType stage) { 146 Tegra::Engines::ShaderType stage) {
147 if (stage == Tegra::Engines::ShaderType::Compute) { 147 if (stage == ShaderType::Compute) {
148 return system.GPU().KeplerCompute(); 148 return system.GPU().KeplerCompute();
149 } else { 149 } else {
150 return system.GPU().Maxwell3D(); 150 return system.GPU().Maxwell3D();
@@ -156,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri
156 VKDescriptorPool& descriptor_pool, 156 VKDescriptorPool& descriptor_pool,
157 VKUpdateDescriptorQueue& update_descriptor_queue, 157 VKUpdateDescriptorQueue& update_descriptor_queue,
158 VKRenderPassCache& renderpass_cache) 158 VKRenderPassCache& renderpass_cache)
159 : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, 159 : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
160 descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, 160 scheduler{scheduler}, descriptor_pool{descriptor_pool},
161 renderpass_cache{renderpass_cache} {} 161 update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
162 162
163VKPipelineCache::~VKPipelineCache() = default; 163VKPipelineCache::~VKPipelineCache() = default;
164 164
165std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { 165std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
166 const auto& gpu = system.GPU().Maxwell3D(); 166 const auto& gpu = system.GPU().Maxwell3D();
167 167
168 std::array<Shader, Maxwell::MaxShaderProgram> shaders; 168 std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
169 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { 169 for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
170 const auto program{static_cast<Maxwell::ShaderProgram>(index)}; 170 const auto program{static_cast<Maxwell::ShaderProgram>(index)};
171 171
@@ -178,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
178 const GPUVAddr program_addr{GetShaderAddress(system, program)}; 178 const GPUVAddr program_addr{GetShaderAddress(system, program)};
179 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 179 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
180 ASSERT(cpu_addr); 180 ASSERT(cpu_addr);
181 auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; 181
182 if (!shader) { 182 Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
183 if (!result) {
183 const auto host_ptr{memory_manager.GetPointer(program_addr)}; 184 const auto host_ptr{memory_manager.GetPointer(program_addr)};
184 185
185 // No shader found - create a new one 186 // No shader found - create a new one
186 constexpr u32 stage_offset = STAGE_MAIN_OFFSET; 187 constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
187 const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); 188 const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
188 ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); 189 ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
190 const std::size_t size_in_bytes = code.size() * sizeof(u64);
191
192 auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
193 stage_offset);
194 result = shader.get();
189 195
190 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
191 std::move(code), stage_offset);
192 if (cpu_addr) { 196 if (cpu_addr) {
193 Register(shader); 197 Register(std::move(shader), *cpu_addr, size_in_bytes);
194 } else { 198 } else {
195 null_shader = shader; 199 null_shader = std::move(shader);
196 } 200 }
197 } 201 }
198 shaders[index] = std::move(shader); 202 shaders[index] = result;
199 } 203 }
200 return last_shaders = shaders; 204 return last_shaders = shaders;
201} 205}
@@ -236,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
236 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 240 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
237 ASSERT(cpu_addr); 241 ASSERT(cpu_addr);
238 242
239 auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel; 243 Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
240 if (!shader) { 244 if (!shader) {
241 // No shader found - create a new one 245 // No shader found - create a new one
242 const auto host_ptr = memory_manager.GetPointer(program_addr); 246 const auto host_ptr = memory_manager.GetPointer(program_addr);
243 247
244 ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); 248 ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
245 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, 249 const std::size_t size_in_bytes = code.size() * sizeof(u64);
246 program_addr, *cpu_addr, std::move(code), 250
247 KERNEL_MAIN_OFFSET); 251 auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
252 std::move(code), KERNEL_MAIN_OFFSET);
253 shader = shader_info.get();
254
248 if (cpu_addr) { 255 if (cpu_addr) {
249 Register(shader); 256 Register(std::move(shader_info), *cpu_addr, size_in_bytes);
250 } else { 257 } else {
251 null_kernel = shader; 258 null_kernel = std::move(shader_info);
252 } 259 }
253 } 260 }
254 261
@@ -264,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
264 return *entry; 271 return *entry;
265} 272}
266 273
267void VKPipelineCache::Unregister(const Shader& shader) { 274void VKPipelineCache::OnShaderRemoval(Shader* shader) {
268 bool finished = false; 275 bool finished = false;
269 const auto Finish = [&] { 276 const auto Finish = [&] {
270 // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and 277 // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
@@ -296,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) {
296 Finish(); 303 Finish();
297 it = compute_cache.erase(it); 304 it = compute_cache.erase(it);
298 } 305 }
299
300 RasterizerCache::Unregister(shader);
301} 306}
302 307
303std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> 308std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
@@ -332,12 +337,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
332 } 337 }
333 338
334 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); 339 const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
335 const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); 340 const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
336 const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; 341 Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
337 ASSERT(shader);
338 342
339 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 343 const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
340 const auto program_type = GetShaderType(program_enum); 344 const ShaderType program_type = GetShaderType(program_enum);
341 const auto& entries = shader->GetEntries(); 345 const auto& entries = shader->GetEntries();
342 program[stage] = { 346 program[stage] = {
343 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), 347 Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 0b5796fef..0a36e5112 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -17,7 +17,6 @@
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "video_core/engines/const_buffer_engine_interface.h" 18#include "video_core/engines/const_buffer_engine_interface.h"
19#include "video_core/engines/maxwell_3d.h" 19#include "video_core/engines/maxwell_3d.h"
20#include "video_core/rasterizer_cache.h"
21#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 20#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
22#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" 21#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
23#include "video_core/renderer_vulkan/vk_renderpass_cache.h" 22#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
@@ -26,6 +25,7 @@
26#include "video_core/shader/memory_util.h" 25#include "video_core/shader/memory_util.h"
27#include "video_core/shader/registry.h" 26#include "video_core/shader/registry.h"
28#include "video_core/shader/shader_ir.h" 27#include "video_core/shader/shader_ir.h"
28#include "video_core/shader_cache.h"
29 29
30namespace Core { 30namespace Core {
31class System; 31class System;
@@ -41,8 +41,6 @@ class VKFence;
41class VKScheduler; 41class VKScheduler;
42class VKUpdateDescriptorQueue; 42class VKUpdateDescriptorQueue;
43 43
44class CachedShader;
45using Shader = std::shared_ptr<CachedShader>;
46using Maxwell = Tegra::Engines::Maxwell3D::Regs; 44using Maxwell = Tegra::Engines::Maxwell3D::Regs;
47 45
48struct GraphicsPipelineCacheKey { 46struct GraphicsPipelineCacheKey {
@@ -102,21 +100,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
102 100
103namespace Vulkan { 101namespace Vulkan {
104 102
105class CachedShader final : public RasterizerCacheObject { 103class Shader {
106public: 104public:
107 explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, 105 explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
108 VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code, 106 VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
109 u32 main_offset); 107 ~Shader();
110 ~CachedShader();
111 108
112 GPUVAddr GetGpuAddr() const { 109 GPUVAddr GetGpuAddr() const {
113 return gpu_addr; 110 return gpu_addr;
114 } 111 }
115 112
116 std::size_t GetSizeInBytes() const override {
117 return program_code.size() * sizeof(u64);
118 }
119
120 VideoCommon::Shader::ShaderIR& GetIR() { 113 VideoCommon::Shader::ShaderIR& GetIR() {
121 return shader_ir; 114 return shader_ir;
122 } 115 }
@@ -144,25 +137,23 @@ private:
144 ShaderEntries entries; 137 ShaderEntries entries;
145}; 138};
146 139
147class VKPipelineCache final : public RasterizerCache<Shader> { 140class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
148public: 141public:
149 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, 142 explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
150 const VKDevice& device, VKScheduler& scheduler, 143 const VKDevice& device, VKScheduler& scheduler,
151 VKDescriptorPool& descriptor_pool, 144 VKDescriptorPool& descriptor_pool,
152 VKUpdateDescriptorQueue& update_descriptor_queue, 145 VKUpdateDescriptorQueue& update_descriptor_queue,
153 VKRenderPassCache& renderpass_cache); 146 VKRenderPassCache& renderpass_cache);
154 ~VKPipelineCache(); 147 ~VKPipelineCache() override;
155 148
156 std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); 149 std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
157 150
158 VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); 151 VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
159 152
160 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); 153 VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
161 154
162protected: 155protected:
163 void Unregister(const Shader& shader) override; 156 void OnShaderRemoval(Shader* shader) final;
164
165 void FlushObjectInner(const Shader& object) override {}
166 157
167private: 158private:
168 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( 159 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
@@ -175,10 +166,10 @@ private:
175 VKUpdateDescriptorQueue& update_descriptor_queue; 166 VKUpdateDescriptorQueue& update_descriptor_queue;
176 VKRenderPassCache& renderpass_cache; 167 VKRenderPassCache& renderpass_cache;
177 168
178 Shader null_shader{}; 169 std::unique_ptr<Shader> null_shader;
179 Shader null_kernel{}; 170 std::unique_ptr<Shader> null_kernel;
180 171
181 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 172 std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
182 173
183 GraphicsPipelineCacheKey last_graphics_key; 174 GraphicsPipelineCacheKey last_graphics_key;
184 VKGraphicsPipeline* last_graphics_pipeline = nullptr; 175 VKGraphicsPipeline* last_graphics_pipeline = nullptr;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 19b8f9da3..184b2238a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -38,6 +38,7 @@
38#include "video_core/renderer_vulkan/vk_texture_cache.h" 38#include "video_core/renderer_vulkan/vk_texture_cache.h"
39#include "video_core/renderer_vulkan/vk_update_descriptor.h" 39#include "video_core/renderer_vulkan/vk_update_descriptor.h"
40#include "video_core/renderer_vulkan/wrapper.h" 40#include "video_core/renderer_vulkan/wrapper.h"
41#include "video_core/shader_cache.h"
41 42
42namespace Vulkan { 43namespace Vulkan {
43 44
@@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
98} 99}
99 100
100std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( 101std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
101 const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { 102 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
102 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; 103 std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
103 for (std::size_t i = 0; i < std::size(addresses); ++i) { 104 for (std::size_t i = 0; i < std::size(addresses); ++i) {
104 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; 105 addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
@@ -117,6 +118,17 @@ template <typename Engine, typename Entry>
117Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, 118Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
118 std::size_t stage, std::size_t index = 0) { 119 std::size_t stage, std::size_t index = 0) {
119 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); 120 const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
121 if constexpr (std::is_same_v<Entry, SamplerEntry>) {
122 if (entry.is_separated) {
123 const u32 buffer_1 = entry.buffer;
124 const u32 buffer_2 = entry.secondary_buffer;
125 const u32 offset_1 = entry.offset;
126 const u32 offset_2 = entry.secondary_offset;
127 const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
128 const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
129 return engine.GetTextureInfo(handle_1 | handle_2);
130 }
131 }
120 if (entry.is_bindless) { 132 if (entry.is_bindless) {
121 const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); 133 const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
122 return engine.GetTextureInfo(tex_handle); 134 return engine.GetTextureInfo(tex_handle);
@@ -776,12 +788,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
776} 788}
777 789
778void RasterizerVulkan::SetupShaderDescriptors( 790void RasterizerVulkan::SetupShaderDescriptors(
779 const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { 791 const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
780 texture_cache.GuardSamplers(true); 792 texture_cache.GuardSamplers(true);
781 793
782 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { 794 for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
783 // Skip VertexA stage 795 // Skip VertexA stage
784 const auto& shader = shaders[stage + 1]; 796 Shader* const shader = shaders[stage + 1];
785 if (!shader) { 797 if (!shader) {
786 continue; 798 continue;
787 } 799 }
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 04be37a5e..c8c187606 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -168,7 +168,7 @@ private:
168 bool is_indexed, bool is_instanced); 168 bool is_indexed, bool is_instanced);
169 169
170 /// Setup descriptors in the graphics pipeline. 170 /// Setup descriptors in the graphics pipeline.
171 void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders); 171 void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
172 172
173 void SetupImageTransitions(Texceptions texceptions, 173 void SetupImageTransitions(Texceptions texceptions,
174 const std::array<View, Maxwell::NumRenderTargets>& color_attachments, 174 const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 8f0bb996e..29ebf65ba 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
357 return pc; 357 return pc;
358} 358}
359 359
360ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, 360ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
361 std::optional<u32> buffer) { 361 SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
362 if (info.IsComplete()) { 362 if (info.IsComplete()) {
363 return info; 363 return info;
364 } 364 }
365 const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
366 : registry.ObtainBoundSampler(offset);
367 if (!sampler) { 365 if (!sampler) {
368 LOG_WARNING(HW_GPU, "Unknown sampler info"); 366 LOG_WARNING(HW_GPU, "Unknown sampler info");
369 info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); 367 info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
@@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
381 379
382std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, 380std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
383 SamplerInfo sampler_info) { 381 SamplerInfo sampler_info) {
384 const auto offset = static_cast<u32>(sampler.index.Value()); 382 const u32 offset = static_cast<u32>(sampler.index.Value());
385 const auto info = GetSamplerInfo(sampler_info, offset); 383 const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
386 384
387 // If this sampler has already been used, return the existing mapping. 385 // If this sampler has already been used, return the existing mapping.
388 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), 386 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
@@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
404 const Node sampler_register = GetRegister(reg); 402 const Node sampler_register = GetRegister(reg);
405 const auto [base_node, tracked_sampler_info] = 403 const auto [base_node, tracked_sampler_info] =
406 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); 404 TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
407 ASSERT(base_node != nullptr); 405 if (!base_node) {
408 if (base_node == nullptr) { 406 UNREACHABLE();
409 return std::nullopt; 407 return std::nullopt;
410 } 408 }
411 409
412 if (const auto bindless_sampler_info = 410 if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
413 std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { 411 const u32 buffer = sampler_info->index;
414 const u32 buffer = bindless_sampler_info->GetIndex(); 412 const u32 offset = sampler_info->offset;
415 const u32 offset = bindless_sampler_info->GetOffset(); 413 info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
416 info = GetSamplerInfo(info, offset, buffer);
417 414
418 // If this sampler has already been used, return the existing mapping. 415 // If this sampler has already been used, return the existing mapping.
419 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), 416 const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
420 [buffer = buffer, offset = offset](const Sampler& entry) { 417 [buffer, offset](const Sampler& entry) {
421 return entry.buffer == buffer && entry.offset == offset; 418 return entry.buffer == buffer && entry.offset == offset;
422 }); 419 });
423 if (it != used_samplers.end()) { 420 if (it != used_samplers.end()) {
@@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
431 return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, 428 return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
432 *info.is_shadow, *info.is_buffer, false); 429 *info.is_shadow, *info.is_buffer, false);
433 } 430 }
434 if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { 431 if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
435 const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; 432 const std::pair indices = sampler_info->indices;
436 index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); 433 const std::pair offsets = sampler_info->offsets;
437 info = GetSamplerInfo(info, base_offset); 434 info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
435
436 // Try to use an already created sampler if it exists
437 const auto it = std::find_if(
438 used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
439 return offsets == std::pair{entry.offset, entry.secondary_offset} &&
440 indices == std::pair{entry.buffer, entry.secondary_buffer};
441 });
442 if (it != used_samplers.end()) {
443 ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
444 it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
445 return *it;
446 }
447
448 // Otherwise create a new mapping for this sampler
449 const u32 next_index = static_cast<u32>(used_samplers.size());
450 return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
451 *info.is_shadow, *info.is_buffer);
452 }
453 if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
454 const u32 base_offset = sampler_info->base_offset / 4;
455 index_var = GetCustomVariable(sampler_info->bindless_var);
456 info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
438 457
439 // If this sampler has already been used, return the existing mapping. 458 // If this sampler has already been used, return the existing mapping.
440 const auto it = std::find_if( 459 const auto it = std::find_if(
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index c5e5165ff..8f230d57a 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -275,10 +275,11 @@ using Node = std::shared_ptr<NodeData>;
275using Node4 = std::array<Node, 4>; 275using Node4 = std::array<Node, 4>;
276using NodeBlock = std::vector<Node>; 276using NodeBlock = std::vector<Node>;
277 277
278class BindlessSamplerNode; 278struct ArraySamplerNode;
279class ArraySamplerNode; 279struct BindlessSamplerNode;
280struct SeparateSamplerNode;
280 281
281using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; 282using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
282using TrackSampler = std::shared_ptr<TrackSamplerData>; 283using TrackSampler = std::shared_ptr<TrackSamplerData>;
283 284
284struct Sampler { 285struct Sampler {
@@ -288,63 +289,51 @@ struct Sampler {
288 : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, 289 : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
289 is_buffer{is_buffer}, is_indexed{is_indexed} {} 290 is_buffer{is_buffer}, is_indexed{is_indexed} {}
290 291
292 /// Separate sampler constructor
293 constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
294 Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
295 bool is_buffer)
296 : index{index}, offset{offsets.first}, secondary_offset{offsets.second},
297 buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
298 is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
299
291 /// Bindless samplers constructor 300 /// Bindless samplers constructor
292 constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, 301 constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
293 bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) 302 bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
294 : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, 303 : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
295 is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} 304 is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
296 305
297 u32 index = 0; ///< Emulated index given for the this sampler. 306 u32 index = 0; ///< Emulated index given for the this sampler.
298 u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. 307 u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
299 u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers). 308 u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
300 u32 size = 1; ///< Size of the sampler. 309 u32 buffer = 0; ///< Buffer where the bindless sampler is read.
310 u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
311 u32 size = 1; ///< Size of the sampler.
301 312
302 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) 313 Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
303 bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. 314 bool is_array = false; ///< Whether the texture is being sampled as an array texture or not.
304 bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. 315 bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not.
305 bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. 316 bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
306 bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. 317 bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
307 bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. 318 bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
319 bool is_separated = false; ///< Whether the image and sampler is separated or not.
308}; 320};
309 321
310/// Represents a tracked bindless sampler into a direct const buffer 322/// Represents a tracked bindless sampler into a direct const buffer
311class ArraySamplerNode final { 323struct ArraySamplerNode {
312public:
313 explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
314 : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
315
316 constexpr u32 GetIndex() const {
317 return index;
318 }
319
320 constexpr u32 GetBaseOffset() const {
321 return base_offset;
322 }
323
324 constexpr u32 GetIndexVar() const {
325 return bindless_var;
326 }
327
328private:
329 u32 index; 324 u32 index;
330 u32 base_offset; 325 u32 base_offset;
331 u32 bindless_var; 326 u32 bindless_var;
332}; 327};
333 328
334/// Represents a tracked bindless sampler into a direct const buffer 329/// Represents a tracked separate sampler image pair that was folded statically
335class BindlessSamplerNode final { 330struct SeparateSamplerNode {
336public: 331 std::pair<u32, u32> indices;
337 explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} 332 std::pair<u32, u32> offsets;
338 333};
339 constexpr u32 GetIndex() const {
340 return index;
341 }
342
343 constexpr u32 GetOffset() const {
344 return offset;
345 }
346 334
347private: 335/// Represents a tracked bindless sampler into a direct const buffer
336struct BindlessSamplerNode {
348 u32 index; 337 u32 index;
349 u32 offset; 338 u32 offset;
350}; 339};
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 11231bbea..1e0886185 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) {
48template <typename T, typename... Args> 48template <typename T, typename... Args>
49TrackSampler MakeTrackSampler(Args&&... args) { 49TrackSampler MakeTrackSampler(Args&&... args) {
50 static_assert(std::is_convertible_v<T, TrackSamplerData>); 50 static_assert(std::is_convertible_v<T, TrackSamplerData>);
51 return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); 51 return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
52} 52}
53 53
54template <typename... Args> 54template <typename... Args>
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
index af70b3f35..cdf274e54 100644
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
93 return value; 93 return value;
94} 94}
95 95
96std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
97 std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
98 SeparateSamplerKey key;
99 key.buffers = buffers;
100 key.offsets = offsets;
101 const auto iter = separate_samplers.find(key);
102 if (iter != separate_samplers.end()) {
103 return iter->second;
104 }
105 if (!engine) {
106 return std::nullopt;
107 }
108
109 const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
110 const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
111 const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
112 separate_samplers.emplace(key, value);
113 return value;
114}
115
96std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, 116std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
97 u32 offset) { 117 u32 offset) {
98 const std::pair key = {buffer, offset}; 118 const std::pair key = {buffer, offset};
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
index 0c80d35fd..231206765 100644
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@@ -19,8 +19,39 @@
19 19
20namespace VideoCommon::Shader { 20namespace VideoCommon::Shader {
21 21
22struct SeparateSamplerKey {
23 std::pair<u32, u32> buffers;
24 std::pair<u32, u32> offsets;
25};
26
27} // namespace VideoCommon::Shader
28
29namespace std {
30
31template <>
32struct hash<VideoCommon::Shader::SeparateSamplerKey> {
33 std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
34 return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
35 key.offsets.second);
36 }
37};
38
39template <>
40struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
41 bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
42 const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
43 return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
44 }
45};
46
47} // namespace std
48
49namespace VideoCommon::Shader {
50
22using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; 51using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
23using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; 52using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
53using SeparateSamplerMap =
54 std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
24using BindlessSamplerMap = 55using BindlessSamplerMap =
25 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; 56 std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
26 57
@@ -73,6 +104,9 @@ public:
73 104
74 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); 105 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
75 106
107 std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
108 std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
109
76 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); 110 std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
77 111
78 /// Inserts a key. 112 /// Inserts a key.
@@ -128,6 +162,7 @@ private:
128 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; 162 Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
129 KeyMap keys; 163 KeyMap keys;
130 BoundSamplerMap bound_samplers; 164 BoundSamplerMap bound_samplers;
165 SeparateSamplerMap separate_samplers;
131 BindlessSamplerMap bindless_samplers; 166 BindlessSamplerMap bindless_samplers;
132 u32 bound_buffer; 167 u32 bound_buffer;
133 GraphicsInfo graphics_info; 168 GraphicsInfo graphics_info;
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 15ae152f2..3a98b2104 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -330,8 +330,8 @@ private:
330 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); 330 OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
331 331
332 /// Queries the missing sampler info from the execution context. 332 /// Queries the missing sampler info from the execution context.
333 SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset, 333 SamplerInfo GetSamplerInfo(SamplerInfo info,
334 std::optional<u32> buffer = std::nullopt); 334 std::optional<Tegra::Engines::SamplerDescriptor> sampler);
335 335
336 /// Accesses a texture sampler. 336 /// Accesses a texture sampler.
337 std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); 337 std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
@@ -409,8 +409,14 @@ private:
409 409
410 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; 410 std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
411 411
412 std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, 412 std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
413 s64 cursor); 413 s64 cursor);
414
415 std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
416 const OperationNode& operation,
417 Node gpr, Node base_offset,
418 Node tracked, const NodeBlock& code,
419 s64 cursor);
414 420
415 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; 421 std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
416 422
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index eb97bfd41..d5ed81442 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -14,6 +14,7 @@
14namespace VideoCommon::Shader { 14namespace VideoCommon::Shader {
15 15
16namespace { 16namespace {
17
17std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, 18std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
18 OperationCode operation_code) { 19 OperationCode operation_code) {
19 for (; cursor >= 0; --cursor) { 20 for (; cursor >= 0; --cursor) {
@@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
63 if (const auto operation = std::get_if<OperationNode>(&*node)) { 64 if (const auto operation = std::get_if<OperationNode>(&*node)) {
64 operation->SetAmendIndex(amend_index); 65 operation->SetAmendIndex(amend_index);
65 return true; 66 return true;
66 } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { 67 }
68 if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
67 conditional->SetAmendIndex(amend_index); 69 conditional->SetAmendIndex(amend_index);
68 return true; 70 return true;
69 } 71 }
@@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
72 74
73} // Anonymous namespace 75} // Anonymous namespace
74 76
75std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, 77std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
76 s64 cursor) { 78 s64 cursor) {
77 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { 79 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
80 const u32 cbuf_index = cbuf->GetIndex();
81
78 // Constant buffer found, test if it's an immediate 82 // Constant buffer found, test if it's an immediate
79 const auto& offset = cbuf->GetOffset(); 83 const auto& offset = cbuf->GetOffset();
80 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { 84 if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
81 auto track = 85 auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
82 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
83 return {tracked, track}; 86 return {tracked, track};
84 } 87 }
85 if (const auto operation = std::get_if<OperationNode>(&*offset)) { 88 if (const auto operation = std::get_if<OperationNode>(&*offset)) {
86 const u32 bound_buffer = registry.GetBoundBuffer(); 89 const u32 bound_buffer = registry.GetBoundBuffer();
87 if (bound_buffer != cbuf->GetIndex()) { 90 if (bound_buffer != cbuf_index) {
88 return {}; 91 return {};
89 } 92 }
90 const auto pair = DecoupleIndirectRead(*operation); 93 if (const std::optional pair = DecoupleIndirectRead(*operation)) {
91 if (!pair) { 94 auto [gpr, base_offset] = *pair;
92 return {}; 95 return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
96 code, cursor);
93 } 97 }
94 auto [gpr, base_offset] = *pair;
95 const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
96 const auto& gpu_driver = registry.AccessGuestDriverProfile();
97 const u32 bindless_cv = NewCustomVariable();
98 Node op =
99 Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
100
101 const Node cv_node = GetCustomVariable(bindless_cv);
102 Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
103 const std::size_t amend_index = DeclareAmend(std::move(amend_op));
104 AmendNodeCv(amend_index, code[cursor]);
105 // TODO Implement Bindless Index custom variable
106 auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
107 offset_inm->GetValue(), bindless_cv);
108 return {tracked, track};
109 } 98 }
110 return {}; 99 return {};
111 } 100 }
@@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
122 return TrackBindlessSampler(source, code, new_cursor); 111 return TrackBindlessSampler(source, code, new_cursor);
123 } 112 }
124 if (const auto operation = std::get_if<OperationNode>(&*tracked)) { 113 if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
125 for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { 114 const OperationNode& op = *operation;
126 if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); 115
127 std::get<0>(found)) { 116 const OperationCode opcode = operation->GetCode();
128 // Cbuf found in operand. 117 if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
118 ASSERT(op.GetOperandsCount() == 2);
119 auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
120 auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
121 if (node_a && node_b) {
122 auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
123 std::pair{offset_a, offset_b});
124 return {tracked, std::move(track)};
125 }
126 }
127 std::size_t i = op.GetOperandsCount();
128 while (i--) {
129 if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
130 // Constant buffer found in operand.
129 return found; 131 return found;
130 } 132 }
131 } 133 }
@@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
139 return {}; 141 return {};
140} 142}
141 143
144std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
145 const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
146 const NodeBlock& code, s64 cursor) {
147 const auto offset_imm = std::get<ImmediateNode>(*base_offset);
148 const auto& gpu_driver = registry.AccessGuestDriverProfile();
149 const u32 bindless_cv = NewCustomVariable();
150 const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
151 Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
152
153 Node cv_node = GetCustomVariable(bindless_cv);
154 Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
155 const std::size_t amend_index = DeclareAmend(std::move(amend_op));
156 AmendNodeCv(amend_index, code[cursor]);
157
158 // TODO: Implement bindless index custom variable
159 auto track =
160 MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
161 return {tracked, track};
162}
163
142std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, 164std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
143 s64 cursor) const { 165 s64 cursor) const {
144 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { 166 if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
new file mode 100644
index 000000000..a23c23886
--- /dev/null
+++ b/src/video_core/shader_cache.h
@@ -0,0 +1,228 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <memory>
9#include <mutex>
10#include <unordered_map>
11#include <utility>
12#include <vector>
13
14#include "common/assert.h"
15#include "common/common_types.h"
16#include "video_core/rasterizer_interface.h"
17
18namespace VideoCommon {
19
20template <class T>
21class ShaderCache {
22 static constexpr u64 PAGE_SHIFT = 14;
23
24 struct Entry {
25 VAddr addr_start;
26 VAddr addr_end;
27 T* data;
28
29 bool is_memory_marked = true;
30
31 constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
32 return start < addr_end && addr_start < end;
33 }
34 };
35
36public:
37 virtual ~ShaderCache() = default;
38
39 /// @brief Removes shaders inside a given region
40 /// @note Checks for ranges
41 /// @param addr Start address of the invalidation
42 /// @param size Number of bytes of the invalidation
43 void InvalidateRegion(VAddr addr, std::size_t size) {
44 std::scoped_lock lock{invalidation_mutex};
45 InvalidatePagesInRegion(addr, size);
46 RemovePendingShaders();
47 }
48
49 /// @brief Unmarks a memory region as cached and marks it for removal
50 /// @param addr Start address of the CPU write operation
51 /// @param size Number of bytes of the CPU write operation
52 void OnCPUWrite(VAddr addr, std::size_t size) {
53 std::lock_guard lock{invalidation_mutex};
54 InvalidatePagesInRegion(addr, size);
55 }
56
57 /// @brief Flushes delayed removal operations
58 void SyncGuestHost() {
59 std::scoped_lock lock{invalidation_mutex};
60 RemovePendingShaders();
61 }
62
63 /// @brief Tries to obtain a cached shader starting in a given address
64 /// @note Doesn't check for ranges, the given address has to be the start of the shader
65 /// @param addr Start address of the shader, this doesn't cache for region
66 /// @return Pointer to a valid shader, nullptr when nothing is found
67 T* TryGet(VAddr addr) const {
68 std::scoped_lock lock{lookup_mutex};
69
70 const auto it = lookup_cache.find(addr);
71 if (it == lookup_cache.end()) {
72 return nullptr;
73 }
74 return it->second->data;
75 }
76
77protected:
78 explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
79
80 /// @brief Register in the cache a given entry
81 /// @param data Shader to store in the cache
82 /// @param addr Start address of the shader that will be registered
83 /// @param size Size in bytes of the shader
84 void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
85 std::scoped_lock lock{invalidation_mutex, lookup_mutex};
86
87 const VAddr addr_end = addr + size;
88 Entry* const entry = NewEntry(addr, addr_end, data.get());
89
90 const u64 page_end = addr_end >> PAGE_SHIFT;
91 for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
92 invalidation_cache[page].push_back(entry);
93 }
94
95 storage.push_back(std::move(data));
96
97 rasterizer.UpdatePagesCachedCount(addr, size, 1);
98 }
99
100 /// @brief Called when a shader is going to be removed
101 /// @param shader Shader that will be removed
102 /// @pre invalidation_cache is locked
103 /// @pre lookup_mutex is locked
104 virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
105
106private:
107 /// @brief Invalidate pages in a given region
108 /// @pre invalidation_mutex is locked
109 void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
110 const VAddr addr_end = addr + size;
111 const u64 page_end = addr_end >> PAGE_SHIFT;
112 for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
113 const auto it = invalidation_cache.find(page);
114 if (it == invalidation_cache.end()) {
115 continue;
116 }
117
118 std::vector<Entry*>& entries = it->second;
119 InvalidatePageEntries(entries, addr, addr_end);
120
121 // If there's nothing else in this page, remove it to avoid overpopulating the hash map.
122 if (entries.empty()) {
123 invalidation_cache.erase(it);
124 }
125 }
126 }
127
128 /// @brief Remove shaders marked for deletion
129 /// @pre invalidation_mutex is locked
130 void RemovePendingShaders() {
131 if (marked_for_removal.empty()) {
132 return;
133 }
134 std::scoped_lock lock{lookup_mutex};
135
136 std::vector<T*> removed_shaders;
137 removed_shaders.reserve(marked_for_removal.size());
138
139 for (Entry* const entry : marked_for_removal) {
140 if (lookup_cache.erase(entry->addr_start) > 0) {
141 removed_shaders.push_back(entry->data);
142 }
143 }
144 marked_for_removal.clear();
145
146 if (!removed_shaders.empty()) {
147 RemoveShadersFromStorage(std::move(removed_shaders));
148 }
149 }
150
151 /// @brief Invalidates entries in a given range for the passed page
152 /// @param entries Vector of entries in the page, it will be modified on overlaps
153 /// @param addr Start address of the invalidation
154 /// @param addr_end Non-inclusive end address of the invalidation
155 /// @pre invalidation_mutex is locked
156 void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
157 auto it = entries.begin();
158 while (it != entries.end()) {
159 Entry* const entry = *it;
160 if (!entry->Overlaps(addr, addr_end)) {
161 ++it;
162 continue;
163 }
164 UnmarkMemory(entry);
165 marked_for_removal.push_back(entry);
166
167 it = entries.erase(it);
168 }
169 }
170
171 /// @brief Unmarks an entry from the rasterizer cache
172 /// @param entry Entry to unmark from memory
173 void UnmarkMemory(Entry* entry) {
174 if (!entry->is_memory_marked) {
175 return;
176 }
177 entry->is_memory_marked = false;
178
179 const VAddr addr = entry->addr_start;
180 const std::size_t size = entry->addr_end - addr;
181 rasterizer.UpdatePagesCachedCount(addr, size, -1);
182 }
183
184 /// @brief Removes a vector of shaders from a list
185 /// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates
186 /// @pre invalidation_mutex is locked
187 /// @pre lookup_mutex is locked
188 void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
189 // Remove duplicates
190 std::sort(removed_shaders.begin(), removed_shaders.end());
191 removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()),
192 removed_shaders.end());
193
194 // Now that there are no duplicates, we can notify removals
195 for (T* const shader : removed_shaders) {
196 OnShaderRemoval(shader);
197 }
198
199 // Remove them from the cache
200 const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) {
201 return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
202 removed_shaders.end();
203 };
204 storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end());
205 }
206
207 /// @brief Creates a new entry in the lookup cache and returns its pointer
208 /// @pre lookup_mutex is locked
209 Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
210 auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
211 Entry* const entry_pointer = entry.get();
212
213 lookup_cache.emplace(addr, std::move(entry));
214 return entry_pointer;
215 }
216
217 VideoCore::RasterizerInterface& rasterizer;
218
219 mutable std::mutex lookup_mutex;
220 std::mutex invalidation_mutex;
221
222 std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
223 std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
224 std::vector<std::unique_ptr<T>> storage;
225 std::vector<Entry*> marked_for_removal;
226};
227
228} // namespace VideoCommon