diff options
Diffstat (limited to 'src')
31 files changed, 661 insertions, 545 deletions
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 36724569f..c4c5199b1 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -132,7 +132,8 @@ std::shared_ptr<ResourceLimit> Process::GetResourceLimit() const { | |||
| 132 | 132 | ||
| 133 | u64 Process::GetTotalPhysicalMemoryAvailable() const { | 133 | u64 Process::GetTotalPhysicalMemoryAvailable() const { |
| 134 | const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) + | 134 | const u64 capacity{resource_limit->GetCurrentResourceValue(ResourceType::PhysicalMemory) + |
| 135 | page_table->GetTotalHeapSize() + image_size + main_thread_stack_size}; | 135 | page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size + |
| 136 | main_thread_stack_size}; | ||
| 136 | 137 | ||
| 137 | if (capacity < memory_usage_capacity) { | 138 | if (capacity < memory_usage_capacity) { |
| 138 | return capacity; | 139 | return capacity; |
| @@ -146,7 +147,8 @@ u64 Process::GetTotalPhysicalMemoryAvailableWithoutSystemResource() const { | |||
| 146 | } | 147 | } |
| 147 | 148 | ||
| 148 | u64 Process::GetTotalPhysicalMemoryUsed() const { | 149 | u64 Process::GetTotalPhysicalMemoryUsed() const { |
| 149 | return image_size + main_thread_stack_size + page_table->GetTotalHeapSize(); | 150 | return image_size + main_thread_stack_size + page_table->GetTotalHeapSize() + |
| 151 | GetSystemResourceSize(); | ||
| 150 | } | 152 | } |
| 151 | 153 | ||
| 152 | u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { | 154 | u64 Process::GetTotalPhysicalMemoryUsedWithoutSystemResource() const { |
diff --git a/src/core/hle/kernel/resource_limit.cpp b/src/core/hle/kernel/resource_limit.cpp index d9beaa3a4..212e442f4 100644 --- a/src/core/hle/kernel/resource_limit.cpp +++ b/src/core/hle/kernel/resource_limit.cpp | |||
| @@ -24,13 +24,9 @@ bool ResourceLimit::Reserve(ResourceType resource, s64 amount, u64 timeout) { | |||
| 24 | const std::size_t index{ResourceTypeToIndex(resource)}; | 24 | const std::size_t index{ResourceTypeToIndex(resource)}; |
| 25 | 25 | ||
| 26 | s64 new_value = current[index] + amount; | 26 | s64 new_value = current[index] + amount; |
| 27 | while (new_value > limit[index] && available[index] + amount <= limit[index]) { | 27 | if (new_value > limit[index] && available[index] + amount <= limit[index]) { |
| 28 | // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout | 28 | // TODO(bunnei): This is wrong for multicore, we should wait the calling thread for timeout |
| 29 | new_value = current[index] + amount; | 29 | new_value = current[index] + amount; |
| 30 | |||
| 31 | if (timeout >= 0) { | ||
| 32 | break; | ||
| 33 | } | ||
| 34 | } | 30 | } |
| 35 | 31 | ||
| 36 | if (new_value <= limit[index]) { | 32 | if (new_value <= limit[index]) { |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 2bf8d68ce..39d5d8401 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -49,8 +49,6 @@ add_library(video_core STATIC | |||
| 49 | query_cache.h | 49 | query_cache.h |
| 50 | rasterizer_accelerated.cpp | 50 | rasterizer_accelerated.cpp |
| 51 | rasterizer_accelerated.h | 51 | rasterizer_accelerated.h |
| 52 | rasterizer_cache.cpp | ||
| 53 | rasterizer_cache.h | ||
| 54 | rasterizer_interface.h | 52 | rasterizer_interface.h |
| 55 | renderer_base.cpp | 53 | renderer_base.cpp |
| 56 | renderer_base.h | 54 | renderer_base.h |
| @@ -93,6 +91,7 @@ add_library(video_core STATIC | |||
| 93 | renderer_opengl/utils.h | 91 | renderer_opengl/utils.h |
| 94 | sampler_cache.cpp | 92 | sampler_cache.cpp |
| 95 | sampler_cache.h | 93 | sampler_cache.h |
| 94 | shader_cache.h | ||
| 96 | shader/decode/arithmetic.cpp | 95 | shader/decode/arithmetic.cpp |
| 97 | shader/decode/arithmetic_immediate.cpp | 96 | shader/decode/arithmetic_immediate.cpp |
| 98 | shader/decode/bfe.cpp | 97 | shader/decode/bfe.cpp |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b88fce2cd..77ae34339 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -110,19 +110,23 @@ public: | |||
| 110 | }); | 110 | }); |
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | void Map(std::size_t max_size) { | 113 | /// Prepares the buffer cache for data uploading |
| 114 | /// @param max_size Maximum number of bytes that will be uploaded | ||
| 115 | /// @return True when a stream buffer invalidation was required, false otherwise | ||
| 116 | bool Map(std::size_t max_size) { | ||
| 114 | std::lock_guard lock{mutex}; | 117 | std::lock_guard lock{mutex}; |
| 115 | 118 | ||
| 119 | bool invalidated; | ||
| 116 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); | 120 | std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); |
| 117 | buffer_offset = buffer_offset_base; | 121 | buffer_offset = buffer_offset_base; |
| 122 | |||
| 123 | return invalidated; | ||
| 118 | } | 124 | } |
| 119 | 125 | ||
| 120 | /// Finishes the upload stream, returns true on bindings invalidation. | 126 | /// Finishes the upload stream |
| 121 | bool Unmap() { | 127 | void Unmap() { |
| 122 | std::lock_guard lock{mutex}; | 128 | std::lock_guard lock{mutex}; |
| 123 | |||
| 124 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); | 129 | stream_buffer->Unmap(buffer_offset - buffer_offset_base); |
| 125 | return std::exchange(invalidated, false); | ||
| 126 | } | 130 | } |
| 127 | 131 | ||
| 128 | void TickFrame() { | 132 | void TickFrame() { |
| @@ -576,8 +580,6 @@ private: | |||
| 576 | std::unique_ptr<StreamBuffer> stream_buffer; | 580 | std::unique_ptr<StreamBuffer> stream_buffer; |
| 577 | BufferType stream_buffer_handle{}; | 581 | BufferType stream_buffer_handle{}; |
| 578 | 582 | ||
| 579 | bool invalidated = false; | ||
| 580 | |||
| 581 | u8* buffer_ptr = nullptr; | 583 | u8* buffer_ptr = nullptr; |
| 582 | u64 buffer_offset = 0; | 584 | u64 buffer_offset = 0; |
| 583 | u64 buffer_offset_base = 0; | 585 | u64 buffer_offset_base = 0; |
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h index ebe139504..f46e81bb7 100644 --- a/src/video_core/engines/const_buffer_engine_interface.h +++ b/src/video_core/engines/const_buffer_engine_interface.h | |||
| @@ -93,6 +93,7 @@ public: | |||
| 93 | virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; | 93 | virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; |
| 94 | virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | 94 | virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, |
| 95 | u64 offset) const = 0; | 95 | u64 offset) const = 0; |
| 96 | virtual SamplerDescriptor AccessSampler(u32 handle) const = 0; | ||
| 96 | virtual u32 GetBoundBuffer() const = 0; | 97 | virtual u32 GetBoundBuffer() const = 0; |
| 97 | 98 | ||
| 98 | virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; | 99 | virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index f6237fc6a..a82b06a38 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con | |||
| 92 | ASSERT(stage == ShaderType::Compute); | 92 | ASSERT(stage == ShaderType::Compute); |
| 93 | const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; | 93 | const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; |
| 94 | const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; | 94 | const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; |
| 95 | return AccessSampler(memory_manager.Read<u32>(tex_info_address)); | ||
| 96 | } | ||
| 95 | 97 | ||
| 96 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | 98 | SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { |
| 99 | const Texture::TextureHandle tex_handle{handle}; | ||
| 97 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 100 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); |
| 98 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); | 101 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); |
| 99 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 102 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 18ceedfaf..b7f668d88 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -219,6 +219,8 @@ public: | |||
| 219 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | 219 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, |
| 220 | u64 offset) const override; | 220 | u64 offset) const override; |
| 221 | 221 | ||
| 222 | SamplerDescriptor AccessSampler(u32 handle) const override; | ||
| 223 | |||
| 222 | u32 GetBoundBuffer() const override { | 224 | u32 GetBoundBuffer() const override { |
| 223 | return regs.tex_cb_index; | 225 | return regs.tex_cb_index; |
| 224 | } | 226 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index e46b153f9..ea3c8a963 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -740,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b | |||
| 740 | const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | 740 | const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; |
| 741 | const auto& tex_info_buffer = shader.const_buffers[const_buffer]; | 741 | const auto& tex_info_buffer = shader.const_buffers[const_buffer]; |
| 742 | const GPUVAddr tex_info_address = tex_info_buffer.address + offset; | 742 | const GPUVAddr tex_info_address = tex_info_buffer.address + offset; |
| 743 | return AccessSampler(memory_manager.Read<u32>(tex_info_address)); | ||
| 744 | } | ||
| 743 | 745 | ||
| 744 | const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | 746 | SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { |
| 747 | const Texture::TextureHandle tex_handle{handle}; | ||
| 745 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); | 748 | const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); |
| 746 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); | 749 | SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); |
| 747 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | 750 | result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 79fc9bbea..d5fe25065 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -1404,6 +1404,8 @@ public: | |||
| 1404 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | 1404 | SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, |
| 1405 | u64 offset) const override; | 1405 | u64 offset) const override; |
| 1406 | 1406 | ||
| 1407 | SamplerDescriptor AccessSampler(u32 handle) const override; | ||
| 1408 | |||
| 1407 | u32 GetBoundBuffer() const override { | 1409 | u32 GetBoundBuffer() const override { |
| 1408 | return regs.tex_cb_index; | 1410 | return regs.tex_cb_index; |
| 1409 | } | 1411 | } |
diff --git a/src/video_core/rasterizer_cache.cpp b/src/video_core/rasterizer_cache.cpp deleted file mode 100644 index 093b2cdf4..000000000 --- a/src/video_core/rasterizer_cache.cpp +++ /dev/null | |||
| @@ -1,7 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/rasterizer_cache.h" | ||
| 6 | |||
| 7 | RasterizerCacheObject::~RasterizerCacheObject() = default; | ||
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h deleted file mode 100644 index 096ee337c..000000000 --- a/src/video_core/rasterizer_cache.h +++ /dev/null | |||
| @@ -1,253 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <mutex> | ||
| 8 | #include <set> | ||
| 9 | #include <unordered_map> | ||
| 10 | |||
| 11 | #include <boost/icl/interval_map.hpp> | ||
| 12 | #include <boost/range/iterator_range_core.hpp> | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "core/settings.h" | ||
| 16 | #include "video_core/gpu.h" | ||
| 17 | #include "video_core/rasterizer_interface.h" | ||
| 18 | |||
| 19 | class RasterizerCacheObject { | ||
| 20 | public: | ||
| 21 | explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {} | ||
| 22 | |||
| 23 | virtual ~RasterizerCacheObject(); | ||
| 24 | |||
| 25 | VAddr GetCpuAddr() const { | ||
| 26 | return cpu_addr; | ||
| 27 | } | ||
| 28 | |||
| 29 | /// Gets the size of the shader in guest memory, required for cache management | ||
| 30 | virtual std::size_t GetSizeInBytes() const = 0; | ||
| 31 | |||
| 32 | /// Sets whether the cached object should be considered registered | ||
| 33 | void SetIsRegistered(bool registered) { | ||
| 34 | is_registered = registered; | ||
| 35 | } | ||
| 36 | |||
| 37 | /// Returns true if the cached object is registered | ||
| 38 | bool IsRegistered() const { | ||
| 39 | return is_registered; | ||
| 40 | } | ||
| 41 | |||
| 42 | /// Returns true if the cached object is dirty | ||
| 43 | bool IsDirty() const { | ||
| 44 | return is_dirty; | ||
| 45 | } | ||
| 46 | |||
| 47 | /// Returns ticks from when this cached object was last modified | ||
| 48 | u64 GetLastModifiedTicks() const { | ||
| 49 | return last_modified_ticks; | ||
| 50 | } | ||
| 51 | |||
| 52 | /// Marks an object as recently modified, used to specify whether it is clean or dirty | ||
| 53 | template <class T> | ||
| 54 | void MarkAsModified(bool dirty, T& cache) { | ||
| 55 | is_dirty = dirty; | ||
| 56 | last_modified_ticks = cache.GetModifiedTicks(); | ||
| 57 | } | ||
| 58 | |||
| 59 | void SetMemoryMarked(bool is_memory_marked_) { | ||
| 60 | is_memory_marked = is_memory_marked_; | ||
| 61 | } | ||
| 62 | |||
| 63 | bool IsMemoryMarked() const { | ||
| 64 | return is_memory_marked; | ||
| 65 | } | ||
| 66 | |||
| 67 | void SetSyncPending(bool is_sync_pending_) { | ||
| 68 | is_sync_pending = is_sync_pending_; | ||
| 69 | } | ||
| 70 | |||
| 71 | bool IsSyncPending() const { | ||
| 72 | return is_sync_pending; | ||
| 73 | } | ||
| 74 | |||
| 75 | private: | ||
| 76 | bool is_registered{}; ///< Whether the object is currently registered with the cache | ||
| 77 | bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) | ||
| 78 | bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory. | ||
| 79 | bool is_sync_pending{}; ///< Whether the object is pending deletion. | ||
| 80 | u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing | ||
| 81 | VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space | ||
| 82 | }; | ||
| 83 | |||
| 84 | template <class T> | ||
| 85 | class RasterizerCache : NonCopyable { | ||
| 86 | friend class RasterizerCacheObject; | ||
| 87 | |||
| 88 | public: | ||
| 89 | explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} | ||
| 90 | |||
| 91 | /// Write any cached resources overlapping the specified region back to memory | ||
| 92 | void FlushRegion(VAddr addr, std::size_t size) { | ||
| 93 | std::lock_guard lock{mutex}; | ||
| 94 | |||
| 95 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | ||
| 96 | for (auto& object : objects) { | ||
| 97 | FlushObject(object); | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | /// Mark the specified region as being invalidated | ||
| 102 | void InvalidateRegion(VAddr addr, u64 size) { | ||
| 103 | std::lock_guard lock{mutex}; | ||
| 104 | |||
| 105 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | ||
| 106 | for (auto& object : objects) { | ||
| 107 | if (!object->IsRegistered()) { | ||
| 108 | // Skip duplicates | ||
| 109 | continue; | ||
| 110 | } | ||
| 111 | Unregister(object); | ||
| 112 | } | ||
| 113 | } | ||
| 114 | |||
| 115 | void OnCPUWrite(VAddr addr, std::size_t size) { | ||
| 116 | std::lock_guard lock{mutex}; | ||
| 117 | |||
| 118 | for (const auto& object : GetSortedObjectsFromRegion(addr, size)) { | ||
| 119 | if (object->IsRegistered()) { | ||
| 120 | UnmarkMemory(object); | ||
| 121 | object->SetSyncPending(true); | ||
| 122 | marked_for_unregister.emplace_back(object); | ||
| 123 | } | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | void SyncGuestHost() { | ||
| 128 | std::lock_guard lock{mutex}; | ||
| 129 | |||
| 130 | for (const auto& object : marked_for_unregister) { | ||
| 131 | if (object->IsRegistered()) { | ||
| 132 | object->SetSyncPending(false); | ||
| 133 | Unregister(object); | ||
| 134 | } | ||
| 135 | } | ||
| 136 | marked_for_unregister.clear(); | ||
| 137 | } | ||
| 138 | |||
| 139 | /// Invalidates everything in the cache | ||
| 140 | void InvalidateAll() { | ||
| 141 | std::lock_guard lock{mutex}; | ||
| 142 | |||
| 143 | while (interval_cache.begin() != interval_cache.end()) { | ||
| 144 | Unregister(*interval_cache.begin()->second.begin()); | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | protected: | ||
| 149 | /// Tries to get an object from the cache with the specified cache address | ||
| 150 | T TryGet(VAddr addr) const { | ||
| 151 | const auto iter = map_cache.find(addr); | ||
| 152 | if (iter != map_cache.end()) | ||
| 153 | return iter->second; | ||
| 154 | return nullptr; | ||
| 155 | } | ||
| 156 | |||
| 157 | /// Register an object into the cache | ||
| 158 | virtual void Register(const T& object) { | ||
| 159 | std::lock_guard lock{mutex}; | ||
| 160 | |||
| 161 | object->SetIsRegistered(true); | ||
| 162 | interval_cache.add({GetInterval(object), ObjectSet{object}}); | ||
| 163 | map_cache.insert({object->GetCpuAddr(), object}); | ||
| 164 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); | ||
| 165 | object->SetMemoryMarked(true); | ||
| 166 | } | ||
| 167 | |||
| 168 | /// Unregisters an object from the cache | ||
| 169 | virtual void Unregister(const T& object) { | ||
| 170 | std::lock_guard lock{mutex}; | ||
| 171 | |||
| 172 | UnmarkMemory(object); | ||
| 173 | object->SetIsRegistered(false); | ||
| 174 | if (object->IsSyncPending()) { | ||
| 175 | marked_for_unregister.remove(object); | ||
| 176 | object->SetSyncPending(false); | ||
| 177 | } | ||
| 178 | const VAddr addr = object->GetCpuAddr(); | ||
| 179 | interval_cache.subtract({GetInterval(object), ObjectSet{object}}); | ||
| 180 | map_cache.erase(addr); | ||
| 181 | } | ||
| 182 | |||
| 183 | void UnmarkMemory(const T& object) { | ||
| 184 | if (!object->IsMemoryMarked()) { | ||
| 185 | return; | ||
| 186 | } | ||
| 187 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); | ||
| 188 | object->SetMemoryMarked(false); | ||
| 189 | } | ||
| 190 | |||
| 191 | /// Returns a ticks counter used for tracking when cached objects were last modified | ||
| 192 | u64 GetModifiedTicks() { | ||
| 193 | std::lock_guard lock{mutex}; | ||
| 194 | |||
| 195 | return ++modified_ticks; | ||
| 196 | } | ||
| 197 | |||
| 198 | virtual void FlushObjectInner(const T& object) = 0; | ||
| 199 | |||
| 200 | /// Flushes the specified object, updating appropriate cache state as needed | ||
| 201 | void FlushObject(const T& object) { | ||
| 202 | std::lock_guard lock{mutex}; | ||
| 203 | |||
| 204 | if (!object->IsDirty()) { | ||
| 205 | return; | ||
| 206 | } | ||
| 207 | FlushObjectInner(object); | ||
| 208 | object->MarkAsModified(false, *this); | ||
| 209 | } | ||
| 210 | |||
| 211 | std::recursive_mutex mutex; | ||
| 212 | |||
| 213 | private: | ||
| 214 | /// Returns a list of cached objects from the specified memory region, ordered by access time | ||
| 215 | std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { | ||
| 216 | if (size == 0) { | ||
| 217 | return {}; | ||
| 218 | } | ||
| 219 | |||
| 220 | std::vector<T> objects; | ||
| 221 | const ObjectInterval interval{addr, addr + size}; | ||
| 222 | for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) { | ||
| 223 | for (auto& cached_object : pair.second) { | ||
| 224 | if (!cached_object) { | ||
| 225 | continue; | ||
| 226 | } | ||
| 227 | objects.push_back(cached_object); | ||
| 228 | } | ||
| 229 | } | ||
| 230 | |||
| 231 | std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool { | ||
| 232 | return a->GetLastModifiedTicks() < b->GetLastModifiedTicks(); | ||
| 233 | }); | ||
| 234 | |||
| 235 | return objects; | ||
| 236 | } | ||
| 237 | |||
| 238 | using ObjectSet = std::set<T>; | ||
| 239 | using ObjectCache = std::unordered_map<VAddr, T>; | ||
| 240 | using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; | ||
| 241 | using ObjectInterval = typename IntervalCache::interval_type; | ||
| 242 | |||
| 243 | static auto GetInterval(const T& object) { | ||
| 244 | return ObjectInterval::right_open(object->GetCpuAddr(), | ||
| 245 | object->GetCpuAddr() + object->GetSizeInBytes()); | ||
| 246 | } | ||
| 247 | |||
| 248 | ObjectCache map_cache; | ||
| 249 | IntervalCache interval_cache; ///< Cache of objects | ||
| 250 | u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing | ||
| 251 | VideoCore::RasterizerInterface& rasterizer; | ||
| 252 | std::list<T> marked_for_unregister; | ||
| 253 | }; | ||
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index a9e86cfc7..679b9b1d7 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 11 | #include "video_core/buffer_cache/buffer_cache.h" | 11 | #include "video_core/buffer_cache/buffer_cache.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 12 | #include "video_core/engines/maxwell_3d.h" |
| 13 | #include "video_core/rasterizer_cache.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 15 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 14 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 16 | 15 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f802fd384..2d6c11320 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" | 30 | #include "video_core/renderer_opengl/gl_shader_cache.h" |
| 31 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 31 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 32 | #include "video_core/renderer_opengl/renderer_opengl.h" | 32 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 33 | #include "video_core/shader_cache.h" | ||
| 33 | 34 | ||
| 34 | namespace OpenGL { | 35 | namespace OpenGL { |
| 35 | 36 | ||
| @@ -65,10 +66,22 @@ constexpr std::size_t NumSupportedVertexAttributes = 16; | |||
| 65 | template <typename Engine, typename Entry> | 66 | template <typename Engine, typename Entry> |
| 66 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 67 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, |
| 67 | ShaderType shader_type, std::size_t index = 0) { | 68 | ShaderType shader_type, std::size_t index = 0) { |
| 69 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | ||
| 70 | if (entry.is_separated) { | ||
| 71 | const u32 buffer_1 = entry.buffer; | ||
| 72 | const u32 buffer_2 = entry.secondary_buffer; | ||
| 73 | const u32 offset_1 = entry.offset; | ||
| 74 | const u32 offset_2 = entry.secondary_offset; | ||
| 75 | const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1); | ||
| 76 | const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2); | ||
| 77 | return engine.GetTextureInfo(handle_1 | handle_2); | ||
| 78 | } | ||
| 79 | } | ||
| 68 | if (entry.is_bindless) { | 80 | if (entry.is_bindless) { |
| 69 | const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); | 81 | const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); |
| 70 | return engine.GetTextureInfo(tex_handle); | 82 | return engine.GetTextureInfo(handle); |
| 71 | } | 83 | } |
| 84 | |||
| 72 | const auto& gpu_profile = engine.AccessGuestDriverProfile(); | 85 | const auto& gpu_profile = engine.AccessGuestDriverProfile(); |
| 73 | const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); | 86 | const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); |
| 74 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { | 87 | if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { |
| @@ -310,7 +323,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 310 | continue; | 323 | continue; |
| 311 | } | 324 | } |
| 312 | 325 | ||
| 313 | Shader shader{shader_cache.GetStageProgram(program)}; | 326 | Shader* const shader = shader_cache.GetStageProgram(program); |
| 314 | 327 | ||
| 315 | if (device.UseAssemblyShaders()) { | 328 | if (device.UseAssemblyShaders()) { |
| 316 | // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this | 329 | // Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this |
| @@ -604,7 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 604 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | 617 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); |
| 605 | 618 | ||
| 606 | // Prepare the vertex array. | 619 | // Prepare the vertex array. |
| 607 | buffer_cache.Map(buffer_size); | 620 | const bool invalidated = buffer_cache.Map(buffer_size); |
| 621 | |||
| 622 | if (invalidated) { | ||
| 623 | // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty | ||
| 624 | auto& dirty = gpu.dirty.flags; | ||
| 625 | dirty[Dirty::VertexBuffers] = true; | ||
| 626 | for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { | ||
| 627 | dirty[index] = true; | ||
| 628 | } | ||
| 629 | } | ||
| 608 | 630 | ||
| 609 | // Prepare vertex array format. | 631 | // Prepare vertex array format. |
| 610 | SetupVertexFormat(); | 632 | SetupVertexFormat(); |
| @@ -870,7 +892,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 870 | return true; | 892 | return true; |
| 871 | } | 893 | } |
| 872 | 894 | ||
| 873 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { | 895 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { |
| 874 | static constexpr std::array PARAMETER_LUT = { | 896 | static constexpr std::array PARAMETER_LUT = { |
| 875 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | 897 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, |
| 876 | GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | 898 | GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, |
| @@ -900,7 +922,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad | |||
| 900 | } | 922 | } |
| 901 | } | 923 | } |
| 902 | 924 | ||
| 903 | void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | 925 | void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) { |
| 904 | MICROPROFILE_SCOPE(OpenGL_UBO); | 926 | MICROPROFILE_SCOPE(OpenGL_UBO); |
| 905 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | 927 | const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |
| 906 | const auto& entries = kernel->GetEntries(); | 928 | const auto& entries = kernel->GetEntries(); |
| @@ -969,7 +991,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, | |||
| 969 | } | 991 | } |
| 970 | } | 992 | } |
| 971 | 993 | ||
| 972 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { | 994 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { |
| 973 | auto& gpu{system.GPU()}; | 995 | auto& gpu{system.GPU()}; |
| 974 | auto& memory_manager{gpu.MemoryManager()}; | 996 | auto& memory_manager{gpu.MemoryManager()}; |
| 975 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; | 997 | const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; |
| @@ -984,7 +1006,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad | |||
| 984 | } | 1006 | } |
| 985 | } | 1007 | } |
| 986 | 1008 | ||
| 987 | void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | 1009 | void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { |
| 988 | auto& gpu{system.GPU()}; | 1010 | auto& gpu{system.GPU()}; |
| 989 | auto& memory_manager{gpu.MemoryManager()}; | 1011 | auto& memory_manager{gpu.MemoryManager()}; |
| 990 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | 1012 | const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; |
| @@ -1007,7 +1029,7 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e | |||
| 1007 | static_cast<GLsizeiptr>(size)); | 1029 | static_cast<GLsizeiptr>(size)); |
| 1008 | } | 1030 | } |
| 1009 | 1031 | ||
| 1010 | void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { | 1032 | void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { |
| 1011 | MICROPROFILE_SCOPE(OpenGL_Texture); | 1033 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 1012 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 1034 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1013 | u32 binding = device.GetBaseBindings(stage_index).sampler; | 1035 | u32 binding = device.GetBaseBindings(stage_index).sampler; |
| @@ -1020,7 +1042,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& | |||
| 1020 | } | 1042 | } |
| 1021 | } | 1043 | } |
| 1022 | 1044 | ||
| 1023 | void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { | 1045 | void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { |
| 1024 | MICROPROFILE_SCOPE(OpenGL_Texture); | 1046 | MICROPROFILE_SCOPE(OpenGL_Texture); |
| 1025 | const auto& compute = system.GPU().KeplerCompute(); | 1047 | const auto& compute = system.GPU().KeplerCompute(); |
| 1026 | u32 binding = 0; | 1048 | u32 binding = 0; |
| @@ -1049,7 +1071,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu | |||
| 1049 | } | 1071 | } |
| 1050 | } | 1072 | } |
| 1051 | 1073 | ||
| 1052 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { | 1074 | void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { |
| 1053 | const auto& maxwell3d = system.GPU().Maxwell3D(); | 1075 | const auto& maxwell3d = system.GPU().Maxwell3D(); |
| 1054 | u32 binding = device.GetBaseBindings(stage_index).image; | 1076 | u32 binding = device.GetBaseBindings(stage_index).image; |
| 1055 | for (const auto& entry : shader->GetEntries().images) { | 1077 | for (const auto& entry : shader->GetEntries().images) { |
| @@ -1059,7 +1081,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh | |||
| 1059 | } | 1081 | } |
| 1060 | } | 1082 | } |
| 1061 | 1083 | ||
| 1062 | void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { | 1084 | void RasterizerOpenGL::SetupComputeImages(Shader* shader) { |
| 1063 | const auto& compute = system.GPU().KeplerCompute(); | 1085 | const auto& compute = system.GPU().KeplerCompute(); |
| 1064 | u32 binding = 0; | 1086 | u32 binding = 0; |
| 1065 | for (const auto& entry : shader->GetEntries().images) { | 1087 | for (const auto& entry : shader->GetEntries().images) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 7abc8fdbd..4f082592f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -19,7 +19,6 @@ | |||
| 19 | #include "video_core/engines/const_buffer_info.h" | 19 | #include "video_core/engines/const_buffer_info.h" |
| 20 | #include "video_core/engines/maxwell_3d.h" | 20 | #include "video_core/engines/maxwell_3d.h" |
| 21 | #include "video_core/rasterizer_accelerated.h" | 21 | #include "video_core/rasterizer_accelerated.h" |
| 22 | #include "video_core/rasterizer_cache.h" | ||
| 23 | #include "video_core/rasterizer_interface.h" | 22 | #include "video_core/rasterizer_interface.h" |
| 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 23 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_device.h" | 24 | #include "video_core/renderer_opengl/gl_device.h" |
| @@ -100,10 +99,10 @@ private: | |||
| 100 | void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); | 99 | void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil); |
| 101 | 100 | ||
| 102 | /// Configures the current constbuffers to use for the draw command. | 101 | /// Configures the current constbuffers to use for the draw command. |
| 103 | void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); | 102 | void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); |
| 104 | 103 | ||
| 105 | /// Configures the current constbuffers to use for the kernel invocation. | 104 | /// Configures the current constbuffers to use for the kernel invocation. |
| 106 | void SetupComputeConstBuffers(const Shader& kernel); | 105 | void SetupComputeConstBuffers(Shader* kernel); |
| 107 | 106 | ||
| 108 | /// Configures a constant buffer. | 107 | /// Configures a constant buffer. |
| 109 | void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | 108 | void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, |
| @@ -111,30 +110,30 @@ private: | |||
| 111 | std::size_t unified_offset); | 110 | std::size_t unified_offset); |
| 112 | 111 | ||
| 113 | /// Configures the current global memory entries to use for the draw command. | 112 | /// Configures the current global memory entries to use for the draw command. |
| 114 | void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); | 113 | void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader); |
| 115 | 114 | ||
| 116 | /// Configures the current global memory entries to use for the kernel invocation. | 115 | /// Configures the current global memory entries to use for the kernel invocation. |
| 117 | void SetupComputeGlobalMemory(const Shader& kernel); | 116 | void SetupComputeGlobalMemory(Shader* kernel); |
| 118 | 117 | ||
| 119 | /// Configures a constant buffer. | 118 | /// Configures a constant buffer. |
| 120 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | 119 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, |
| 121 | std::size_t size); | 120 | std::size_t size); |
| 122 | 121 | ||
| 123 | /// Configures the current textures to use for the draw command. | 122 | /// Configures the current textures to use for the draw command. |
| 124 | void SetupDrawTextures(std::size_t stage_index, const Shader& shader); | 123 | void SetupDrawTextures(std::size_t stage_index, Shader* shader); |
| 125 | 124 | ||
| 126 | /// Configures the textures used in a compute shader. | 125 | /// Configures the textures used in a compute shader. |
| 127 | void SetupComputeTextures(const Shader& kernel); | 126 | void SetupComputeTextures(Shader* kernel); |
| 128 | 127 | ||
| 129 | /// Configures a texture. | 128 | /// Configures a texture. |
| 130 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, | 129 | void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, |
| 131 | const SamplerEntry& entry); | 130 | const SamplerEntry& entry); |
| 132 | 131 | ||
| 133 | /// Configures images in a graphics shader. | 132 | /// Configures images in a graphics shader. |
| 134 | void SetupDrawImages(std::size_t stage_index, const Shader& shader); | 133 | void SetupDrawImages(std::size_t stage_index, Shader* shader); |
| 135 | 134 | ||
| 136 | /// Configures images in a compute shader. | 135 | /// Configures images in a compute shader. |
| 137 | void SetupComputeImages(const Shader& shader); | 136 | void SetupComputeImages(Shader* shader); |
| 138 | 137 | ||
| 139 | /// Configures an image. | 138 | /// Configures an image. |
| 140 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); | 139 | void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index a991ca64a..c28486b1d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -29,6 +29,7 @@ | |||
| 29 | #include "video_core/shader/memory_util.h" | 29 | #include "video_core/shader/memory_util.h" |
| 30 | #include "video_core/shader/registry.h" | 30 | #include "video_core/shader/registry.h" |
| 31 | #include "video_core/shader/shader_ir.h" | 31 | #include "video_core/shader/shader_ir.h" |
| 32 | #include "video_core/shader_cache.h" | ||
| 32 | 33 | ||
| 33 | namespace OpenGL { | 34 | namespace OpenGL { |
| 34 | 35 | ||
| @@ -194,12 +195,9 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 194 | 195 | ||
| 195 | } // Anonymous namespace | 196 | } // Anonymous namespace |
| 196 | 197 | ||
| 197 | CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, | 198 | Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_, |
| 198 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | 199 | ProgramSharedPtr program_) |
| 199 | ShaderEntries entries, ProgramSharedPtr program_) | 200 | : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} { |
| 200 | : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, | ||
| 201 | size_in_bytes{size_in_bytes}, program{std::move(program_)} { | ||
| 202 | // Assign either the assembly program or source program. We can't have both. | ||
| 203 | handle = program->assembly_program.handle; | 201 | handle = program->assembly_program.handle; |
| 204 | if (handle == 0) { | 202 | if (handle == 0) { |
| 205 | handle = program->source_program.handle; | 203 | handle = program->source_program.handle; |
| @@ -207,16 +205,16 @@ CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, | |||
| 207 | ASSERT(handle != 0); | 205 | ASSERT(handle != 0); |
| 208 | } | 206 | } |
| 209 | 207 | ||
| 210 | CachedShader::~CachedShader() = default; | 208 | Shader::~Shader() = default; |
| 211 | 209 | ||
| 212 | GLuint CachedShader::GetHandle() const { | 210 | GLuint Shader::GetHandle() const { |
| 213 | DEBUG_ASSERT(registry->IsConsistent()); | 211 | DEBUG_ASSERT(registry->IsConsistent()); |
| 214 | return handle; | 212 | return handle; |
| 215 | } | 213 | } |
| 216 | 214 | ||
| 217 | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | 215 | std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params, |
| 218 | Maxwell::ShaderProgram program_type, ProgramCode code, | 216 | Maxwell::ShaderProgram program_type, |
| 219 | ProgramCode code_b) { | 217 | ProgramCode code, ProgramCode code_b) { |
| 220 | const auto shader_type = GetShaderType(program_type); | 218 | const auto shader_type = GetShaderType(program_type); |
| 221 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 219 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 222 | 220 | ||
| @@ -241,12 +239,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
| 241 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 239 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| 242 | params.disk_cache.SaveEntry(std::move(entry)); | 240 | params.disk_cache.SaveEntry(std::move(entry)); |
| 243 | 241 | ||
| 244 | return std::shared_ptr<CachedShader>( | 242 | return std::unique_ptr<Shader>(new Shader( |
| 245 | new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry), | 243 | std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program))); |
| 246 | MakeEntries(params.device, ir, shader_type), std::move(program))); | ||
| 247 | } | 244 | } |
| 248 | 245 | ||
| 249 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | 246 | std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, |
| 247 | ProgramCode code) { | ||
| 250 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 248 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 251 | 249 | ||
| 252 | auto& engine = params.system.GPU().KeplerCompute(); | 250 | auto& engine = params.system.GPU().KeplerCompute(); |
| @@ -266,23 +264,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog | |||
| 266 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 264 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| 267 | params.disk_cache.SaveEntry(std::move(entry)); | 265 | params.disk_cache.SaveEntry(std::move(entry)); |
| 268 | 266 | ||
| 269 | return std::shared_ptr<CachedShader>( | 267 | return std::unique_ptr<Shader>(new Shader(std::move(registry), |
| 270 | new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry), | 268 | MakeEntries(params.device, ir, ShaderType::Compute), |
| 271 | MakeEntries(params.device, ir, ShaderType::Compute), std::move(program))); | 269 | std::move(program))); |
| 272 | } | 270 | } |
| 273 | 271 | ||
| 274 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, | 272 | std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params, |
| 275 | const PrecompiledShader& precompiled_shader, | 273 | const PrecompiledShader& precompiled_shader) { |
| 276 | std::size_t size_in_bytes) { | 274 | return std::unique_ptr<Shader>(new Shader( |
| 277 | return std::shared_ptr<CachedShader>( | 275 | precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); |
| 278 | new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry, | ||
| 279 | precompiled_shader.entries, precompiled_shader.program)); | ||
| 280 | } | 276 | } |
| 281 | 277 | ||
| 282 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 278 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| 283 | Core::Frontend::EmuWindow& emu_window, const Device& device) | 279 | Core::Frontend::EmuWindow& emu_window, const Device& device) |
| 284 | : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device}, | 280 | : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, |
| 285 | disk_cache{system} {} | 281 | emu_window{emu_window}, device{device}, disk_cache{system} {} |
| 282 | |||
| 283 | ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; | ||
| 286 | 284 | ||
| 287 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | 285 | void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, |
| 288 | const VideoCore::DiskResourceLoadCallback& callback) { | 286 | const VideoCore::DiskResourceLoadCallback& callback) { |
| @@ -436,7 +434,7 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( | |||
| 436 | return program; | 434 | return program; |
| 437 | } | 435 | } |
| 438 | 436 | ||
| 439 | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | 437 | Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { |
| 440 | if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { | 438 | if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { |
| 441 | return last_shaders[static_cast<std::size_t>(program)]; | 439 | return last_shaders[static_cast<std::size_t>(program)]; |
| 442 | } | 440 | } |
| @@ -446,8 +444,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 446 | 444 | ||
| 447 | // Look up shader in the cache based on address | 445 | // Look up shader in the cache based on address |
| 448 | const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; | 446 | const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; |
| 449 | Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader}; | 447 | if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { |
| 450 | if (shader) { | ||
| 451 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 448 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 452 | } | 449 | } |
| 453 | 450 | ||
| @@ -468,30 +465,29 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 468 | const ShaderParameters params{system, disk_cache, device, | 465 | const ShaderParameters params{system, disk_cache, device, |
| 469 | *cpu_addr, host_ptr, unique_identifier}; | 466 | *cpu_addr, host_ptr, unique_identifier}; |
| 470 | 467 | ||
| 468 | std::unique_ptr<Shader> shader; | ||
| 471 | const auto found = runtime_cache.find(unique_identifier); | 469 | const auto found = runtime_cache.find(unique_identifier); |
| 472 | if (found == runtime_cache.end()) { | 470 | if (found == runtime_cache.end()) { |
| 473 | shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), | 471 | shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b)); |
| 474 | std::move(code_b)); | ||
| 475 | } else { | 472 | } else { |
| 476 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 473 | shader = Shader::CreateFromCache(params, found->second); |
| 477 | shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||
| 478 | } | 474 | } |
| 479 | 475 | ||
| 476 | Shader* const result = shader.get(); | ||
| 480 | if (cpu_addr) { | 477 | if (cpu_addr) { |
| 481 | Register(shader); | 478 | Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64)); |
| 482 | } else { | 479 | } else { |
| 483 | null_shader = shader; | 480 | null_shader = std::move(shader); |
| 484 | } | 481 | } |
| 485 | 482 | ||
| 486 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 483 | return last_shaders[static_cast<std::size_t>(program)] = result; |
| 487 | } | 484 | } |
| 488 | 485 | ||
| 489 | Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | 486 | Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { |
| 490 | auto& memory_manager{system.GPU().MemoryManager()}; | 487 | auto& memory_manager{system.GPU().MemoryManager()}; |
| 491 | const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; | 488 | const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; |
| 492 | 489 | ||
| 493 | auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel; | 490 | if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { |
| 494 | if (kernel) { | ||
| 495 | return kernel; | 491 | return kernel; |
| 496 | } | 492 | } |
| 497 | 493 | ||
| @@ -503,20 +499,21 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 503 | const ShaderParameters params{system, disk_cache, device, | 499 | const ShaderParameters params{system, disk_cache, device, |
| 504 | *cpu_addr, host_ptr, unique_identifier}; | 500 | *cpu_addr, host_ptr, unique_identifier}; |
| 505 | 501 | ||
| 502 | std::unique_ptr<Shader> kernel; | ||
| 506 | const auto found = runtime_cache.find(unique_identifier); | 503 | const auto found = runtime_cache.find(unique_identifier); |
| 507 | if (found == runtime_cache.end()) { | 504 | if (found == runtime_cache.end()) { |
| 508 | kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | 505 | kernel = Shader::CreateKernelFromMemory(params, std::move(code)); |
| 509 | } else { | 506 | } else { |
| 510 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 507 | kernel = Shader::CreateFromCache(params, found->second); |
| 511 | kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | ||
| 512 | } | 508 | } |
| 513 | 509 | ||
| 510 | Shader* const result = kernel.get(); | ||
| 514 | if (cpu_addr) { | 511 | if (cpu_addr) { |
| 515 | Register(kernel); | 512 | Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64)); |
| 516 | } else { | 513 | } else { |
| 517 | null_kernel = kernel; | 514 | null_kernel = std::move(kernel); |
| 518 | } | 515 | } |
| 519 | return kernel; | 516 | return result; |
| 520 | } | 517 | } |
| 521 | 518 | ||
| 522 | } // namespace OpenGL | 519 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index b2ae8d7f9..6848f1388 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -18,12 +18,12 @@ | |||
| 18 | 18 | ||
| 19 | #include "common/common_types.h" | 19 | #include "common/common_types.h" |
| 20 | #include "video_core/engines/shader_type.h" | 20 | #include "video_core/engines/shader_type.h" |
| 21 | #include "video_core/rasterizer_cache.h" | ||
| 22 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 23 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 22 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 24 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | 23 | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" |
| 25 | #include "video_core/shader/registry.h" | 24 | #include "video_core/shader/registry.h" |
| 26 | #include "video_core/shader/shader_ir.h" | 25 | #include "video_core/shader/shader_ir.h" |
| 26 | #include "video_core/shader_cache.h" | ||
| 27 | 27 | ||
| 28 | namespace Core { | 28 | namespace Core { |
| 29 | class System; | 29 | class System; |
| @@ -35,12 +35,10 @@ class EmuWindow; | |||
| 35 | 35 | ||
| 36 | namespace OpenGL { | 36 | namespace OpenGL { |
| 37 | 37 | ||
| 38 | class CachedShader; | ||
| 39 | class Device; | 38 | class Device; |
| 40 | class RasterizerOpenGL; | 39 | class RasterizerOpenGL; |
| 41 | struct UnspecializedShader; | 40 | struct UnspecializedShader; |
| 42 | 41 | ||
| 43 | using Shader = std::shared_ptr<CachedShader>; | ||
| 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 42 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 45 | 43 | ||
| 46 | struct ProgramHandle { | 44 | struct ProgramHandle { |
| @@ -64,62 +62,53 @@ struct ShaderParameters { | |||
| 64 | u64 unique_identifier; | 62 | u64 unique_identifier; |
| 65 | }; | 63 | }; |
| 66 | 64 | ||
| 67 | class CachedShader final : public RasterizerCacheObject { | 65 | class Shader final { |
| 68 | public: | 66 | public: |
| 69 | ~CachedShader(); | 67 | ~Shader(); |
| 70 | 68 | ||
| 71 | /// Gets the GL program handle for the shader | 69 | /// Gets the GL program handle for the shader |
| 72 | GLuint GetHandle() const; | 70 | GLuint GetHandle() const; |
| 73 | 71 | ||
| 74 | /// Returns the size in bytes of the shader | ||
| 75 | std::size_t GetSizeInBytes() const override { | ||
| 76 | return size_in_bytes; | ||
| 77 | } | ||
| 78 | |||
| 79 | /// Gets the shader entries for the shader | 72 | /// Gets the shader entries for the shader |
| 80 | const ShaderEntries& GetEntries() const { | 73 | const ShaderEntries& GetEntries() const { |
| 81 | return entries; | 74 | return entries; |
| 82 | } | 75 | } |
| 83 | 76 | ||
| 84 | static Shader CreateStageFromMemory(const ShaderParameters& params, | 77 | static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params, |
| 85 | Maxwell::ShaderProgram program_type, | 78 | Maxwell::ShaderProgram program_type, |
| 86 | ProgramCode program_code, ProgramCode program_code_b); | 79 | ProgramCode program_code, |
| 87 | static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); | 80 | ProgramCode program_code_b); |
| 81 | static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params, | ||
| 82 | ProgramCode code); | ||
| 88 | 83 | ||
| 89 | static Shader CreateFromCache(const ShaderParameters& params, | 84 | static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params, |
| 90 | const PrecompiledShader& precompiled_shader, | 85 | const PrecompiledShader& precompiled_shader); |
| 91 | std::size_t size_in_bytes); | ||
| 92 | 86 | ||
| 93 | private: | 87 | private: |
| 94 | explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, | 88 | explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries, |
| 95 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | 89 | ProgramSharedPtr program); |
| 96 | ShaderEntries entries, ProgramSharedPtr program); | ||
| 97 | 90 | ||
| 98 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | 91 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 99 | ShaderEntries entries; | 92 | ShaderEntries entries; |
| 100 | std::size_t size_in_bytes = 0; | ||
| 101 | ProgramSharedPtr program; | 93 | ProgramSharedPtr program; |
| 102 | GLuint handle = 0; | 94 | GLuint handle = 0; |
| 103 | }; | 95 | }; |
| 104 | 96 | ||
| 105 | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | 97 | class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { |
| 106 | public: | 98 | public: |
| 107 | explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 99 | explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| 108 | Core::Frontend::EmuWindow& emu_window, const Device& device); | 100 | Core::Frontend::EmuWindow& emu_window, const Device& device); |
| 101 | ~ShaderCacheOpenGL() override; | ||
| 109 | 102 | ||
| 110 | /// Loads disk cache for the current game | 103 | /// Loads disk cache for the current game |
| 111 | void LoadDiskCache(const std::atomic_bool& stop_loading, | 104 | void LoadDiskCache(const std::atomic_bool& stop_loading, |
| 112 | const VideoCore::DiskResourceLoadCallback& callback); | 105 | const VideoCore::DiskResourceLoadCallback& callback); |
| 113 | 106 | ||
| 114 | /// Gets the current specified shader stage program | 107 | /// Gets the current specified shader stage program |
| 115 | Shader GetStageProgram(Maxwell::ShaderProgram program); | 108 | Shader* GetStageProgram(Maxwell::ShaderProgram program); |
| 116 | 109 | ||
| 117 | /// Gets a compute kernel in the passed address | 110 | /// Gets a compute kernel in the passed address |
| 118 | Shader GetComputeKernel(GPUVAddr code_addr); | 111 | Shader* GetComputeKernel(GPUVAddr code_addr); |
| 119 | |||
| 120 | protected: | ||
| 121 | // We do not have to flush this cache as things in it are never modified by us. | ||
| 122 | void FlushObjectInner(const Shader& object) override {} | ||
| 123 | 112 | ||
| 124 | private: | 113 | private: |
| 125 | ProgramSharedPtr GeneratePrecompiledProgram( | 114 | ProgramSharedPtr GeneratePrecompiledProgram( |
| @@ -132,10 +121,10 @@ private: | |||
| 132 | ShaderDiskCacheOpenGL disk_cache; | 121 | ShaderDiskCacheOpenGL disk_cache; |
| 133 | std::unordered_map<u64, PrecompiledShader> runtime_cache; | 122 | std::unordered_map<u64, PrecompiledShader> runtime_cache; |
| 134 | 123 | ||
| 135 | Shader null_shader{}; | 124 | std::unique_ptr<Shader> null_shader; |
| 136 | Shader null_kernel{}; | 125 | std::unique_ptr<Shader> null_kernel; |
| 137 | 126 | ||
| 138 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 127 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; |
| 139 | }; | 128 | }; |
| 140 | 129 | ||
| 141 | } // namespace OpenGL | 130 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 9e95a122b..653c3f2f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp | |||
| @@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap; | |||
| 29 | 29 | ||
| 30 | namespace { | 30 | namespace { |
| 31 | 31 | ||
| 32 | using VideoCommon::Shader::SeparateSamplerKey; | ||
| 33 | |||
| 32 | using ShaderCacheVersionHash = std::array<u8, 64>; | 34 | using ShaderCacheVersionHash = std::array<u8, 64>; |
| 33 | 35 | ||
| 34 | struct ConstBufferKey { | 36 | struct ConstBufferKey { |
| @@ -37,18 +39,26 @@ struct ConstBufferKey { | |||
| 37 | u32 value = 0; | 39 | u32 value = 0; |
| 38 | }; | 40 | }; |
| 39 | 41 | ||
| 40 | struct BoundSamplerKey { | 42 | struct BoundSamplerEntry { |
| 41 | u32 offset = 0; | 43 | u32 offset = 0; |
| 42 | Tegra::Engines::SamplerDescriptor sampler; | 44 | Tegra::Engines::SamplerDescriptor sampler; |
| 43 | }; | 45 | }; |
| 44 | 46 | ||
| 45 | struct BindlessSamplerKey { | 47 | struct SeparateSamplerEntry { |
| 48 | u32 cbuf1 = 0; | ||
| 49 | u32 cbuf2 = 0; | ||
| 50 | u32 offset1 = 0; | ||
| 51 | u32 offset2 = 0; | ||
| 52 | Tegra::Engines::SamplerDescriptor sampler; | ||
| 53 | }; | ||
| 54 | |||
| 55 | struct BindlessSamplerEntry { | ||
| 46 | u32 cbuf = 0; | 56 | u32 cbuf = 0; |
| 47 | u32 offset = 0; | 57 | u32 offset = 0; |
| 48 | Tegra::Engines::SamplerDescriptor sampler; | 58 | Tegra::Engines::SamplerDescriptor sampler; |
| 49 | }; | 59 | }; |
| 50 | 60 | ||
| 51 | constexpr u32 NativeVersion = 20; | 61 | constexpr u32 NativeVersion = 21; |
| 52 | 62 | ||
| 53 | ShaderCacheVersionHash GetShaderCacheVersionHash() { | 63 | ShaderCacheVersionHash GetShaderCacheVersionHash() { |
| 54 | ShaderCacheVersionHash hash{}; | 64 | ShaderCacheVersionHash hash{}; |
| @@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { | |||
| 87 | u32 texture_handler_size_value; | 97 | u32 texture_handler_size_value; |
| 88 | u32 num_keys; | 98 | u32 num_keys; |
| 89 | u32 num_bound_samplers; | 99 | u32 num_bound_samplers; |
| 100 | u32 num_separate_samplers; | ||
| 90 | u32 num_bindless_samplers; | 101 | u32 num_bindless_samplers; |
| 91 | if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || | 102 | if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || |
| 92 | file.ReadArray(&is_texture_handler_size_known, 1) != 1 || | 103 | file.ReadArray(&is_texture_handler_size_known, 1) != 1 || |
| 93 | file.ReadArray(&texture_handler_size_value, 1) != 1 || | 104 | file.ReadArray(&texture_handler_size_value, 1) != 1 || |
| 94 | file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || | 105 | file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || |
| 95 | file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || | 106 | file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || |
| 107 | file.ReadArray(&num_separate_samplers, 1) != 1 || | ||
| 96 | file.ReadArray(&num_bindless_samplers, 1) != 1) { | 108 | file.ReadArray(&num_bindless_samplers, 1) != 1) { |
| 97 | return false; | 109 | return false; |
| 98 | } | 110 | } |
| @@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { | |||
| 101 | } | 113 | } |
| 102 | 114 | ||
| 103 | std::vector<ConstBufferKey> flat_keys(num_keys); | 115 | std::vector<ConstBufferKey> flat_keys(num_keys); |
| 104 | std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); | 116 | std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers); |
| 105 | std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); | 117 | std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers); |
| 118 | std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers); | ||
| 106 | if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || | 119 | if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || |
| 107 | file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != | 120 | file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != |
| 108 | flat_bound_samplers.size() || | 121 | flat_bound_samplers.size() || |
| 122 | file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) != | ||
| 123 | flat_separate_samplers.size() || | ||
| 109 | file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != | 124 | file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != |
| 110 | flat_bindless_samplers.size()) { | 125 | flat_bindless_samplers.size()) { |
| 111 | return false; | 126 | return false; |
| 112 | } | 127 | } |
| 113 | for (const auto& key : flat_keys) { | 128 | for (const auto& entry : flat_keys) { |
| 114 | keys.insert({{key.cbuf, key.offset}, key.value}); | 129 | keys.insert({{entry.cbuf, entry.offset}, entry.value}); |
| 115 | } | 130 | } |
| 116 | for (const auto& key : flat_bound_samplers) { | 131 | for (const auto& entry : flat_bound_samplers) { |
| 117 | bound_samplers.emplace(key.offset, key.sampler); | 132 | bound_samplers.emplace(entry.offset, entry.sampler); |
| 118 | } | 133 | } |
| 119 | for (const auto& key : flat_bindless_samplers) { | 134 | for (const auto& entry : flat_separate_samplers) { |
| 120 | bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | 135 | SeparateSamplerKey key; |
| 136 | key.buffers = {entry.cbuf1, entry.cbuf2}; | ||
| 137 | key.offsets = {entry.offset1, entry.offset2}; | ||
| 138 | separate_samplers.emplace(key, entry.sampler); | ||
| 139 | } | ||
| 140 | for (const auto& entry : flat_bindless_samplers) { | ||
| 141 | bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler}); | ||
| 121 | } | 142 | } |
| 122 | 143 | ||
| 123 | return true; | 144 | return true; |
| @@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { | |||
| 142 | file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || | 163 | file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || |
| 143 | file.WriteObject(static_cast<u32>(keys.size())) != 1 || | 164 | file.WriteObject(static_cast<u32>(keys.size())) != 1 || |
| 144 | file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || | 165 | file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || |
| 166 | file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 || | ||
| 145 | file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { | 167 | file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { |
| 146 | return false; | 168 | return false; |
| 147 | } | 169 | } |
| @@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { | |||
| 152 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); | 174 | flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); |
| 153 | } | 175 | } |
| 154 | 176 | ||
| 155 | std::vector<BoundSamplerKey> flat_bound_samplers; | 177 | std::vector<BoundSamplerEntry> flat_bound_samplers; |
| 156 | flat_bound_samplers.reserve(bound_samplers.size()); | 178 | flat_bound_samplers.reserve(bound_samplers.size()); |
| 157 | for (const auto& [address, sampler] : bound_samplers) { | 179 | for (const auto& [address, sampler] : bound_samplers) { |
| 158 | flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); | 180 | flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler}); |
| 181 | } | ||
| 182 | |||
| 183 | std::vector<SeparateSamplerEntry> flat_separate_samplers; | ||
| 184 | flat_separate_samplers.reserve(separate_samplers.size()); | ||
| 185 | for (const auto& [key, sampler] : separate_samplers) { | ||
| 186 | SeparateSamplerEntry entry; | ||
| 187 | std::tie(entry.cbuf1, entry.cbuf2) = key.buffers; | ||
| 188 | std::tie(entry.offset1, entry.offset2) = key.offsets; | ||
| 189 | entry.sampler = sampler; | ||
| 190 | flat_separate_samplers.push_back(entry); | ||
| 159 | } | 191 | } |
| 160 | 192 | ||
| 161 | std::vector<BindlessSamplerKey> flat_bindless_samplers; | 193 | std::vector<BindlessSamplerEntry> flat_bindless_samplers; |
| 162 | flat_bindless_samplers.reserve(bindless_samplers.size()); | 194 | flat_bindless_samplers.reserve(bindless_samplers.size()); |
| 163 | for (const auto& [address, sampler] : bindless_samplers) { | 195 | for (const auto& [address, sampler] : bindless_samplers) { |
| 164 | flat_bindless_samplers.push_back( | 196 | flat_bindless_samplers.push_back( |
| 165 | BindlessSamplerKey{address.first, address.second, sampler}); | 197 | BindlessSamplerEntry{address.first, address.second, sampler}); |
| 166 | } | 198 | } |
| 167 | 199 | ||
| 168 | return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && | 200 | return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && |
| 169 | file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == | 201 | file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == |
| 170 | flat_bound_samplers.size() && | 202 | flat_bound_samplers.size() && |
| 203 | file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) == | ||
| 204 | flat_separate_samplers.size() && | ||
| 171 | file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == | 205 | file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == |
| 172 | flat_bindless_samplers.size(); | 206 | flat_bindless_samplers.size(); |
| 173 | } | 207 | } |
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index d5be52e40..a79cef0e9 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h | |||
| @@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry { | |||
| 57 | VideoCommon::Shader::ComputeInfo compute_info; | 57 | VideoCommon::Shader::ComputeInfo compute_info; |
| 58 | VideoCommon::Shader::KeyMap keys; | 58 | VideoCommon::Shader::KeyMap keys; |
| 59 | VideoCommon::Shader::BoundSamplerMap bound_samplers; | 59 | VideoCommon::Shader::BoundSamplerMap bound_samplers; |
| 60 | VideoCommon::Shader::SeparateSamplerMap separate_samplers; | ||
| 60 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | 61 | VideoCommon::Shader::BindlessSamplerMap bindless_samplers; |
| 61 | }; | 62 | }; |
| 62 | 63 | ||
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index a54583e7d..65cb3c8ad 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -8,7 +8,6 @@ | |||
| 8 | 8 | ||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 10 | #include "video_core/buffer_cache/buffer_cache.h" |
| 11 | #include "video_core/rasterizer_cache.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 11 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 13 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 12 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 14 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 13 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index b8ccf164f..ea66e621e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include "video_core/renderer_vulkan/wrapper.h" | 27 | #include "video_core/renderer_vulkan/wrapper.h" |
| 28 | #include "video_core/shader/compiler_settings.h" | 28 | #include "video_core/shader/compiler_settings.h" |
| 29 | #include "video_core/shader/memory_util.h" | 29 | #include "video_core/shader/memory_util.h" |
| 30 | #include "video_core/shader_cache.h" | ||
| 30 | 31 | ||
| 31 | namespace Vulkan { | 32 | namespace Vulkan { |
| 32 | 33 | ||
| @@ -132,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con | |||
| 132 | return std::memcmp(&rhs, this, sizeof *this) == 0; | 133 | return std::memcmp(&rhs, this, sizeof *this) == 0; |
| 133 | } | 134 | } |
| 134 | 135 | ||
| 135 | CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, | 136 | Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, |
| 136 | GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, | 137 | VideoCommon::Shader::ProgramCode program_code, u32 main_offset) |
| 137 | u32 main_offset) | 138 | : gpu_addr{gpu_addr}, program_code{std::move(program_code)}, |
| 138 | : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)}, | ||
| 139 | registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, | 139 | registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, |
| 140 | compiler_settings, registry}, | 140 | compiler_settings, registry}, |
| 141 | entries{GenerateShaderEntries(shader_ir)} {} | 141 | entries{GenerateShaderEntries(shader_ir)} {} |
| 142 | 142 | ||
| 143 | CachedShader::~CachedShader() = default; | 143 | Shader::~Shader() = default; |
| 144 | 144 | ||
| 145 | Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine( | 145 | Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system, |
| 146 | Core::System& system, Tegra::Engines::ShaderType stage) { | 146 | Tegra::Engines::ShaderType stage) { |
| 147 | if (stage == Tegra::Engines::ShaderType::Compute) { | 147 | if (stage == ShaderType::Compute) { |
| 148 | return system.GPU().KeplerCompute(); | 148 | return system.GPU().KeplerCompute(); |
| 149 | } else { | 149 | } else { |
| 150 | return system.GPU().Maxwell3D(); | 150 | return system.GPU().Maxwell3D(); |
| @@ -156,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri | |||
| 156 | VKDescriptorPool& descriptor_pool, | 156 | VKDescriptorPool& descriptor_pool, |
| 157 | VKUpdateDescriptorQueue& update_descriptor_queue, | 157 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 158 | VKRenderPassCache& renderpass_cache) | 158 | VKRenderPassCache& renderpass_cache) |
| 159 | : RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler}, | 159 | : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device}, |
| 160 | descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue}, | 160 | scheduler{scheduler}, descriptor_pool{descriptor_pool}, |
| 161 | renderpass_cache{renderpass_cache} {} | 161 | update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {} |
| 162 | 162 | ||
| 163 | VKPipelineCache::~VKPipelineCache() = default; | 163 | VKPipelineCache::~VKPipelineCache() = default; |
| 164 | 164 | ||
| 165 | std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | 165 | std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { |
| 166 | const auto& gpu = system.GPU().Maxwell3D(); | 166 | const auto& gpu = system.GPU().Maxwell3D(); |
| 167 | 167 | ||
| 168 | std::array<Shader, Maxwell::MaxShaderProgram> shaders; | 168 | std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; |
| 169 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | 169 | for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { |
| 170 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | 170 | const auto program{static_cast<Maxwell::ShaderProgram>(index)}; |
| 171 | 171 | ||
| @@ -178,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||
| 178 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; | 178 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; |
| 179 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | 179 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); |
| 180 | ASSERT(cpu_addr); | 180 | ASSERT(cpu_addr); |
| 181 | auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; | 181 | |
| 182 | if (!shader) { | 182 | Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); |
| 183 | if (!result) { | ||
| 183 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; | 184 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; |
| 184 | 185 | ||
| 185 | // No shader found - create a new one | 186 | // No shader found - create a new one |
| 186 | constexpr u32 stage_offset = STAGE_MAIN_OFFSET; | 187 | constexpr u32 stage_offset = STAGE_MAIN_OFFSET; |
| 187 | const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); | 188 | const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); |
| 188 | ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); | 189 | ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); |
| 190 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | ||
| 191 | |||
| 192 | auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code), | ||
| 193 | stage_offset); | ||
| 194 | result = shader.get(); | ||
| 189 | 195 | ||
| 190 | shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, | ||
| 191 | std::move(code), stage_offset); | ||
| 192 | if (cpu_addr) { | 196 | if (cpu_addr) { |
| 193 | Register(shader); | 197 | Register(std::move(shader), *cpu_addr, size_in_bytes); |
| 194 | } else { | 198 | } else { |
| 195 | null_shader = shader; | 199 | null_shader = std::move(shader); |
| 196 | } | 200 | } |
| 197 | } | 201 | } |
| 198 | shaders[index] = std::move(shader); | 202 | shaders[index] = result; |
| 199 | } | 203 | } |
| 200 | return last_shaders = shaders; | 204 | return last_shaders = shaders; |
| 201 | } | 205 | } |
| @@ -236,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 236 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | 240 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); |
| 237 | ASSERT(cpu_addr); | 241 | ASSERT(cpu_addr); |
| 238 | 242 | ||
| 239 | auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel; | 243 | Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); |
| 240 | if (!shader) { | 244 | if (!shader) { |
| 241 | // No shader found - create a new one | 245 | // No shader found - create a new one |
| 242 | const auto host_ptr = memory_manager.GetPointer(program_addr); | 246 | const auto host_ptr = memory_manager.GetPointer(program_addr); |
| 243 | 247 | ||
| 244 | ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); | 248 | ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); |
| 245 | shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, | 249 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 246 | program_addr, *cpu_addr, std::move(code), | 250 | |
| 247 | KERNEL_MAIN_OFFSET); | 251 | auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr, |
| 252 | std::move(code), KERNEL_MAIN_OFFSET); | ||
| 253 | shader = shader_info.get(); | ||
| 254 | |||
| 248 | if (cpu_addr) { | 255 | if (cpu_addr) { |
| 249 | Register(shader); | 256 | Register(std::move(shader_info), *cpu_addr, size_in_bytes); |
| 250 | } else { | 257 | } else { |
| 251 | null_kernel = shader; | 258 | null_kernel = std::move(shader_info); |
| 252 | } | 259 | } |
| 253 | } | 260 | } |
| 254 | 261 | ||
| @@ -264,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 264 | return *entry; | 271 | return *entry; |
| 265 | } | 272 | } |
| 266 | 273 | ||
| 267 | void VKPipelineCache::Unregister(const Shader& shader) { | 274 | void VKPipelineCache::OnShaderRemoval(Shader* shader) { |
| 268 | bool finished = false; | 275 | bool finished = false; |
| 269 | const auto Finish = [&] { | 276 | const auto Finish = [&] { |
| 270 | // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and | 277 | // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and |
| @@ -296,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) { | |||
| 296 | Finish(); | 303 | Finish(); |
| 297 | it = compute_cache.erase(it); | 304 | it = compute_cache.erase(it); |
| 298 | } | 305 | } |
| 299 | |||
| 300 | RasterizerCache::Unregister(shader); | ||
| 301 | } | 306 | } |
| 302 | 307 | ||
| 303 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> | 308 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> |
| @@ -332,12 +337,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 332 | } | 337 | } |
| 333 | 338 | ||
| 334 | const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); | 339 | const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); |
| 335 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | 340 | const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); |
| 336 | const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; | 341 | Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); |
| 337 | ASSERT(shader); | ||
| 338 | 342 | ||
| 339 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | 343 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 |
| 340 | const auto program_type = GetShaderType(program_enum); | 344 | const ShaderType program_type = GetShaderType(program_enum); |
| 341 | const auto& entries = shader->GetEntries(); | 345 | const auto& entries = shader->GetEntries(); |
| 342 | program[stage] = { | 346 | program[stage] = { |
| 343 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), | 347 | Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization), |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 0b5796fef..0a36e5112 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -17,7 +17,6 @@ | |||
| 17 | #include "common/common_types.h" | 17 | #include "common/common_types.h" |
| 18 | #include "video_core/engines/const_buffer_engine_interface.h" | 18 | #include "video_core/engines/const_buffer_engine_interface.h" |
| 19 | #include "video_core/engines/maxwell_3d.h" | 19 | #include "video_core/engines/maxwell_3d.h" |
| 20 | #include "video_core/rasterizer_cache.h" | ||
| 21 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 20 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 22 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 21 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| 23 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | 22 | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" |
| @@ -26,6 +25,7 @@ | |||
| 26 | #include "video_core/shader/memory_util.h" | 25 | #include "video_core/shader/memory_util.h" |
| 27 | #include "video_core/shader/registry.h" | 26 | #include "video_core/shader/registry.h" |
| 28 | #include "video_core/shader/shader_ir.h" | 27 | #include "video_core/shader/shader_ir.h" |
| 28 | #include "video_core/shader_cache.h" | ||
| 29 | 29 | ||
| 30 | namespace Core { | 30 | namespace Core { |
| 31 | class System; | 31 | class System; |
| @@ -41,8 +41,6 @@ class VKFence; | |||
| 41 | class VKScheduler; | 41 | class VKScheduler; |
| 42 | class VKUpdateDescriptorQueue; | 42 | class VKUpdateDescriptorQueue; |
| 43 | 43 | ||
| 44 | class CachedShader; | ||
| 45 | using Shader = std::shared_ptr<CachedShader>; | ||
| 46 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 44 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 47 | 45 | ||
| 48 | struct GraphicsPipelineCacheKey { | 46 | struct GraphicsPipelineCacheKey { |
| @@ -102,21 +100,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> { | |||
| 102 | 100 | ||
| 103 | namespace Vulkan { | 101 | namespace Vulkan { |
| 104 | 102 | ||
| 105 | class CachedShader final : public RasterizerCacheObject { | 103 | class Shader { |
| 106 | public: | 104 | public: |
| 107 | explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, | 105 | explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, |
| 108 | VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code, | 106 | VideoCommon::Shader::ProgramCode program_code, u32 main_offset); |
| 109 | u32 main_offset); | 107 | ~Shader(); |
| 110 | ~CachedShader(); | ||
| 111 | 108 | ||
| 112 | GPUVAddr GetGpuAddr() const { | 109 | GPUVAddr GetGpuAddr() const { |
| 113 | return gpu_addr; | 110 | return gpu_addr; |
| 114 | } | 111 | } |
| 115 | 112 | ||
| 116 | std::size_t GetSizeInBytes() const override { | ||
| 117 | return program_code.size() * sizeof(u64); | ||
| 118 | } | ||
| 119 | |||
| 120 | VideoCommon::Shader::ShaderIR& GetIR() { | 113 | VideoCommon::Shader::ShaderIR& GetIR() { |
| 121 | return shader_ir; | 114 | return shader_ir; |
| 122 | } | 115 | } |
| @@ -144,25 +137,23 @@ private: | |||
| 144 | ShaderEntries entries; | 137 | ShaderEntries entries; |
| 145 | }; | 138 | }; |
| 146 | 139 | ||
| 147 | class VKPipelineCache final : public RasterizerCache<Shader> { | 140 | class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { |
| 148 | public: | 141 | public: |
| 149 | explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, | 142 | explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, |
| 150 | const VKDevice& device, VKScheduler& scheduler, | 143 | const VKDevice& device, VKScheduler& scheduler, |
| 151 | VKDescriptorPool& descriptor_pool, | 144 | VKDescriptorPool& descriptor_pool, |
| 152 | VKUpdateDescriptorQueue& update_descriptor_queue, | 145 | VKUpdateDescriptorQueue& update_descriptor_queue, |
| 153 | VKRenderPassCache& renderpass_cache); | 146 | VKRenderPassCache& renderpass_cache); |
| 154 | ~VKPipelineCache(); | 147 | ~VKPipelineCache() override; |
| 155 | 148 | ||
| 156 | std::array<Shader, Maxwell::MaxShaderProgram> GetShaders(); | 149 | std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); |
| 157 | 150 | ||
| 158 | VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); | 151 | VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); |
| 159 | 152 | ||
| 160 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); | 153 | VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); |
| 161 | 154 | ||
| 162 | protected: | 155 | protected: |
| 163 | void Unregister(const Shader& shader) override; | 156 | void OnShaderRemoval(Shader* shader) final; |
| 164 | |||
| 165 | void FlushObjectInner(const Shader& object) override {} | ||
| 166 | 157 | ||
| 167 | private: | 158 | private: |
| 168 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( | 159 | std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( |
| @@ -175,10 +166,10 @@ private: | |||
| 175 | VKUpdateDescriptorQueue& update_descriptor_queue; | 166 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 176 | VKRenderPassCache& renderpass_cache; | 167 | VKRenderPassCache& renderpass_cache; |
| 177 | 168 | ||
| 178 | Shader null_shader{}; | 169 | std::unique_ptr<Shader> null_shader; |
| 179 | Shader null_kernel{}; | 170 | std::unique_ptr<Shader> null_kernel; |
| 180 | 171 | ||
| 181 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 172 | std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{}; |
| 182 | 173 | ||
| 183 | GraphicsPipelineCacheKey last_graphics_key; | 174 | GraphicsPipelineCacheKey last_graphics_key; |
| 184 | VKGraphicsPipeline* last_graphics_pipeline = nullptr; | 175 | VKGraphicsPipeline* last_graphics_pipeline = nullptr; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 19b8f9da3..184b2238a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 38 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 39 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 39 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 40 | #include "video_core/renderer_vulkan/wrapper.h" | 40 | #include "video_core/renderer_vulkan/wrapper.h" |
| 41 | #include "video_core/shader_cache.h" | ||
| 41 | 42 | ||
| 42 | namespace Vulkan { | 43 | namespace Vulkan { |
| 43 | 44 | ||
| @@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) { | |||
| 98 | } | 99 | } |
| 99 | 100 | ||
| 100 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( | 101 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses( |
| 101 | const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { | 102 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { |
| 102 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; | 103 | std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses; |
| 103 | for (std::size_t i = 0; i < std::size(addresses); ++i) { | 104 | for (std::size_t i = 0; i < std::size(addresses); ++i) { |
| 104 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; | 105 | addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; |
| @@ -117,6 +118,17 @@ template <typename Engine, typename Entry> | |||
| 117 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, | 118 | Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, |
| 118 | std::size_t stage, std::size_t index = 0) { | 119 | std::size_t stage, std::size_t index = 0) { |
| 119 | const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); | 120 | const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); |
| 121 | if constexpr (std::is_same_v<Entry, SamplerEntry>) { | ||
| 122 | if (entry.is_separated) { | ||
| 123 | const u32 buffer_1 = entry.buffer; | ||
| 124 | const u32 buffer_2 = entry.secondary_buffer; | ||
| 125 | const u32 offset_1 = entry.offset; | ||
| 126 | const u32 offset_2 = entry.secondary_offset; | ||
| 127 | const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1); | ||
| 128 | const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2); | ||
| 129 | return engine.GetTextureInfo(handle_1 | handle_2); | ||
| 130 | } | ||
| 131 | } | ||
| 120 | if (entry.is_bindless) { | 132 | if (entry.is_bindless) { |
| 121 | const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); | 133 | const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); |
| 122 | return engine.GetTextureInfo(tex_handle); | 134 | return engine.GetTextureInfo(tex_handle); |
| @@ -776,12 +788,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt | |||
| 776 | } | 788 | } |
| 777 | 789 | ||
| 778 | void RasterizerVulkan::SetupShaderDescriptors( | 790 | void RasterizerVulkan::SetupShaderDescriptors( |
| 779 | const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) { | 791 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { |
| 780 | texture_cache.GuardSamplers(true); | 792 | texture_cache.GuardSamplers(true); |
| 781 | 793 | ||
| 782 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | 794 | for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| 783 | // Skip VertexA stage | 795 | // Skip VertexA stage |
| 784 | const auto& shader = shaders[stage + 1]; | 796 | Shader* const shader = shaders[stage + 1]; |
| 785 | if (!shader) { | 797 | if (!shader) { |
| 786 | continue; | 798 | continue; |
| 787 | } | 799 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 04be37a5e..c8c187606 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -168,7 +168,7 @@ private: | |||
| 168 | bool is_indexed, bool is_instanced); | 168 | bool is_indexed, bool is_instanced); |
| 169 | 169 | ||
| 170 | /// Setup descriptors in the graphics pipeline. | 170 | /// Setup descriptors in the graphics pipeline. |
| 171 | void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders); | 171 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); |
| 172 | 172 | ||
| 173 | void SetupImageTransitions(Texceptions texceptions, | 173 | void SetupImageTransitions(Texceptions texceptions, |
| 174 | const std::array<View, Maxwell::NumRenderTargets>& color_attachments, | 174 | const std::array<View, Maxwell::NumRenderTargets>& color_attachments, |
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 8f0bb996e..29ebf65ba 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp | |||
| @@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||
| 357 | return pc; | 357 | return pc; |
| 358 | } | 358 | } |
| 359 | 359 | ||
| 360 | ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, | 360 | ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo( |
| 361 | std::optional<u32> buffer) { | 361 | SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) { |
| 362 | if (info.IsComplete()) { | 362 | if (info.IsComplete()) { |
| 363 | return info; | 363 | return info; |
| 364 | } | 364 | } |
| 365 | const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset) | ||
| 366 | : registry.ObtainBoundSampler(offset); | ||
| 367 | if (!sampler) { | 365 | if (!sampler) { |
| 368 | LOG_WARNING(HW_GPU, "Unknown sampler info"); | 366 | LOG_WARNING(HW_GPU, "Unknown sampler info"); |
| 369 | info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); | 367 | info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); |
| @@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, | |||
| 381 | 379 | ||
| 382 | std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, | 380 | std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, |
| 383 | SamplerInfo sampler_info) { | 381 | SamplerInfo sampler_info) { |
| 384 | const auto offset = static_cast<u32>(sampler.index.Value()); | 382 | const u32 offset = static_cast<u32>(sampler.index.Value()); |
| 385 | const auto info = GetSamplerInfo(sampler_info, offset); | 383 | const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset)); |
| 386 | 384 | ||
| 387 | // If this sampler has already been used, return the existing mapping. | 385 | // If this sampler has already been used, return the existing mapping. |
| 388 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), | 386 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), |
| @@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | |||
| 404 | const Node sampler_register = GetRegister(reg); | 402 | const Node sampler_register = GetRegister(reg); |
| 405 | const auto [base_node, tracked_sampler_info] = | 403 | const auto [base_node, tracked_sampler_info] = |
| 406 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); | 404 | TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); |
| 407 | ASSERT(base_node != nullptr); | 405 | if (!base_node) { |
| 408 | if (base_node == nullptr) { | 406 | UNREACHABLE(); |
| 409 | return std::nullopt; | 407 | return std::nullopt; |
| 410 | } | 408 | } |
| 411 | 409 | ||
| 412 | if (const auto bindless_sampler_info = | 410 | if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { |
| 413 | std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { | 411 | const u32 buffer = sampler_info->index; |
| 414 | const u32 buffer = bindless_sampler_info->GetIndex(); | 412 | const u32 offset = sampler_info->offset; |
| 415 | const u32 offset = bindless_sampler_info->GetOffset(); | 413 | info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset)); |
| 416 | info = GetSamplerInfo(info, offset, buffer); | ||
| 417 | 414 | ||
| 418 | // If this sampler has already been used, return the existing mapping. | 415 | // If this sampler has already been used, return the existing mapping. |
| 419 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), | 416 | const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), |
| 420 | [buffer = buffer, offset = offset](const Sampler& entry) { | 417 | [buffer, offset](const Sampler& entry) { |
| 421 | return entry.buffer == buffer && entry.offset == offset; | 418 | return entry.buffer == buffer && entry.offset == offset; |
| 422 | }); | 419 | }); |
| 423 | if (it != used_samplers.end()) { | 420 | if (it != used_samplers.end()) { |
| @@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, | |||
| 431 | return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, | 428 | return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, |
| 432 | *info.is_shadow, *info.is_buffer, false); | 429 | *info.is_shadow, *info.is_buffer, false); |
| 433 | } | 430 | } |
| 434 | if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { | 431 | if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) { |
| 435 | const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; | 432 | const std::pair indices = sampler_info->indices; |
| 436 | index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); | 433 | const std::pair offsets = sampler_info->offsets; |
| 437 | info = GetSamplerInfo(info, base_offset); | 434 | info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets)); |
| 435 | |||
| 436 | // Try to use an already created sampler if it exists | ||
| 437 | const auto it = std::find_if( | ||
| 438 | used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) { | ||
| 439 | return offsets == std::pair{entry.offset, entry.secondary_offset} && | ||
| 440 | indices == std::pair{entry.buffer, entry.secondary_buffer}; | ||
| 441 | }); | ||
| 442 | if (it != used_samplers.end()) { | ||
| 443 | ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array && | ||
| 444 | it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer); | ||
| 445 | return *it; | ||
| 446 | } | ||
| 447 | |||
| 448 | // Otherwise create a new mapping for this sampler | ||
| 449 | const u32 next_index = static_cast<u32>(used_samplers.size()); | ||
| 450 | return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array, | ||
| 451 | *info.is_shadow, *info.is_buffer); | ||
| 452 | } | ||
| 453 | if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { | ||
| 454 | const u32 base_offset = sampler_info->base_offset / 4; | ||
| 455 | index_var = GetCustomVariable(sampler_info->bindless_var); | ||
| 456 | info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset)); | ||
| 438 | 457 | ||
| 439 | // If this sampler has already been used, return the existing mapping. | 458 | // If this sampler has already been used, return the existing mapping. |
| 440 | const auto it = std::find_if( | 459 | const auto it = std::find_if( |
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index c5e5165ff..8f230d57a 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h | |||
| @@ -275,10 +275,11 @@ using Node = std::shared_ptr<NodeData>; | |||
| 275 | using Node4 = std::array<Node, 4>; | 275 | using Node4 = std::array<Node, 4>; |
| 276 | using NodeBlock = std::vector<Node>; | 276 | using NodeBlock = std::vector<Node>; |
| 277 | 277 | ||
| 278 | class BindlessSamplerNode; | 278 | struct ArraySamplerNode; |
| 279 | class ArraySamplerNode; | 279 | struct BindlessSamplerNode; |
| 280 | struct SeparateSamplerNode; | ||
| 280 | 281 | ||
| 281 | using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; | 282 | using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>; |
| 282 | using TrackSampler = std::shared_ptr<TrackSamplerData>; | 283 | using TrackSampler = std::shared_ptr<TrackSamplerData>; |
| 283 | 284 | ||
| 284 | struct Sampler { | 285 | struct Sampler { |
| @@ -288,63 +289,51 @@ struct Sampler { | |||
| 288 | : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, | 289 | : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, |
| 289 | is_buffer{is_buffer}, is_indexed{is_indexed} {} | 290 | is_buffer{is_buffer}, is_indexed{is_indexed} {} |
| 290 | 291 | ||
| 292 | /// Separate sampler constructor | ||
| 293 | constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers, | ||
| 294 | Tegra::Shader::TextureType type, bool is_array, bool is_shadow, | ||
| 295 | bool is_buffer) | ||
| 296 | : index{index}, offset{offsets.first}, secondary_offset{offsets.second}, | ||
| 297 | buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array}, | ||
| 298 | is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {} | ||
| 299 | |||
| 291 | /// Bindless samplers constructor | 300 | /// Bindless samplers constructor |
| 292 | constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, | 301 | constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, |
| 293 | bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) | 302 | bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) |
| 294 | : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, | 303 | : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array}, |
| 295 | is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} | 304 | is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {} |
| 296 | 305 | ||
| 297 | u32 index = 0; ///< Emulated index given for the this sampler. | 306 | u32 index = 0; ///< Emulated index given for the this sampler. |
| 298 | u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. | 307 | u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. |
| 299 | u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers). | 308 | u32 secondary_offset = 0; ///< Secondary offset in the const buffer. |
| 300 | u32 size = 1; ///< Size of the sampler. | 309 | u32 buffer = 0; ///< Buffer where the bindless sampler is read. |
| 310 | u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read. | ||
| 311 | u32 size = 1; ///< Size of the sampler. | ||
| 301 | 312 | ||
| 302 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) | 313 | Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) |
| 303 | bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. | 314 | bool is_array = false; ///< Whether the texture is being sampled as an array texture or not. |
| 304 | bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. | 315 | bool is_shadow = false; ///< Whether the texture is being sampled as a depth texture or not. |
| 305 | bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. | 316 | bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. |
| 306 | bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. | 317 | bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. |
| 307 | bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. | 318 | bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. |
| 319 | bool is_separated = false; ///< Whether the image and sampler is separated or not. | ||
| 308 | }; | 320 | }; |
| 309 | 321 | ||
| 310 | /// Represents a tracked bindless sampler into a direct const buffer | 322 | /// Represents a tracked bindless sampler into a direct const buffer |
| 311 | class ArraySamplerNode final { | 323 | struct ArraySamplerNode { |
| 312 | public: | ||
| 313 | explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var) | ||
| 314 | : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {} | ||
| 315 | |||
| 316 | constexpr u32 GetIndex() const { | ||
| 317 | return index; | ||
| 318 | } | ||
| 319 | |||
| 320 | constexpr u32 GetBaseOffset() const { | ||
| 321 | return base_offset; | ||
| 322 | } | ||
| 323 | |||
| 324 | constexpr u32 GetIndexVar() const { | ||
| 325 | return bindless_var; | ||
| 326 | } | ||
| 327 | |||
| 328 | private: | ||
| 329 | u32 index; | 324 | u32 index; |
| 330 | u32 base_offset; | 325 | u32 base_offset; |
| 331 | u32 bindless_var; | 326 | u32 bindless_var; |
| 332 | }; | 327 | }; |
| 333 | 328 | ||
| 334 | /// Represents a tracked bindless sampler into a direct const buffer | 329 | /// Represents a tracked separate sampler image pair that was folded statically |
| 335 | class BindlessSamplerNode final { | 330 | struct SeparateSamplerNode { |
| 336 | public: | 331 | std::pair<u32, u32> indices; |
| 337 | explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {} | 332 | std::pair<u32, u32> offsets; |
| 338 | 333 | }; | |
| 339 | constexpr u32 GetIndex() const { | ||
| 340 | return index; | ||
| 341 | } | ||
| 342 | |||
| 343 | constexpr u32 GetOffset() const { | ||
| 344 | return offset; | ||
| 345 | } | ||
| 346 | 334 | ||
| 347 | private: | 335 | /// Represents a tracked bindless sampler into a direct const buffer |
| 336 | struct BindlessSamplerNode { | ||
| 348 | u32 index; | 337 | u32 index; |
| 349 | u32 offset; | 338 | u32 offset; |
| 350 | }; | 339 | }; |
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h index 11231bbea..1e0886185 100644 --- a/src/video_core/shader/node_helper.h +++ b/src/video_core/shader/node_helper.h | |||
| @@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) { | |||
| 48 | template <typename T, typename... Args> | 48 | template <typename T, typename... Args> |
| 49 | TrackSampler MakeTrackSampler(Args&&... args) { | 49 | TrackSampler MakeTrackSampler(Args&&... args) { |
| 50 | static_assert(std::is_convertible_v<T, TrackSamplerData>); | 50 | static_assert(std::is_convertible_v<T, TrackSamplerData>); |
| 51 | return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); | 51 | return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...}); |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | template <typename... Args> | 54 | template <typename... Args> |
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index af70b3f35..cdf274e54 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp | |||
| @@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) { | |||
| 93 | return value; | 93 | return value; |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler( | ||
| 97 | std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) { | ||
| 98 | SeparateSamplerKey key; | ||
| 99 | key.buffers = buffers; | ||
| 100 | key.offsets = offsets; | ||
| 101 | const auto iter = separate_samplers.find(key); | ||
| 102 | if (iter != separate_samplers.end()) { | ||
| 103 | return iter->second; | ||
| 104 | } | ||
| 105 | if (!engine) { | ||
| 106 | return std::nullopt; | ||
| 107 | } | ||
| 108 | |||
| 109 | const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first); | ||
| 110 | const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second); | ||
| 111 | const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2); | ||
| 112 | separate_samplers.emplace(key, value); | ||
| 113 | return value; | ||
| 114 | } | ||
| 115 | |||
| 96 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, | 116 | std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, |
| 97 | u32 offset) { | 117 | u32 offset) { |
| 98 | const std::pair key = {buffer, offset}; | 118 | const std::pair key = {buffer, offset}; |
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index 0c80d35fd..231206765 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h | |||
| @@ -19,8 +19,39 @@ | |||
| 19 | 19 | ||
| 20 | namespace VideoCommon::Shader { | 20 | namespace VideoCommon::Shader { |
| 21 | 21 | ||
| 22 | struct SeparateSamplerKey { | ||
| 23 | std::pair<u32, u32> buffers; | ||
| 24 | std::pair<u32, u32> offsets; | ||
| 25 | }; | ||
| 26 | |||
| 27 | } // namespace VideoCommon::Shader | ||
| 28 | |||
| 29 | namespace std { | ||
| 30 | |||
| 31 | template <> | ||
| 32 | struct hash<VideoCommon::Shader::SeparateSamplerKey> { | ||
| 33 | std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept { | ||
| 34 | return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^ | ||
| 35 | key.offsets.second); | ||
| 36 | } | ||
| 37 | }; | ||
| 38 | |||
| 39 | template <> | ||
| 40 | struct equal_to<VideoCommon::Shader::SeparateSamplerKey> { | ||
| 41 | bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs, | ||
| 42 | const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept { | ||
| 43 | return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets; | ||
| 44 | } | ||
| 45 | }; | ||
| 46 | |||
| 47 | } // namespace std | ||
| 48 | |||
| 49 | namespace VideoCommon::Shader { | ||
| 50 | |||
| 22 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | 51 | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; |
| 23 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | 52 | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; |
| 53 | using SeparateSamplerMap = | ||
| 54 | std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>; | ||
| 24 | using BindlessSamplerMap = | 55 | using BindlessSamplerMap = |
| 25 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | 56 | std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; |
| 26 | 57 | ||
| @@ -73,6 +104,9 @@ public: | |||
| 73 | 104 | ||
| 74 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | 105 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); |
| 75 | 106 | ||
| 107 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler( | ||
| 108 | std::pair<u32, u32> buffers, std::pair<u32, u32> offsets); | ||
| 109 | |||
| 76 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | 110 | std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); |
| 77 | 111 | ||
| 78 | /// Inserts a key. | 112 | /// Inserts a key. |
| @@ -128,6 +162,7 @@ private: | |||
| 128 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | 162 | Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; |
| 129 | KeyMap keys; | 163 | KeyMap keys; |
| 130 | BoundSamplerMap bound_samplers; | 164 | BoundSamplerMap bound_samplers; |
| 165 | SeparateSamplerMap separate_samplers; | ||
| 131 | BindlessSamplerMap bindless_samplers; | 166 | BindlessSamplerMap bindless_samplers; |
| 132 | u32 bound_buffer; | 167 | u32 bound_buffer; |
| 133 | GraphicsInfo graphics_info; | 168 | GraphicsInfo graphics_info; |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 15ae152f2..3a98b2104 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -330,8 +330,8 @@ private: | |||
| 330 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); | 330 | OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); |
| 331 | 331 | ||
| 332 | /// Queries the missing sampler info from the execution context. | 332 | /// Queries the missing sampler info from the execution context. |
| 333 | SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset, | 333 | SamplerInfo GetSamplerInfo(SamplerInfo info, |
| 334 | std::optional<u32> buffer = std::nullopt); | 334 | std::optional<Tegra::Engines::SamplerDescriptor> sampler); |
| 335 | 335 | ||
| 336 | /// Accesses a texture sampler. | 336 | /// Accesses a texture sampler. |
| 337 | std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); | 337 | std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); |
| @@ -409,8 +409,14 @@ private: | |||
| 409 | 409 | ||
| 410 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; | 410 | std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 411 | 411 | ||
| 412 | std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, | 412 | std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, |
| 413 | s64 cursor); | 413 | s64 cursor); |
| 414 | |||
| 415 | std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf, | ||
| 416 | const OperationNode& operation, | ||
| 417 | Node gpr, Node base_offset, | ||
| 418 | Node tracked, const NodeBlock& code, | ||
| 419 | s64 cursor); | ||
| 414 | 420 | ||
| 415 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; | 421 | std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; |
| 416 | 422 | ||
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index eb97bfd41..d5ed81442 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | namespace VideoCommon::Shader { | 14 | namespace VideoCommon::Shader { |
| 15 | 15 | ||
| 16 | namespace { | 16 | namespace { |
| 17 | |||
| 17 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | 18 | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, |
| 18 | OperationCode operation_code) { | 19 | OperationCode operation_code) { |
| 19 | for (; cursor >= 0; --cursor) { | 20 | for (; cursor >= 0; --cursor) { |
| @@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) { | |||
| 63 | if (const auto operation = std::get_if<OperationNode>(&*node)) { | 64 | if (const auto operation = std::get_if<OperationNode>(&*node)) { |
| 64 | operation->SetAmendIndex(amend_index); | 65 | operation->SetAmendIndex(amend_index); |
| 65 | return true; | 66 | return true; |
| 66 | } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | 67 | } |
| 68 | if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { | ||
| 67 | conditional->SetAmendIndex(amend_index); | 69 | conditional->SetAmendIndex(amend_index); |
| 68 | return true; | 70 | return true; |
| 69 | } | 71 | } |
| @@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) { | |||
| 72 | 74 | ||
| 73 | } // Anonymous namespace | 75 | } // Anonymous namespace |
| 74 | 76 | ||
| 75 | std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, | 77 | std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, |
| 76 | s64 cursor) { | 78 | s64 cursor) { |
| 77 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | 79 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |
| 80 | const u32 cbuf_index = cbuf->GetIndex(); | ||
| 81 | |||
| 78 | // Constant buffer found, test if it's an immediate | 82 | // Constant buffer found, test if it's an immediate |
| 79 | const auto& offset = cbuf->GetOffset(); | 83 | const auto& offset = cbuf->GetOffset(); |
| 80 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | 84 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { |
| 81 | auto track = | 85 | auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue()); |
| 82 | MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); | ||
| 83 | return {tracked, track}; | 86 | return {tracked, track}; |
| 84 | } | 87 | } |
| 85 | if (const auto operation = std::get_if<OperationNode>(&*offset)) { | 88 | if (const auto operation = std::get_if<OperationNode>(&*offset)) { |
| 86 | const u32 bound_buffer = registry.GetBoundBuffer(); | 89 | const u32 bound_buffer = registry.GetBoundBuffer(); |
| 87 | if (bound_buffer != cbuf->GetIndex()) { | 90 | if (bound_buffer != cbuf_index) { |
| 88 | return {}; | 91 | return {}; |
| 89 | } | 92 | } |
| 90 | const auto pair = DecoupleIndirectRead(*operation); | 93 | if (const std::optional pair = DecoupleIndirectRead(*operation)) { |
| 91 | if (!pair) { | 94 | auto [gpr, base_offset] = *pair; |
| 92 | return {}; | 95 | return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked, |
| 96 | code, cursor); | ||
| 93 | } | 97 | } |
| 94 | auto [gpr, base_offset] = *pair; | ||
| 95 | const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); | ||
| 96 | const auto& gpu_driver = registry.AccessGuestDriverProfile(); | ||
| 97 | const u32 bindless_cv = NewCustomVariable(); | ||
| 98 | Node op = | ||
| 99 | Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); | ||
| 100 | |||
| 101 | const Node cv_node = GetCustomVariable(bindless_cv); | ||
| 102 | Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); | ||
| 103 | const std::size_t amend_index = DeclareAmend(std::move(amend_op)); | ||
| 104 | AmendNodeCv(amend_index, code[cursor]); | ||
| 105 | // TODO Implement Bindless Index custom variable | ||
| 106 | auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), | ||
| 107 | offset_inm->GetValue(), bindless_cv); | ||
| 108 | return {tracked, track}; | ||
| 109 | } | 98 | } |
| 110 | return {}; | 99 | return {}; |
| 111 | } | 100 | } |
| @@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons | |||
| 122 | return TrackBindlessSampler(source, code, new_cursor); | 111 | return TrackBindlessSampler(source, code, new_cursor); |
| 123 | } | 112 | } |
| 124 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { | 113 | if (const auto operation = std::get_if<OperationNode>(&*tracked)) { |
| 125 | for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { | 114 | const OperationNode& op = *operation; |
| 126 | if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor); | 115 | |
| 127 | std::get<0>(found)) { | 116 | const OperationCode opcode = operation->GetCode(); |
| 128 | // Cbuf found in operand. | 117 | if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) { |
| 118 | ASSERT(op.GetOperandsCount() == 2); | ||
| 119 | auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor); | ||
| 120 | auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor); | ||
| 121 | if (node_a && node_b) { | ||
| 122 | auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b}, | ||
| 123 | std::pair{offset_a, offset_b}); | ||
| 124 | return {tracked, std::move(track)}; | ||
| 125 | } | ||
| 126 | } | ||
| 127 | std::size_t i = op.GetOperandsCount(); | ||
| 128 | while (i--) { | ||
| 129 | if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) { | ||
| 130 | // Constant buffer found in operand. | ||
| 129 | return found; | 131 | return found; |
| 130 | } | 132 | } |
| 131 | } | 133 | } |
| @@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons | |||
| 139 | return {}; | 141 | return {}; |
| 140 | } | 142 | } |
| 141 | 143 | ||
| 144 | std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead( | ||
| 145 | const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked, | ||
| 146 | const NodeBlock& code, s64 cursor) { | ||
| 147 | const auto offset_imm = std::get<ImmediateNode>(*base_offset); | ||
| 148 | const auto& gpu_driver = registry.AccessGuestDriverProfile(); | ||
| 149 | const u32 bindless_cv = NewCustomVariable(); | ||
| 150 | const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize(); | ||
| 151 | Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size)); | ||
| 152 | |||
| 153 | Node cv_node = GetCustomVariable(bindless_cv); | ||
| 154 | Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op)); | ||
| 155 | const std::size_t amend_index = DeclareAmend(std::move(amend_op)); | ||
| 156 | AmendNodeCv(amend_index, code[cursor]); | ||
| 157 | |||
| 158 | // TODO: Implement bindless index custom variable | ||
| 159 | auto track = | ||
| 160 | MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv); | ||
| 161 | return {tracked, track}; | ||
| 162 | } | ||
| 163 | |||
| 142 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, | 164 | std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, |
| 143 | s64 cursor) const { | 165 | s64 cursor) const { |
| 144 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { | 166 | if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { |
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h new file mode 100644 index 000000000..a23c23886 --- /dev/null +++ b/src/video_core/shader_cache.h | |||
| @@ -0,0 +1,228 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <memory> | ||
| 9 | #include <mutex> | ||
| 10 | #include <unordered_map> | ||
| 11 | #include <utility> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include "common/assert.h" | ||
| 15 | #include "common/common_types.h" | ||
| 16 | #include "video_core/rasterizer_interface.h" | ||
| 17 | |||
| 18 | namespace VideoCommon { | ||
| 19 | |||
| 20 | template <class T> | ||
| 21 | class ShaderCache { | ||
| 22 | static constexpr u64 PAGE_SHIFT = 14; | ||
| 23 | |||
| 24 | struct Entry { | ||
| 25 | VAddr addr_start; | ||
| 26 | VAddr addr_end; | ||
| 27 | T* data; | ||
| 28 | |||
| 29 | bool is_memory_marked = true; | ||
| 30 | |||
| 31 | constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { | ||
| 32 | return start < addr_end && addr_start < end; | ||
| 33 | } | ||
| 34 | }; | ||
| 35 | |||
| 36 | public: | ||
| 37 | virtual ~ShaderCache() = default; | ||
| 38 | |||
| 39 | /// @brief Removes shaders inside a given region | ||
| 40 | /// @note Checks for ranges | ||
| 41 | /// @param addr Start address of the invalidation | ||
| 42 | /// @param size Number of bytes of the invalidation | ||
| 43 | void InvalidateRegion(VAddr addr, std::size_t size) { | ||
| 44 | std::scoped_lock lock{invalidation_mutex}; | ||
| 45 | InvalidatePagesInRegion(addr, size); | ||
| 46 | RemovePendingShaders(); | ||
| 47 | } | ||
| 48 | |||
| 49 | /// @brief Unmarks a memory region as cached and marks it for removal | ||
| 50 | /// @param addr Start address of the CPU write operation | ||
| 51 | /// @param size Number of bytes of the CPU write operation | ||
| 52 | void OnCPUWrite(VAddr addr, std::size_t size) { | ||
| 53 | std::lock_guard lock{invalidation_mutex}; | ||
| 54 | InvalidatePagesInRegion(addr, size); | ||
| 55 | } | ||
| 56 | |||
| 57 | /// @brief Flushes delayed removal operations | ||
| 58 | void SyncGuestHost() { | ||
| 59 | std::scoped_lock lock{invalidation_mutex}; | ||
| 60 | RemovePendingShaders(); | ||
| 61 | } | ||
| 62 | |||
| 63 | /// @brief Tries to obtain a cached shader starting in a given address | ||
| 64 | /// @note Doesn't check for ranges, the given address has to be the start of the shader | ||
| 65 | /// @param addr Start address of the shader, this doesn't cache for region | ||
| 66 | /// @return Pointer to a valid shader, nullptr when nothing is found | ||
| 67 | T* TryGet(VAddr addr) const { | ||
| 68 | std::scoped_lock lock{lookup_mutex}; | ||
| 69 | |||
| 70 | const auto it = lookup_cache.find(addr); | ||
| 71 | if (it == lookup_cache.end()) { | ||
| 72 | return nullptr; | ||
| 73 | } | ||
| 74 | return it->second->data; | ||
| 75 | } | ||
| 76 | |||
| 77 | protected: | ||
| 78 | explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} | ||
| 79 | |||
| 80 | /// @brief Register in the cache a given entry | ||
| 81 | /// @param data Shader to store in the cache | ||
| 82 | /// @param addr Start address of the shader that will be registered | ||
| 83 | /// @param size Size in bytes of the shader | ||
| 84 | void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) { | ||
| 85 | std::scoped_lock lock{invalidation_mutex, lookup_mutex}; | ||
| 86 | |||
| 87 | const VAddr addr_end = addr + size; | ||
| 88 | Entry* const entry = NewEntry(addr, addr_end, data.get()); | ||
| 89 | |||
| 90 | const u64 page_end = addr_end >> PAGE_SHIFT; | ||
| 91 | for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { | ||
| 92 | invalidation_cache[page].push_back(entry); | ||
| 93 | } | ||
| 94 | |||
| 95 | storage.push_back(std::move(data)); | ||
| 96 | |||
| 97 | rasterizer.UpdatePagesCachedCount(addr, size, 1); | ||
| 98 | } | ||
| 99 | |||
| 100 | /// @brief Called when a shader is going to be removed | ||
| 101 | /// @param shader Shader that will be removed | ||
| 102 | /// @pre invalidation_cache is locked | ||
| 103 | /// @pre lookup_mutex is locked | ||
| 104 | virtual void OnShaderRemoval([[maybe_unused]] T* shader) {} | ||
| 105 | |||
| 106 | private: | ||
| 107 | /// @brief Invalidate pages in a given region | ||
| 108 | /// @pre invalidation_mutex is locked | ||
| 109 | void InvalidatePagesInRegion(VAddr addr, std::size_t size) { | ||
| 110 | const VAddr addr_end = addr + size; | ||
| 111 | const u64 page_end = addr_end >> PAGE_SHIFT; | ||
| 112 | for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) { | ||
| 113 | const auto it = invalidation_cache.find(page); | ||
| 114 | if (it == invalidation_cache.end()) { | ||
| 115 | continue; | ||
| 116 | } | ||
| 117 | |||
| 118 | std::vector<Entry*>& entries = it->second; | ||
| 119 | InvalidatePageEntries(entries, addr, addr_end); | ||
| 120 | |||
| 121 | // If there's nothing else in this page, remove it to avoid overpopulating the hash map. | ||
| 122 | if (entries.empty()) { | ||
| 123 | invalidation_cache.erase(it); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | /// @brief Remove shaders marked for deletion | ||
| 129 | /// @pre invalidation_mutex is locked | ||
| 130 | void RemovePendingShaders() { | ||
| 131 | if (marked_for_removal.empty()) { | ||
| 132 | return; | ||
| 133 | } | ||
| 134 | std::scoped_lock lock{lookup_mutex}; | ||
| 135 | |||
| 136 | std::vector<T*> removed_shaders; | ||
| 137 | removed_shaders.reserve(marked_for_removal.size()); | ||
| 138 | |||
| 139 | for (Entry* const entry : marked_for_removal) { | ||
| 140 | if (lookup_cache.erase(entry->addr_start) > 0) { | ||
| 141 | removed_shaders.push_back(entry->data); | ||
| 142 | } | ||
| 143 | } | ||
| 144 | marked_for_removal.clear(); | ||
| 145 | |||
| 146 | if (!removed_shaders.empty()) { | ||
| 147 | RemoveShadersFromStorage(std::move(removed_shaders)); | ||
| 148 | } | ||
| 149 | } | ||
| 150 | |||
| 151 | /// @brief Invalidates entries in a given range for the passed page | ||
| 152 | /// @param entries Vector of entries in the page, it will be modified on overlaps | ||
| 153 | /// @param addr Start address of the invalidation | ||
| 154 | /// @param addr_end Non-inclusive end address of the invalidation | ||
| 155 | /// @pre invalidation_mutex is locked | ||
| 156 | void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { | ||
| 157 | auto it = entries.begin(); | ||
| 158 | while (it != entries.end()) { | ||
| 159 | Entry* const entry = *it; | ||
| 160 | if (!entry->Overlaps(addr, addr_end)) { | ||
| 161 | ++it; | ||
| 162 | continue; | ||
| 163 | } | ||
| 164 | UnmarkMemory(entry); | ||
| 165 | marked_for_removal.push_back(entry); | ||
| 166 | |||
| 167 | it = entries.erase(it); | ||
| 168 | } | ||
| 169 | } | ||
| 170 | |||
| 171 | /// @brief Unmarks an entry from the rasterizer cache | ||
| 172 | /// @param entry Entry to unmark from memory | ||
| 173 | void UnmarkMemory(Entry* entry) { | ||
| 174 | if (!entry->is_memory_marked) { | ||
| 175 | return; | ||
| 176 | } | ||
| 177 | entry->is_memory_marked = false; | ||
| 178 | |||
| 179 | const VAddr addr = entry->addr_start; | ||
| 180 | const std::size_t size = entry->addr_end - addr; | ||
| 181 | rasterizer.UpdatePagesCachedCount(addr, size, -1); | ||
| 182 | } | ||
| 183 | |||
| 184 | /// @brief Removes a vector of shaders from a list | ||
| 185 | /// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates | ||
| 186 | /// @pre invalidation_mutex is locked | ||
| 187 | /// @pre lookup_mutex is locked | ||
| 188 | void RemoveShadersFromStorage(std::vector<T*> removed_shaders) { | ||
| 189 | // Remove duplicates | ||
| 190 | std::sort(removed_shaders.begin(), removed_shaders.end()); | ||
| 191 | removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()), | ||
| 192 | removed_shaders.end()); | ||
| 193 | |||
| 194 | // Now that there are no duplicates, we can notify removals | ||
| 195 | for (T* const shader : removed_shaders) { | ||
| 196 | OnShaderRemoval(shader); | ||
| 197 | } | ||
| 198 | |||
| 199 | // Remove them from the cache | ||
| 200 | const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) { | ||
| 201 | return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != | ||
| 202 | removed_shaders.end(); | ||
| 203 | }; | ||
| 204 | storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end()); | ||
| 205 | } | ||
| 206 | |||
| 207 | /// @brief Creates a new entry in the lookup cache and returns its pointer | ||
| 208 | /// @pre lookup_mutex is locked | ||
| 209 | Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { | ||
| 210 | auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data}); | ||
| 211 | Entry* const entry_pointer = entry.get(); | ||
| 212 | |||
| 213 | lookup_cache.emplace(addr, std::move(entry)); | ||
| 214 | return entry_pointer; | ||
| 215 | } | ||
| 216 | |||
| 217 | VideoCore::RasterizerInterface& rasterizer; | ||
| 218 | |||
| 219 | mutable std::mutex lookup_mutex; | ||
| 220 | std::mutex invalidation_mutex; | ||
| 221 | |||
| 222 | std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; | ||
| 223 | std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; | ||
| 224 | std::vector<std::unique_ptr<T>> storage; | ||
| 225 | std::vector<Entry*> marked_for_removal; | ||
| 226 | }; | ||
| 227 | |||
| 228 | } // namespace VideoCommon | ||