summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/audio_core/stream.cpp7
-rw-r--r--src/audio_core/stream.h6
-rw-r--r--src/common/common_sizes.h1
-rw-r--r--src/common/settings.cpp2
-rw-r--r--src/common/settings.h1
-rw-r--r--src/core/hle/service/audio/audout_u.cpp10
-rw-r--r--src/video_core/buffer_cache/buffer_base.h11
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h161
-rw-r--r--src/video_core/engines/maxwell_3d.h1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp6
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h11
-rw-r--r--src/video_core/surface.cpp7
-rw-r--r--src/video_core/surface.h2
-rw-r--r--src/video_core/texture_cache/image_base.cpp37
-rw-r--r--src/video_core/texture_cache/image_base.h12
-rw-r--r--src/video_core/texture_cache/slot_vector.h70
-rw-r--r--src/video_core/texture_cache/texture_cache.h151
-rw-r--r--src/video_core/texture_cache/util.cpp2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp12
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h9
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp22
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.h5
-rw-r--r--src/yuzu/configuration/config.cpp2
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp6
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h1
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui10
-rw-r--r--src/yuzu_cmd/default_ini.h4
29 files changed, 514 insertions, 66 deletions
diff --git a/src/audio_core/stream.cpp b/src/audio_core/stream.cpp
index ad6c587c2..5a30f55a7 100644
--- a/src/audio_core/stream.cpp
+++ b/src/audio_core/stream.cpp
@@ -107,9 +107,12 @@ void Stream::PlayNextBuffer(std::chrono::nanoseconds ns_late) {
107 active_buffer = queued_buffers.front(); 107 active_buffer = queued_buffers.front();
108 queued_buffers.pop(); 108 queued_buffers.pop();
109 109
110 VolumeAdjustSamples(active_buffer->GetSamples(), game_volume); 110 auto& samples = active_buffer->GetSamples();
111 111
112 sink_stream.EnqueueSamples(GetNumChannels(), active_buffer->GetSamples()); 112 VolumeAdjustSamples(samples, game_volume);
113
114 sink_stream.EnqueueSamples(GetNumChannels(), samples);
115 played_samples += samples.size();
113 116
114 const auto buffer_release_ns = GetBufferReleaseNS(*active_buffer); 117 const auto buffer_release_ns = GetBufferReleaseNS(*active_buffer);
115 118
diff --git a/src/audio_core/stream.h b/src/audio_core/stream.h
index 559844b9b..dbd97ec9c 100644
--- a/src/audio_core/stream.h
+++ b/src/audio_core/stream.h
@@ -89,6 +89,11 @@ public:
89 return sample_rate; 89 return sample_rate;
90 } 90 }
91 91
92 /// Gets the number of samples played so far
93 [[nodiscard]] u64 GetPlayedSampleCount() const {
94 return played_samples;
95 }
96
92 /// Gets the number of channels 97 /// Gets the number of channels
93 [[nodiscard]] u32 GetNumChannels() const; 98 [[nodiscard]] u32 GetNumChannels() const;
94 99
@@ -106,6 +111,7 @@ private:
106 [[nodiscard]] std::chrono::nanoseconds GetBufferReleaseNS(const Buffer& buffer) const; 111 [[nodiscard]] std::chrono::nanoseconds GetBufferReleaseNS(const Buffer& buffer) const;
107 112
108 u32 sample_rate; ///< Sample rate of the stream 113 u32 sample_rate; ///< Sample rate of the stream
114 u64 played_samples{}; ///< The current played sample count
109 Format format; ///< Format of the stream 115 Format format; ///< Format of the stream
110 float game_volume = 1.0f; ///< The volume the game currently has set 116 float game_volume = 1.0f; ///< The volume the game currently has set
111 ReleaseCallback release_callback; ///< Buffer release callback for the stream 117 ReleaseCallback release_callback; ///< Buffer release callback for the stream
diff --git a/src/common/common_sizes.h b/src/common/common_sizes.h
index 7e9fd968b..d07b7ee5a 100644
--- a/src/common/common_sizes.h
+++ b/src/common/common_sizes.h
@@ -24,6 +24,7 @@ enum : u64 {
24 Size_128_MB = 128ULL * Size_1_MB, 24 Size_128_MB = 128ULL * Size_1_MB,
25 Size_448_MB = 448ULL * Size_1_MB, 25 Size_448_MB = 448ULL * Size_1_MB,
26 Size_507_MB = 507ULL * Size_1_MB, 26 Size_507_MB = 507ULL * Size_1_MB,
27 Size_512_MB = 512ULL * Size_1_MB,
27 Size_562_MB = 562ULL * Size_1_MB, 28 Size_562_MB = 562ULL * Size_1_MB,
28 Size_1554_MB = 1554ULL * Size_1_MB, 29 Size_1554_MB = 1554ULL * Size_1_MB,
29 Size_2048_MB = 2048ULL * Size_1_MB, 30 Size_2048_MB = 2048ULL * Size_1_MB,
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 6397308ec..e1bb4b7ff 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -59,6 +59,7 @@ void LogSettings() {
59 log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); 59 log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
60 log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); 60 log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue());
61 log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); 61 log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
62 log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue());
62 log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); 63 log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
63 log_setting("Audio_OutputEngine", values.sink_id); 64 log_setting("Audio_OutputEngine", values.sink_id);
64 log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue()); 65 log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue());
@@ -142,6 +143,7 @@ void RestoreGlobalState(bool is_powered_on) {
142 values.use_assembly_shaders.SetGlobal(true); 143 values.use_assembly_shaders.SetGlobal(true);
143 values.use_asynchronous_shaders.SetGlobal(true); 144 values.use_asynchronous_shaders.SetGlobal(true);
144 values.use_fast_gpu_time.SetGlobal(true); 145 values.use_fast_gpu_time.SetGlobal(true);
146 values.use_caches_gc.SetGlobal(true);
145 values.bg_red.SetGlobal(true); 147 values.bg_red.SetGlobal(true);
146 values.bg_green.SetGlobal(true); 148 values.bg_green.SetGlobal(true);
147 values.bg_blue.SetGlobal(true); 149 values.bg_blue.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 85554eac4..82ec18e27 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -154,6 +154,7 @@ struct Values {
154 Setting<bool> use_assembly_shaders; 154 Setting<bool> use_assembly_shaders;
155 Setting<bool> use_asynchronous_shaders; 155 Setting<bool> use_asynchronous_shaders;
156 Setting<bool> use_fast_gpu_time; 156 Setting<bool> use_fast_gpu_time;
157 Setting<bool> use_caches_gc;
157 158
158 Setting<float> bg_red; 159 Setting<float> bg_red;
159 Setting<float> bg_green; 160 Setting<float> bg_green;
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 804c6b10c..92d4510b1 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -58,7 +58,7 @@ public:
58 {7, &IAudioOut::AppendAudioOutBufferImpl, "AppendAudioOutBufferAuto"}, 58 {7, &IAudioOut::AppendAudioOutBufferImpl, "AppendAudioOutBufferAuto"},
59 {8, &IAudioOut::GetReleasedAudioOutBufferImpl, "GetReleasedAudioOutBufferAuto"}, 59 {8, &IAudioOut::GetReleasedAudioOutBufferImpl, "GetReleasedAudioOutBufferAuto"},
60 {9, &IAudioOut::GetAudioOutBufferCount, "GetAudioOutBufferCount"}, 60 {9, &IAudioOut::GetAudioOutBufferCount, "GetAudioOutBufferCount"},
61 {10, nullptr, "GetAudioOutPlayedSampleCount"}, 61 {10, &IAudioOut::GetAudioOutPlayedSampleCount, "GetAudioOutPlayedSampleCount"},
62 {11, &IAudioOut::FlushAudioOutBuffers, "FlushAudioOutBuffers"}, 62 {11, &IAudioOut::FlushAudioOutBuffers, "FlushAudioOutBuffers"},
63 {12, &IAudioOut::SetAudioOutVolume, "SetAudioOutVolume"}, 63 {12, &IAudioOut::SetAudioOutVolume, "SetAudioOutVolume"},
64 {13, &IAudioOut::GetAudioOutVolume, "GetAudioOutVolume"}, 64 {13, &IAudioOut::GetAudioOutVolume, "GetAudioOutVolume"},
@@ -186,6 +186,14 @@ private:
186 rb.Push(static_cast<u32>(stream->GetQueueSize())); 186 rb.Push(static_cast<u32>(stream->GetQueueSize()));
187 } 187 }
188 188
189 void GetAudioOutPlayedSampleCount(Kernel::HLERequestContext& ctx) {
190 LOG_DEBUG(Service_Audio, "called");
191
192 IPC::ResponseBuilder rb{ctx, 4};
193 rb.Push(ResultSuccess);
194 rb.Push(stream->GetPlayedSampleCount());
195 }
196
189 void FlushAudioOutBuffers(Kernel::HLERequestContext& ctx) { 197 void FlushAudioOutBuffers(Kernel::HLERequestContext& ctx) {
190 LOG_DEBUG(Service_Audio, "called"); 198 LOG_DEBUG(Service_Audio, "called");
191 199
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index a39505903..b121d36a3 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -256,6 +256,16 @@ public:
256 stream_score += score; 256 stream_score += score;
257 } 257 }
258 258
259 /// Sets the new frame tick
260 void SetFrameTick(u64 new_frame_tick) noexcept {
261 frame_tick = new_frame_tick;
262 }
263
264 /// Returns the new frame tick
265 [[nodiscard]] u64 FrameTick() const noexcept {
266 return frame_tick;
267 }
268
259 /// Returns the likeliness of this being a stream buffer 269 /// Returns the likeliness of this being a stream buffer
260 [[nodiscard]] int StreamScore() const noexcept { 270 [[nodiscard]] int StreamScore() const noexcept {
261 return stream_score; 271 return stream_score;
@@ -586,6 +596,7 @@ private:
586 RasterizerInterface* rasterizer = nullptr; 596 RasterizerInterface* rasterizer = nullptr;
587 VAddr cpu_addr = 0; 597 VAddr cpu_addr = 0;
588 Words words; 598 Words words;
599 u64 frame_tick = 0;
589 BufferFlagBits flags{}; 600 BufferFlagBits flags{};
590 int stream_score = 0; 601 int stream_score = 0;
591}; 602};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index d371b842f..6d04d00da 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -16,6 +16,7 @@
16 16
17#include <boost/container/small_vector.hpp> 17#include <boost/container/small_vector.hpp>
18 18
19#include "common/common_sizes.h"
19#include "common/common_types.h" 20#include "common/common_types.h"
20#include "common/div_ceil.h" 21#include "common/div_ceil.h"
21#include "common/microprofile.h" 22#include "common/microprofile.h"
@@ -65,6 +66,9 @@ class BufferCache {
65 66
66 static constexpr BufferId NULL_BUFFER_ID{0}; 67 static constexpr BufferId NULL_BUFFER_ID{0};
67 68
69 static constexpr u64 EXPECTED_MEMORY = Common::Size_512_MB;
70 static constexpr u64 CRITICAL_MEMORY = Common::Size_1_GB;
71
68 using Maxwell = Tegra::Engines::Maxwell3D::Regs; 72 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
69 73
70 using Runtime = typename P::Runtime; 74 using Runtime = typename P::Runtime;
@@ -102,6 +106,8 @@ public:
102 106
103 void TickFrame(); 107 void TickFrame();
104 108
109 void RunGarbageCollector();
110
105 void WriteMemory(VAddr cpu_addr, u64 size); 111 void WriteMemory(VAddr cpu_addr, u64 size);
106 112
107 void CachedWriteMemory(VAddr cpu_addr, u64 size); 113 void CachedWriteMemory(VAddr cpu_addr, u64 size);
@@ -243,6 +249,8 @@ private:
243 template <bool insert> 249 template <bool insert>
244 void ChangeRegister(BufferId buffer_id); 250 void ChangeRegister(BufferId buffer_id);
245 251
252 void TouchBuffer(Buffer& buffer) const noexcept;
253
246 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); 254 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
247 255
248 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); 256 bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
@@ -255,6 +263,10 @@ private:
255 263
256 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); 264 void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
257 265
266 void DownloadBufferMemory(Buffer& buffer_id);
267
268 void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
269
258 void DeleteBuffer(BufferId buffer_id); 270 void DeleteBuffer(BufferId buffer_id);
259 271
260 void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); 272 void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
@@ -319,6 +331,10 @@ private:
319 size_t immediate_buffer_capacity = 0; 331 size_t immediate_buffer_capacity = 0;
320 std::unique_ptr<u8[]> immediate_buffer_alloc; 332 std::unique_ptr<u8[]> immediate_buffer_alloc;
321 333
334 typename SlotVector<Buffer>::Iterator deletion_iterator;
335 u64 frame_tick = 0;
336 u64 total_used_memory = 0;
337
322 std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; 338 std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table;
323}; 339};
324 340
@@ -332,6 +348,28 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
332 gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { 348 gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
333 // Ensure the first slot is used for the null buffer 349 // Ensure the first slot is used for the null buffer
334 void(slot_buffers.insert(runtime, NullBufferParams{})); 350 void(slot_buffers.insert(runtime, NullBufferParams{}));
351 deletion_iterator = slot_buffers.end();
352}
353
354template <class P>
355void BufferCache<P>::RunGarbageCollector() {
356 const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
357 const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
358 int num_iterations = aggressive_gc ? 64 : 32;
359 for (; num_iterations > 0; --num_iterations) {
360 if (deletion_iterator == slot_buffers.end()) {
361 deletion_iterator = slot_buffers.begin();
362 }
363 ++deletion_iterator;
364 if (deletion_iterator == slot_buffers.end()) {
365 break;
366 }
367 const auto [buffer_id, buffer] = *deletion_iterator;
368 if (buffer->FrameTick() + ticks_to_destroy < frame_tick) {
369 DownloadBufferMemory(*buffer);
370 DeleteBuffer(buffer_id);
371 }
372 }
335} 373}
336 374
337template <class P> 375template <class P>
@@ -349,6 +387,10 @@ void BufferCache<P>::TickFrame() {
349 const bool skip_preferred = hits * 256 < shots * 251; 387 const bool skip_preferred = hits * 256 < shots * 251;
350 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; 388 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
351 389
390 if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) {
391 RunGarbageCollector();
392 }
393 ++frame_tick;
352 delayed_destruction_ring.Tick(); 394 delayed_destruction_ring.Tick();
353} 395}
354 396
@@ -371,50 +413,8 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
371 413
372template <class P> 414template <class P>
373void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { 415void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
374 ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { 416 ForEachBufferInRange(cpu_addr, size,
375 boost::container::small_vector<BufferCopy, 1> copies; 417 [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); });
376 u64 total_size_bytes = 0;
377 u64 largest_copy = 0;
378 buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
379 copies.push_back(BufferCopy{
380 .src_offset = range_offset,
381 .dst_offset = total_size_bytes,
382 .size = range_size,
383 });
384 total_size_bytes += range_size;
385 largest_copy = std::max(largest_copy, range_size);
386 });
387 if (total_size_bytes == 0) {
388 return;
389 }
390 MICROPROFILE_SCOPE(GPU_DownloadMemory);
391
392 if constexpr (USE_MEMORY_MAPS) {
393 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
394 const u8* const mapped_memory = download_staging.mapped_span.data();
395 const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
396 for (BufferCopy& copy : copies) {
397 // Modify copies to have the staging offset in mind
398 copy.dst_offset += download_staging.offset;
399 }
400 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
401 runtime.Finish();
402 for (const BufferCopy& copy : copies) {
403 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
404 // Undo the modified offset
405 const u64 dst_offset = copy.dst_offset - download_staging.offset;
406 const u8* copy_mapped_memory = mapped_memory + dst_offset;
407 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
408 }
409 } else {
410 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
411 for (const BufferCopy& copy : copies) {
412 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
413 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
414 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
415 }
416 }
417 });
418} 418}
419 419
420template <class P> 420template <class P>
@@ -640,6 +640,7 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
640template <class P> 640template <class P>
641void BufferCache<P>::BindHostIndexBuffer() { 641void BufferCache<P>::BindHostIndexBuffer() {
642 Buffer& buffer = slot_buffers[index_buffer.buffer_id]; 642 Buffer& buffer = slot_buffers[index_buffer.buffer_id];
643 TouchBuffer(buffer);
643 const u32 offset = buffer.Offset(index_buffer.cpu_addr); 644 const u32 offset = buffer.Offset(index_buffer.cpu_addr);
644 const u32 size = index_buffer.size; 645 const u32 size = index_buffer.size;
645 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); 646 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
@@ -658,6 +659,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
658 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { 659 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
659 const Binding& binding = vertex_buffers[index]; 660 const Binding& binding = vertex_buffers[index];
660 Buffer& buffer = slot_buffers[binding.buffer_id]; 661 Buffer& buffer = slot_buffers[binding.buffer_id];
662 TouchBuffer(buffer);
661 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); 663 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
662 if (!flags[Dirty::VertexBuffer0 + index]) { 664 if (!flags[Dirty::VertexBuffer0 + index]) {
663 continue; 665 continue;
@@ -693,6 +695,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
693 const VAddr cpu_addr = binding.cpu_addr; 695 const VAddr cpu_addr = binding.cpu_addr;
694 const u32 size = binding.size; 696 const u32 size = binding.size;
695 Buffer& buffer = slot_buffers[binding.buffer_id]; 697 Buffer& buffer = slot_buffers[binding.buffer_id];
698 TouchBuffer(buffer);
696 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 699 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
697 size <= uniform_buffer_skip_cache_size && 700 size <= uniform_buffer_skip_cache_size &&
698 !buffer.IsRegionGpuModified(cpu_addr, size); 701 !buffer.IsRegionGpuModified(cpu_addr, size);
@@ -744,6 +747,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
744 ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { 747 ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
745 const Binding& binding = storage_buffers[stage][index]; 748 const Binding& binding = storage_buffers[stage][index];
746 Buffer& buffer = slot_buffers[binding.buffer_id]; 749 Buffer& buffer = slot_buffers[binding.buffer_id];
750 TouchBuffer(buffer);
747 const u32 size = binding.size; 751 const u32 size = binding.size;
748 SynchronizeBuffer(buffer, binding.cpu_addr, size); 752 SynchronizeBuffer(buffer, binding.cpu_addr, size);
749 753
@@ -766,6 +770,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
766 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { 770 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
767 const Binding& binding = transform_feedback_buffers[index]; 771 const Binding& binding = transform_feedback_buffers[index];
768 Buffer& buffer = slot_buffers[binding.buffer_id]; 772 Buffer& buffer = slot_buffers[binding.buffer_id];
773 TouchBuffer(buffer);
769 const u32 size = binding.size; 774 const u32 size = binding.size;
770 SynchronizeBuffer(buffer, binding.cpu_addr, size); 775 SynchronizeBuffer(buffer, binding.cpu_addr, size);
771 776
@@ -784,6 +789,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
784 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { 789 ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) {
785 const Binding& binding = compute_uniform_buffers[index]; 790 const Binding& binding = compute_uniform_buffers[index];
786 Buffer& buffer = slot_buffers[binding.buffer_id]; 791 Buffer& buffer = slot_buffers[binding.buffer_id];
792 TouchBuffer(buffer);
787 const u32 size = binding.size; 793 const u32 size = binding.size;
788 SynchronizeBuffer(buffer, binding.cpu_addr, size); 794 SynchronizeBuffer(buffer, binding.cpu_addr, size);
789 795
@@ -803,6 +809,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
803 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { 809 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
804 const Binding& binding = compute_storage_buffers[index]; 810 const Binding& binding = compute_storage_buffers[index];
805 Buffer& buffer = slot_buffers[binding.buffer_id]; 811 Buffer& buffer = slot_buffers[binding.buffer_id];
812 TouchBuffer(buffer);
806 const u32 size = binding.size; 813 const u32 size = binding.size;
807 SynchronizeBuffer(buffer, binding.cpu_addr, size); 814 SynchronizeBuffer(buffer, binding.cpu_addr, size);
808 815
@@ -1101,6 +1108,7 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1101 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); 1108 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
1102 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1109 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1103 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1110 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
1111 TouchBuffer(slot_buffers[new_buffer_id]);
1104 for (const BufferId overlap_id : overlap.ids) { 1112 for (const BufferId overlap_id : overlap.ids) {
1105 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); 1113 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
1106 } 1114 }
@@ -1122,8 +1130,14 @@ template <class P>
1122template <bool insert> 1130template <bool insert>
1123void BufferCache<P>::ChangeRegister(BufferId buffer_id) { 1131void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1124 const Buffer& buffer = slot_buffers[buffer_id]; 1132 const Buffer& buffer = slot_buffers[buffer_id];
1133 const auto size = buffer.SizeBytes();
1134 if (insert) {
1135 total_used_memory += Common::AlignUp(size, 1024);
1136 } else {
1137 total_used_memory -= Common::AlignUp(size, 1024);
1138 }
1125 const VAddr cpu_addr_begin = buffer.CpuAddr(); 1139 const VAddr cpu_addr_begin = buffer.CpuAddr();
1126 const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes(); 1140 const VAddr cpu_addr_end = cpu_addr_begin + size;
1127 const u64 page_begin = cpu_addr_begin / PAGE_SIZE; 1141 const u64 page_begin = cpu_addr_begin / PAGE_SIZE;
1128 const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE); 1142 const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE);
1129 for (u64 page = page_begin; page != page_end; ++page) { 1143 for (u64 page = page_begin; page != page_end; ++page) {
@@ -1136,6 +1150,11 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1136} 1150}
1137 1151
1138template <class P> 1152template <class P>
1153void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept {
1154 buffer.SetFrameTick(frame_tick);
1155}
1156
1157template <class P>
1139bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { 1158bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
1140 if (buffer.CpuAddr() == 0) { 1159 if (buffer.CpuAddr() == 0) {
1141 return true; 1160 return true;
@@ -1212,6 +1231,57 @@ void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes,
1212} 1231}
1213 1232
1214template <class P> 1233template <class P>
1234void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
1235 DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
1236}
1237
1238template <class P>
1239void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
1240 boost::container::small_vector<BufferCopy, 1> copies;
1241 u64 total_size_bytes = 0;
1242 u64 largest_copy = 0;
1243 buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
1244 copies.push_back(BufferCopy{
1245 .src_offset = range_offset,
1246 .dst_offset = total_size_bytes,
1247 .size = range_size,
1248 });
1249 total_size_bytes += range_size;
1250 largest_copy = std::max(largest_copy, range_size);
1251 });
1252 if (total_size_bytes == 0) {
1253 return;
1254 }
1255 MICROPROFILE_SCOPE(GPU_DownloadMemory);
1256
1257 if constexpr (USE_MEMORY_MAPS) {
1258 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
1259 const u8* const mapped_memory = download_staging.mapped_span.data();
1260 const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
1261 for (BufferCopy& copy : copies) {
1262 // Modify copies to have the staging offset in mind
1263 copy.dst_offset += download_staging.offset;
1264 }
1265 runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
1266 runtime.Finish();
1267 for (const BufferCopy& copy : copies) {
1268 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
1269 // Undo the modified offset
1270 const u64 dst_offset = copy.dst_offset - download_staging.offset;
1271 const u8* copy_mapped_memory = mapped_memory + dst_offset;
1272 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
1273 }
1274 } else {
1275 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
1276 for (const BufferCopy& copy : copies) {
1277 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
1278 const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
1279 cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
1280 }
1281 }
1282}
1283
1284template <class P>
1215void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { 1285void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
1216 const auto scalar_replace = [buffer_id](Binding& binding) { 1286 const auto scalar_replace = [buffer_id](Binding& binding) {
1217 if (binding.buffer_id == buffer_id) { 1287 if (binding.buffer_id == buffer_id) {
@@ -1236,6 +1306,7 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
1236 1306
1237 Unregister(buffer_id); 1307 Unregister(buffer_id);
1238 delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); 1308 delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id]));
1309 slot_buffers.erase(buffer_id);
1239 1310
1240 NotifyBufferDeletion(); 1311 NotifyBufferDeletion();
1241} 1312}
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ffed42a29..335383955 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -242,6 +242,7 @@ public:
242 return 4; 242 return 4;
243 default: 243 default:
244 UNREACHABLE(); 244 UNREACHABLE();
245 return 1;
245 } 246 }
246 } 247 }
247 248
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 9b4038615..23948feed 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -737,6 +737,8 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
737 } 737 }
738} 738}
739 739
740Image::~Image() = default;
741
740void Image::UploadMemory(const ImageBufferMap& map, 742void Image::UploadMemory(const ImageBufferMap& map,
741 std::span<const VideoCommon::BufferImageCopy> copies) { 743 std::span<const VideoCommon::BufferImageCopy> copies) {
742 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); 744 glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer);
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index df8be12ff..25fe61566 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -143,6 +143,14 @@ public:
143 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, 143 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
144 VAddr cpu_addr); 144 VAddr cpu_addr);
145 145
146 ~Image();
147
148 Image(const Image&) = delete;
149 Image& operator=(const Image&) = delete;
150
151 Image(Image&&) = default;
152 Image& operator=(Image&&) = default;
153
146 void UploadMemory(const ImageBufferMap& map, 154 void UploadMemory(const ImageBufferMap& map,
147 std::span<const VideoCommon::BufferImageCopy> copies); 155 std::span<const VideoCommon::BufferImageCopy> copies);
148 156
@@ -235,6 +243,7 @@ struct TextureCacheParams {
235 static constexpr bool ENABLE_VALIDATION = true; 243 static constexpr bool ENABLE_VALIDATION = true;
236 static constexpr bool FRAMEBUFFER_BLITS = true; 244 static constexpr bool FRAMEBUFFER_BLITS = true;
237 static constexpr bool HAS_EMULATED_COPIES = true; 245 static constexpr bool HAS_EMULATED_COPIES = true;
246 static constexpr bool HAS_DEVICE_MEMORY_INFO = false;
238 247
239 using Runtime = OpenGL::TextureCacheRuntime; 248 using Runtime = OpenGL::TextureCacheRuntime;
240 using Image = OpenGL::Image; 249 using Image = OpenGL::Image;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 52860b4cf..a2ab4d1ee 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -818,6 +818,10 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
818 }); 818 });
819} 819}
820 820
821u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
822 return device.GetDeviceLocalMemory();
823}
824
821Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, 825Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_,
822 VAddr cpu_addr_) 826 VAddr cpu_addr_)
823 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, 827 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler},
@@ -876,6 +880,8 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_
876 } 880 }
877} 881}
878 882
883Image::~Image() = default;
884
879void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { 885void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) {
880 // TODO: Move this to another API 886 // TODO: Move this to another API
881 scheduler->RequestOutsideRenderPassOperationContext(); 887 scheduler->RequestOutsideRenderPassOperationContext();
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 4a57d378b..172bcdf98 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -97,6 +97,8 @@ struct TextureCacheRuntime {
97 // All known Vulkan drivers can natively handle BGR textures 97 // All known Vulkan drivers can natively handle BGR textures
98 return true; 98 return true;
99 } 99 }
100
101 u64 GetDeviceLocalMemory() const;
100}; 102};
101 103
102class Image : public VideoCommon::ImageBase { 104class Image : public VideoCommon::ImageBase {
@@ -104,6 +106,14 @@ public:
104 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, 106 explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
105 VAddr cpu_addr); 107 VAddr cpu_addr);
106 108
109 ~Image();
110
111 Image(const Image&) = delete;
112 Image& operator=(const Image&) = delete;
113
114 Image(Image&&) = default;
115 Image& operator=(Image&&) = default;
116
107 void UploadMemory(const StagingBufferRef& map, 117 void UploadMemory(const StagingBufferRef& map,
108 std::span<const VideoCommon::BufferImageCopy> copies); 118 std::span<const VideoCommon::BufferImageCopy> copies);
109 119
@@ -257,6 +267,7 @@ struct TextureCacheParams {
257 static constexpr bool ENABLE_VALIDATION = true; 267 static constexpr bool ENABLE_VALIDATION = true;
258 static constexpr bool FRAMEBUFFER_BLITS = false; 268 static constexpr bool FRAMEBUFFER_BLITS = false;
259 static constexpr bool HAS_EMULATED_COPIES = false; 269 static constexpr bool HAS_EMULATED_COPIES = false;
270 static constexpr bool HAS_DEVICE_MEMORY_INFO = true;
260 271
261 using Runtime = Vulkan::TextureCacheRuntime; 272 using Runtime = Vulkan::TextureCacheRuntime;
262 using Image = Vulkan::Image; 273 using Image = Vulkan::Image;
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 6308aef94..eb1746265 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -283,4 +283,11 @@ std::pair<u32, u32> GetASTCBlockSize(PixelFormat format) {
283 return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; 283 return {DefaultBlockWidth(format), DefaultBlockHeight(format)};
284} 284}
285 285
286u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format) {
287 constexpr u64 RGBA8_PIXEL_SIZE = 4;
288 const u64 base_block_size = static_cast<u64>(DefaultBlockWidth(format)) *
289 static_cast<u64>(DefaultBlockHeight(format)) * RGBA8_PIXEL_SIZE;
290 return (base_size * base_block_size) / BytesPerBlock(format);
291}
292
286} // namespace VideoCore::Surface 293} // namespace VideoCore::Surface
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index c40ab89d0..1503db81f 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -462,4 +462,6 @@ bool IsPixelFormatSRGB(PixelFormat format);
462 462
463std::pair<u32, u32> GetASTCBlockSize(PixelFormat format); 463std::pair<u32, u32> GetASTCBlockSize(PixelFormat format);
464 464
465u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format);
466
465} // namespace VideoCore::Surface 467} // namespace VideoCore::Surface
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index 9914926b3..ad69d32d1 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -113,6 +113,43 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie
113 image_view_ids.push_back(image_view_id); 113 image_view_ids.push_back(image_view_id);
114} 114}
115 115
116bool ImageBase::IsSafeDownload() const noexcept {
117 // Skip images that were not modified from the GPU
118 if (False(flags & ImageFlagBits::GpuModified)) {
119 return false;
120 }
121 // Skip images that .are. modified from the CPU
122 // We don't want to write sensitive data from the guest
123 if (True(flags & ImageFlagBits::CpuModified)) {
124 return false;
125 }
126 if (info.num_samples > 1) {
127 LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
128 return false;
129 }
130 return true;
131}
132
133void ImageBase::CheckBadOverlapState() {
134 if (False(flags & ImageFlagBits::BadOverlap)) {
135 return;
136 }
137 if (!overlapping_images.empty()) {
138 return;
139 }
140 flags &= ~ImageFlagBits::BadOverlap;
141}
142
143void ImageBase::CheckAliasState() {
144 if (False(flags & ImageFlagBits::Alias)) {
145 return;
146 }
147 if (!aliased_images.empty()) {
148 return;
149 }
150 flags &= ~ImageFlagBits::Alias;
151}
152
116void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { 153void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
117 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; 154 static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
118 ASSERT(lhs.info.type == rhs.info.type); 155 ASSERT(lhs.info.type == rhs.info.type);
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index b7f3b7e43..e326cab71 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 {
25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted 25 Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
26 Registered = 1 << 6, ///< True when the image is registered 26 Registered = 1 << 6, ///< True when the image is registered
27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked 27 Picked = 1 << 7, ///< Temporary flag to mark the image as picked
28
29 // Garbage Collection Flags
30 BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher
31 ///< garbage collection priority
32 Alias = 1 << 9, ///< This image has aliases and has priority on garbage
33 ///< collection
28}; 34};
29DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) 35DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
30 36
@@ -44,11 +50,16 @@ struct ImageBase {
44 50
45 void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); 51 void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id);
46 52
53 [[nodiscard]] bool IsSafeDownload() const noexcept;
54
47 [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { 55 [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
48 const VAddr overlap_end = overlap_cpu_addr + overlap_size; 56 const VAddr overlap_end = overlap_cpu_addr + overlap_size;
49 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; 57 return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
50 } 58 }
51 59
60 void CheckBadOverlapState();
61 void CheckAliasState();
62
52 ImageInfo info; 63 ImageInfo info;
53 64
54 u32 guest_size_bytes = 0; 65 u32 guest_size_bytes = 0;
@@ -72,6 +83,7 @@ struct ImageBase {
72 std::vector<SubresourceBase> slice_subresources; 83 std::vector<SubresourceBase> slice_subresources;
73 84
74 std::vector<AliasedImage> aliased_images; 85 std::vector<AliasedImage> aliased_images;
86 std::vector<ImageId> overlapping_images;
75}; 87};
76 88
77struct ImageAllocBase { 89struct ImageAllocBase {
diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h
index eae3be6ea..6180b8c0e 100644
--- a/src/video_core/texture_cache/slot_vector.h
+++ b/src/video_core/texture_cache/slot_vector.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <bit>
8#include <concepts> 9#include <concepts>
9#include <numeric> 10#include <numeric>
10#include <type_traits> 11#include <type_traits>
@@ -32,6 +33,60 @@ template <class T>
32requires std::is_nothrow_move_assignable_v<T>&& 33requires std::is_nothrow_move_assignable_v<T>&&
33 std::is_nothrow_move_constructible_v<T> class SlotVector { 34 std::is_nothrow_move_constructible_v<T> class SlotVector {
34public: 35public:
36 class Iterator {
37 friend SlotVector<T>;
38
39 public:
40 constexpr Iterator() = default;
41
42 Iterator& operator++() noexcept {
43 const u64* const bitset = slot_vector->stored_bitset.data();
44 const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
45 if (id.index < size) {
46 do {
47 ++id.index;
48 } while (id.index < size && !IsValid(bitset));
49 if (id.index == size) {
50 id.index = SlotId::INVALID_INDEX;
51 }
52 }
53 return *this;
54 }
55
56 Iterator operator++(int) noexcept {
57 const Iterator copy{*this};
58 ++*this;
59 return copy;
60 }
61
62 bool operator==(const Iterator& other) const noexcept {
63 return id.index == other.id.index;
64 }
65
66 bool operator!=(const Iterator& other) const noexcept {
67 return id.index != other.id.index;
68 }
69
70 std::pair<SlotId, T*> operator*() const noexcept {
71 return {id, std::addressof((*slot_vector)[id])};
72 }
73
74 T* operator->() const noexcept {
75 return std::addressof((*slot_vector)[id]);
76 }
77
78 private:
79 Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
80 : slot_vector{slot_vector_}, id{id_} {}
81
82 bool IsValid(const u64* bitset) const noexcept {
83 return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
84 }
85
86 SlotVector<T>* slot_vector;
87 SlotId id;
88 };
89
35 ~SlotVector() noexcept { 90 ~SlotVector() noexcept {
36 size_t index = 0; 91 size_t index = 0;
37 for (u64 bits : stored_bitset) { 92 for (u64 bits : stored_bitset) {
@@ -70,6 +125,20 @@ public:
70 ResetStorageBit(id.index); 125 ResetStorageBit(id.index);
71 } 126 }
72 127
128 [[nodiscard]] Iterator begin() noexcept {
129 const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; });
130 if (it == stored_bitset.end()) {
131 return end();
132 }
133 const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
134 const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
135 return Iterator(this, first_id);
136 }
137
138 [[nodiscard]] Iterator end() noexcept {
139 return Iterator(this, SlotId{SlotId::INVALID_INDEX});
140 }
141
73private: 142private:
74 struct NonTrivialDummy { 143 struct NonTrivialDummy {
75 NonTrivialDummy() noexcept {} 144 NonTrivialDummy() noexcept {}
@@ -140,7 +209,6 @@ private:
140 209
141 Entry* values = nullptr; 210 Entry* values = nullptr;
142 size_t values_capacity = 0; 211 size_t values_capacity = 0;
143 size_t values_size = 0;
144 212
145 std::vector<u64> stored_bitset; 213 std::vector<u64> stored_bitset;
146 std::vector<u32> free_list; 214 std::vector<u32> free_list;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 59b7c678b..e7f8478b4 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -20,8 +20,10 @@
20 20
21#include "common/alignment.h" 21#include "common/alignment.h"
22#include "common/common_funcs.h" 22#include "common/common_funcs.h"
23#include "common/common_sizes.h"
23#include "common/common_types.h" 24#include "common/common_types.h"
24#include "common/logging/log.h" 25#include "common/logging/log.h"
26#include "common/settings.h"
25#include "video_core/compatible_formats.h" 27#include "video_core/compatible_formats.h"
26#include "video_core/delayed_destruction_ring.h" 28#include "video_core/delayed_destruction_ring.h"
27#include "video_core/dirty_flags.h" 29#include "video_core/dirty_flags.h"
@@ -69,12 +71,17 @@ class TextureCache {
69 static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; 71 static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
70 /// True when some copies have to be emulated 72 /// True when some copies have to be emulated
71 static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; 73 static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
74 /// True when the API can provide info about the memory of the device.
75 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
72 76
73 /// Image view ID for null descriptors 77 /// Image view ID for null descriptors
74 static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; 78 static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
75 /// Sampler ID for bugged sampler ids 79 /// Sampler ID for bugged sampler ids
76 static constexpr SamplerId NULL_SAMPLER_ID{0}; 80 static constexpr SamplerId NULL_SAMPLER_ID{0};
77 81
82 static constexpr u64 DEFAULT_EXPECTED_MEMORY = Common::Size_1_GB;
83 static constexpr u64 DEFAULT_CRITICAL_MEMORY = Common::Size_2_GB;
84
78 using Runtime = typename P::Runtime; 85 using Runtime = typename P::Runtime;
79 using Image = typename P::Image; 86 using Image = typename P::Image;
80 using ImageAlloc = typename P::ImageAlloc; 87 using ImageAlloc = typename P::ImageAlloc;
@@ -103,6 +110,9 @@ public:
103 /// Notify the cache that a new frame has been queued 110 /// Notify the cache that a new frame has been queued
104 void TickFrame(); 111 void TickFrame();
105 112
113 /// Runs the Garbage Collector.
114 void RunGarbageCollector();
115
106 /// Return a constant reference to the given image view id 116 /// Return a constant reference to the given image view id
107 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; 117 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
108 118
@@ -333,6 +343,10 @@ private:
333 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; 343 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
334 344
335 bool has_deleted_images = false; 345 bool has_deleted_images = false;
346 u64 total_used_memory = 0;
347 u64 minimum_memory;
348 u64 expected_memory;
349 u64 critical_memory;
336 350
337 SlotVector<Image> slot_images; 351 SlotVector<Image> slot_images;
338 SlotVector<ImageView> slot_image_views; 352 SlotVector<ImageView> slot_image_views;
@@ -353,6 +367,7 @@ private:
353 367
354 u64 modification_tick = 0; 368 u64 modification_tick = 0;
355 u64 frame_tick = 0; 369 u64 frame_tick = 0;
370 typename SlotVector<Image>::Iterator deletion_iterator;
356}; 371};
357 372
358template <class P> 373template <class P>
@@ -373,11 +388,94 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
373 // This way the null resource becomes a compile time constant 388 // This way the null resource becomes a compile time constant
374 void(slot_image_views.insert(runtime, NullImageParams{})); 389 void(slot_image_views.insert(runtime, NullImageParams{}));
375 void(slot_samplers.insert(runtime, sampler_descriptor)); 390 void(slot_samplers.insert(runtime, sampler_descriptor));
391
392 deletion_iterator = slot_images.begin();
393
394 if constexpr (HAS_DEVICE_MEMORY_INFO) {
395 const auto device_memory = runtime.GetDeviceLocalMemory();
396 const u64 possible_expected_memory = (device_memory * 3) / 10;
397 const u64 possible_critical_memory = (device_memory * 6) / 10;
398 expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
399 critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
400 minimum_memory = 0;
401 } else {
402 // on OGL we can be more conservatives as the driver takes care.
403 expected_memory = DEFAULT_EXPECTED_MEMORY + Common::Size_512_MB;
404 critical_memory = DEFAULT_CRITICAL_MEMORY + Common::Size_1_GB;
405 minimum_memory = expected_memory;
406 }
407}
408
409template <class P>
410void TextureCache<P>::RunGarbageCollector() {
411 const bool high_priority_mode = total_used_memory >= expected_memory;
412 const bool aggressive_mode = total_used_memory >= critical_memory;
413 const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
414 int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
415 for (; num_iterations > 0; --num_iterations) {
416 if (deletion_iterator == slot_images.end()) {
417 deletion_iterator = slot_images.begin();
418 if (deletion_iterator == slot_images.end()) {
419 break;
420 }
421 }
422 auto [image_id, image_tmp] = *deletion_iterator;
423 Image* image = image_tmp; // fix clang error.
424 const bool is_alias = True(image->flags & ImageFlagBits::Alias);
425 const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
426 const bool must_download = image->IsSafeDownload();
427 bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
428 const u64 ticks_needed =
429 is_bad_overlap
430 ? ticks_to_destroy >> 4
431 : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
432 should_care |= aggressive_mode;
433 if (should_care && image->frame_tick + ticks_needed < frame_tick) {
434 if (is_bad_overlap) {
435 const bool overlap_check = std::ranges::all_of(
436 image->overlapping_images, [&, image](const ImageId& overlap_id) {
437 auto& overlap = slot_images[overlap_id];
438 return overlap.frame_tick >= image->frame_tick;
439 });
440 if (!overlap_check) {
441 ++deletion_iterator;
442 continue;
443 }
444 }
445 if (!is_bad_overlap && must_download) {
446 const bool alias_check = std::ranges::none_of(
447 image->aliased_images, [&, image](const AliasedImage& alias) {
448 auto& alias_image = slot_images[alias.id];
449 return (alias_image.frame_tick < image->frame_tick) ||
450 (alias_image.modification_tick < image->modification_tick);
451 });
452
453 if (alias_check) {
454 auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
455 const auto copies = FullDownloadCopies(image->info);
456 image->DownloadMemory(map, copies);
457 runtime.Finish();
458 SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
459 }
460 }
461 if (True(image->flags & ImageFlagBits::Tracked)) {
462 UntrackImage(*image);
463 }
464 UnregisterImage(image_id);
465 DeleteImage(image_id);
466 if (is_bad_overlap) {
467 ++num_iterations;
468 }
469 }
470 ++deletion_iterator;
471 }
376} 472}
377 473
378template <class P> 474template <class P>
379void TextureCache<P>::TickFrame() { 475void TextureCache<P>::TickFrame() {
380 // Tick sentenced resources in this order to ensure they are destroyed in the right order 476 if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
477 RunGarbageCollector();
478 }
381 sentenced_images.Tick(); 479 sentenced_images.Tick();
382 sentenced_framebuffers.Tick(); 480 sentenced_framebuffers.Tick();
383 sentenced_image_view.Tick(); 481 sentenced_image_view.Tick();
@@ -568,17 +666,7 @@ template <class P>
568void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { 666void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
569 std::vector<ImageId> images; 667 std::vector<ImageId> images;
570 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { 668 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
571 // Skip images that were not modified from the GPU 669 if (!image.IsSafeDownload()) {
572 if (False(image.flags & ImageFlagBits::GpuModified)) {
573 return;
574 }
575 // Skip images that .are. modified from the CPU
576 // We don't want to write sensitive data from the guest
577 if (True(image.flags & ImageFlagBits::CpuModified)) {
578 return;
579 }
580 if (image.info.num_samples > 1) {
581 LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented");
582 return; 670 return;
583 } 671 }
584 image.flags &= ~ImageFlagBits::GpuModified; 672 image.flags &= ~ImageFlagBits::GpuModified;
@@ -967,6 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
967 std::vector<ImageId> overlap_ids; 1055 std::vector<ImageId> overlap_ids;
968 std::vector<ImageId> left_aliased_ids; 1056 std::vector<ImageId> left_aliased_ids;
969 std::vector<ImageId> right_aliased_ids; 1057 std::vector<ImageId> right_aliased_ids;
1058 std::vector<ImageId> bad_overlap_ids;
970 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { 1059 ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
971 if (info.type != overlap.info.type) { 1060 if (info.type != overlap.info.type) {
972 return; 1061 return;
@@ -992,9 +1081,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
992 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); 1081 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
993 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { 1082 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
994 left_aliased_ids.push_back(overlap_id); 1083 left_aliased_ids.push_back(overlap_id);
1084 overlap.flags |= ImageFlagBits::Alias;
995 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, 1085 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
996 broken_views, native_bgr)) { 1086 broken_views, native_bgr)) {
997 right_aliased_ids.push_back(overlap_id); 1087 right_aliased_ids.push_back(overlap_id);
1088 overlap.flags |= ImageFlagBits::Alias;
1089 } else {
1090 bad_overlap_ids.push_back(overlap_id);
1091 overlap.flags |= ImageFlagBits::BadOverlap;
998 } 1092 }
999 }); 1093 });
1000 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); 1094 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
@@ -1022,10 +1116,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1022 for (const ImageId aliased_id : right_aliased_ids) { 1116 for (const ImageId aliased_id : right_aliased_ids) {
1023 ImageBase& aliased = slot_images[aliased_id]; 1117 ImageBase& aliased = slot_images[aliased_id];
1024 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); 1118 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
1119 new_image.flags |= ImageFlagBits::Alias;
1025 } 1120 }
1026 for (const ImageId aliased_id : left_aliased_ids) { 1121 for (const ImageId aliased_id : left_aliased_ids) {
1027 ImageBase& aliased = slot_images[aliased_id]; 1122 ImageBase& aliased = slot_images[aliased_id];
1028 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); 1123 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
1124 new_image.flags |= ImageFlagBits::Alias;
1125 }
1126 for (const ImageId aliased_id : bad_overlap_ids) {
1127 ImageBase& aliased = slot_images[aliased_id];
1128 aliased.overlapping_images.push_back(new_image_id);
1129 new_image.overlapping_images.push_back(aliased_id);
1130 new_image.flags |= ImageFlagBits::BadOverlap;
1029 } 1131 }
1030 RegisterImage(new_image_id); 1132 RegisterImage(new_image_id);
1031 return new_image_id; 1133 return new_image_id;
@@ -1195,6 +1297,13 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1195 image.flags |= ImageFlagBits::Registered; 1297 image.flags |= ImageFlagBits::Registered;
1196 ForEachPage(image.cpu_addr, image.guest_size_bytes, 1298 ForEachPage(image.cpu_addr, image.guest_size_bytes,
1197 [this, image_id](u64 page) { page_table[page].push_back(image_id); }); 1299 [this, image_id](u64 page) { page_table[page].push_back(image_id); });
1300 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1301 if ((IsPixelFormatASTC(image.info.format) &&
1302 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
1303 True(image.flags & ImageFlagBits::Converted)) {
1304 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1305 }
1306 total_used_memory += Common::AlignUp(tentative_size, 1024);
1198} 1307}
1199 1308
1200template <class P> 1309template <class P>
@@ -1203,6 +1312,14 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1203 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), 1312 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
1204 "Trying to unregister an already registered image"); 1313 "Trying to unregister an already registered image");
1205 image.flags &= ~ImageFlagBits::Registered; 1314 image.flags &= ~ImageFlagBits::Registered;
1315 image.flags &= ~ImageFlagBits::BadOverlap;
1316 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1317 if ((IsPixelFormatASTC(image.info.format) &&
1318 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
1319 True(image.flags & ImageFlagBits::Converted)) {
1320 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1321 }
1322 total_used_memory -= Common::AlignUp(tentative_size, 1024);
1206 ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { 1323 ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
1207 const auto page_it = page_table.find(page); 1324 const auto page_it = page_table.find(page);
1208 if (page_it == page_table.end()) { 1325 if (page_it == page_table.end()) {
@@ -1276,9 +1393,19 @@ void TextureCache<P>::DeleteImage(ImageId image_id) {
1276 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { 1393 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
1277 return other_alias.id == image_id; 1394 return other_alias.id == image_id;
1278 }); 1395 });
1396 other_image.CheckAliasState();
1279 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", 1397 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
1280 num_removed_aliases); 1398 num_removed_aliases);
1281 } 1399 }
1400 for (const ImageId overlap_id : image.overlapping_images) {
1401 ImageBase& other_image = slot_images[overlap_id];
1402 [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
1403 other_image.overlapping_images,
1404 [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
1405 other_image.CheckBadOverlapState();
1406 ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
1407 num_removed_overlaps);
1408 }
1282 for (const ImageViewId image_view_id : image_view_ids) { 1409 for (const ImageViewId image_view_id : image_view_ids) {
1283 sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); 1410 sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
1284 slot_image_views.erase(image_view_id); 1411 slot_image_views.erase(image_view_id);
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 6835fd747..4efe042b6 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -581,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
581 581
582 for (s32 layer = 0; layer < info.resources.layers; ++layer) { 582 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
583 const std::span<const u8> src = input.subspan(host_offset); 583 const std::span<const u8> src = input.subspan(host_offset);
584 gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
585
584 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, 586 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
585 num_tiles.depth, block.height, block.depth); 587 num_tiles.depth, block.height, block.depth);
586 588
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 64206b3d2..707a8b8fb 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -408,6 +408,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
408 } 408 }
409 logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); 409 logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld);
410 410
411 CollectPhysicalMemoryInfo();
411 CollectTelemetryParameters(); 412 CollectTelemetryParameters();
412 CollectToolingInfo(); 413 CollectToolingInfo();
413 414
@@ -818,6 +819,17 @@ void Device::CollectTelemetryParameters() {
818 } 819 }
819} 820}
820 821
822void Device::CollectPhysicalMemoryInfo() {
823 const auto mem_properties = physical.GetMemoryProperties();
824 const std::size_t num_properties = mem_properties.memoryHeapCount;
825 device_access_memory = 0;
826 for (std::size_t element = 0; element < num_properties; element++) {
827 if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) {
828 device_access_memory += mem_properties.memoryHeaps[element].size;
829 }
830 }
831}
832
821void Device::CollectToolingInfo() { 833void Device::CollectToolingInfo() {
822 if (!ext_tooling_info) { 834 if (!ext_tooling_info) {
823 return; 835 return;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 67d70cd22..a1aba973b 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -225,6 +225,10 @@ public:
225 return use_asynchronous_shaders; 225 return use_asynchronous_shaders;
226 } 226 }
227 227
228 u64 GetDeviceLocalMemory() const {
229 return device_access_memory;
230 }
231
228private: 232private:
229 /// Checks if the physical device is suitable. 233 /// Checks if the physical device is suitable.
230 void CheckSuitability(bool requires_swapchain) const; 234 void CheckSuitability(bool requires_swapchain) const;
@@ -244,6 +248,9 @@ private:
244 /// Collects information about attached tools. 248 /// Collects information about attached tools.
245 void CollectToolingInfo(); 249 void CollectToolingInfo();
246 250
251 /// Collects information about the device's local memory.
252 void CollectPhysicalMemoryInfo();
253
247 /// Returns a list of queue initialization descriptors. 254 /// Returns a list of queue initialization descriptors.
248 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const; 255 std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
249 256
@@ -302,6 +309,8 @@ private:
302 309
303 /// Nsight Aftermath GPU crash tracker 310 /// Nsight Aftermath GPU crash tracker
304 std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker; 311 std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker;
312
313 u64 device_access_memory;
305}; 314};
306 315
307} // namespace Vulkan 316} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index 5edd06ebc..aa173d19e 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -69,10 +69,10 @@ constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{
69 69
70class MemoryAllocation { 70class MemoryAllocation {
71public: 71public:
72 explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties, 72 explicit MemoryAllocation(MemoryAllocator* const allocator_, vk::DeviceMemory memory_,
73 u64 allocation_size_, u32 type) 73 VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type)
74 : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties}, 74 : allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
75 shifted_memory_type{1U << type} {} 75 property_flags{properties}, shifted_memory_type{1U << type} {}
76 76
77#if defined(_WIN32) || defined(__unix__) 77#if defined(_WIN32) || defined(__unix__)
78 ~MemoryAllocation() { 78 ~MemoryAllocation() {
@@ -106,6 +106,10 @@ public:
106 const auto it = std::ranges::find(commits, begin, &Range::begin); 106 const auto it = std::ranges::find(commits, begin, &Range::begin);
107 ASSERT_MSG(it != commits.end(), "Invalid commit"); 107 ASSERT_MSG(it != commits.end(), "Invalid commit");
108 commits.erase(it); 108 commits.erase(it);
109 if (commits.empty()) {
110 // Do not call any code involving 'this' after this call, the object will be destroyed
111 allocator->ReleaseMemory(this);
112 }
109 } 113 }
110 114
111 [[nodiscard]] std::span<u8> Map() { 115 [[nodiscard]] std::span<u8> Map() {
@@ -171,6 +175,7 @@ private:
171 return candidate; 175 return candidate;
172 } 176 }
173 177
178 MemoryAllocator* const allocator; ///< Parent memory allocation.
174 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. 179 const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
175 const u64 allocation_size; ///< Size of this allocation. 180 const u64 allocation_size; ///< Size of this allocation.
176 const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. 181 const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
@@ -275,10 +280,17 @@ bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask,
275 return false; 280 return false;
276 } 281 }
277 } 282 }
278 allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type)); 283 allocations.push_back(
284 std::make_unique<MemoryAllocation>(this, std::move(memory), flags, size, type));
279 return true; 285 return true;
280} 286}
281 287
288void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) {
289 const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr<MemoryAllocation>::get);
290 ASSERT(it != allocations.end());
291 allocations.erase(it);
292}
293
282std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, 294std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
283 VkMemoryPropertyFlags flags) { 295 VkMemoryPropertyFlags flags) {
284 for (auto& allocation : allocations) { 296 for (auto& allocation : allocations) {
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index db12d02f4..b61e931e0 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -69,6 +69,8 @@ private:
69/// Memory allocator container. 69/// Memory allocator container.
70/// Allocates and releases memory allocations on demand. 70/// Allocates and releases memory allocations on demand.
71class MemoryAllocator { 71class MemoryAllocator {
72 friend MemoryAllocation;
73
72public: 74public:
73 /** 75 /**
74 * Construct memory allocator 76 * Construct memory allocator
@@ -104,6 +106,9 @@ private:
104 /// Tries to allocate a chunk of memory. 106 /// Tries to allocate a chunk of memory.
105 bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size); 107 bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
106 108
109 /// Releases a chunk of memory.
110 void ReleaseMemory(MemoryAllocation* alloc);
111
107 /// Tries to allocate a memory commit. 112 /// Tries to allocate a memory commit.
108 std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements, 113 std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements,
109 VkMemoryPropertyFlags flags); 114 VkMemoryPropertyFlags flags);
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index a1e726dc7..62bafc453 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -822,6 +822,7 @@ void Config::ReadRendererValues() {
822 QStringLiteral("use_asynchronous_shaders"), false); 822 QStringLiteral("use_asynchronous_shaders"), false);
823 ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), 823 ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"),
824 true); 824 true);
825 ReadSettingGlobal(Settings::values.use_caches_gc, QStringLiteral("use_caches_gc"), false);
825 ReadSettingGlobal(Settings::values.bg_red, QStringLiteral("bg_red"), 0.0); 826 ReadSettingGlobal(Settings::values.bg_red, QStringLiteral("bg_red"), 0.0);
826 ReadSettingGlobal(Settings::values.bg_green, QStringLiteral("bg_green"), 0.0); 827 ReadSettingGlobal(Settings::values.bg_green, QStringLiteral("bg_green"), 0.0);
827 ReadSettingGlobal(Settings::values.bg_blue, QStringLiteral("bg_blue"), 0.0); 828 ReadSettingGlobal(Settings::values.bg_blue, QStringLiteral("bg_blue"), 0.0);
@@ -1410,6 +1411,7 @@ void Config::SaveRendererValues() {
1410 Settings::values.use_asynchronous_shaders, false); 1411 Settings::values.use_asynchronous_shaders, false);
1411 WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, 1412 WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time,
1412 true); 1413 true);
1414 WriteSettingGlobal(QStringLiteral("use_caches_gc"), Settings::values.use_caches_gc, false);
1413 // Cast to double because Qt's written float values are not human-readable 1415 // Cast to double because Qt's written float values are not human-readable
1414 WriteSettingGlobal(QStringLiteral("bg_red"), Settings::values.bg_red, 0.0); 1416 WriteSettingGlobal(QStringLiteral("bg_red"), Settings::values.bg_red, 0.0);
1415 WriteSettingGlobal(QStringLiteral("bg_green"), Settings::values.bg_green, 0.0); 1417 WriteSettingGlobal(QStringLiteral("bg_green"), Settings::values.bg_green, 0.0);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 1af87e849..8d13c9857 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -31,6 +31,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
31 ui->disable_fps_limit->setChecked(Settings::values.disable_fps_limit.GetValue()); 31 ui->disable_fps_limit->setChecked(Settings::values.disable_fps_limit.GetValue());
32 ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); 32 ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue());
33 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); 33 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
34 ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue());
34 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); 35 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
35 36
36 if (Settings::IsConfiguringGlobal()) { 37 if (Settings::IsConfiguringGlobal()) {
@@ -65,6 +66,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
65 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, 66 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
66 ui->use_asynchronous_shaders, 67 ui->use_asynchronous_shaders,
67 use_asynchronous_shaders); 68 use_asynchronous_shaders);
69 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc,
70 use_caches_gc);
68 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, 71 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time,
69 ui->use_fast_gpu_time, use_fast_gpu_time); 72 ui->use_fast_gpu_time, use_fast_gpu_time);
70 73
@@ -105,6 +108,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
105 ui->use_asynchronous_shaders->setEnabled( 108 ui->use_asynchronous_shaders->setEnabled(
106 Settings::values.use_asynchronous_shaders.UsingGlobal()); 109 Settings::values.use_asynchronous_shaders.UsingGlobal());
107 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); 110 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
111 ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal());
108 ui->anisotropic_filtering_combobox->setEnabled( 112 ui->anisotropic_filtering_combobox->setEnabled(
109 Settings::values.max_anisotropy.UsingGlobal()); 113 Settings::values.max_anisotropy.UsingGlobal());
110 114
@@ -121,6 +125,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
121 use_asynchronous_shaders); 125 use_asynchronous_shaders);
122 ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time, 126 ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time,
123 Settings::values.use_fast_gpu_time, use_fast_gpu_time); 127 Settings::values.use_fast_gpu_time, use_fast_gpu_time);
128 ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc,
129 use_caches_gc);
124 ConfigurationShared::SetColoredComboBox( 130 ConfigurationShared::SetColoredComboBox(
125 ui->gpu_accuracy, ui->label_gpu_accuracy, 131 ui->gpu_accuracy, ui->label_gpu_accuracy,
126 static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); 132 static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index c19c34851..6ac5f20ec 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -39,4 +39,5 @@ private:
39 ConfigurationShared::CheckState use_assembly_shaders; 39 ConfigurationShared::CheckState use_assembly_shaders;
40 ConfigurationShared::CheckState use_asynchronous_shaders; 40 ConfigurationShared::CheckState use_asynchronous_shaders;
41 ConfigurationShared::CheckState use_fast_gpu_time; 41 ConfigurationShared::CheckState use_fast_gpu_time;
42 ConfigurationShared::CheckState use_caches_gc;
42}; 43};
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 824cb2fb2..18c43629e 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -122,6 +122,16 @@
122 </widget> 122 </widget>
123 </item> 123 </item>
124 <item> 124 <item>
125 <widget class="QCheckBox" name="use_caches_gc">
126 <property name="toolTip">
127 <string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string>
128 </property>
129 <property name="text">
130 <string>Enable GPU cache garbage collection (experimental)</string>
131 </property>
132 </widget>
133 </item>
134 <item>
125 <widget class="QWidget" name="af_layout" native="true"> 135 <widget class="QWidget" name="af_layout" native="true">
126 <layout class="QHBoxLayout" name="horizontalLayout_1"> 136 <layout class="QHBoxLayout" name="horizontalLayout_1">
127 <property name="leftMargin"> 137 <property name="leftMargin">
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index c960ccf89..cc9850aad 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -227,6 +227,10 @@ use_asynchronous_gpu_emulation =
227# 0: Off, 1 (default): On 227# 0: Off, 1 (default): On
228use_vsync = 228use_vsync =
229 229
230# Whether to use garbage collection or not for GPU caches.
231# 0 (default): Off, 1: On
232use_caches_gc =
233
230# The clear color for the renderer. What shows up on the sides of the bottom screen. 234# The clear color for the renderer. What shows up on the sides of the bottom screen.
231# Must be in range of 0.0-1.0. Defaults to 1.0 for all. 235# Must be in range of 0.0-1.0. Defaults to 1.0 for all.
232bg_red = 236bg_red =