summaryrefslogtreecommitdiff
path: root/src/video_core/buffer_cache
diff options
context:
space:
mode:
authorGravatar Fernando Sahmkow2021-08-15 15:35:53 +0200
committerGravatar Fernando Sahmkow2021-08-28 17:54:12 +0200
commitd540d284b5711f044678191bbab858de626103a9 (patch)
tree42839b218c848973c1886c7b288d2708821130a5 /src/video_core/buffer_cache
parentMerge pull request #6929 from yuzu-emu/revert-6870-trace-back-stack-back-stac... (diff)
downloadyuzu-d540d284b5711f044678191bbab858de626103a9.tar.gz
yuzu-d540d284b5711f044678191bbab858de626103a9.tar.xz
yuzu-d540d284b5711f044678191bbab858de626103a9.zip
VideoCore: Rework Garbage Collection.
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r--src/video_core/buffer_cache/buffer_base.h13
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h61
2 files changed, 35 insertions, 39 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index c3318095c..4b696a60f 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -261,16 +261,6 @@ public:
261 stream_score += score; 261 stream_score += score;
262 } 262 }
263 263
264 /// Sets the new frame tick
265 void SetFrameTick(u64 new_frame_tick) noexcept {
266 frame_tick = new_frame_tick;
267 }
268
269 /// Returns the new frame tick
270 [[nodiscard]] u64 FrameTick() const noexcept {
271 return frame_tick;
272 }
273
274 /// Returns the likeliness of this being a stream buffer 264 /// Returns the likeliness of this being a stream buffer
275 [[nodiscard]] int StreamScore() const noexcept { 265 [[nodiscard]] int StreamScore() const noexcept {
276 return stream_score; 266 return stream_score;
@@ -307,6 +297,8 @@ public:
307 return words.size_bytes; 297 return words.size_bytes;
308 } 298 }
309 299
300 size_t lru_id;
301
310private: 302private:
311 template <Type type> 303 template <Type type>
312 u64* Array() noexcept { 304 u64* Array() noexcept {
@@ -603,7 +595,6 @@ private:
603 RasterizerInterface* rasterizer = nullptr; 595 RasterizerInterface* rasterizer = nullptr;
604 VAddr cpu_addr = 0; 596 VAddr cpu_addr = 0;
605 Words words; 597 Words words;
606 u64 frame_tick = 0;
607 BufferFlagBits flags{}; 598 BufferFlagBits flags{};
608 int stream_score = 0; 599 int stream_score = 0;
609}; 600};
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 3b43554f9..a0217908a 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -20,6 +20,7 @@
20#include "common/common_types.h" 20#include "common/common_types.h"
21#include "common/div_ceil.h" 21#include "common/div_ceil.h"
22#include "common/literals.h" 22#include "common/literals.h"
23#include "common/lru_cache.h"
23#include "common/microprofile.h" 24#include "common/microprofile.h"
24#include "common/scope_exit.h" 25#include "common/scope_exit.h"
25#include "common/settings.h" 26#include "common/settings.h"
@@ -77,7 +78,7 @@ class BufferCache {
77 78
78 static constexpr BufferId NULL_BUFFER_ID{0}; 79 static constexpr BufferId NULL_BUFFER_ID{0};
79 80
80 static constexpr u64 EXPECTED_MEMORY = 512_MiB; 81 static constexpr u64 EXPECTED_MEMORY = 256_MiB;
81 static constexpr u64 CRITICAL_MEMORY = 1_GiB; 82 static constexpr u64 CRITICAL_MEMORY = 1_GiB;
82 83
83 using Maxwell = Tegra::Engines::Maxwell3D::Regs; 84 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
@@ -330,7 +331,7 @@ private:
330 template <bool insert> 331 template <bool insert>
331 void ChangeRegister(BufferId buffer_id); 332 void ChangeRegister(BufferId buffer_id);
332 333
333 void TouchBuffer(Buffer& buffer) const noexcept; 334 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
334 335
335 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); 336 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
336 337
@@ -428,7 +429,11 @@ private:
428 size_t immediate_buffer_capacity = 0; 429 size_t immediate_buffer_capacity = 0;
429 std::unique_ptr<u8[]> immediate_buffer_alloc; 430 std::unique_ptr<u8[]> immediate_buffer_alloc;
430 431
431 typename SlotVector<Buffer>::Iterator deletion_iterator; 432 struct LRUItemParams {
433 using ObjectType = BufferId;
434 using TickType = u64;
435 };
436 Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
432 u64 frame_tick = 0; 437 u64 frame_tick = 0;
433 u64 total_used_memory = 0; 438 u64 total_used_memory = 0;
434 439
@@ -445,7 +450,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
445 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { 450 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
446 // Ensure the first slot is used for the null buffer 451 // Ensure the first slot is used for the null buffer
447 void(slot_buffers.insert(runtime, NullBufferParams{})); 452 void(slot_buffers.insert(runtime, NullBufferParams{}));
448 deletion_iterator = slot_buffers.end();
449 common_ranges.clear(); 453 common_ranges.clear();
450} 454}
451 455
@@ -454,20 +458,17 @@ void BufferCache<P>::RunGarbageCollector() {
454 const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; 458 const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
455 const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; 459 const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
456 int num_iterations = aggressive_gc ? 64 : 32; 460 int num_iterations = aggressive_gc ? 64 : 32;
457 for (; num_iterations > 0; --num_iterations) { 461 const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
458 if (deletion_iterator == slot_buffers.end()) { 462 if (num_iterations == 0) {
459 deletion_iterator = slot_buffers.begin(); 463 return true;
460 }
461 ++deletion_iterator;
462 if (deletion_iterator == slot_buffers.end()) {
463 break;
464 }
465 const auto [buffer_id, buffer] = *deletion_iterator;
466 if (buffer->FrameTick() + ticks_to_destroy < frame_tick) {
467 DownloadBufferMemory(*buffer);
468 DeleteBuffer(buffer_id);
469 } 464 }
470 } 465 --num_iterations;
466 auto& buffer = slot_buffers[buffer_id];
467 DownloadBufferMemory(buffer);
468 DeleteBuffer(buffer_id);
469 return false;
470 };
471 lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
471} 472}
472 473
473template <class P> 474template <class P>
@@ -954,7 +955,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
954template <class P> 955template <class P>
955void BufferCache<P>::BindHostIndexBuffer() { 956void BufferCache<P>::BindHostIndexBuffer() {
956 Buffer& buffer = slot_buffers[index_buffer.buffer_id]; 957 Buffer& buffer = slot_buffers[index_buffer.buffer_id];
957 TouchBuffer(buffer); 958 TouchBuffer(buffer, index_buffer.buffer_id);
958 const u32 offset = buffer.Offset(index_buffer.cpu_addr); 959 const u32 offset = buffer.Offset(index_buffer.cpu_addr);
959 const u32 size = index_buffer.size; 960 const u32 size = index_buffer.size;
960 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); 961 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
@@ -975,7 +976,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
975 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { 976 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
976 const Binding& binding = vertex_buffers[index]; 977 const Binding& binding = vertex_buffers[index];
977 Buffer& buffer = slot_buffers[binding.buffer_id]; 978 Buffer& buffer = slot_buffers[binding.buffer_id];
978 TouchBuffer(buffer); 979 TouchBuffer(buffer, binding.buffer_id);
979 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); 980 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
980 if (!flags[Dirty::VertexBuffer0 + index]) { 981 if (!flags[Dirty::VertexBuffer0 + index]) {
981 continue; 982 continue;
@@ -1011,7 +1012,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
1011 const VAddr cpu_addr = binding.cpu_addr; 1012 const VAddr cpu_addr = binding.cpu_addr;
1012 const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); 1013 const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
1013 Buffer& buffer = slot_buffers[binding.buffer_id]; 1014 Buffer& buffer = slot_buffers[binding.buffer_id];
1014 TouchBuffer(buffer); 1015 TouchBuffer(buffer, binding.buffer_id);
1015 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 1016 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
1016 size <= uniform_buffer_skip_cache_size && 1017 size <= uniform_buffer_skip_cache_size &&
1017 !buffer.IsRegionGpuModified(cpu_addr, size); 1018 !buffer.IsRegionGpuModified(cpu_addr, size);
@@ -1083,7 +1084,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
1083 ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { 1084 ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
1084 const Binding& binding = storage_buffers[stage][index]; 1085 const Binding& binding = storage_buffers[stage][index];
1085 Buffer& buffer = slot_buffers[binding.buffer_id]; 1086 Buffer& buffer = slot_buffers[binding.buffer_id];
1086 TouchBuffer(buffer); 1087 TouchBuffer(buffer, binding.buffer_id);
1087 const u32 size = binding.size; 1088 const u32 size = binding.size;
1088 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1089 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1089 1090
@@ -1128,7 +1129,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
1128 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { 1129 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
1129 const Binding& binding = transform_feedback_buffers[index]; 1130 const Binding& binding = transform_feedback_buffers[index];
1130 Buffer& buffer = slot_buffers[binding.buffer_id]; 1131 Buffer& buffer = slot_buffers[binding.buffer_id];
1131 TouchBuffer(buffer); 1132 TouchBuffer(buffer, binding.buffer_id);
1132 const u32 size = binding.size; 1133 const u32 size = binding.size;
1133 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1134 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1134 1135
@@ -1148,7 +1149,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
1148 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { 1149 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
1149 const Binding& binding = compute_uniform_buffers[index]; 1150 const Binding& binding = compute_uniform_buffers[index];
1150 Buffer& buffer = slot_buffers[binding.buffer_id]; 1151 Buffer& buffer = slot_buffers[binding.buffer_id];
1151 TouchBuffer(buffer); 1152 TouchBuffer(buffer, binding.buffer_id);
1152 const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); 1153 const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
1153 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1154 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1154 1155
@@ -1168,7 +1169,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
1168 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { 1169 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
1169 const Binding& binding = compute_storage_buffers[index]; 1170 const Binding& binding = compute_storage_buffers[index];
1170 Buffer& buffer = slot_buffers[binding.buffer_id]; 1171 Buffer& buffer = slot_buffers[binding.buffer_id];
1171 TouchBuffer(buffer); 1172 TouchBuffer(buffer, binding.buffer_id);
1172 const u32 size = binding.size; 1173 const u32 size = binding.size;
1173 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1174 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1174 1175
@@ -1513,11 +1514,11 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1513 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); 1514 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
1514 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1515 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1515 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1516 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
1516 TouchBuffer(slot_buffers[new_buffer_id]);
1517 for (const BufferId overlap_id : overlap.ids) { 1517 for (const BufferId overlap_id : overlap.ids) {
1518 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); 1518 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
1519 } 1519 }
1520 Register(new_buffer_id); 1520 Register(new_buffer_id);
1521 TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id);
1521 return new_buffer_id; 1522 return new_buffer_id;
1522} 1523}
1523 1524
@@ -1534,12 +1535,14 @@ void BufferCache<P>::Unregister(BufferId buffer_id) {
1534template <class P> 1535template <class P>
1535template <bool insert> 1536template <bool insert>
1536void BufferCache<P>::ChangeRegister(BufferId buffer_id) { 1537void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1537 const Buffer& buffer = slot_buffers[buffer_id]; 1538 Buffer& buffer = slot_buffers[buffer_id];
1538 const auto size = buffer.SizeBytes(); 1539 const auto size = buffer.SizeBytes();
1539 if (insert) { 1540 if (insert) {
1540 total_used_memory += Common::AlignUp(size, 1024); 1541 total_used_memory += Common::AlignUp(size, 1024);
1542 buffer.lru_id = lru_cache.Insert(buffer_id, frame_tick);
1541 } else { 1543 } else {
1542 total_used_memory -= Common::AlignUp(size, 1024); 1544 total_used_memory -= Common::AlignUp(size, 1024);
1545 lru_cache.Free(buffer.lru_id);
1543 } 1546 }
1544 const VAddr cpu_addr_begin = buffer.CpuAddr(); 1547 const VAddr cpu_addr_begin = buffer.CpuAddr();
1545 const VAddr cpu_addr_end = cpu_addr_begin + size; 1548 const VAddr cpu_addr_end = cpu_addr_begin + size;
@@ -1555,8 +1558,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1555} 1558}
1556 1559
1557template <class P> 1560template <class P>
1558void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept { 1561void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
1559 buffer.SetFrameTick(frame_tick); 1562 if (buffer_id != NULL_BUFFER_ID) {
1563 lru_cache.Touch(buffer.lru_id, frame_tick);
1564 }
1560} 1565}
1561 1566
1562template <class P> 1567template <class P>