summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/logging/backend.cpp1
-rw-r--r--src/common/lru_cache.h140
-rw-r--r--src/common/settings.cpp2
-rw-r--r--src/common/settings.h1
-rw-r--r--src/core/cpu_manager.cpp25
-rw-r--r--src/core/cpu_manager.h6
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.cpp25
-rw-r--r--src/core/hle/service/nvflinger/buffer_queue.h11
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp15
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.h3
-rw-r--r--src/core/hle/service/vi/display/vi_display.cpp17
-rw-r--r--src/core/hle/service/vi/display/vi_display.h13
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp5
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.h9
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp2
-rw-r--r--src/shader_recompiler/host_translate_info.h5
-rw-r--r--src/video_core/buffer_cache/buffer_base.h20
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h61
-rw-r--r--src/video_core/engines/maxwell_3d.h8
-rw-r--r--src/video_core/renderer_opengl/gl_device.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp2
-rw-r--r--src/video_core/texture_cache/image_base.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h92
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h8
-rw-r--r--src/video_core/textures/decoders.cpp8
-rw-r--r--src/yuzu/configuration/config.cpp2
-rw-r--r--src/yuzu/configuration/configure_graphics.ui2
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp6
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h1
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui14
-rw-r--r--src/yuzu/game_list.cpp8
-rw-r--r--src/yuzu_cmd/config.cpp1
35 files changed, 445 insertions, 206 deletions
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index 949384fd3..e40d117d6 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -18,6 +18,7 @@
18#include "common/fs/fs_paths.h" 18#include "common/fs/fs_paths.h"
19#include "common/fs/path_util.h" 19#include "common/fs/path_util.h"
20#include "common/literals.h" 20#include "common/literals.h"
21#include "common/thread.h"
21 22
22#include "common/logging/backend.h" 23#include "common/logging/backend.h"
23#include "common/logging/log.h" 24#include "common/logging/log.h"
diff --git a/src/common/lru_cache.h b/src/common/lru_cache.h
new file mode 100644
index 000000000..365488ba5
--- /dev/null
+++ b/src/common/lru_cache.h
@@ -0,0 +1,140 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2+ or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <deque>
8#include <memory>
9#include <type_traits>
10
11#include "common/common_types.h"
12
13namespace Common {
14
15template <class Traits>
16class LeastRecentlyUsedCache {
17 using ObjectType = typename Traits::ObjectType;
18 using TickType = typename Traits::TickType;
19
20 struct Item {
21 ObjectType obj;
22 TickType tick;
23 Item* next{};
24 Item* prev{};
25 };
26
27public:
28 LeastRecentlyUsedCache() : first_item{}, last_item{} {}
29 ~LeastRecentlyUsedCache() = default;
30
31 size_t Insert(ObjectType obj, TickType tick) {
32 const auto new_id = Build();
33 auto& item = item_pool[new_id];
34 item.obj = obj;
35 item.tick = tick;
36 Attach(item);
37 return new_id;
38 }
39
40 void Touch(size_t id, TickType tick) {
41 auto& item = item_pool[id];
42 if (item.tick >= tick) {
43 return;
44 }
45 item.tick = tick;
46 if (&item == last_item) {
47 return;
48 }
49 Detach(item);
50 Attach(item);
51 }
52
53 void Free(size_t id) {
54 auto& item = item_pool[id];
55 Detach(item);
56 item.prev = nullptr;
57 item.next = nullptr;
58 free_items.push_back(id);
59 }
60
61 template <typename Func>
62 void ForEachItemBelow(TickType tick, Func&& func) {
63 static constexpr bool RETURNS_BOOL =
64 std::is_same_v<std::invoke_result<Func, ObjectType>, bool>;
65 Item* iterator = first_item;
66 while (iterator) {
67 if (static_cast<s64>(tick) - static_cast<s64>(iterator->tick) < 0) {
68 return;
69 }
70 Item* next = iterator->next;
71 if constexpr (RETURNS_BOOL) {
72 if (func(iterator->obj)) {
73 return;
74 }
75 } else {
76 func(iterator->obj);
77 }
78 iterator = next;
79 }
80 }
81
82private:
83 size_t Build() {
84 if (free_items.empty()) {
85 const size_t item_id = item_pool.size();
86 auto& item = item_pool.emplace_back();
87 item.next = nullptr;
88 item.prev = nullptr;
89 return item_id;
90 }
91 const size_t item_id = free_items.front();
92 free_items.pop_front();
93 auto& item = item_pool[item_id];
94 item.next = nullptr;
95 item.prev = nullptr;
96 return item_id;
97 }
98
99 void Attach(Item& item) {
100 if (!first_item) {
101 first_item = &item;
102 }
103 if (!last_item) {
104 last_item = &item;
105 } else {
106 item.prev = last_item;
107 last_item->next = &item;
108 item.next = nullptr;
109 last_item = &item;
110 }
111 }
112
113 void Detach(Item& item) {
114 if (item.prev) {
115 item.prev->next = item.next;
116 }
117 if (item.next) {
118 item.next->prev = item.prev;
119 }
120 if (&item == first_item) {
121 first_item = item.next;
122 if (first_item) {
123 first_item->prev = nullptr;
124 }
125 }
126 if (&item == last_item) {
127 last_item = item.prev;
128 if (last_item) {
129 last_item->next = nullptr;
130 }
131 }
132 }
133
134 std::deque<Item> item_pool;
135 std::deque<size_t> free_items;
136 Item* first_item{};
137 Item* last_item{};
138};
139
140} // namespace Common
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 996315999..fd3b639cd 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -59,7 +59,6 @@ void LogSettings() {
59 log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); 59 log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
60 log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); 60 log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
61 log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); 61 log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
62 log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue());
63 log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); 62 log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
64 log_setting("Audio_OutputEngine", values.sink_id.GetValue()); 63 log_setting("Audio_OutputEngine", values.sink_id.GetValue());
65 log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue()); 64 log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue());
@@ -143,7 +142,6 @@ void RestoreGlobalState(bool is_powered_on) {
143 values.shader_backend.SetGlobal(true); 142 values.shader_backend.SetGlobal(true);
144 values.use_asynchronous_shaders.SetGlobal(true); 143 values.use_asynchronous_shaders.SetGlobal(true);
145 values.use_fast_gpu_time.SetGlobal(true); 144 values.use_fast_gpu_time.SetGlobal(true);
146 values.use_caches_gc.SetGlobal(true);
147 values.bg_red.SetGlobal(true); 145 values.bg_red.SetGlobal(true);
148 values.bg_green.SetGlobal(true); 146 values.bg_green.SetGlobal(true);
149 values.bg_blue.SetGlobal(true); 147 values.bg_blue.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 20769d310..ec4d381e8 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -475,7 +475,6 @@ struct Values {
475 ShaderBackend::SPIRV, "shader_backend"}; 475 ShaderBackend::SPIRV, "shader_backend"};
476 Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; 476 Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
477 Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; 477 Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
478 Setting<bool> use_caches_gc{false, "use_caches_gc"};
479 478
480 Setting<u8> bg_red{0, "bg_red"}; 479 Setting<u8> bg_red{0, "bg_red"};
481 Setting<u8> bg_green{0, "bg_green"}; 480 Setting<u8> bg_green{0, "bg_green"};
diff --git a/src/core/cpu_manager.cpp b/src/core/cpu_manager.cpp
index 7e195346b..77efcabf0 100644
--- a/src/core/cpu_manager.cpp
+++ b/src/core/cpu_manager.cpp
@@ -21,34 +21,25 @@ namespace Core {
21CpuManager::CpuManager(System& system_) : system{system_} {} 21CpuManager::CpuManager(System& system_) : system{system_} {}
22CpuManager::~CpuManager() = default; 22CpuManager::~CpuManager() = default;
23 23
24void CpuManager::ThreadStart(CpuManager& cpu_manager, std::size_t core) { 24void CpuManager::ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager,
25 cpu_manager.RunThread(core); 25 std::size_t core) {
26 cpu_manager.RunThread(stop_token, core);
26} 27}
27 28
28void CpuManager::Initialize() { 29void CpuManager::Initialize() {
29 running_mode = true; 30 running_mode = true;
30 if (is_multicore) { 31 if (is_multicore) {
31 for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) { 32 for (std::size_t core = 0; core < Core::Hardware::NUM_CPU_CORES; core++) {
32 core_data[core].host_thread = 33 core_data[core].host_thread = std::jthread(ThreadStart, std::ref(*this), core);
33 std::make_unique<std::thread>(ThreadStart, std::ref(*this), core);
34 } 34 }
35 } else { 35 } else {
36 core_data[0].host_thread = std::make_unique<std::thread>(ThreadStart, std::ref(*this), 0); 36 core_data[0].host_thread = std::jthread(ThreadStart, std::ref(*this), 0);
37 } 37 }
38} 38}
39 39
40void CpuManager::Shutdown() { 40void CpuManager::Shutdown() {
41 running_mode = false; 41 running_mode = false;
42 Pause(false); 42 Pause(false);
43 if (is_multicore) {
44 for (auto& data : core_data) {
45 data.host_thread->join();
46 data.host_thread.reset();
47 }
48 } else {
49 core_data[0].host_thread->join();
50 core_data[0].host_thread.reset();
51 }
52} 43}
53 44
54std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() { 45std::function<void(void*)> CpuManager::GetGuestThreadStartFunc() {
@@ -317,7 +308,7 @@ void CpuManager::Pause(bool paused) {
317 } 308 }
318} 309}
319 310
320void CpuManager::RunThread(std::size_t core) { 311void CpuManager::RunThread(std::stop_token stop_token, std::size_t core) {
321 /// Initialization 312 /// Initialization
322 system.RegisterCoreThread(core); 313 system.RegisterCoreThread(core);
323 std::string name; 314 std::string name;
@@ -361,6 +352,10 @@ void CpuManager::RunThread(std::size_t core) {
361 return; 352 return;
362 } 353 }
363 354
355 if (stop_token.stop_requested()) {
356 break;
357 }
358
364 auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread(); 359 auto current_thread = system.Kernel().CurrentScheduler()->GetCurrentThread();
365 data.is_running = true; 360 data.is_running = true;
366 Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext()); 361 Common::Fiber::YieldTo(data.host_context, *current_thread->GetHostContext());
diff --git a/src/core/cpu_manager.h b/src/core/cpu_manager.h
index 140263b09..9d92d4af0 100644
--- a/src/core/cpu_manager.h
+++ b/src/core/cpu_manager.h
@@ -78,9 +78,9 @@ private:
78 void SingleCoreRunSuspendThread(); 78 void SingleCoreRunSuspendThread();
79 void SingleCorePause(bool paused); 79 void SingleCorePause(bool paused);
80 80
81 static void ThreadStart(CpuManager& cpu_manager, std::size_t core); 81 static void ThreadStart(std::stop_token stop_token, CpuManager& cpu_manager, std::size_t core);
82 82
83 void RunThread(std::size_t core); 83 void RunThread(std::stop_token stop_token, std::size_t core);
84 84
85 struct CoreData { 85 struct CoreData {
86 std::shared_ptr<Common::Fiber> host_context; 86 std::shared_ptr<Common::Fiber> host_context;
@@ -89,7 +89,7 @@ private:
89 std::atomic<bool> is_running; 89 std::atomic<bool> is_running;
90 std::atomic<bool> is_paused; 90 std::atomic<bool> is_paused;
91 std::atomic<bool> initialized; 91 std::atomic<bool> initialized;
92 std::unique_ptr<std::thread> host_thread; 92 std::jthread host_thread;
93 }; 93 };
94 94
95 std::atomic<bool> running_mode{}; 95 std::atomic<bool> running_mode{};
diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp
index 59ddf6298..b4c3a6099 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.cpp
+++ b/src/core/hle/service/nvflinger/buffer_queue.cpp
@@ -9,17 +9,20 @@
9#include "core/core.h" 9#include "core/core.h"
10#include "core/hle/kernel/k_writable_event.h" 10#include "core/hle/kernel/k_writable_event.h"
11#include "core/hle/kernel/kernel.h" 11#include "core/hle/kernel/kernel.h"
12#include "core/hle/service/kernel_helpers.h"
12#include "core/hle/service/nvflinger/buffer_queue.h" 13#include "core/hle/service/nvflinger/buffer_queue.h"
13 14
14namespace Service::NVFlinger { 15namespace Service::NVFlinger {
15 16
16BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_) 17BufferQueue::BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_,
17 : id(id_), layer_id(layer_id_), buffer_wait_event{kernel} { 18 KernelHelpers::ServiceContext& service_context_)
18 Kernel::KAutoObject::Create(std::addressof(buffer_wait_event)); 19 : id(id_), layer_id(layer_id_), service_context{service_context_} {
19 buffer_wait_event.Initialize("BufferQueue:WaitEvent"); 20 buffer_wait_event = service_context.CreateEvent("BufferQueue:WaitEvent");
20} 21}
21 22
22BufferQueue::~BufferQueue() = default; 23BufferQueue::~BufferQueue() {
24 service_context.CloseEvent(buffer_wait_event);
25}
23 26
24void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) { 27void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer) {
25 ASSERT(slot < buffer_slots); 28 ASSERT(slot < buffer_slots);
@@ -41,7 +44,7 @@ void BufferQueue::SetPreallocatedBuffer(u32 slot, const IGBPBuffer& igbp_buffer)
41 .multi_fence = {}, 44 .multi_fence = {},
42 }; 45 };
43 46
44 buffer_wait_event.GetWritableEvent().Signal(); 47 buffer_wait_event->GetWritableEvent().Signal();
45} 48}
46 49
47std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width, 50std::optional<std::pair<u32, Service::Nvidia::MultiFence*>> BufferQueue::DequeueBuffer(u32 width,
@@ -119,7 +122,7 @@ void BufferQueue::CancelBuffer(u32 slot, const Service::Nvidia::MultiFence& mult
119 } 122 }
120 free_buffers_condition.notify_one(); 123 free_buffers_condition.notify_one();
121 124
122 buffer_wait_event.GetWritableEvent().Signal(); 125 buffer_wait_event->GetWritableEvent().Signal();
123} 126}
124 127
125std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() { 128std::optional<std::reference_wrapper<const BufferQueue::Buffer>> BufferQueue::AcquireBuffer() {
@@ -154,7 +157,7 @@ void BufferQueue::ReleaseBuffer(u32 slot) {
154 } 157 }
155 free_buffers_condition.notify_one(); 158 free_buffers_condition.notify_one();
156 159
157 buffer_wait_event.GetWritableEvent().Signal(); 160 buffer_wait_event->GetWritableEvent().Signal();
158} 161}
159 162
160void BufferQueue::Connect() { 163void BufferQueue::Connect() {
@@ -169,7 +172,7 @@ void BufferQueue::Disconnect() {
169 std::unique_lock lock{queue_sequence_mutex}; 172 std::unique_lock lock{queue_sequence_mutex};
170 queue_sequence.clear(); 173 queue_sequence.clear();
171 } 174 }
172 buffer_wait_event.GetWritableEvent().Signal(); 175 buffer_wait_event->GetWritableEvent().Signal();
173 is_connect = false; 176 is_connect = false;
174 free_buffers_condition.notify_one(); 177 free_buffers_condition.notify_one();
175} 178}
@@ -189,11 +192,11 @@ u32 BufferQueue::Query(QueryType type) {
189} 192}
190 193
191Kernel::KWritableEvent& BufferQueue::GetWritableBufferWaitEvent() { 194Kernel::KWritableEvent& BufferQueue::GetWritableBufferWaitEvent() {
192 return buffer_wait_event.GetWritableEvent(); 195 return buffer_wait_event->GetWritableEvent();
193} 196}
194 197
195Kernel::KReadableEvent& BufferQueue::GetBufferWaitEvent() { 198Kernel::KReadableEvent& BufferQueue::GetBufferWaitEvent() {
196 return buffer_wait_event.GetReadableEvent(); 199 return buffer_wait_event->GetReadableEvent();
197} 200}
198 201
199} // namespace Service::NVFlinger 202} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h
index 61e337ac5..759247eb0 100644
--- a/src/core/hle/service/nvflinger/buffer_queue.h
+++ b/src/core/hle/service/nvflinger/buffer_queue.h
@@ -24,6 +24,10 @@ class KReadableEvent;
24class KWritableEvent; 24class KWritableEvent;
25} // namespace Kernel 25} // namespace Kernel
26 26
27namespace Service::KernelHelpers {
28class ServiceContext;
29} // namespace Service::KernelHelpers
30
27namespace Service::NVFlinger { 31namespace Service::NVFlinger {
28 32
29constexpr u32 buffer_slots = 0x40; 33constexpr u32 buffer_slots = 0x40;
@@ -54,7 +58,8 @@ public:
54 NativeWindowFormat = 2, 58 NativeWindowFormat = 2,
55 }; 59 };
56 60
57 explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_); 61 explicit BufferQueue(Kernel::KernelCore& kernel, u32 id_, u64 layer_id_,
62 KernelHelpers::ServiceContext& service_context_);
58 ~BufferQueue(); 63 ~BufferQueue();
59 64
60 enum class BufferTransformFlags : u32 { 65 enum class BufferTransformFlags : u32 {
@@ -130,12 +135,14 @@ private:
130 std::list<u32> free_buffers; 135 std::list<u32> free_buffers;
131 std::array<Buffer, buffer_slots> buffers; 136 std::array<Buffer, buffer_slots> buffers;
132 std::list<u32> queue_sequence; 137 std::list<u32> queue_sequence;
133 Kernel::KEvent buffer_wait_event; 138 Kernel::KEvent* buffer_wait_event{};
134 139
135 std::mutex free_buffers_mutex; 140 std::mutex free_buffers_mutex;
136 std::condition_variable free_buffers_condition; 141 std::condition_variable free_buffers_condition;
137 142
138 std::mutex queue_sequence_mutex; 143 std::mutex queue_sequence_mutex;
144
145 KernelHelpers::ServiceContext& service_context;
139}; 146};
140 147
141} // namespace Service::NVFlinger 148} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 941748970..00bff8caf 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -61,12 +61,13 @@ void NVFlinger::SplitVSync() {
61 } 61 }
62} 62}
63 63
64NVFlinger::NVFlinger(Core::System& system_) : system(system_) { 64NVFlinger::NVFlinger(Core::System& system_)
65 displays.emplace_back(0, "Default", system); 65 : system(system_), service_context(system_, "nvflinger") {
66 displays.emplace_back(1, "External", system); 66 displays.emplace_back(0, "Default", service_context, system);
67 displays.emplace_back(2, "Edid", system); 67 displays.emplace_back(1, "External", service_context, system);
68 displays.emplace_back(3, "Internal", system); 68 displays.emplace_back(2, "Edid", service_context, system);
69 displays.emplace_back(4, "Null", system); 69 displays.emplace_back(3, "Internal", service_context, system);
70 displays.emplace_back(4, "Null", service_context, system);
70 guard = std::make_shared<std::mutex>(); 71 guard = std::make_shared<std::mutex>();
71 72
72 // Schedule the screen composition events 73 // Schedule the screen composition events
@@ -146,7 +147,7 @@ std::optional<u64> NVFlinger::CreateLayer(u64 display_id) {
146void NVFlinger::CreateLayerAtId(VI::Display& display, u64 layer_id) { 147void NVFlinger::CreateLayerAtId(VI::Display& display, u64 layer_id) {
147 const u32 buffer_queue_id = next_buffer_queue_id++; 148 const u32 buffer_queue_id = next_buffer_queue_id++;
148 buffer_queues.emplace_back( 149 buffer_queues.emplace_back(
149 std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id)); 150 std::make_unique<BufferQueue>(system.Kernel(), buffer_queue_id, layer_id, service_context));
150 display.CreateLayer(layer_id, *buffer_queues.back()); 151 display.CreateLayer(layer_id, *buffer_queues.back());
151} 152}
152 153
diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h
index d80fd07ef..6d84cafb4 100644
--- a/src/core/hle/service/nvflinger/nvflinger.h
+++ b/src/core/hle/service/nvflinger/nvflinger.h
@@ -15,6 +15,7 @@
15#include <vector> 15#include <vector>
16 16
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "core/hle/service/kernel_helpers.h"
18 19
19namespace Common { 20namespace Common {
20class Event; 21class Event;
@@ -135,6 +136,8 @@ private:
135 std::unique_ptr<std::thread> vsync_thread; 136 std::unique_ptr<std::thread> vsync_thread;
136 std::unique_ptr<Common::Event> wait_event; 137 std::unique_ptr<Common::Event> wait_event;
137 std::atomic<bool> is_running{}; 138 std::atomic<bool> is_running{};
139
140 KernelHelpers::ServiceContext service_context;
138}; 141};
139 142
140} // namespace Service::NVFlinger 143} // namespace Service::NVFlinger
diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp
index 0dd342dbf..b7705c02a 100644
--- a/src/core/hle/service/vi/display/vi_display.cpp
+++ b/src/core/hle/service/vi/display/vi_display.cpp
@@ -12,18 +12,21 @@
12#include "core/hle/kernel/k_event.h" 12#include "core/hle/kernel/k_event.h"
13#include "core/hle/kernel/k_readable_event.h" 13#include "core/hle/kernel/k_readable_event.h"
14#include "core/hle/kernel/k_writable_event.h" 14#include "core/hle/kernel/k_writable_event.h"
15#include "core/hle/service/kernel_helpers.h"
15#include "core/hle/service/vi/display/vi_display.h" 16#include "core/hle/service/vi/display/vi_display.h"
16#include "core/hle/service/vi/layer/vi_layer.h" 17#include "core/hle/service/vi/layer/vi_layer.h"
17 18
18namespace Service::VI { 19namespace Service::VI {
19 20
20Display::Display(u64 id, std::string name_, Core::System& system) 21Display::Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_,
21 : display_id{id}, name{std::move(name_)}, vsync_event{system.Kernel()} { 22 Core::System& system_)
22 Kernel::KAutoObject::Create(std::addressof(vsync_event)); 23 : display_id{id}, name{std::move(name_)}, service_context{service_context_} {
23 vsync_event.Initialize(fmt::format("Display VSync Event {}", id)); 24 vsync_event = service_context.CreateEvent(fmt::format("Display VSync Event {}", id));
24} 25}
25 26
26Display::~Display() = default; 27Display::~Display() {
28 service_context.CloseEvent(vsync_event);
29}
27 30
28Layer& Display::GetLayer(std::size_t index) { 31Layer& Display::GetLayer(std::size_t index) {
29 return *layers.at(index); 32 return *layers.at(index);
@@ -34,11 +37,11 @@ const Layer& Display::GetLayer(std::size_t index) const {
34} 37}
35 38
36Kernel::KReadableEvent& Display::GetVSyncEvent() { 39Kernel::KReadableEvent& Display::GetVSyncEvent() {
37 return vsync_event.GetReadableEvent(); 40 return vsync_event->GetReadableEvent();
38} 41}
39 42
40void Display::SignalVSyncEvent() { 43void Display::SignalVSyncEvent() {
41 vsync_event.GetWritableEvent().Signal(); 44 vsync_event->GetWritableEvent().Signal();
42} 45}
43 46
44void Display::CreateLayer(u64 layer_id, NVFlinger::BufferQueue& buffer_queue) { 47void Display::CreateLayer(u64 layer_id, NVFlinger::BufferQueue& buffer_queue) {
diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h
index 166f2a4cc..0979fc421 100644
--- a/src/core/hle/service/vi/display/vi_display.h
+++ b/src/core/hle/service/vi/display/vi_display.h
@@ -18,6 +18,9 @@ class KEvent;
18namespace Service::NVFlinger { 18namespace Service::NVFlinger {
19class BufferQueue; 19class BufferQueue;
20} 20}
21namespace Service::KernelHelpers {
22class ServiceContext;
23} // namespace Service::KernelHelpers
21 24
22namespace Service::VI { 25namespace Service::VI {
23 26
@@ -31,10 +34,13 @@ class Display {
31public: 34public:
32 /// Constructs a display with a given unique ID and name. 35 /// Constructs a display with a given unique ID and name.
33 /// 36 ///
34 /// @param id The unique ID for this display. 37 /// @param id The unique ID for this display.
38 /// @param service_context_ The ServiceContext for the owning service.
35 /// @param name_ The name for this display. 39 /// @param name_ The name for this display.
40 /// @param system_ The global system instance.
36 /// 41 ///
37 Display(u64 id, std::string name_, Core::System& system); 42 Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_,
43 Core::System& system_);
38 ~Display(); 44 ~Display();
39 45
40 /// Gets the unique ID assigned to this display. 46 /// Gets the unique ID assigned to this display.
@@ -98,9 +104,10 @@ public:
98private: 104private:
99 u64 display_id; 105 u64 display_id;
100 std::string name; 106 std::string name;
107 KernelHelpers::ServiceContext& service_context;
101 108
102 std::vector<std::shared_ptr<Layer>> layers; 109 std::vector<std::shared_ptr<Layer>> layers;
103 Kernel::KEvent vsync_event; 110 Kernel::KEvent* vsync_event{};
104}; 111};
105 112
106} // namespace Service::VI 113} // namespace Service::VI
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
index 67df46499..68f360b3c 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
@@ -380,8 +380,9 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
380 return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); 380 return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base));
381 } 381 }
382 case IR::Attribute::FrontFace: 382 case IR::Attribute::FrontFace:
383 return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1, ctx.front_face), 383 return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face),
384 ctx.Const(std::numeric_limits<u32>::max()), ctx.u32_zero_value); 384 ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())),
385 ctx.f32_zero_value);
385 case IR::Attribute::PointSpriteS: 386 case IR::Attribute::PointSpriteS:
386 return ctx.OpLoad(ctx.F32[1], 387 return ctx.OpLoad(ctx.F32[1],
387 ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value)); 388 ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, ctx.u32_zero_value));
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index 8b3e0a15c..69eeaa3e6 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -20,6 +20,7 @@
20#include "shader_recompiler/frontend/maxwell/decode.h" 20#include "shader_recompiler/frontend/maxwell/decode.h"
21#include "shader_recompiler/frontend/maxwell/structured_control_flow.h" 21#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
22#include "shader_recompiler/frontend/maxwell/translate/translate.h" 22#include "shader_recompiler/frontend/maxwell/translate/translate.h"
23#include "shader_recompiler/host_translate_info.h"
23#include "shader_recompiler/object_pool.h" 24#include "shader_recompiler/object_pool.h"
24 25
25namespace Shader::Maxwell { 26namespace Shader::Maxwell {
@@ -652,7 +653,7 @@ class TranslatePass {
652public: 653public:
653 TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, 654 TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
654 ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, 655 ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
655 IR::AbstractSyntaxList& syntax_list_) 656 IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info)
656 : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, 657 : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
657 syntax_list{syntax_list_} { 658 syntax_list{syntax_list_} {
658 Visit(root_stmt, nullptr, nullptr); 659 Visit(root_stmt, nullptr, nullptr);
@@ -660,6 +661,9 @@ public:
660 IR::Block& first_block{*syntax_list.front().data.block}; 661 IR::Block& first_block{*syntax_list.front().data.block};
661 IR::IREmitter ir(first_block, first_block.begin()); 662 IR::IREmitter ir(first_block, first_block.begin());
662 ir.Prologue(); 663 ir.Prologue();
664 if (uses_demote_to_helper && host_info.needs_demote_reorder) {
665 DemoteCombinationPass();
666 }
663 } 667 }
664 668
665private: 669private:
@@ -809,7 +813,14 @@ private:
809 } 813 }
810 case StatementType::Return: { 814 case StatementType::Return: {
811 ensure_block(); 815 ensure_block();
812 IR::IREmitter{*current_block}.Epilogue(); 816 IR::Block* return_block{block_pool.Create(inst_pool)};
817 IR::IREmitter{*return_block}.Epilogue();
818 current_block->AddBranch(return_block);
819
820 auto& merge{syntax_list.emplace_back()};
821 merge.type = IR::AbstractSyntaxNode::Type::Block;
822 merge.data.block = return_block;
823
813 current_block = nullptr; 824 current_block = nullptr;
814 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; 825 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
815 break; 826 break;
@@ -824,6 +835,7 @@ private:
824 auto& merge{syntax_list.emplace_back()}; 835 auto& merge{syntax_list.emplace_back()};
825 merge.type = IR::AbstractSyntaxNode::Type::Block; 836 merge.type = IR::AbstractSyntaxNode::Type::Block;
826 merge.data.block = demote_block; 837 merge.data.block = demote_block;
838 uses_demote_to_helper = true;
827 break; 839 break;
828 } 840 }
829 case StatementType::Unreachable: { 841 case StatementType::Unreachable: {
@@ -855,11 +867,117 @@ private:
855 return block_pool.Create(inst_pool); 867 return block_pool.Create(inst_pool);
856 } 868 }
857 869
870 void DemoteCombinationPass() {
871 using Type = IR::AbstractSyntaxNode::Type;
872 std::vector<IR::Block*> demote_blocks;
873 std::vector<IR::U1> demote_conds;
874 u32 num_epilogues{};
875 u32 branch_depth{};
876 for (const IR::AbstractSyntaxNode& node : syntax_list) {
877 if (node.type == Type::If) {
878 ++branch_depth;
879 }
880 if (node.type == Type::EndIf) {
881 --branch_depth;
882 }
883 if (node.type != Type::Block) {
884 continue;
885 }
886 if (branch_depth > 1) {
887 // Skip reordering nested demote branches.
888 continue;
889 }
890 for (const IR::Inst& inst : node.data.block->Instructions()) {
891 const IR::Opcode op{inst.GetOpcode()};
892 if (op == IR::Opcode::DemoteToHelperInvocation) {
893 demote_blocks.push_back(node.data.block);
894 break;
895 }
896 if (op == IR::Opcode::Epilogue) {
897 ++num_epilogues;
898 }
899 }
900 }
901 if (demote_blocks.size() == 0) {
902 return;
903 }
904 if (num_epilogues > 1) {
905 LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented.");
906 return;
907 }
908 s64 last_iterator_offset{};
909 auto& asl{syntax_list};
910 for (const IR::Block* demote_block : demote_blocks) {
911 const auto start_it{asl.begin() + last_iterator_offset};
912 auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
913 return asn.type == Type::If && asn.data.if_node.body == demote_block;
914 })};
915 if (asl_it == asl.end()) {
916 // Demote without a conditional branch.
917 // No need to proceed since all fragment instances will be demoted regardless.
918 return;
919 }
920 const IR::Block* const end_if = asl_it->data.if_node.merge;
921 demote_conds.push_back(asl_it->data.if_node.cond);
922 last_iterator_offset = std::distance(asl.begin(), asl_it);
923
924 asl_it = asl.erase(asl_it);
925 asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
926 return asn.type == Type::Block && asn.data.block == demote_block;
927 });
928
929 asl_it = asl.erase(asl_it);
930 asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
931 return asn.type == Type::EndIf && asn.data.end_if.merge == end_if;
932 });
933 asl_it = asl.erase(asl_it);
934 }
935 const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) {
936 if (asn.type != Type::Block) {
937 return false;
938 }
939 for (const auto& inst : asn.data.block->Instructions()) {
940 if (inst.GetOpcode() == IR::Opcode::Epilogue) {
941 return true;
942 }
943 }
944 return false;
945 }};
946 const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)};
947 const auto return_block_it{(reverse_it + 1).base()};
948
949 IR::IREmitter ir{*(return_block_it - 1)->data.block};
950 IR::U1 cond(IR::Value(false));
951 for (const auto& demote_cond : demote_conds) {
952 cond = ir.LogicalOr(cond, demote_cond);
953 }
954 cond.Inst()->DestructiveAddUsage(1);
955
956 IR::AbstractSyntaxNode demote_if_node{};
957 demote_if_node.type = Type::If;
958 demote_if_node.data.if_node.cond = cond;
959 demote_if_node.data.if_node.body = demote_blocks[0];
960 demote_if_node.data.if_node.merge = return_block_it->data.block;
961
962 IR::AbstractSyntaxNode demote_node{};
963 demote_node.type = Type::Block;
964 demote_node.data.block = demote_blocks[0];
965
966 IR::AbstractSyntaxNode demote_endif_node{};
967 demote_endif_node.type = Type::EndIf;
968 demote_endif_node.data.end_if.merge = return_block_it->data.block;
969
970 asl.insert(return_block_it, demote_endif_node);
971 asl.insert(return_block_it, demote_node);
972 asl.insert(return_block_it, demote_if_node);
973 }
974
858 ObjectPool<Statement>& stmt_pool; 975 ObjectPool<Statement>& stmt_pool;
859 ObjectPool<IR::Inst>& inst_pool; 976 ObjectPool<IR::Inst>& inst_pool;
860 ObjectPool<IR::Block>& block_pool; 977 ObjectPool<IR::Block>& block_pool;
861 Environment& env; 978 Environment& env;
862 IR::AbstractSyntaxList& syntax_list; 979 IR::AbstractSyntaxList& syntax_list;
980 bool uses_demote_to_helper{};
863 981
864// TODO: C++20 Remove this when all compilers support constexpr std::vector 982// TODO: C++20 Remove this when all compilers support constexpr std::vector
865#if __cpp_lib_constexpr_vector >= 201907 983#if __cpp_lib_constexpr_vector >= 201907
@@ -871,12 +989,13 @@ private:
871} // Anonymous namespace 989} // Anonymous namespace
872 990
873IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, 991IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
874 Environment& env, Flow::CFG& cfg) { 992 Environment& env, Flow::CFG& cfg,
993 const HostTranslateInfo& host_info) {
875 ObjectPool<Statement> stmt_pool{64}; 994 ObjectPool<Statement> stmt_pool{64};
876 GotoPass goto_pass{cfg, stmt_pool}; 995 GotoPass goto_pass{cfg, stmt_pool};
877 Statement& root{goto_pass.RootStatement()}; 996 Statement& root{goto_pass.RootStatement()};
878 IR::AbstractSyntaxList syntax_list; 997 IR::AbstractSyntaxList syntax_list;
879 TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; 998 TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info};
880 return syntax_list; 999 return syntax_list;
881} 1000}
882 1001
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
index 88b083649..e38158da3 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -11,10 +11,13 @@
11#include "shader_recompiler/frontend/maxwell/control_flow.h" 11#include "shader_recompiler/frontend/maxwell/control_flow.h"
12#include "shader_recompiler/object_pool.h" 12#include "shader_recompiler/object_pool.h"
13 13
14namespace Shader::Maxwell { 14namespace Shader {
15struct HostTranslateInfo;
16namespace Maxwell {
15 17
16[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, 18[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
17 ObjectPool<IR::Block>& block_pool, Environment& env, 19 ObjectPool<IR::Block>& block_pool, Environment& env,
18 Flow::CFG& cfg); 20 Flow::CFG& cfg, const HostTranslateInfo& host_info);
19 21
20} // namespace Shader::Maxwell 22} // namespace Maxwell
23} // namespace Shader
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index c067d459c..012d55357 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) {
130IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, 130IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
131 Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { 131 Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
132 IR::Program program; 132 IR::Program program;
133 program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); 133 program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
134 program.blocks = GenerateBlocks(program.syntax_list); 134 program.blocks = GenerateBlocks(program.syntax_list);
135 program.post_order_blocks = PostOrder(program.syntax_list.front()); 135 program.post_order_blocks = PostOrder(program.syntax_list.front());
136 program.stage = env.ShaderStage(); 136 program.stage = env.ShaderStage();
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 94a584219..96468b2e7 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -11,8 +11,9 @@ namespace Shader {
11 11
12/// Misc information about the host 12/// Misc information about the host
13struct HostTranslateInfo { 13struct HostTranslateInfo {
14 bool support_float16{}; ///< True when the device supports 16-bit floats 14 bool support_float16{}; ///< True when the device supports 16-bit floats
15 bool support_int64{}; ///< True when the device supports 64-bit integers 15 bool support_int64{}; ///< True when the device supports 64-bit integers
16 bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
16}; 17};
17 18
18} // namespace Shader 19} // namespace Shader
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index c3318095c..be2113f5a 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -261,16 +261,6 @@ public:
261 stream_score += score; 261 stream_score += score;
262 } 262 }
263 263
264 /// Sets the new frame tick
265 void SetFrameTick(u64 new_frame_tick) noexcept {
266 frame_tick = new_frame_tick;
267 }
268
269 /// Returns the new frame tick
270 [[nodiscard]] u64 FrameTick() const noexcept {
271 return frame_tick;
272 }
273
274 /// Returns the likeliness of this being a stream buffer 264 /// Returns the likeliness of this being a stream buffer
275 [[nodiscard]] int StreamScore() const noexcept { 265 [[nodiscard]] int StreamScore() const noexcept {
276 return stream_score; 266 return stream_score;
@@ -307,6 +297,14 @@ public:
307 return words.size_bytes; 297 return words.size_bytes;
308 } 298 }
309 299
300 size_t getLRUID() const noexcept {
301 return lru_id;
302 }
303
304 void setLRUID(size_t lru_id_) {
305 lru_id = lru_id_;
306 }
307
310private: 308private:
311 template <Type type> 309 template <Type type>
312 u64* Array() noexcept { 310 u64* Array() noexcept {
@@ -603,9 +601,9 @@ private:
603 RasterizerInterface* rasterizer = nullptr; 601 RasterizerInterface* rasterizer = nullptr;
604 VAddr cpu_addr = 0; 602 VAddr cpu_addr = 0;
605 Words words; 603 Words words;
606 u64 frame_tick = 0;
607 BufferFlagBits flags{}; 604 BufferFlagBits flags{};
608 int stream_score = 0; 605 int stream_score = 0;
606 size_t lru_id = SIZE_MAX;
609}; 607};
610 608
611} // namespace VideoCommon 609} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 3b43554f9..7bfd57369 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -20,6 +20,7 @@
20#include "common/common_types.h" 20#include "common/common_types.h"
21#include "common/div_ceil.h" 21#include "common/div_ceil.h"
22#include "common/literals.h" 22#include "common/literals.h"
23#include "common/lru_cache.h"
23#include "common/microprofile.h" 24#include "common/microprofile.h"
24#include "common/scope_exit.h" 25#include "common/scope_exit.h"
25#include "common/settings.h" 26#include "common/settings.h"
@@ -330,7 +331,7 @@ private:
330 template <bool insert> 331 template <bool insert>
331 void ChangeRegister(BufferId buffer_id); 332 void ChangeRegister(BufferId buffer_id);
332 333
333 void TouchBuffer(Buffer& buffer) const noexcept; 334 void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
334 335
335 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); 336 bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
336 337
@@ -428,7 +429,11 @@ private:
428 size_t immediate_buffer_capacity = 0; 429 size_t immediate_buffer_capacity = 0;
429 std::unique_ptr<u8[]> immediate_buffer_alloc; 430 std::unique_ptr<u8[]> immediate_buffer_alloc;
430 431
431 typename SlotVector<Buffer>::Iterator deletion_iterator; 432 struct LRUItemParams {
433 using ObjectType = BufferId;
434 using TickType = u64;
435 };
436 Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
432 u64 frame_tick = 0; 437 u64 frame_tick = 0;
433 u64 total_used_memory = 0; 438 u64 total_used_memory = 0;
434 439
@@ -445,7 +450,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
445 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { 450 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
446 // Ensure the first slot is used for the null buffer 451 // Ensure the first slot is used for the null buffer
447 void(slot_buffers.insert(runtime, NullBufferParams{})); 452 void(slot_buffers.insert(runtime, NullBufferParams{}));
448 deletion_iterator = slot_buffers.end();
449 common_ranges.clear(); 453 common_ranges.clear();
450} 454}
451 455
@@ -454,20 +458,17 @@ void BufferCache<P>::RunGarbageCollector() {
454 const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; 458 const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
455 const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; 459 const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
456 int num_iterations = aggressive_gc ? 64 : 32; 460 int num_iterations = aggressive_gc ? 64 : 32;
457 for (; num_iterations > 0; --num_iterations) { 461 const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
458 if (deletion_iterator == slot_buffers.end()) { 462 if (num_iterations == 0) {
459 deletion_iterator = slot_buffers.begin(); 463 return true;
460 }
461 ++deletion_iterator;
462 if (deletion_iterator == slot_buffers.end()) {
463 break;
464 }
465 const auto [buffer_id, buffer] = *deletion_iterator;
466 if (buffer->FrameTick() + ticks_to_destroy < frame_tick) {
467 DownloadBufferMemory(*buffer);
468 DeleteBuffer(buffer_id);
469 } 464 }
470 } 465 --num_iterations;
466 auto& buffer = slot_buffers[buffer_id];
467 DownloadBufferMemory(buffer);
468 DeleteBuffer(buffer_id);
469 return false;
470 };
471 lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
471} 472}
472 473
473template <class P> 474template <class P>
@@ -485,7 +486,7 @@ void BufferCache<P>::TickFrame() {
485 const bool skip_preferred = hits * 256 < shots * 251; 486 const bool skip_preferred = hits * 256 < shots * 251;
486 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; 487 uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
487 488
488 if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) { 489 if (total_used_memory >= EXPECTED_MEMORY) {
489 RunGarbageCollector(); 490 RunGarbageCollector();
490 } 491 }
491 ++frame_tick; 492 ++frame_tick;
@@ -954,7 +955,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
954template <class P> 955template <class P>
955void BufferCache<P>::BindHostIndexBuffer() { 956void BufferCache<P>::BindHostIndexBuffer() {
956 Buffer& buffer = slot_buffers[index_buffer.buffer_id]; 957 Buffer& buffer = slot_buffers[index_buffer.buffer_id];
957 TouchBuffer(buffer); 958 TouchBuffer(buffer, index_buffer.buffer_id);
958 const u32 offset = buffer.Offset(index_buffer.cpu_addr); 959 const u32 offset = buffer.Offset(index_buffer.cpu_addr);
959 const u32 size = index_buffer.size; 960 const u32 size = index_buffer.size;
960 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); 961 SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
@@ -975,7 +976,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
975 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { 976 for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
976 const Binding& binding = vertex_buffers[index]; 977 const Binding& binding = vertex_buffers[index];
977 Buffer& buffer = slot_buffers[binding.buffer_id]; 978 Buffer& buffer = slot_buffers[binding.buffer_id];
978 TouchBuffer(buffer); 979 TouchBuffer(buffer, binding.buffer_id);
979 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); 980 SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
980 if (!flags[Dirty::VertexBuffer0 + index]) { 981 if (!flags[Dirty::VertexBuffer0 + index]) {
981 continue; 982 continue;
@@ -1011,7 +1012,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
1011 const VAddr cpu_addr = binding.cpu_addr; 1012 const VAddr cpu_addr = binding.cpu_addr;
1012 const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); 1013 const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
1013 Buffer& buffer = slot_buffers[binding.buffer_id]; 1014 Buffer& buffer = slot_buffers[binding.buffer_id];
1014 TouchBuffer(buffer); 1015 TouchBuffer(buffer, binding.buffer_id);
1015 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && 1016 const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
1016 size <= uniform_buffer_skip_cache_size && 1017 size <= uniform_buffer_skip_cache_size &&
1017 !buffer.IsRegionGpuModified(cpu_addr, size); 1018 !buffer.IsRegionGpuModified(cpu_addr, size);
@@ -1083,7 +1084,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
1083 ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { 1084 ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
1084 const Binding& binding = storage_buffers[stage][index]; 1085 const Binding& binding = storage_buffers[stage][index];
1085 Buffer& buffer = slot_buffers[binding.buffer_id]; 1086 Buffer& buffer = slot_buffers[binding.buffer_id];
1086 TouchBuffer(buffer); 1087 TouchBuffer(buffer, binding.buffer_id);
1087 const u32 size = binding.size; 1088 const u32 size = binding.size;
1088 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1089 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1089 1090
@@ -1128,7 +1129,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
1128 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { 1129 for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
1129 const Binding& binding = transform_feedback_buffers[index]; 1130 const Binding& binding = transform_feedback_buffers[index];
1130 Buffer& buffer = slot_buffers[binding.buffer_id]; 1131 Buffer& buffer = slot_buffers[binding.buffer_id];
1131 TouchBuffer(buffer); 1132 TouchBuffer(buffer, binding.buffer_id);
1132 const u32 size = binding.size; 1133 const u32 size = binding.size;
1133 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1134 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1134 1135
@@ -1148,7 +1149,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
1148 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { 1149 ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
1149 const Binding& binding = compute_uniform_buffers[index]; 1150 const Binding& binding = compute_uniform_buffers[index];
1150 Buffer& buffer = slot_buffers[binding.buffer_id]; 1151 Buffer& buffer = slot_buffers[binding.buffer_id];
1151 TouchBuffer(buffer); 1152 TouchBuffer(buffer, binding.buffer_id);
1152 const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); 1153 const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
1153 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1154 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1154 1155
@@ -1168,7 +1169,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
1168 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { 1169 ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
1169 const Binding& binding = compute_storage_buffers[index]; 1170 const Binding& binding = compute_storage_buffers[index];
1170 Buffer& buffer = slot_buffers[binding.buffer_id]; 1171 Buffer& buffer = slot_buffers[binding.buffer_id];
1171 TouchBuffer(buffer); 1172 TouchBuffer(buffer, binding.buffer_id);
1172 const u32 size = binding.size; 1173 const u32 size = binding.size;
1173 SynchronizeBuffer(buffer, binding.cpu_addr, size); 1174 SynchronizeBuffer(buffer, binding.cpu_addr, size);
1174 1175
@@ -1513,11 +1514,11 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
1513 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); 1514 const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
1514 const u32 size = static_cast<u32>(overlap.end - overlap.begin); 1515 const u32 size = static_cast<u32>(overlap.end - overlap.begin);
1515 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); 1516 const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
1516 TouchBuffer(slot_buffers[new_buffer_id]);
1517 for (const BufferId overlap_id : overlap.ids) { 1517 for (const BufferId overlap_id : overlap.ids) {
1518 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); 1518 JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
1519 } 1519 }
1520 Register(new_buffer_id); 1520 Register(new_buffer_id);
1521 TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id);
1521 return new_buffer_id; 1522 return new_buffer_id;
1522} 1523}
1523 1524
@@ -1534,12 +1535,14 @@ void BufferCache<P>::Unregister(BufferId buffer_id) {
1534template <class P> 1535template <class P>
1535template <bool insert> 1536template <bool insert>
1536void BufferCache<P>::ChangeRegister(BufferId buffer_id) { 1537void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1537 const Buffer& buffer = slot_buffers[buffer_id]; 1538 Buffer& buffer = slot_buffers[buffer_id];
1538 const auto size = buffer.SizeBytes(); 1539 const auto size = buffer.SizeBytes();
1539 if (insert) { 1540 if (insert) {
1540 total_used_memory += Common::AlignUp(size, 1024); 1541 total_used_memory += Common::AlignUp(size, 1024);
1542 buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick));
1541 } else { 1543 } else {
1542 total_used_memory -= Common::AlignUp(size, 1024); 1544 total_used_memory -= Common::AlignUp(size, 1024);
1545 lru_cache.Free(buffer.getLRUID());
1543 } 1546 }
1544 const VAddr cpu_addr_begin = buffer.CpuAddr(); 1547 const VAddr cpu_addr_begin = buffer.CpuAddr();
1545 const VAddr cpu_addr_end = cpu_addr_begin + size; 1548 const VAddr cpu_addr_end = cpu_addr_begin + size;
@@ -1555,8 +1558,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
1555} 1558}
1556 1559
1557template <class P> 1560template <class P>
1558void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept { 1561void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
1559 buffer.SetFrameTick(frame_tick); 1562 if (buffer_id != NULL_BUFFER_ID) {
1563 lru_cache.Touch(buffer.getLRUID(), frame_tick);
1564 }
1560} 1565}
1561 1566
1562template <class P> 1567template <class P>
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1aa43523a..7f4ca6282 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -475,10 +475,10 @@ public:
475 475
476 // These values are used by Nouveau and some games. 476 // These values are used by Nouveau and some games.
477 AddGL = 0x8006, 477 AddGL = 0x8006,
478 SubtractGL = 0x8007, 478 MinGL = 0x8007,
479 ReverseSubtractGL = 0x8008, 479 MaxGL = 0x8008,
480 MinGL = 0x800a, 480 SubtractGL = 0x800a,
481 MaxGL = 0x800b 481 ReverseSubtractGL = 0x800b
482 }; 482 };
483 483
484 enum class Factor : u32 { 484 enum class Factor : u32 {
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index ee992aed4..de9e41659 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -156,6 +156,10 @@ public:
156 return shader_backend; 156 return shader_backend;
157 } 157 }
158 158
159 bool IsAmd() const {
160 return vendor_name == "ATI Technologies Inc.";
161 }
162
159private: 163private:
160 static bool TestVariableAoffi(); 164 static bool TestVariableAoffi();
161 static bool TestPreciseBug(); 165 static bool TestPreciseBug();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 1f4dda17e..b0e14182e 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
219 host_info{ 219 host_info{
220 .support_float16 = false, 220 .support_float16 = false,
221 .support_int64 = device.HasShaderInt64(), 221 .support_int64 = device.HasShaderInt64(),
222 .needs_demote_reorder = device.IsAmd(),
222 } { 223 } {
223 if (use_asynchronous_shaders) { 224 if (use_asynchronous_shaders) {
224 workers = CreateWorkers(); 225 workers = CreateWorkers();
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index 5c43b8acf..cb0580182 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -159,11 +159,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
159 159
160 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; 160 const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
161 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); 161 const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
162 const size_t size_bytes = GetSizeInBytes(framebuffer);
163 162
164 // TODO(Rodrigo): Read this from HLE 163 // TODO(Rodrigo): Read this from HLE
165 constexpr u32 block_height_log2 = 4; 164 constexpr u32 block_height_log2 = 4;
166 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); 165 const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
166 const u64 size_bytes{Tegra::Texture::CalculateSize(true, bytes_per_pixel,
167 framebuffer.stride, framebuffer.height,
168 1, block_height_log2, 0)};
167 Tegra::Texture::UnswizzleTexture( 169 Tegra::Texture::UnswizzleTexture(
168 mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), 170 mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes),
169 bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); 171 bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index f316c4f92..31bfbcb06 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
325 host_info = Shader::HostTranslateInfo{ 325 host_info = Shader::HostTranslateInfo{
326 .support_float16 = device.IsFloat16Supported(), 326 .support_float16 = device.IsFloat16Supported(),
327 .support_int64 = device.IsShaderInt64Supported(), 327 .support_int64 = device.IsShaderInt64Supported(),
328 .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
329 driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,
328 }; 330 };
329} 331}
330 332
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index ff1feda9b..0c17a791b 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -80,7 +80,7 @@ struct ImageBase {
80 VAddr cpu_addr_end = 0; 80 VAddr cpu_addr_end = 0;
81 81
82 u64 modification_tick = 0; 82 u64 modification_tick = 0;
83 u64 frame_tick = 0; 83 size_t lru_index = SIZE_MAX;
84 84
85 std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; 85 std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};
86 86
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index a087498ff..24b809242 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -5,7 +5,6 @@
5#pragma once 5#pragma once
6 6
7#include "common/alignment.h" 7#include "common/alignment.h"
8#include "common/settings.h"
9#include "video_core/dirty_flags.h" 8#include "video_core/dirty_flags.h"
10#include "video_core/texture_cache/samples_helper.h" 9#include "video_core/texture_cache/samples_helper.h"
11#include "video_core/texture_cache/texture_cache_base.h" 10#include "video_core/texture_cache/texture_cache_base.h"
@@ -43,8 +42,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
43 void(slot_image_views.insert(runtime, NullImageParams{})); 42 void(slot_image_views.insert(runtime, NullImageParams{}));
44 void(slot_samplers.insert(runtime, sampler_descriptor)); 43 void(slot_samplers.insert(runtime, sampler_descriptor));
45 44
46 deletion_iterator = slot_images.begin();
47
48 if constexpr (HAS_DEVICE_MEMORY_INFO) { 45 if constexpr (HAS_DEVICE_MEMORY_INFO) {
49 const auto device_memory = runtime.GetDeviceLocalMemory(); 46 const auto device_memory = runtime.GetDeviceLocalMemory();
50 const u64 possible_expected_memory = (device_memory * 3) / 10; 47 const u64 possible_expected_memory = (device_memory * 3) / 10;
@@ -64,70 +61,38 @@ template <class P>
64void TextureCache<P>::RunGarbageCollector() { 61void TextureCache<P>::RunGarbageCollector() {
65 const bool high_priority_mode = total_used_memory >= expected_memory; 62 const bool high_priority_mode = total_used_memory >= expected_memory;
66 const bool aggressive_mode = total_used_memory >= critical_memory; 63 const bool aggressive_mode = total_used_memory >= critical_memory;
67 const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; 64 const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL;
68 int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); 65 size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5);
69 for (; num_iterations > 0; --num_iterations) { 66 const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) {
70 if (deletion_iterator == slot_images.end()) { 67 if (num_iterations == 0) {
71 deletion_iterator = slot_images.begin(); 68 return true;
72 if (deletion_iterator == slot_images.end()) {
73 break;
74 }
75 } 69 }
76 auto [image_id, image_tmp] = *deletion_iterator; 70 --num_iterations;
77 Image* image = image_tmp; // fix clang error. 71 auto& image = slot_images[image_id];
78 const bool is_alias = True(image->flags & ImageFlagBits::Alias); 72 const bool must_download = image.IsSafeDownload();
79 const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); 73 if (!high_priority_mode && must_download) {
80 const bool must_download = image->IsSafeDownload(); 74 return false;
81 bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
82 const u64 ticks_needed =
83 is_bad_overlap
84 ? ticks_to_destroy >> 4
85 : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
86 should_care |= aggressive_mode;
87 if (should_care && image->frame_tick + ticks_needed < frame_tick) {
88 if (is_bad_overlap) {
89 const bool overlap_check = std::ranges::all_of(
90 image->overlapping_images, [&, image](const ImageId& overlap_id) {
91 auto& overlap = slot_images[overlap_id];
92 return overlap.frame_tick >= image->frame_tick;
93 });
94 if (!overlap_check) {
95 ++deletion_iterator;
96 continue;
97 }
98 }
99 if (!is_bad_overlap && must_download) {
100 const bool alias_check = std::ranges::none_of(
101 image->aliased_images, [&, image](const AliasedImage& alias) {
102 auto& alias_image = slot_images[alias.id];
103 return (alias_image.frame_tick < image->frame_tick) ||
104 (alias_image.modification_tick < image->modification_tick);
105 });
106
107 if (alias_check) {
108 auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
109 const auto copies = FullDownloadCopies(image->info);
110 image->DownloadMemory(map, copies);
111 runtime.Finish();
112 SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
113 }
114 }
115 if (True(image->flags & ImageFlagBits::Tracked)) {
116 UntrackImage(*image, image_id);
117 }
118 UnregisterImage(image_id);
119 DeleteImage(image_id);
120 if (is_bad_overlap) {
121 ++num_iterations;
122 }
123 } 75 }
124 ++deletion_iterator; 76 if (must_download) {
125 } 77 auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
78 const auto copies = FullDownloadCopies(image.info);
79 image.DownloadMemory(map, copies);
80 runtime.Finish();
81 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
82 }
83 if (True(image.flags & ImageFlagBits::Tracked)) {
84 UntrackImage(image, image_id);
85 }
86 UnregisterImage(image_id);
87 DeleteImage(image_id);
88 return false;
89 };
90 lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
126} 91}
127 92
128template <class P> 93template <class P>
129void TextureCache<P>::TickFrame() { 94void TextureCache<P>::TickFrame() {
130 if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { 95 if (total_used_memory > minimum_memory) {
131 RunGarbageCollector(); 96 RunGarbageCollector();
132 } 97 }
133 sentenced_images.Tick(); 98 sentenced_images.Tick();
@@ -1078,6 +1043,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1078 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); 1043 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1079 } 1044 }
1080 total_used_memory += Common::AlignUp(tentative_size, 1024); 1045 total_used_memory += Common::AlignUp(tentative_size, 1024);
1046 image.lru_index = lru_cache.Insert(image_id, frame_tick);
1047
1081 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, 1048 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1082 [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); 1049 [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
1083 if (False(image.flags & ImageFlagBits::Sparse)) { 1050 if (False(image.flags & ImageFlagBits::Sparse)) {
@@ -1115,6 +1082,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1115 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); 1082 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1116 } 1083 }
1117 total_used_memory -= Common::AlignUp(tentative_size, 1024); 1084 total_used_memory -= Common::AlignUp(tentative_size, 1024);
1085 lru_cache.Free(image.lru_index);
1118 const auto& clear_page_table = 1086 const auto& clear_page_table =
1119 [this, image_id]( 1087 [this, image_id](
1120 u64 page, 1088 u64 page,
@@ -1384,7 +1352,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
1384 if (is_modification) { 1352 if (is_modification) {
1385 MarkModification(image); 1353 MarkModification(image);
1386 } 1354 }
1387 image.frame_tick = frame_tick; 1355 lru_cache.Touch(image.lru_index, frame_tick);
1388} 1356}
1389 1357
1390template <class P> 1358template <class P>
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index e4ae351cb..d7528ed24 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -14,6 +14,7 @@
14 14
15#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/literals.h" 16#include "common/literals.h"
17#include "common/lru_cache.h"
17#include "video_core/compatible_formats.h" 18#include "video_core/compatible_formats.h"
18#include "video_core/delayed_destruction_ring.h" 19#include "video_core/delayed_destruction_ring.h"
19#include "video_core/engines/fermi_2d.h" 20#include "video_core/engines/fermi_2d.h"
@@ -370,6 +371,12 @@ private:
370 std::vector<ImageId> uncommitted_downloads; 371 std::vector<ImageId> uncommitted_downloads;
371 std::queue<std::vector<ImageId>> committed_downloads; 372 std::queue<std::vector<ImageId>> committed_downloads;
372 373
374 struct LRUItemParams {
375 using ObjectType = ImageId;
376 using TickType = u64;
377 };
378 Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
379
373 static constexpr size_t TICKS_TO_DESTROY = 6; 380 static constexpr size_t TICKS_TO_DESTROY = 6;
374 DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; 381 DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
375 DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; 382 DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
@@ -379,7 +386,6 @@ private:
379 386
380 u64 modification_tick = 0; 387 u64 modification_tick = 0;
381 u64 frame_tick = 0; 388 u64 frame_tick = 0;
382 typename SlotVector<Image>::Iterator deletion_iterator;
383}; 389};
384 390
385} // namespace VideoCommon 391} // namespace VideoCommon
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index c010b9353..24e943e4c 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -63,14 +63,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
63 const u32 unswizzled_offset = 63 const u32 unswizzled_offset =
64 slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL; 64 slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL;
65 65
66 if (const auto offset = (TO_LINEAR ? unswizzled_offset : swizzled_offset);
67 offset >= input.size()) {
68 // TODO(Rodrigo): This is an out of bounds access that should never happen. To
69 // avoid crashing the emulator, break.
70 ASSERT_MSG(false, "offset {} exceeds input size {}!", offset, input.size());
71 break;
72 }
73
74 u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; 66 u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset];
75 const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; 67 const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset];
76 68
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 377795326..85d292bcc 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -818,7 +818,6 @@ void Config::ReadRendererValues() {
818 ReadGlobalSetting(Settings::values.shader_backend); 818 ReadGlobalSetting(Settings::values.shader_backend);
819 ReadGlobalSetting(Settings::values.use_asynchronous_shaders); 819 ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
820 ReadGlobalSetting(Settings::values.use_fast_gpu_time); 820 ReadGlobalSetting(Settings::values.use_fast_gpu_time);
821 ReadGlobalSetting(Settings::values.use_caches_gc);
822 ReadGlobalSetting(Settings::values.bg_red); 821 ReadGlobalSetting(Settings::values.bg_red);
823 ReadGlobalSetting(Settings::values.bg_green); 822 ReadGlobalSetting(Settings::values.bg_green);
824 ReadGlobalSetting(Settings::values.bg_blue); 823 ReadGlobalSetting(Settings::values.bg_blue);
@@ -1359,7 +1358,6 @@ void Config::SaveRendererValues() {
1359 Settings::values.shader_backend.UsingGlobal()); 1358 Settings::values.shader_backend.UsingGlobal());
1360 WriteGlobalSetting(Settings::values.use_asynchronous_shaders); 1359 WriteGlobalSetting(Settings::values.use_asynchronous_shaders);
1361 WriteGlobalSetting(Settings::values.use_fast_gpu_time); 1360 WriteGlobalSetting(Settings::values.use_fast_gpu_time);
1362 WriteGlobalSetting(Settings::values.use_caches_gc);
1363 WriteGlobalSetting(Settings::values.bg_red); 1361 WriteGlobalSetting(Settings::values.bg_red);
1364 WriteGlobalSetting(Settings::values.bg_green); 1362 WriteGlobalSetting(Settings::values.bg_green);
1365 WriteGlobalSetting(Settings::values.bg_blue); 1363 WriteGlobalSetting(Settings::values.bg_blue);
diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui
index 099ddbb7c..43f1887d1 100644
--- a/src/yuzu/configuration/configure_graphics.ui
+++ b/src/yuzu/configuration/configure_graphics.ui
@@ -156,7 +156,7 @@
156 <item> 156 <item>
157 <widget class="QCheckBox" name="use_disk_shader_cache"> 157 <widget class="QCheckBox" name="use_disk_shader_cache">
158 <property name="text"> 158 <property name="text">
159 <string>Use disk shader cache</string> 159 <string>Use disk pipeline cache</string>
160 </property> 160 </property>
161 </widget> 161 </widget>
162 </item> 162 </item>
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index a31b8e192..bfd464061 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -28,7 +28,6 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
28 28
29 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); 29 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
30 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); 30 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
31 ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue());
32 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); 31 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
33 32
34 if (Settings::IsConfiguringGlobal()) { 33 if (Settings::IsConfiguringGlobal()) {
@@ -55,8 +54,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
55 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, 54 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
56 ui->use_asynchronous_shaders, 55 ui->use_asynchronous_shaders,
57 use_asynchronous_shaders); 56 use_asynchronous_shaders);
58 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc,
59 use_caches_gc);
60 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, 57 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time,
61 ui->use_fast_gpu_time, use_fast_gpu_time); 58 ui->use_fast_gpu_time, use_fast_gpu_time);
62} 59}
@@ -81,7 +78,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
81 ui->use_asynchronous_shaders->setEnabled( 78 ui->use_asynchronous_shaders->setEnabled(
82 Settings::values.use_asynchronous_shaders.UsingGlobal()); 79 Settings::values.use_asynchronous_shaders.UsingGlobal());
83 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); 80 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
84 ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal());
85 ui->anisotropic_filtering_combobox->setEnabled( 81 ui->anisotropic_filtering_combobox->setEnabled(
86 Settings::values.max_anisotropy.UsingGlobal()); 82 Settings::values.max_anisotropy.UsingGlobal());
87 83
@@ -94,8 +90,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
94 use_asynchronous_shaders); 90 use_asynchronous_shaders);
95 ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time, 91 ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time,
96 Settings::values.use_fast_gpu_time, use_fast_gpu_time); 92 Settings::values.use_fast_gpu_time, use_fast_gpu_time);
97 ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc,
98 use_caches_gc);
99 ConfigurationShared::SetColoredComboBox( 93 ConfigurationShared::SetColoredComboBox(
100 ui->gpu_accuracy, ui->label_gpu_accuracy, 94 ui->gpu_accuracy, ui->label_gpu_accuracy,
101 static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); 95 static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index 7356e6916..13ba4ff6b 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -37,5 +37,4 @@ private:
37 ConfigurationShared::CheckState use_vsync; 37 ConfigurationShared::CheckState use_vsync;
38 ConfigurationShared::CheckState use_asynchronous_shaders; 38 ConfigurationShared::CheckState use_asynchronous_shaders;
39 ConfigurationShared::CheckState use_fast_gpu_time; 39 ConfigurationShared::CheckState use_fast_gpu_time;
40 ConfigurationShared::CheckState use_caches_gc;
41}; 40};
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 4fe6b86ae..b91abc2f0 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -82,7 +82,7 @@
82 <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> 82 <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
83 </property> 83 </property>
84 <property name="text"> 84 <property name="text">
85 <string>Use asynchronous shader building (hack)</string> 85 <string>Use asynchronous shader building (Hack)</string>
86 </property> 86 </property>
87 </widget> 87 </widget>
88 </item> 88 </item>
@@ -92,17 +92,7 @@
92 <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string> 92 <string>Enables Fast GPU Time. This option will force most games to run at their highest native resolution.</string>
93 </property> 93 </property>
94 <property name="text"> 94 <property name="text">
95 <string>Use Fast GPU Time (hack)</string> 95 <string>Use Fast GPU Time (Hack)</string>
96 </property>
97 </widget>
98 </item>
99 <item>
100 <widget class="QCheckBox" name="use_caches_gc">
101 <property name="toolTip">
102 <string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string>
103 </property>
104 <property name="text">
105 <string>Enable GPU cache garbage collection (experimental)</string>
106 </property> 96 </property>
107 </widget> 97 </widget>
108 </item> 98 </item>
diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp
index e97804220..f9d949e75 100644
--- a/src/yuzu/game_list.cpp
+++ b/src/yuzu/game_list.cpp
@@ -515,16 +515,16 @@ void GameList::AddGamePopup(QMenu& context_menu, u64 program_id, const std::stri
515 QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location")); 515 QAction* open_save_location = context_menu.addAction(tr("Open Save Data Location"));
516 QAction* open_mod_location = context_menu.addAction(tr("Open Mod Data Location")); 516 QAction* open_mod_location = context_menu.addAction(tr("Open Mod Data Location"));
517 QAction* open_transferable_shader_cache = 517 QAction* open_transferable_shader_cache =
518 context_menu.addAction(tr("Open Transferable Shader Cache")); 518 context_menu.addAction(tr("Open Transferable Pipeline Cache"));
519 context_menu.addSeparator(); 519 context_menu.addSeparator();
520 QMenu* remove_menu = context_menu.addMenu(tr("Remove")); 520 QMenu* remove_menu = context_menu.addMenu(tr("Remove"));
521 QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update")); 521 QAction* remove_update = remove_menu->addAction(tr("Remove Installed Update"));
522 QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC")); 522 QAction* remove_dlc = remove_menu->addAction(tr("Remove All Installed DLC"));
523 QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration")); 523 QAction* remove_custom_config = remove_menu->addAction(tr("Remove Custom Configuration"));
524 QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Shader Cache")); 524 QAction* remove_gl_shader_cache = remove_menu->addAction(tr("Remove OpenGL Pipeline Cache"));
525 QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Shader Cache")); 525 QAction* remove_vk_shader_cache = remove_menu->addAction(tr("Remove Vulkan Pipeline Cache"));
526 remove_menu->addSeparator(); 526 remove_menu->addSeparator();
527 QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Shader Caches")); 527 QAction* remove_shader_cache = remove_menu->addAction(tr("Remove All Pipeline Caches"));
528 QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents")); 528 QAction* remove_all_content = remove_menu->addAction(tr("Remove All Installed Contents"));
529 QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS")); 529 QMenu* dump_romfs_menu = context_menu.addMenu(tr("Dump RomFS"));
530 QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS")); 530 QAction* dump_romfs = dump_romfs_menu->addAction(tr("Dump RomFS"));
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 4f14be524..757dd1ea0 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -468,7 +468,6 @@ void Config::ReadValues() {
468 ReadSetting("Renderer", Settings::values.use_nvdec_emulation); 468 ReadSetting("Renderer", Settings::values.use_nvdec_emulation);
469 ReadSetting("Renderer", Settings::values.accelerate_astc); 469 ReadSetting("Renderer", Settings::values.accelerate_astc);
470 ReadSetting("Renderer", Settings::values.use_fast_gpu_time); 470 ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
471 ReadSetting("Renderer", Settings::values.use_caches_gc);
472 471
473 ReadSetting("Renderer", Settings::values.bg_red); 472 ReadSetting("Renderer", Settings::values.bg_red);
474 ReadSetting("Renderer", Settings::values.bg_green); 473 ReadSetting("Renderer", Settings::values.bg_green);