summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/settings.cpp10
-rw-r--r--src/core/settings.h11
-rw-r--r--src/core/telemetry_session.cpp16
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h119
-rw-r--r--src/video_core/buffer_cache/map_interval.h18
-rw-r--r--src/video_core/dma_pusher.cpp3
-rw-r--r--src/video_core/engines/maxwell_3d.cpp8
-rw-r--r--src/video_core/engines/maxwell_dma.cpp11
-rw-r--r--src/video_core/fence_manager.h170
-rw-r--r--src/video_core/gpu.cpp29
-rw-r--r--src/video_core/gpu.h29
-rw-r--r--src/video_core/gpu_asynch.cpp4
-rw-r--r--src/video_core/gpu_asynch.h2
-rw-r--r--src/video_core/gpu_thread.cpp39
-rw-r--r--src/video_core/gpu_thread.h11
-rw-r--r--src/video_core/query_cache.h46
-rw-r--r--src/video_core/rasterizer_interface.h18
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp72
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h53
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp62
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h10
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp17
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp101
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h74
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp16
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp55
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h8
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp18
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h20
-rw-r--r--src/video_core/texture_cache/surface_base.h18
-rw-r--r--src/video_core/texture_cache/texture_cache.h114
-rw-r--r--src/yuzu/configuration/config.cpp8
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp5
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui33
-rw-r--r--src/yuzu_cmd/config.cpp4
-rw-r--r--src/yuzu_cmd/default_ini.h6
-rw-r--r--src/yuzu_tester/config.cpp4
41 files changed, 1193 insertions, 63 deletions
diff --git a/src/core/settings.cpp b/src/core/settings.cpp
index c1282cb80..cd6c257f5 100644
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@@ -92,7 +92,7 @@ void LogSettings() {
92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); 92 LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); 93 LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
94 LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); 94 LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
95 LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); 95 LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy);
96 LogSetting("Renderer_UseAsynchronousGpuEmulation", 96 LogSetting("Renderer_UseAsynchronousGpuEmulation",
97 Settings::values.use_asynchronous_gpu_emulation); 97 Settings::values.use_asynchronous_gpu_emulation);
98 LogSetting("Renderer_UseVsync", Settings::values.use_vsync); 98 LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
@@ -109,4 +109,12 @@ void LogSettings() {
109 LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local); 109 LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
110} 110}
111 111
112bool IsGPULevelExtreme() {
113 return values.gpu_accuracy == GPUAccuracy::Extreme;
114}
115
116bool IsGPULevelHigh() {
117 return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High;
118}
119
112} // namespace Settings 120} // namespace Settings
diff --git a/src/core/settings.h b/src/core/settings.h
index c73d1c596..7d09253f5 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -376,6 +376,12 @@ enum class RendererBackend {
376 Vulkan = 1, 376 Vulkan = 1,
377}; 377};
378 378
379enum class GPUAccuracy : u32 {
380 Normal = 0,
381 High = 1,
382 Extreme = 2,
383};
384
379struct Values { 385struct Values {
380 // System 386 // System
381 bool use_docked_mode; 387 bool use_docked_mode;
@@ -436,7 +442,7 @@ struct Values {
436 bool use_frame_limit; 442 bool use_frame_limit;
437 u16 frame_limit; 443 u16 frame_limit;
438 bool use_disk_shader_cache; 444 bool use_disk_shader_cache;
439 bool use_accurate_gpu_emulation; 445 GPUAccuracy gpu_accuracy;
440 bool use_asynchronous_gpu_emulation; 446 bool use_asynchronous_gpu_emulation;
441 bool use_vsync; 447 bool use_vsync;
442 bool force_30fps_mode; 448 bool force_30fps_mode;
@@ -480,6 +486,9 @@ struct Values {
480 std::map<u64, std::vector<std::string>> disabled_addons; 486 std::map<u64, std::vector<std::string>> disabled_addons;
481} extern values; 487} extern values;
482 488
489bool IsGPULevelExtreme();
490bool IsGPULevelHigh();
491
483void Apply(); 492void Apply();
484void LogSettings(); 493void LogSettings();
485} // namespace Settings 494} // namespace Settings
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index fd5a3ee9f..1c3b03a1c 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -56,6 +56,18 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) {
56 return "Unknown"; 56 return "Unknown";
57} 57}
58 58
59static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
60 switch (backend) {
61 case Settings::GPUAccuracy::Normal:
62 return "Normal";
63 case Settings::GPUAccuracy::High:
64 return "High";
65 case Settings::GPUAccuracy::Extreme:
66 return "Extreme";
67 }
68 return "Unknown";
69}
70
59u64 GetTelemetryId() { 71u64 GetTelemetryId() {
60 u64 telemetry_id{}; 72 u64 telemetry_id{};
61 const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) + 73 const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) +
@@ -184,8 +196,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
184 AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit); 196 AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
185 AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit); 197 AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
186 AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); 198 AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
187 AddField(field_type, "Renderer_UseAccurateGpuEmulation", 199 AddField(field_type, "Renderer_GPUAccuracyLevel",
188 Settings::values.use_accurate_gpu_emulation); 200 TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy));
189 AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", 201 AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
190 Settings::values.use_asynchronous_gpu_emulation); 202 Settings::values.use_asynchronous_gpu_emulation);
191 AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); 203 AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index c0e8f6ab1..8ede4ba9b 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -23,6 +23,7 @@ add_library(video_core STATIC
23 engines/shader_bytecode.h 23 engines/shader_bytecode.h
24 engines/shader_header.h 24 engines/shader_header.h
25 engines/shader_type.h 25 engines/shader_type.h
26 fence_manager.h
26 gpu.cpp 27 gpu.cpp
27 gpu.h 28 gpu.h
28 gpu_asynch.cpp 29 gpu_asynch.cpp
@@ -51,6 +52,8 @@ add_library(video_core STATIC
51 renderer_opengl/gl_buffer_cache.h 52 renderer_opengl/gl_buffer_cache.h
52 renderer_opengl/gl_device.cpp 53 renderer_opengl/gl_device.cpp
53 renderer_opengl/gl_device.h 54 renderer_opengl/gl_device.h
55 renderer_opengl/gl_fence_manager.cpp
56 renderer_opengl/gl_fence_manager.h
54 renderer_opengl/gl_framebuffer_cache.cpp 57 renderer_opengl/gl_framebuffer_cache.cpp
55 renderer_opengl/gl_framebuffer_cache.h 58 renderer_opengl/gl_framebuffer_cache.h
56 renderer_opengl/gl_rasterizer.cpp 59 renderer_opengl/gl_rasterizer.cpp
@@ -176,6 +179,8 @@ if (ENABLE_VULKAN)
176 renderer_vulkan/vk_descriptor_pool.h 179 renderer_vulkan/vk_descriptor_pool.h
177 renderer_vulkan/vk_device.cpp 180 renderer_vulkan/vk_device.cpp
178 renderer_vulkan/vk_device.h 181 renderer_vulkan/vk_device.h
182 renderer_vulkan/vk_fence_manager.cpp
183 renderer_vulkan/vk_fence_manager.h
179 renderer_vulkan/vk_graphics_pipeline.cpp 184 renderer_vulkan/vk_graphics_pipeline.cpp
180 renderer_vulkan/vk_graphics_pipeline.h 185 renderer_vulkan/vk_graphics_pipeline.h
181 renderer_vulkan/vk_image.cpp 186 renderer_vulkan/vk_image.cpp
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 83e7a1cde..510f11089 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -5,6 +5,7 @@
5#pragma once 5#pragma once
6 6
7#include <array> 7#include <array>
8#include <list>
8#include <memory> 9#include <memory>
9#include <mutex> 10#include <mutex>
10#include <unordered_map> 11#include <unordered_map>
@@ -18,8 +19,10 @@
18 19
19#include "common/alignment.h" 20#include "common/alignment.h"
20#include "common/common_types.h" 21#include "common/common_types.h"
22#include "common/logging/log.h"
21#include "core/core.h" 23#include "core/core.h"
22#include "core/memory.h" 24#include "core/memory.h"
25#include "core/settings.h"
23#include "video_core/buffer_cache/buffer_block.h" 26#include "video_core/buffer_cache/buffer_block.h"
24#include "video_core/buffer_cache/map_interval.h" 27#include "video_core/buffer_cache/map_interval.h"
25#include "video_core/memory_manager.h" 28#include "video_core/memory_manager.h"
@@ -79,6 +82,9 @@ public:
79 auto map = MapAddress(block, gpu_addr, cpu_addr, size); 82 auto map = MapAddress(block, gpu_addr, cpu_addr, size);
80 if (is_written) { 83 if (is_written) {
81 map->MarkAsModified(true, GetModifiedTicks()); 84 map->MarkAsModified(true, GetModifiedTicks());
85 if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
86 MarkForAsyncFlush(map);
87 }
82 if (!map->IsWritten()) { 88 if (!map->IsWritten()) {
83 map->MarkAsWritten(true); 89 map->MarkAsWritten(true);
84 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); 90 MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
@@ -137,11 +143,22 @@ public:
137 }); 143 });
138 for (auto& object : objects) { 144 for (auto& object : objects) {
139 if (object->IsModified() && object->IsRegistered()) { 145 if (object->IsModified() && object->IsRegistered()) {
146 mutex.unlock();
140 FlushMap(object); 147 FlushMap(object);
148 mutex.lock();
141 } 149 }
142 } 150 }
143 } 151 }
144 152
153 bool MustFlushRegion(VAddr addr, std::size_t size) {
154 std::lock_guard lock{mutex};
155
156 const std::vector<MapInterval> objects = GetMapsInRange(addr, size);
157 return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) {
158 return map->IsModified() && map->IsRegistered();
159 });
160 }
161
145 /// Mark the specified region as being invalidated 162 /// Mark the specified region as being invalidated
146 void InvalidateRegion(VAddr addr, u64 size) { 163 void InvalidateRegion(VAddr addr, u64 size) {
147 std::lock_guard lock{mutex}; 164 std::lock_guard lock{mutex};
@@ -154,6 +171,77 @@ public:
154 } 171 }
155 } 172 }
156 173
174 void OnCPUWrite(VAddr addr, std::size_t size) {
175 std::lock_guard lock{mutex};
176
177 for (const auto& object : GetMapsInRange(addr, size)) {
178 if (object->IsMemoryMarked() && object->IsRegistered()) {
179 UnmarkMemory(object);
180 object->SetSyncPending(true);
181 marked_for_unregister.emplace_back(object);
182 }
183 }
184 }
185
186 void SyncGuestHost() {
187 std::lock_guard lock{mutex};
188
189 for (const auto& object : marked_for_unregister) {
190 if (object->IsRegistered()) {
191 object->SetSyncPending(false);
192 Unregister(object);
193 }
194 }
195 marked_for_unregister.clear();
196 }
197
198 void CommitAsyncFlushes() {
199 if (uncommitted_flushes) {
200 auto commit_list = std::make_shared<std::list<MapInterval>>();
201 for (auto& map : *uncommitted_flushes) {
202 if (map->IsRegistered() && map->IsModified()) {
203 // TODO(Blinkhawk): Implement backend asynchronous flushing
204 // AsyncFlushMap(map)
205 commit_list->push_back(map);
206 }
207 }
208 if (!commit_list->empty()) {
209 committed_flushes.push_back(commit_list);
210 } else {
211 committed_flushes.emplace_back();
212 }
213 } else {
214 committed_flushes.emplace_back();
215 }
216 uncommitted_flushes.reset();
217 }
218
219 bool ShouldWaitAsyncFlushes() const {
220 return !committed_flushes.empty() && committed_flushes.front() != nullptr;
221 }
222
223 bool HasUncommittedFlushes() const {
224 return uncommitted_flushes != nullptr;
225 }
226
227 void PopAsyncFlushes() {
228 if (committed_flushes.empty()) {
229 return;
230 }
231 auto& flush_list = committed_flushes.front();
232 if (!flush_list) {
233 committed_flushes.pop_front();
234 return;
235 }
236 for (MapInterval& map : *flush_list) {
237 if (map->IsRegistered()) {
238 // TODO(Blinkhawk): Replace this for reading the asynchronous flush
239 FlushMap(map);
240 }
241 }
242 committed_flushes.pop_front();
243 }
244
157 virtual BufferType GetEmptyBuffer(std::size_t size) = 0; 245 virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
158 246
159protected: 247protected:
@@ -196,17 +284,30 @@ protected:
196 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; 284 const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
197 mapped_addresses.insert({interval, new_map}); 285 mapped_addresses.insert({interval, new_map});
198 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); 286 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
287 new_map->SetMemoryMarked(true);
199 if (inherit_written) { 288 if (inherit_written) {
200 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); 289 MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
201 new_map->MarkAsWritten(true); 290 new_map->MarkAsWritten(true);
202 } 291 }
203 } 292 }
204 293
205 /// Unregisters an object from the cache 294 void UnmarkMemory(const MapInterval& map) {
206 void Unregister(MapInterval& map) { 295 if (!map->IsMemoryMarked()) {
296 return;
297 }
207 const std::size_t size = map->GetEnd() - map->GetStart(); 298 const std::size_t size = map->GetEnd() - map->GetStart();
208 rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); 299 rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
300 map->SetMemoryMarked(false);
301 }
302
303 /// Unregisters an object from the cache
304 void Unregister(const MapInterval& map) {
305 UnmarkMemory(map);
209 map->MarkAsRegistered(false); 306 map->MarkAsRegistered(false);
307 if (map->IsSyncPending()) {
308 marked_for_unregister.remove(map);
309 map->SetSyncPending(false);
310 }
210 if (map->IsWritten()) { 311 if (map->IsWritten()) {
211 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); 312 UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
212 } 313 }
@@ -264,6 +365,9 @@ private:
264 MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); 365 MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
265 if (modified_inheritance) { 366 if (modified_inheritance) {
266 new_map->MarkAsModified(true, GetModifiedTicks()); 367 new_map->MarkAsModified(true, GetModifiedTicks());
368 if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
369 MarkForAsyncFlush(new_map);
370 }
267 } 371 }
268 Register(new_map, write_inheritance); 372 Register(new_map, write_inheritance);
269 return new_map; 373 return new_map;
@@ -450,6 +554,13 @@ private:
450 return false; 554 return false;
451 } 555 }
452 556
557 void MarkForAsyncFlush(MapInterval& map) {
558 if (!uncommitted_flushes) {
559 uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>();
560 }
561 uncommitted_flushes->insert(map);
562 }
563
453 VideoCore::RasterizerInterface& rasterizer; 564 VideoCore::RasterizerInterface& rasterizer;
454 Core::System& system; 565 Core::System& system;
455 566
@@ -479,6 +590,10 @@ private:
479 u64 modified_ticks = 0; 590 u64 modified_ticks = 0;
480 591
481 std::vector<u8> staging_buffer; 592 std::vector<u8> staging_buffer;
593 std::list<MapInterval> marked_for_unregister;
594
595 std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{};
596 std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes;
482 597
483 std::recursive_mutex mutex; 598 std::recursive_mutex mutex;
484}; 599};
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
index b0956029d..29d8b26f3 100644
--- a/src/video_core/buffer_cache/map_interval.h
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -46,6 +46,22 @@ public:
46 return is_registered; 46 return is_registered;
47 } 47 }
48 48
49 void SetMemoryMarked(bool is_memory_marked_) {
50 is_memory_marked = is_memory_marked_;
51 }
52
53 bool IsMemoryMarked() const {
54 return is_memory_marked;
55 }
56
57 void SetSyncPending(bool is_sync_pending_) {
58 is_sync_pending = is_sync_pending_;
59 }
60
61 bool IsSyncPending() const {
62 return is_sync_pending;
63 }
64
49 VAddr GetStart() const { 65 VAddr GetStart() const {
50 return start; 66 return start;
51 } 67 }
@@ -83,6 +99,8 @@ private:
83 bool is_written{}; 99 bool is_written{};
84 bool is_modified{}; 100 bool is_modified{};
85 bool is_registered{}; 101 bool is_registered{};
102 bool is_memory_marked{};
103 bool is_sync_pending{};
86 u64 ticks{}; 104 u64 ticks{};
87}; 105};
88 106
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 0b77afc71..324dafdcd 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -21,6 +21,7 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
21void DmaPusher::DispatchCalls() { 21void DmaPusher::DispatchCalls() {
22 MICROPROFILE_SCOPE(DispatchCalls); 22 MICROPROFILE_SCOPE(DispatchCalls);
23 23
24 gpu.SyncGuestHost();
24 // On entering GPU code, assume all memory may be touched by the ARM core. 25 // On entering GPU code, assume all memory may be touched by the ARM core.
25 gpu.Maxwell3D().OnMemoryWrite(); 26 gpu.Maxwell3D().OnMemoryWrite();
26 27
@@ -32,6 +33,8 @@ void DmaPusher::DispatchCalls() {
32 } 33 }
33 } 34 }
34 gpu.FlushCommands(); 35 gpu.FlushCommands();
36 gpu.SyncGuestHost();
37 gpu.OnCommandListEnd();
35} 38}
36 39
37bool DmaPusher::Step() { 40bool DmaPusher::Step() {
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index baa74ad4c..2824ed707 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -404,7 +404,11 @@ void Maxwell3D::ProcessQueryGet() {
404 404
405 switch (regs.query.query_get.operation) { 405 switch (regs.query.query_get.operation) {
406 case Regs::QueryOperation::Release: 406 case Regs::QueryOperation::Release:
407 StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); 407 if (regs.query.query_get.fence == 1) {
408 rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
409 } else {
410 StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
411 }
408 break; 412 break;
409 case Regs::QueryOperation::Acquire: 413 case Regs::QueryOperation::Acquire:
410 // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that 414 // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
@@ -483,7 +487,7 @@ void Maxwell3D::ProcessSyncPoint() {
483 const u32 increment = regs.sync_info.increment.Value(); 487 const u32 increment = regs.sync_info.increment.Value();
484 [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); 488 [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
485 if (increment) { 489 if (increment) {
486 system.GPU().IncrementSyncPoint(sync_point); 490 rasterizer.SignalSyncPoint(sync_point);
487 } 491 }
488} 492}
489 493
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index c2610f992..3bfed6ab8 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -104,8 +104,13 @@ void MaxwellDMA::HandleCopy() {
104 write_buffer.resize(dst_size); 104 write_buffer.resize(dst_size);
105 } 105 }
106 106
107 memory_manager.ReadBlock(source, read_buffer.data(), src_size); 107 if (Settings::IsGPULevelExtreme()) {
108 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); 108 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
109 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
110 } else {
111 memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
112 memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
113 }
109 114
110 Texture::UnswizzleSubrect( 115 Texture::UnswizzleSubrect(
111 regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, 116 regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
@@ -136,7 +141,7 @@ void MaxwellDMA::HandleCopy() {
136 write_buffer.resize(dst_size); 141 write_buffer.resize(dst_size);
137 } 142 }
138 143
139 if (Settings::values.use_accurate_gpu_emulation) { 144 if (Settings::IsGPULevelExtreme()) {
140 memory_manager.ReadBlock(source, read_buffer.data(), src_size); 145 memory_manager.ReadBlock(source, read_buffer.data(), src_size);
141 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); 146 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
142 } else { 147 } else {
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
new file mode 100644
index 000000000..dabd1588c
--- /dev/null
+++ b/src/video_core/fence_manager.h
@@ -0,0 +1,170 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9#include <memory>
10#include <queue>
11
12#include "common/assert.h"
13#include "common/common_types.h"
14#include "core/core.h"
15#include "core/memory.h"
16#include "core/settings.h"
17#include "video_core/gpu.h"
18#include "video_core/memory_manager.h"
19#include "video_core/rasterizer_interface.h"
20
21namespace VideoCommon {
22
23class FenceBase {
24public:
25 FenceBase(u32 payload, bool is_stubbed)
26 : address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {}
27
28 FenceBase(GPUVAddr address, u32 payload, bool is_stubbed)
29 : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {}
30
31 GPUVAddr GetAddress() const {
32 return address;
33 }
34
35 u32 GetPayload() const {
36 return payload;
37 }
38
39 bool IsSemaphore() const {
40 return is_semaphore;
41 }
42
43private:
44 GPUVAddr address;
45 u32 payload;
46 bool is_semaphore;
47
48protected:
49 bool is_stubbed;
50};
51
52template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
53class FenceManager {
54public:
55 void SignalSemaphore(GPUVAddr addr, u32 value) {
56 TryReleasePendingFences();
57 const bool should_flush = ShouldFlush();
58 CommitAsyncFlushes();
59 TFence new_fence = CreateFence(addr, value, !should_flush);
60 fences.push(new_fence);
61 QueueFence(new_fence);
62 if (should_flush) {
63 rasterizer.FlushCommands();
64 }
65 rasterizer.SyncGuestHost();
66 }
67
68 void SignalSyncPoint(u32 value) {
69 TryReleasePendingFences();
70 const bool should_flush = ShouldFlush();
71 CommitAsyncFlushes();
72 TFence new_fence = CreateFence(value, !should_flush);
73 fences.push(new_fence);
74 QueueFence(new_fence);
75 if (should_flush) {
76 rasterizer.FlushCommands();
77 }
78 rasterizer.SyncGuestHost();
79 }
80
81 void WaitPendingFences() {
82 auto& gpu{system.GPU()};
83 auto& memory_manager{gpu.MemoryManager()};
84 while (!fences.empty()) {
85 TFence& current_fence = fences.front();
86 if (ShouldWait()) {
87 WaitFence(current_fence);
88 }
89 PopAsyncFlushes();
90 if (current_fence->IsSemaphore()) {
91 memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
92 } else {
93 gpu.IncrementSyncPoint(current_fence->GetPayload());
94 }
95 fences.pop();
96 }
97 }
98
99protected:
100 FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
101 TTextureCache& texture_cache, TTBufferCache& buffer_cache,
102 TQueryCache& query_cache)
103 : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
104 buffer_cache{buffer_cache}, query_cache{query_cache} {}
105
106 virtual ~FenceManager() {}
107
108 /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
109 /// true
110 virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
111 /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
112 virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
113 /// Queues a fence into the backend if the fence isn't stubbed.
114 virtual void QueueFence(TFence& fence) = 0;
115 /// Notifies that the backend fence has been signaled/reached in host GPU.
116 virtual bool IsFenceSignaled(TFence& fence) const = 0;
117 /// Waits until a fence has been signalled by the host GPU.
118 virtual void WaitFence(TFence& fence) = 0;
119
120 Core::System& system;
121 VideoCore::RasterizerInterface& rasterizer;
122 TTextureCache& texture_cache;
123 TTBufferCache& buffer_cache;
124 TQueryCache& query_cache;
125
126private:
127 void TryReleasePendingFences() {
128 auto& gpu{system.GPU()};
129 auto& memory_manager{gpu.MemoryManager()};
130 while (!fences.empty()) {
131 TFence& current_fence = fences.front();
132 if (ShouldWait() && !IsFenceSignaled(current_fence)) {
133 return;
134 }
135 PopAsyncFlushes();
136 if (current_fence->IsSemaphore()) {
137 memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
138 } else {
139 gpu.IncrementSyncPoint(current_fence->GetPayload());
140 }
141 fences.pop();
142 }
143 }
144
145 bool ShouldWait() const {
146 return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
147 query_cache.ShouldWaitAsyncFlushes();
148 }
149
150 bool ShouldFlush() const {
151 return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
152 query_cache.HasUncommittedFlushes();
153 }
154
155 void PopAsyncFlushes() {
156 texture_cache.PopAsyncFlushes();
157 buffer_cache.PopAsyncFlushes();
158 query_cache.PopAsyncFlushes();
159 }
160
161 void CommitAsyncFlushes() {
162 texture_cache.CommitAsyncFlushes();
163 buffer_cache.CommitAsyncFlushes();
164 query_cache.CommitAsyncFlushes();
165 }
166
167 std::queue<TFence> fences;
168};
169
170} // namespace VideoCommon
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index a606f4abd..3b7572d61 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
125 return true; 125 return true;
126} 126}
127 127
128u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
129 std::unique_lock lck{flush_request_mutex};
130 const u64 fence = ++last_flush_fence;
131 flush_requests.emplace_back(fence, addr, size);
132 return fence;
133}
134
135void GPU::TickWork() {
136 std::unique_lock lck{flush_request_mutex};
137 while (!flush_requests.empty()) {
138 auto& request = flush_requests.front();
139 const u64 fence = request.fence;
140 const VAddr addr = request.addr;
141 const std::size_t size = request.size;
142 flush_requests.pop_front();
143 flush_request_mutex.unlock();
144 renderer->Rasterizer().FlushRegion(addr, size);
145 current_flush_fence.store(fence);
146 flush_request_mutex.lock();
147 }
148}
149
128u64 GPU::GetTicks() const { 150u64 GPU::GetTicks() const {
129 // This values were reversed engineered by fincs from NVN 151 // This values were reversed engineered by fincs from NVN
130 // The gpu clock is reported in units of 385/625 nanoseconds 152 // The gpu clock is reported in units of 385/625 nanoseconds
@@ -142,6 +164,13 @@ void GPU::FlushCommands() {
142 renderer->Rasterizer().FlushCommands(); 164 renderer->Rasterizer().FlushCommands();
143} 165}
144 166
167void GPU::SyncGuestHost() {
168 renderer->Rasterizer().SyncGuestHost();
169}
170
171void GPU::OnCommandListEnd() {
172 renderer->Rasterizer().ReleaseFences();
173}
145// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence 174// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
146// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. 175// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
147// So the values you see in docs might be multiplied by 4. 176// So the values you see in docs might be multiplied by 4.
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 1a2d747be..5e3eb94e9 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -155,7 +155,23 @@ public:
155 /// Calls a GPU method. 155 /// Calls a GPU method.
156 void CallMethod(const MethodCall& method_call); 156 void CallMethod(const MethodCall& method_call);
157 157
158 /// Flush all current written commands into the host GPU for execution.
158 void FlushCommands(); 159 void FlushCommands();
160 /// Synchronizes CPU writes with Host GPU memory.
161 void SyncGuestHost();
162 /// Signal the ending of command list.
163 virtual void OnCommandListEnd();
164
165 /// Request a host GPU memory flush from the CPU.
166 u64 RequestFlush(VAddr addr, std::size_t size);
167
168 /// Obtains current flush request fence id.
169 u64 CurrentFlushRequestFence() const {
170 return current_flush_fence.load(std::memory_order_relaxed);
171 }
172
173 /// Tick pending requests within the GPU.
174 void TickWork();
159 175
160 /// Returns a reference to the Maxwell3D GPU engine. 176 /// Returns a reference to the Maxwell3D GPU engine.
161 Engines::Maxwell3D& Maxwell3D(); 177 Engines::Maxwell3D& Maxwell3D();
@@ -325,6 +341,19 @@ private:
325 341
326 std::condition_variable sync_cv; 342 std::condition_variable sync_cv;
327 343
344 struct FlushRequest {
345 FlushRequest(u64 fence, VAddr addr, std::size_t size)
346 : fence{fence}, addr{addr}, size{size} {}
347 u64 fence;
348 VAddr addr;
349 std::size_t size;
350 };
351
352 std::list<FlushRequest> flush_requests;
353 std::atomic<u64> current_flush_fence{};
354 u64 last_flush_fence{};
355 std::mutex flush_request_mutex;
356
328 const bool is_async; 357 const bool is_async;
329}; 358};
330 359
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 20e73a37e..53305ab43 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {
52 gpu_thread.WaitIdle(); 52 gpu_thread.WaitIdle();
53} 53}
54 54
55void GPUAsynch::OnCommandListEnd() {
56 gpu_thread.OnCommandListEnd();
57}
58
55} // namespace VideoCommon 59} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 03fd0eef0..517658612 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -32,6 +32,8 @@ public:
32 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 32 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
33 void WaitIdle() const override; 33 void WaitIdle() const override;
34 34
35 void OnCommandListEnd() override;
36
35protected: 37protected:
36 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; 38 void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
37 39
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 10cda686b..c3bb4fe06 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -6,6 +6,7 @@
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/frontend/emu_window.h" 8#include "core/frontend/emu_window.h"
9#include "core/settings.h"
9#include "video_core/dma_pusher.h" 10#include "video_core/dma_pusher.h"
10#include "video_core/gpu.h" 11#include "video_core/gpu.h"
11#include "video_core/gpu_thread.h" 12#include "video_core/gpu_thread.h"
@@ -14,8 +15,9 @@
14namespace VideoCommon::GPUThread { 15namespace VideoCommon::GPUThread {
15 16
16/// Runs the GPU thread 17/// Runs the GPU thread
17static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, 18static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
18 Tegra::DmaPusher& dma_pusher, SynchState& state) { 19 Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
20 SynchState& state) {
19 MicroProfileOnThreadCreate("GpuThread"); 21 MicroProfileOnThreadCreate("GpuThread");
20 22
21 // Wait for first GPU command before acquiring the window context 23 // Wait for first GPU command before acquiring the window context
@@ -37,10 +39,14 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
37 dma_pusher.DispatchCalls(); 39 dma_pusher.DispatchCalls();
38 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { 40 } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
39 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); 41 renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
42 } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
43 renderer.Rasterizer().ReleaseFences();
44 } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
45 system.GPU().TickWork();
40 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { 46 } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
41 renderer.Rasterizer().FlushRegion(data->addr, data->size); 47 renderer.Rasterizer().FlushRegion(data->addr, data->size);
42 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { 48 } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
43 renderer.Rasterizer().InvalidateRegion(data->addr, data->size); 49 renderer.Rasterizer().OnCPUWrite(data->addr, data->size);
44 } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { 50 } else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
45 return; 51 return;
46 } else { 52 } else {
@@ -65,8 +71,8 @@ ThreadManager::~ThreadManager() {
65void ThreadManager::StartThread(VideoCore::RendererBase& renderer, 71void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
66 Core::Frontend::GraphicsContext& context, 72 Core::Frontend::GraphicsContext& context,
67 Tegra::DmaPusher& dma_pusher) { 73 Tegra::DmaPusher& dma_pusher) {
68 thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher), 74 thread = std::thread{RunThread, std::ref(system), std::ref(renderer),
69 std::ref(state)}; 75 std::ref(context), std::ref(dma_pusher), std::ref(state)};
70} 76}
71 77
72void ThreadManager::SubmitList(Tegra::CommandList&& entries) { 78void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
@@ -78,16 +84,29 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
78} 84}
79 85
80void ThreadManager::FlushRegion(VAddr addr, u64 size) { 86void ThreadManager::FlushRegion(VAddr addr, u64 size) {
81 PushCommand(FlushRegionCommand(addr, size)); 87 if (!Settings::IsGPULevelHigh()) {
88 PushCommand(FlushRegionCommand(addr, size));
89 return;
90 }
91 if (!Settings::IsGPULevelExtreme()) {
92 return;
93 }
94 if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
95 auto& gpu = system.GPU();
96 u64 fence = gpu.RequestFlush(addr, size);
97 PushCommand(GPUTickCommand());
98 while (fence > gpu.CurrentFlushRequestFence()) {
99 }
100 }
82} 101}
83 102
84void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { 103void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
85 system.Renderer().Rasterizer().InvalidateRegion(addr, size); 104 system.Renderer().Rasterizer().OnCPUWrite(addr, size);
86} 105}
87 106
88void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { 107void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
89 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important 108 // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
90 InvalidateRegion(addr, size); 109 system.Renderer().Rasterizer().OnCPUWrite(addr, size);
91} 110}
92 111
93void ThreadManager::WaitIdle() const { 112void ThreadManager::WaitIdle() const {
@@ -95,6 +114,10 @@ void ThreadManager::WaitIdle() const {
95 } 114 }
96} 115}
97 116
117void ThreadManager::OnCommandListEnd() {
118 PushCommand(OnCommandListEndCommand());
119}
120
98u64 ThreadManager::PushCommand(CommandData&& command_data) { 121u64 ThreadManager::PushCommand(CommandData&& command_data) {
99 const u64 fence{++state.last_fence}; 122 const u64 fence{++state.last_fence};
100 state.queue.Push(CommandDataContainer(std::move(command_data), fence)); 123 state.queue.Push(CommandDataContainer(std::move(command_data), fence));
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index cd74ad330..5a28335d6 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -70,9 +70,16 @@ struct FlushAndInvalidateRegionCommand final {
70 u64 size; 70 u64 size;
71}; 71};
72 72
73/// Command called within the gpu, to schedule actions after a command list end
74struct OnCommandListEndCommand final {};
75
76/// Command to make the gpu look into pending requests
77struct GPUTickCommand final {};
78
73using CommandData = 79using CommandData =
74 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, 80 std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
75 InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; 81 InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
82 GPUTickCommand>;
76 83
77struct CommandDataContainer { 84struct CommandDataContainer {
78 CommandDataContainer() = default; 85 CommandDataContainer() = default;
@@ -122,6 +129,8 @@ public:
122 // Wait until the gpu thread is idle. 129 // Wait until the gpu thread is idle.
123 void WaitIdle() const; 130 void WaitIdle() const;
124 131
132 void OnCommandListEnd();
133
125private: 134private:
126 /// Pushes a command to be executed by the GPU thread 135 /// Pushes a command to be executed by the GPU thread
127 u64 PushCommand(CommandData&& command_data); 136 u64 PushCommand(CommandData&& command_data);
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 5ea2b01f2..2f75f8801 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -12,10 +12,12 @@
12#include <mutex> 12#include <mutex>
13#include <optional> 13#include <optional>
14#include <unordered_map> 14#include <unordered_map>
15#include <unordered_set>
15#include <vector> 16#include <vector>
16 17
17#include "common/assert.h" 18#include "common/assert.h"
18#include "core/core.h" 19#include "core/core.h"
20#include "core/settings.h"
19#include "video_core/engines/maxwell_3d.h" 21#include "video_core/engines/maxwell_3d.h"
20#include "video_core/gpu.h" 22#include "video_core/gpu.h"
21#include "video_core/memory_manager.h" 23#include "video_core/memory_manager.h"
@@ -130,6 +132,9 @@ public:
130 } 132 }
131 133
132 query->BindCounter(Stream(type).Current(), timestamp); 134 query->BindCounter(Stream(type).Current(), timestamp);
135 if (Settings::values.use_asynchronous_gpu_emulation) {
136 AsyncFlushQuery(cpu_addr);
137 }
133 } 138 }
134 139
135 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. 140 /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
@@ -170,6 +175,37 @@ public:
170 return streams[static_cast<std::size_t>(type)]; 175 return streams[static_cast<std::size_t>(type)];
171 } 176 }
172 177
178 void CommitAsyncFlushes() {
179 committed_flushes.push_back(uncommitted_flushes);
180 uncommitted_flushes.reset();
181 }
182
183 bool HasUncommittedFlushes() const {
184 return uncommitted_flushes != nullptr;
185 }
186
187 bool ShouldWaitAsyncFlushes() const {
188 if (committed_flushes.empty()) {
189 return false;
190 }
191 return committed_flushes.front() != nullptr;
192 }
193
194 void PopAsyncFlushes() {
195 if (committed_flushes.empty()) {
196 return;
197 }
198 auto& flush_list = committed_flushes.front();
199 if (!flush_list) {
200 committed_flushes.pop_front();
201 return;
202 }
203 for (VAddr query_address : *flush_list) {
204 FlushAndRemoveRegion(query_address, 4);
205 }
206 committed_flushes.pop_front();
207 }
208
173protected: 209protected:
174 std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; 210 std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
175 211
@@ -224,6 +260,13 @@ private:
224 return found != std::end(contents) ? &*found : nullptr; 260 return found != std::end(contents) ? &*found : nullptr;
225 } 261 }
226 262
263 void AsyncFlushQuery(VAddr addr) {
264 if (!uncommitted_flushes) {
265 uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>();
266 }
267 uncommitted_flushes->insert(addr);
268 }
269
227 static constexpr std::uintptr_t PAGE_SIZE = 4096; 270 static constexpr std::uintptr_t PAGE_SIZE = 4096;
228 static constexpr unsigned PAGE_SHIFT = 12; 271 static constexpr unsigned PAGE_SHIFT = 12;
229 272
@@ -235,6 +278,9 @@ private:
235 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; 278 std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
236 279
237 std::array<CounterStream, VideoCore::NumQueryTypes> streams; 280 std::array<CounterStream, VideoCore::NumQueryTypes> streams;
281
282 std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{};
283 std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes;
238}; 284};
239 285
240template <class QueryCache, class HostCounter> 286template <class QueryCache, class HostCounter>
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 8ae5b9c4e..603f61952 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -49,15 +49,33 @@ public:
49 /// Records a GPU query and caches it 49 /// Records a GPU query and caches it
50 virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; 50 virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
51 51
52 /// Signal a GPU based semaphore as a fence
53 virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
54
55 /// Signal a GPU based syncpoint as a fence
56 virtual void SignalSyncPoint(u32 value) = 0;
57
58 /// Release all pending fences.
59 virtual void ReleaseFences() = 0;
60
52 /// Notify rasterizer that all caches should be flushed to Switch memory 61 /// Notify rasterizer that all caches should be flushed to Switch memory
53 virtual void FlushAll() = 0; 62 virtual void FlushAll() = 0;
54 63
55 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 64 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
56 virtual void FlushRegion(VAddr addr, u64 size) = 0; 65 virtual void FlushRegion(VAddr addr, u64 size) = 0;
57 66
67 /// Check if the the specified memory area requires flushing to CPU Memory.
68 virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;
69
58 /// Notify rasterizer that any caches of the specified region should be invalidated 70 /// Notify rasterizer that any caches of the specified region should be invalidated
59 virtual void InvalidateRegion(VAddr addr, u64 size) = 0; 71 virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
60 72
73 /// Notify rasterizer that any caches of the specified region are desync with guest
74 virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
75
76 /// Sync memory between guest and host.
77 virtual void SyncGuestHost() = 0;
78
61 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory 79 /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
62 /// and invalidated 80 /// and invalidated
63 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; 81 virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index cb5792407..4efce0de7 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -52,7 +52,7 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
52} 52}
53 53
54void OGLBufferCache::WriteBarrier() { 54void OGLBufferCache::WriteBarrier() {
55 glMemoryBarrier(GL_ALL_BARRIER_BITS); 55 glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
56} 56}
57 57
58GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { 58GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
@@ -72,6 +72,7 @@ void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, s
72void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, 72void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
73 u8* data) { 73 u8* data) {
74 MICROPROFILE_SCOPE(OpenGL_Buffer_Download); 74 MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
75 glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
75 glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), 76 glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
76 static_cast<GLsizeiptr>(size), data); 77 static_cast<GLsizeiptr>(size), data);
77} 78}
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
new file mode 100644
index 000000000..99ddcb3f8
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -0,0 +1,72 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/assert.h"
6
7#include "video_core/renderer_opengl/gl_fence_manager.h"
8
9namespace OpenGL {
10
11GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
12 : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
13
14GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
15 : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
16
17GLInnerFence::~GLInnerFence() = default;
18
19void GLInnerFence::Queue() {
20 if (is_stubbed) {
21 return;
22 }
23 ASSERT(sync_object.handle == 0);
24 sync_object.Create();
25}
26
27bool GLInnerFence::IsSignaled() const {
28 if (is_stubbed) {
29 return true;
30 }
31 ASSERT(sync_object.handle != 0);
32 GLsizei length;
33 GLint sync_status;
34 glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
35 return sync_status == GL_SIGNALED;
36}
37
38void GLInnerFence::Wait() {
39 if (is_stubbed) {
40 return;
41 }
42 ASSERT(sync_object.handle != 0);
43 glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
44}
45
46FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
47 VideoCore::RasterizerInterface& rasterizer,
48 TextureCacheOpenGL& texture_cache,
49 OGLBufferCache& buffer_cache, QueryCache& query_cache)
50 : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
51
52Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
53 return std::make_shared<GLInnerFence>(value, is_stubbed);
54}
55
56Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
57 return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
58}
59
60void FenceManagerOpenGL::QueueFence(Fence& fence) {
61 fence->Queue();
62}
63
64bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const {
65 return fence->IsSignaled();
66}
67
68void FenceManagerOpenGL::WaitFence(Fence& fence) {
69 fence->Wait();
70}
71
72} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
new file mode 100644
index 000000000..c917b3343
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -0,0 +1,53 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8#include <glad/glad.h>
9
10#include "common/common_types.h"
11#include "video_core/fence_manager.h"
12#include "video_core/renderer_opengl/gl_buffer_cache.h"
13#include "video_core/renderer_opengl/gl_query_cache.h"
14#include "video_core/renderer_opengl/gl_resource_manager.h"
15#include "video_core/renderer_opengl/gl_texture_cache.h"
16
17namespace OpenGL {
18
19class GLInnerFence : public VideoCommon::FenceBase {
20public:
21 GLInnerFence(u32 payload, bool is_stubbed);
22 GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed);
23 ~GLInnerFence();
24
25 void Queue();
26
27 bool IsSignaled() const;
28
29 void Wait();
30
31private:
32 OGLSync sync_object;
33};
34
35using Fence = std::shared_ptr<GLInnerFence>;
36using GenericFenceManager =
37 VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
38
39class FenceManagerOpenGL final : public GenericFenceManager {
40public:
41 FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
42 TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
43 QueryCache& query_cache);
44
45protected:
46 Fence CreateFence(u32 value, bool is_stubbed) override;
47 Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
48 void QueueFence(Fence& fence) override;
49 bool IsFenceSignaled(Fence& fence) const override;
50 void WaitFence(Fence& fence) override;
51};
52
53} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 175374f0d..4c16c89d2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -99,9 +99,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
99 ScreenInfo& info, GLShader::ProgramManager& program_manager, 99 ScreenInfo& info, GLShader::ProgramManager& program_manager,
100 StateTracker& state_tracker) 100 StateTracker& state_tracker)
101 : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, 101 : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
102 shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, 102 shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
103 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, 103 buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
104 buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { 104 fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
105 screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
105 CheckExtensions(); 106 CheckExtensions();
106} 107}
107 108
@@ -599,6 +600,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
599 EndTransformFeedback(); 600 EndTransformFeedback();
600 601
601 ++num_queued_commands; 602 ++num_queued_commands;
603
604 system.GPU().TickWork();
602} 605}
603 606
604void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { 607void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -649,6 +652,13 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
649 query_cache.FlushRegion(addr, size); 652 query_cache.FlushRegion(addr, size);
650} 653}
651 654
655bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
656 if (!Settings::IsGPULevelHigh()) {
657 return buffer_cache.MustFlushRegion(addr, size);
658 }
659 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
660}
661
652void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { 662void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
653 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 663 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
654 if (addr == 0 || size == 0) { 664 if (addr == 0 || size == 0) {
@@ -660,8 +670,52 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
660 query_cache.InvalidateRegion(addr, size); 670 query_cache.InvalidateRegion(addr, size);
661} 671}
662 672
673void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
674 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
675 if (addr == 0 || size == 0) {
676 return;
677 }
678 texture_cache.OnCPUWrite(addr, size);
679 shader_cache.InvalidateRegion(addr, size);
680 buffer_cache.OnCPUWrite(addr, size);
681 query_cache.InvalidateRegion(addr, size);
682}
683
684void RasterizerOpenGL::SyncGuestHost() {
685 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
686 texture_cache.SyncGuestHost();
687 buffer_cache.SyncGuestHost();
688}
689
690void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
691 auto& gpu{system.GPU()};
692 if (!gpu.IsAsync()) {
693 auto& memory_manager{gpu.MemoryManager()};
694 memory_manager.Write<u32>(addr, value);
695 return;
696 }
697 fence_manager.SignalSemaphore(addr, value);
698}
699
700void RasterizerOpenGL::SignalSyncPoint(u32 value) {
701 auto& gpu{system.GPU()};
702 if (!gpu.IsAsync()) {
703 gpu.IncrementSyncPoint(value);
704 return;
705 }
706 fence_manager.SignalSyncPoint(value);
707}
708
709void RasterizerOpenGL::ReleaseFences() {
710 auto& gpu{system.GPU()};
711 if (!gpu.IsAsync()) {
712 return;
713 }
714 fence_manager.WaitPendingFences();
715}
716
663void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { 717void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
664 if (Settings::values.use_accurate_gpu_emulation) { 718 if (Settings::IsGPULevelExtreme()) {
665 FlushRegion(addr, size); 719 FlushRegion(addr, size);
666 } 720 }
667 InvalidateRegion(addr, size); 721 InvalidateRegion(addr, size);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index caea174d2..ebd2173eb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -23,6 +23,7 @@
23#include "video_core/rasterizer_interface.h" 23#include "video_core/rasterizer_interface.h"
24#include "video_core/renderer_opengl/gl_buffer_cache.h" 24#include "video_core/renderer_opengl/gl_buffer_cache.h"
25#include "video_core/renderer_opengl/gl_device.h" 25#include "video_core/renderer_opengl/gl_device.h"
26#include "video_core/renderer_opengl/gl_fence_manager.h"
26#include "video_core/renderer_opengl/gl_framebuffer_cache.h" 27#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
27#include "video_core/renderer_opengl/gl_query_cache.h" 28#include "video_core/renderer_opengl/gl_query_cache.h"
28#include "video_core/renderer_opengl/gl_resource_manager.h" 29#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -66,7 +67,13 @@ public:
66 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 67 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
67 void FlushAll() override; 68 void FlushAll() override;
68 void FlushRegion(VAddr addr, u64 size) override; 69 void FlushRegion(VAddr addr, u64 size) override;
70 bool MustFlushRegion(VAddr addr, u64 size) override;
69 void InvalidateRegion(VAddr addr, u64 size) override; 71 void InvalidateRegion(VAddr addr, u64 size) override;
72 void OnCPUWrite(VAddr addr, u64 size) override;
73 void SyncGuestHost() override;
74 void SignalSemaphore(GPUVAddr addr, u32 value) override;
75 void SignalSyncPoint(u32 value) override;
76 void ReleaseFences() override;
70 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 77 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
71 void FlushCommands() override; 78 void FlushCommands() override;
72 void TickFrame() override; 79 void TickFrame() override;
@@ -222,6 +229,8 @@ private:
222 SamplerCacheOpenGL sampler_cache; 229 SamplerCacheOpenGL sampler_cache;
223 FramebufferCacheOpenGL framebuffer_cache; 230 FramebufferCacheOpenGL framebuffer_cache;
224 QueryCache query_cache; 231 QueryCache query_cache;
232 OGLBufferCache buffer_cache;
233 FenceManagerOpenGL fence_manager;
225 234
226 Core::System& system; 235 Core::System& system;
227 ScreenInfo& screen_info; 236 ScreenInfo& screen_info;
@@ -229,7 +238,6 @@ private:
229 StateTracker& state_tracker; 238 StateTracker& state_tracker;
230 239
231 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; 240 static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
232 OGLBufferCache buffer_cache;
233 241
234 GLint vertex_binding = 0; 242 GLint vertex_binding = 0;
235 243
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 6d2ff20f9..f63156b8d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -448,7 +448,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
448 448
449 // Look up shader in the cache based on address 449 // Look up shader in the cache based on address
450 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; 450 const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
451 Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr}; 451 Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
452 if (shader) { 452 if (shader) {
453 return last_shaders[static_cast<std::size_t>(program)] = shader; 453 return last_shaders[static_cast<std::size_t>(program)] = shader;
454 } 454 }
@@ -477,7 +477,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
477 const std::size_t size_in_bytes = code.size() * sizeof(u64); 477 const std::size_t size_in_bytes = code.size() * sizeof(u64);
478 shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); 478 shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
479 } 479 }
480 Register(shader); 480
481 if (cpu_addr) {
482 Register(shader);
483 } else {
484 null_shader = shader;
485 }
481 486
482 return last_shaders[static_cast<std::size_t>(program)] = shader; 487 return last_shaders[static_cast<std::size_t>(program)] = shader;
483} 488}
@@ -486,7 +491,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
486 auto& memory_manager{system.GPU().MemoryManager()}; 491 auto& memory_manager{system.GPU().MemoryManager()};
487 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; 492 const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
488 493
489 auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr; 494 auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
490 if (kernel) { 495 if (kernel) {
491 return kernel; 496 return kernel;
492 } 497 }
@@ -507,7 +512,11 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
507 kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); 512 kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
508 } 513 }
509 514
510 Register(kernel); 515 if (cpu_addr) {
516 Register(kernel);
517 } else {
518 null_kernel = kernel;
519 }
511 return kernel; 520 return kernel;
512} 521}
513 522
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index c836df5bd..91690b470 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -125,6 +125,9 @@ private:
125 ShaderDiskCacheOpenGL disk_cache; 125 ShaderDiskCacheOpenGL disk_cache;
126 std::unordered_map<u64, PrecompiledShader> runtime_cache; 126 std::unordered_map<u64, PrecompiledShader> runtime_cache;
127 127
128 Shader null_shader{};
129 Shader null_kernel{};
130
128 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 131 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
129}; 132};
130 133
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
new file mode 100644
index 000000000..a02be5487
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -0,0 +1,101 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include <memory>
6#include <thread>
7
8#include "video_core/renderer_vulkan/vk_buffer_cache.h"
9#include "video_core/renderer_vulkan/vk_device.h"
10#include "video_core/renderer_vulkan/vk_fence_manager.h"
11#include "video_core/renderer_vulkan/vk_scheduler.h"
12#include "video_core/renderer_vulkan/vk_texture_cache.h"
13#include "video_core/renderer_vulkan/wrapper.h"
14
15namespace Vulkan {
16
17InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed)
18 : VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {}
19
20InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
21 u32 payload, bool is_stubbed)
22 : VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {}
23
24InnerFence::~InnerFence() = default;
25
26void InnerFence::Queue() {
27 if (is_stubbed) {
28 return;
29 }
30 ASSERT(!event);
31
32 event = device.GetLogical().CreateEvent();
33 ticks = scheduler.Ticks();
34
35 scheduler.RequestOutsideRenderPassOperationContext();
36 scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
37 cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
38 });
39}
40
41bool InnerFence::IsSignaled() const {
42 if (is_stubbed) {
43 return true;
44 }
45 ASSERT(event);
46 return IsEventSignalled();
47}
48
49void InnerFence::Wait() {
50 if (is_stubbed) {
51 return;
52 }
53 ASSERT(event);
54
55 if (ticks >= scheduler.Ticks()) {
56 scheduler.Flush();
57 }
58 while (!IsEventSignalled()) {
59 std::this_thread::yield();
60 }
61}
62
63bool InnerFence::IsEventSignalled() const {
64 switch (const VkResult result = event.GetStatus()) {
65 case VK_EVENT_SET:
66 return true;
67 case VK_EVENT_RESET:
68 return false;
69 default:
70 throw vk::Exception(result);
71 }
72}
73
74VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
75 const VKDevice& device, VKScheduler& scheduler,
76 VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
77 VKQueryCache& query_cache)
78 : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
79 device{device}, scheduler{scheduler} {}
80
81Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
82 return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
83}
84
85Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
86 return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
87}
88
89void VKFenceManager::QueueFence(Fence& fence) {
90 fence->Queue();
91}
92
93bool VKFenceManager::IsFenceSignaled(Fence& fence) const {
94 return fence->IsSignaled();
95}
96
97void VKFenceManager::WaitFence(Fence& fence) {
98 fence->Wait();
99}
100
101} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
new file mode 100644
index 000000000..04d07fe6a
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -0,0 +1,74 @@
1// Copyright 2020 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <memory>
8
9#include "video_core/fence_manager.h"
10#include "video_core/renderer_vulkan/wrapper.h"
11
12namespace Core {
13class System;
14}
15
16namespace VideoCore {
17class RasterizerInterface;
18}
19
20namespace Vulkan {
21
22class VKBufferCache;
23class VKDevice;
24class VKQueryCache;
25class VKScheduler;
26class VKTextureCache;
27
28class InnerFence : public VideoCommon::FenceBase {
29public:
30 explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload,
31 bool is_stubbed);
32 explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
33 u32 payload, bool is_stubbed);
34 ~InnerFence();
35
36 void Queue();
37
38 bool IsSignaled() const;
39
40 void Wait();
41
42private:
43 bool IsEventSignalled() const;
44
45 const VKDevice& device;
46 VKScheduler& scheduler;
47 vk::Event event;
48 u64 ticks = 0;
49};
50using Fence = std::shared_ptr<InnerFence>;
51
52using GenericFenceManager =
53 VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>;
54
55class VKFenceManager final : public GenericFenceManager {
56public:
57 explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
58 const VKDevice& device, VKScheduler& scheduler,
59 VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
60 VKQueryCache& query_cache);
61
62protected:
63 Fence CreateFence(u32 value, bool is_stubbed) override;
64 Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
65 void QueueFence(Fence& fence) override;
66 bool IsFenceSignaled(Fence& fence) const override;
67 void WaitFence(Fence& fence) override;
68
69private:
70 const VKDevice& device;
71 VKScheduler& scheduler;
72};
73
74} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a792130fd..91b1b16a5 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -207,7 +207,7 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
207 const GPUVAddr program_addr{GetShaderAddress(system, program)}; 207 const GPUVAddr program_addr{GetShaderAddress(system, program)};
208 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 208 const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
209 ASSERT(cpu_addr); 209 ASSERT(cpu_addr);
210 auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; 210 auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
211 if (!shader) { 211 if (!shader) {
212 const auto host_ptr{memory_manager.GetPointer(program_addr)}; 212 const auto host_ptr{memory_manager.GetPointer(program_addr)};
213 213
@@ -218,7 +218,11 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
218 218
219 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, 219 shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
220 std::move(code), stage_offset); 220 std::move(code), stage_offset);
221 Register(shader); 221 if (cpu_addr) {
222 Register(shader);
223 } else {
224 null_shader = shader;
225 }
222 } 226 }
223 shaders[index] = std::move(shader); 227 shaders[index] = std::move(shader);
224 } 228 }
@@ -261,7 +265,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
261 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); 265 const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
262 ASSERT(cpu_addr); 266 ASSERT(cpu_addr);
263 267
264 auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; 268 auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
265 if (!shader) { 269 if (!shader) {
266 // No shader found - create a new one 270 // No shader found - create a new one
267 const auto host_ptr = memory_manager.GetPointer(program_addr); 271 const auto host_ptr = memory_manager.GetPointer(program_addr);
@@ -271,7 +275,11 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
271 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, 275 shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
272 program_addr, *cpu_addr, std::move(code), 276 program_addr, *cpu_addr, std::move(code),
273 kernel_main_offset); 277 kernel_main_offset);
274 Register(shader); 278 if (cpu_addr) {
279 Register(shader);
280 } else {
281 null_kernel = shader;
282 }
275 } 283 }
276 284
277 Specialization specialization; 285 Specialization specialization;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 7ccdb7083..602a0a340 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -182,6 +182,9 @@ private:
182 VKUpdateDescriptorQueue& update_descriptor_queue; 182 VKUpdateDescriptorQueue& update_descriptor_queue;
183 VKRenderPassCache& renderpass_cache; 183 VKRenderPassCache& renderpass_cache;
184 184
185 Shader null_shader{};
186 Shader null_kernel{};
187
185 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; 188 std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
186 189
187 GraphicsPipelineCacheKey last_graphics_key; 190 GraphicsPipelineCacheKey last_graphics_key;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index b58a88664..8a1f57891 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -17,6 +17,7 @@
17#include "common/microprofile.h" 17#include "common/microprofile.h"
18#include "core/core.h" 18#include "core/core.h"
19#include "core/memory.h" 19#include "core/memory.h"
20#include "core/settings.h"
20#include "video_core/engines/kepler_compute.h" 21#include "video_core/engines/kepler_compute.h"
21#include "video_core/engines/maxwell_3d.h" 22#include "video_core/engines/maxwell_3d.h"
22#include "video_core/renderer_vulkan/fixed_pipeline_state.h" 23#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
@@ -299,7 +300,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
299 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, 300 pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
300 renderpass_cache), 301 renderpass_cache),
301 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), 302 buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
302 sampler_cache(device), query_cache(system, *this, device, scheduler) { 303 sampler_cache(device),
304 fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
305 query_cache(system, *this, device, scheduler) {
303 scheduler.SetQueryCache(query_cache); 306 scheduler.SetQueryCache(query_cache);
304} 307}
305 308
@@ -360,6 +363,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
360 }); 363 });
361 364
362 EndTransformFeedback(); 365 EndTransformFeedback();
366
367 system.GPU().TickWork();
363} 368}
364 369
365void RasterizerVulkan::Clear() { 370void RasterizerVulkan::Clear() {
@@ -504,6 +509,13 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
504 query_cache.FlushRegion(addr, size); 509 query_cache.FlushRegion(addr, size);
505} 510}
506 511
512bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
513 if (!Settings::IsGPULevelHigh()) {
514 return buffer_cache.MustFlushRegion(addr, size);
515 }
516 return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
517}
518
507void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { 519void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
508 if (addr == 0 || size == 0) { 520 if (addr == 0 || size == 0) {
509 return; 521 return;
@@ -514,6 +526,47 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
514 query_cache.InvalidateRegion(addr, size); 526 query_cache.InvalidateRegion(addr, size);
515} 527}
516 528
529void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
530 if (addr == 0 || size == 0) {
531 return;
532 }
533 texture_cache.OnCPUWrite(addr, size);
534 pipeline_cache.InvalidateRegion(addr, size);
535 buffer_cache.OnCPUWrite(addr, size);
536 query_cache.InvalidateRegion(addr, size);
537}
538
539void RasterizerVulkan::SyncGuestHost() {
540 texture_cache.SyncGuestHost();
541 buffer_cache.SyncGuestHost();
542}
543
544void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
545 auto& gpu{system.GPU()};
546 if (!gpu.IsAsync()) {
547 gpu.MemoryManager().Write<u32>(addr, value);
548 return;
549 }
550 fence_manager.SignalSemaphore(addr, value);
551}
552
553void RasterizerVulkan::SignalSyncPoint(u32 value) {
554 auto& gpu{system.GPU()};
555 if (!gpu.IsAsync()) {
556 gpu.IncrementSyncPoint(value);
557 return;
558 }
559 fence_manager.SignalSyncPoint(value);
560}
561
562void RasterizerVulkan::ReleaseFences() {
563 auto& gpu{system.GPU()};
564 if (!gpu.IsAsync()) {
565 return;
566 }
567 fence_manager.WaitPendingFences();
568}
569
517void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { 570void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
518 FlushRegion(addr, size); 571 FlushRegion(addr, size);
519 InvalidateRegion(addr, size); 572 InvalidateRegion(addr, size);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index d9108f862..2fa46b0cc 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -21,6 +21,7 @@
21#include "video_core/renderer_vulkan/vk_buffer_cache.h" 21#include "video_core/renderer_vulkan/vk_buffer_cache.h"
22#include "video_core/renderer_vulkan/vk_compute_pass.h" 22#include "video_core/renderer_vulkan/vk_compute_pass.h"
23#include "video_core/renderer_vulkan/vk_descriptor_pool.h" 23#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
24#include "video_core/renderer_vulkan/vk_fence_manager.h"
24#include "video_core/renderer_vulkan/vk_memory_manager.h" 25#include "video_core/renderer_vulkan/vk_memory_manager.h"
25#include "video_core/renderer_vulkan/vk_pipeline_cache.h" 26#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
26#include "video_core/renderer_vulkan/vk_query_cache.h" 27#include "video_core/renderer_vulkan/vk_query_cache.h"
@@ -118,7 +119,13 @@ public:
118 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; 119 void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
119 void FlushAll() override; 120 void FlushAll() override;
120 void FlushRegion(VAddr addr, u64 size) override; 121 void FlushRegion(VAddr addr, u64 size) override;
122 bool MustFlushRegion(VAddr addr, u64 size) override;
121 void InvalidateRegion(VAddr addr, u64 size) override; 123 void InvalidateRegion(VAddr addr, u64 size) override;
124 void OnCPUWrite(VAddr addr, u64 size) override;
125 void SyncGuestHost() override;
126 void SignalSemaphore(GPUVAddr addr, u32 value) override;
127 void SignalSyncPoint(u32 value) override;
128 void ReleaseFences() override;
122 void FlushAndInvalidateRegion(VAddr addr, u64 size) override; 129 void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
123 void FlushCommands() override; 130 void FlushCommands() override;
124 void TickFrame() override; 131 void TickFrame() override;
@@ -261,6 +268,7 @@ private:
261 VKPipelineCache pipeline_cache; 268 VKPipelineCache pipeline_cache;
262 VKBufferCache buffer_cache; 269 VKBufferCache buffer_cache;
263 VKSamplerCache sampler_cache; 270 VKSamplerCache sampler_cache;
271 VKFenceManager fence_manager;
264 VKQueryCache query_cache; 272 VKQueryCache query_cache;
265 273
266 std::array<View, Maxwell::NumRenderTargets> color_attachments; 274 std::array<View, Maxwell::NumRenderTargets> color_attachments;
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 3a52a3a6f..539f3c974 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -63,6 +63,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
63 X(vkCmdSetBlendConstants); 63 X(vkCmdSetBlendConstants);
64 X(vkCmdSetDepthBias); 64 X(vkCmdSetDepthBias);
65 X(vkCmdSetDepthBounds); 65 X(vkCmdSetDepthBounds);
66 X(vkCmdSetEvent);
66 X(vkCmdSetScissor); 67 X(vkCmdSetScissor);
67 X(vkCmdSetStencilCompareMask); 68 X(vkCmdSetStencilCompareMask);
68 X(vkCmdSetStencilReference); 69 X(vkCmdSetStencilReference);
@@ -75,6 +76,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
75 X(vkCreateDescriptorPool); 76 X(vkCreateDescriptorPool);
76 X(vkCreateDescriptorSetLayout); 77 X(vkCreateDescriptorSetLayout);
77 X(vkCreateDescriptorUpdateTemplateKHR); 78 X(vkCreateDescriptorUpdateTemplateKHR);
79 X(vkCreateEvent);
78 X(vkCreateFence); 80 X(vkCreateFence);
79 X(vkCreateFramebuffer); 81 X(vkCreateFramebuffer);
80 X(vkCreateGraphicsPipelines); 82 X(vkCreateGraphicsPipelines);
@@ -93,6 +95,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
93 X(vkDestroyDescriptorPool); 95 X(vkDestroyDescriptorPool);
94 X(vkDestroyDescriptorSetLayout); 96 X(vkDestroyDescriptorSetLayout);
95 X(vkDestroyDescriptorUpdateTemplateKHR); 97 X(vkDestroyDescriptorUpdateTemplateKHR);
98 X(vkDestroyEvent);
96 X(vkDestroyFence); 99 X(vkDestroyFence);
97 X(vkDestroyFramebuffer); 100 X(vkDestroyFramebuffer);
98 X(vkDestroyImage); 101 X(vkDestroyImage);
@@ -112,6 +115,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
112 X(vkFreeMemory); 115 X(vkFreeMemory);
113 X(vkGetBufferMemoryRequirements); 116 X(vkGetBufferMemoryRequirements);
114 X(vkGetDeviceQueue); 117 X(vkGetDeviceQueue);
118 X(vkGetEventStatus);
115 X(vkGetFenceStatus); 119 X(vkGetFenceStatus);
116 X(vkGetImageMemoryRequirements); 120 X(vkGetImageMemoryRequirements);
117 X(vkGetQueryPoolResults); 121 X(vkGetQueryPoolResults);
@@ -269,6 +273,10 @@ void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld)
269 dld.vkFreeMemory(device, handle, nullptr); 273 dld.vkFreeMemory(device, handle, nullptr);
270} 274}
271 275
276void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept {
277 dld.vkDestroyEvent(device, handle, nullptr);
278}
279
272void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept { 280void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
273 dld.vkDestroyFence(device, handle, nullptr); 281 dld.vkDestroyFence(device, handle, nullptr);
274} 282}
@@ -599,6 +607,16 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
599 return ShaderModule(object, handle, *dld); 607 return ShaderModule(object, handle, *dld);
600} 608}
601 609
610Event Device::CreateEvent() const {
611 VkEventCreateInfo ci;
612 ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
613 ci.pNext = nullptr;
614 ci.flags = 0;
615 VkEvent object;
616 Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
617 return Event(object, handle, *dld);
618}
619
602SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const { 620SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
603 VkSwapchainKHR object; 621 VkSwapchainKHR object;
604 Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object)); 622 Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 6fe0294d8..bda16a2cb 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -199,6 +199,7 @@ struct DeviceDispatch : public InstanceDispatch {
199 PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants; 199 PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
200 PFN_vkCmdSetDepthBias vkCmdSetDepthBias; 200 PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
201 PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds; 201 PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
202 PFN_vkCmdSetEvent vkCmdSetEvent;
202 PFN_vkCmdSetScissor vkCmdSetScissor; 203 PFN_vkCmdSetScissor vkCmdSetScissor;
203 PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask; 204 PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
204 PFN_vkCmdSetStencilReference vkCmdSetStencilReference; 205 PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
@@ -211,6 +212,7 @@ struct DeviceDispatch : public InstanceDispatch {
211 PFN_vkCreateDescriptorPool vkCreateDescriptorPool; 212 PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
212 PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; 213 PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
213 PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; 214 PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
215 PFN_vkCreateEvent vkCreateEvent;
214 PFN_vkCreateFence vkCreateFence; 216 PFN_vkCreateFence vkCreateFence;
215 PFN_vkCreateFramebuffer vkCreateFramebuffer; 217 PFN_vkCreateFramebuffer vkCreateFramebuffer;
216 PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; 218 PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
@@ -229,6 +231,7 @@ struct DeviceDispatch : public InstanceDispatch {
229 PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; 231 PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
230 PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; 232 PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
231 PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; 233 PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
234 PFN_vkDestroyEvent vkDestroyEvent;
232 PFN_vkDestroyFence vkDestroyFence; 235 PFN_vkDestroyFence vkDestroyFence;
233 PFN_vkDestroyFramebuffer vkDestroyFramebuffer; 236 PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
234 PFN_vkDestroyImage vkDestroyImage; 237 PFN_vkDestroyImage vkDestroyImage;
@@ -248,6 +251,7 @@ struct DeviceDispatch : public InstanceDispatch {
248 PFN_vkFreeMemory vkFreeMemory; 251 PFN_vkFreeMemory vkFreeMemory;
249 PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; 252 PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
250 PFN_vkGetDeviceQueue vkGetDeviceQueue; 253 PFN_vkGetDeviceQueue vkGetDeviceQueue;
254 PFN_vkGetEventStatus vkGetEventStatus;
251 PFN_vkGetFenceStatus vkGetFenceStatus; 255 PFN_vkGetFenceStatus vkGetFenceStatus;
252 PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; 256 PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
253 PFN_vkGetQueryPoolResults vkGetQueryPoolResults; 257 PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
@@ -279,6 +283,7 @@ void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept;
279void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept; 283void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept;
280void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept; 284void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept;
281void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept; 285void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept;
286void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept;
282void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept; 287void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept;
283void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; 288void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
284void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; 289void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
@@ -648,6 +653,15 @@ public:
648 std::vector<VkImage> GetImages() const; 653 std::vector<VkImage> GetImages() const;
649}; 654};
650 655
656class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {
657 using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;
658
659public:
660 VkResult GetStatus() const noexcept {
661 return dld->vkGetEventStatus(owner, handle);
662 }
663};
664
651class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { 665class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
652 using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle; 666 using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
653 667
@@ -695,6 +709,8 @@ public:
695 709
696 ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; 710 ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
697 711
712 Event CreateEvent() const;
713
698 SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; 714 SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
699 715
700 DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept; 716 DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept;
@@ -938,6 +954,10 @@ public:
938 dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds); 954 dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds);
939 } 955 }
940 956
957 void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept {
958 dld->vkCmdSetEvent(handle, event, stage_flags);
959 }
960
941 void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, 961 void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
942 const VkDeviceSize* offsets, 962 const VkDeviceSize* offsets,
943 const VkDeviceSize* sizes) const noexcept { 963 const VkDeviceSize* sizes) const noexcept {
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index c5ab21f56..79e10ffbb 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -192,6 +192,22 @@ public:
192 index = index_; 192 index = index_;
193 } 193 }
194 194
195 void SetMemoryMarked(bool is_memory_marked_) {
196 is_memory_marked = is_memory_marked_;
197 }
198
199 bool IsMemoryMarked() const {
200 return is_memory_marked;
201 }
202
203 void SetSyncPending(bool is_sync_pending_) {
204 is_sync_pending = is_sync_pending_;
205 }
206
207 bool IsSyncPending() const {
208 return is_sync_pending;
209 }
210
195 void MarkAsPicked(bool is_picked_) { 211 void MarkAsPicked(bool is_picked_) {
196 is_picked = is_picked_; 212 is_picked = is_picked_;
197 } 213 }
@@ -303,6 +319,8 @@ private:
303 bool is_target{}; 319 bool is_target{};
304 bool is_registered{}; 320 bool is_registered{};
305 bool is_picked{}; 321 bool is_picked{};
322 bool is_memory_marked{};
323 bool is_sync_pending{};
306 u32 index{NO_RT}; 324 u32 index{NO_RT};
307 u64 modification_tick{}; 325 u64 modification_tick{};
308}; 326};
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 69ca08fd1..cf6bd005a 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -6,6 +6,7 @@
6 6
7#include <algorithm> 7#include <algorithm>
8#include <array> 8#include <array>
9#include <list>
9#include <memory> 10#include <memory>
10#include <mutex> 11#include <mutex>
11#include <set> 12#include <set>
@@ -62,6 +63,30 @@ public:
62 } 63 }
63 } 64 }
64 65
66 void OnCPUWrite(VAddr addr, std::size_t size) {
67 std::lock_guard lock{mutex};
68
69 for (const auto& surface : GetSurfacesInRegion(addr, size)) {
70 if (surface->IsMemoryMarked()) {
71 UnmarkMemory(surface);
72 surface->SetSyncPending(true);
73 marked_for_unregister.emplace_back(surface);
74 }
75 }
76 }
77
78 void SyncGuestHost() {
79 std::lock_guard lock{mutex};
80
81 for (const auto& surface : marked_for_unregister) {
82 if (surface->IsRegistered()) {
83 surface->SetSyncPending(false);
84 Unregister(surface);
85 }
86 }
87 marked_for_unregister.clear();
88 }
89
65 /** 90 /**
66 * Guarantees that rendertargets don't unregister themselves if the 91 * Guarantees that rendertargets don't unregister themselves if the
67 * collide. Protection is currently only done on 3D slices. 92 * collide. Protection is currently only done on 3D slices.
@@ -85,10 +110,20 @@ public:
85 return a->GetModificationTick() < b->GetModificationTick(); 110 return a->GetModificationTick() < b->GetModificationTick();
86 }); 111 });
87 for (const auto& surface : surfaces) { 112 for (const auto& surface : surfaces) {
113 mutex.unlock();
88 FlushSurface(surface); 114 FlushSurface(surface);
115 mutex.lock();
89 } 116 }
90 } 117 }
91 118
119 bool MustFlushRegion(VAddr addr, std::size_t size) {
120 std::lock_guard lock{mutex};
121
122 const auto surfaces = GetSurfacesInRegion(addr, size);
123 return std::any_of(surfaces.cbegin(), surfaces.cend(),
124 [](const TSurface& surface) { return surface->IsModified(); });
125 }
126
92 TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, 127 TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
93 const VideoCommon::Shader::Sampler& entry) { 128 const VideoCommon::Shader::Sampler& entry) {
94 std::lock_guard lock{mutex}; 129 std::lock_guard lock{mutex};
@@ -206,8 +241,14 @@ public:
206 241
207 auto surface_view = GetSurface(gpu_addr, *cpu_addr, 242 auto surface_view = GetSurface(gpu_addr, *cpu_addr,
208 SurfaceParams::CreateForFramebuffer(system, index), true); 243 SurfaceParams::CreateForFramebuffer(system, index), true);
209 if (render_targets[index].target) 244 if (render_targets[index].target) {
210 render_targets[index].target->MarkAsRenderTarget(false, NO_RT); 245 auto& surface = render_targets[index].target;
246 surface->MarkAsRenderTarget(false, NO_RT);
247 const auto& cr_params = surface->GetSurfaceParams();
248 if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation) {
249 AsyncFlushSurface(surface);
250 }
251 }
211 render_targets[index].target = surface_view.first; 252 render_targets[index].target = surface_view.first;
212 render_targets[index].view = surface_view.second; 253 render_targets[index].view = surface_view.second;
213 if (render_targets[index].target) 254 if (render_targets[index].target)
@@ -284,6 +325,34 @@ public:
284 return ++ticks; 325 return ++ticks;
285 } 326 }
286 327
328 void CommitAsyncFlushes() {
329 committed_flushes.push_back(uncommitted_flushes);
330 uncommitted_flushes.reset();
331 }
332
333 bool HasUncommittedFlushes() const {
334 return uncommitted_flushes != nullptr;
335 }
336
337 bool ShouldWaitAsyncFlushes() const {
338 return !committed_flushes.empty() && committed_flushes.front() != nullptr;
339 }
340
341 void PopAsyncFlushes() {
342 if (committed_flushes.empty()) {
343 return;
344 }
345 auto& flush_list = committed_flushes.front();
346 if (!flush_list) {
347 committed_flushes.pop_front();
348 return;
349 }
350 for (TSurface& surface : *flush_list) {
351 FlushSurface(surface);
352 }
353 committed_flushes.pop_front();
354 }
355
287protected: 356protected:
288 explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, 357 explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
289 bool is_astc_supported) 358 bool is_astc_supported)
@@ -345,9 +414,20 @@ protected:
345 surface->SetCpuAddr(*cpu_addr); 414 surface->SetCpuAddr(*cpu_addr);
346 RegisterInnerCache(surface); 415 RegisterInnerCache(surface);
347 surface->MarkAsRegistered(true); 416 surface->MarkAsRegistered(true);
417 surface->SetMemoryMarked(true);
348 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); 418 rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
349 } 419 }
350 420
421 void UnmarkMemory(TSurface surface) {
422 if (!surface->IsMemoryMarked()) {
423 return;
424 }
425 const std::size_t size = surface->GetSizeInBytes();
426 const VAddr cpu_addr = surface->GetCpuAddr();
427 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
428 surface->SetMemoryMarked(false);
429 }
430
351 void Unregister(TSurface surface) { 431 void Unregister(TSurface surface) {
352 if (guard_render_targets && surface->IsProtected()) { 432 if (guard_render_targets && surface->IsProtected()) {
353 return; 433 return;
@@ -355,9 +435,11 @@ protected:
355 if (!guard_render_targets && surface->IsRenderTarget()) { 435 if (!guard_render_targets && surface->IsRenderTarget()) {
356 ManageRenderTargetUnregister(surface); 436 ManageRenderTargetUnregister(surface);
357 } 437 }
358 const std::size_t size = surface->GetSizeInBytes(); 438 UnmarkMemory(surface);
359 const VAddr cpu_addr = surface->GetCpuAddr(); 439 if (surface->IsSyncPending()) {
360 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); 440 marked_for_unregister.remove(surface);
441 surface->SetSyncPending(false);
442 }
361 UnregisterInnerCache(surface); 443 UnregisterInnerCache(surface);
362 surface->MarkAsRegistered(false); 444 surface->MarkAsRegistered(false);
363 ReserveSurface(surface->GetSurfaceParams(), surface); 445 ReserveSurface(surface->GetSurfaceParams(), surface);
@@ -417,7 +499,7 @@ private:
417 **/ 499 **/
418 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, 500 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
419 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { 501 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
420 if (Settings::values.use_accurate_gpu_emulation) { 502 if (Settings::IsGPULevelExtreme()) {
421 return RecycleStrategy::Flush; 503 return RecycleStrategy::Flush;
422 } 504 }
423 // 3D Textures decision 505 // 3D Textures decision
@@ -461,7 +543,7 @@ private:
461 } 543 }
462 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { 544 switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
463 case RecycleStrategy::Ignore: { 545 case RecycleStrategy::Ignore: {
464 return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation); 546 return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme());
465 } 547 }
466 case RecycleStrategy::Flush: { 548 case RecycleStrategy::Flush: {
467 std::sort(overlaps.begin(), overlaps.end(), 549 std::sort(overlaps.begin(), overlaps.end(),
@@ -509,7 +591,7 @@ private:
509 } 591 }
510 const auto& final_params = new_surface->GetSurfaceParams(); 592 const auto& final_params = new_surface->GetSurfaceParams();
511 if (cr_params.type != final_params.type) { 593 if (cr_params.type != final_params.type) {
512 if (Settings::values.use_accurate_gpu_emulation) { 594 if (Settings::IsGPULevelExtreme()) {
513 BufferCopy(current_surface, new_surface); 595 BufferCopy(current_surface, new_surface);
514 } 596 }
515 } else { 597 } else {
@@ -598,7 +680,7 @@ private:
598 if (passed_tests == 0) { 680 if (passed_tests == 0) {
599 return {}; 681 return {};
600 // In Accurate GPU all tests should pass, else we recycle 682 // In Accurate GPU all tests should pass, else we recycle
601 } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { 683 } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
602 return {}; 684 return {};
603 } 685 }
604 for (const auto& surface : overlaps) { 686 for (const auto& surface : overlaps) {
@@ -668,7 +750,7 @@ private:
668 for (const auto& surface : overlaps) { 750 for (const auto& surface : overlaps) {
669 if (!surface->MatchTarget(params.target)) { 751 if (!surface->MatchTarget(params.target)) {
670 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { 752 if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
671 if (Settings::values.use_accurate_gpu_emulation) { 753 if (Settings::IsGPULevelExtreme()) {
672 return std::nullopt; 754 return std::nullopt;
673 } 755 }
674 Unregister(surface); 756 Unregister(surface);
@@ -1106,6 +1188,13 @@ private:
1106 TView view; 1188 TView view;
1107 }; 1189 };
1108 1190
1191 void AsyncFlushSurface(TSurface& surface) {
1192 if (!uncommitted_flushes) {
1193 uncommitted_flushes = std::make_shared<std::list<TSurface>>();
1194 }
1195 uncommitted_flushes->push_back(surface);
1196 }
1197
1109 VideoCore::RasterizerInterface& rasterizer; 1198 VideoCore::RasterizerInterface& rasterizer;
1110 1199
1111 FormatLookupTable format_lookup_table; 1200 FormatLookupTable format_lookup_table;
@@ -1150,6 +1239,11 @@ private:
1150 std::unordered_map<u32, TSurface> invalid_cache; 1239 std::unordered_map<u32, TSurface> invalid_cache;
1151 std::vector<u8> invalid_memory; 1240 std::vector<u8> invalid_memory;
1152 1241
1242 std::list<TSurface> marked_for_unregister;
1243
1244 std::shared_ptr<std::list<TSurface>> uncommitted_flushes{};
1245 std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes;
1246
1153 StagingCache staging_cache; 1247 StagingCache staging_cache;
1154 std::recursive_mutex mutex; 1248 std::recursive_mutex mutex;
1155}; 1249};
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 7f6dfac84..196a3a116 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -639,8 +639,8 @@ void Config::ReadRendererValues() {
639 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); 639 Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
640 Settings::values.use_disk_shader_cache = 640 Settings::values.use_disk_shader_cache =
641 ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); 641 ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
642 Settings::values.use_accurate_gpu_emulation = 642 const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
643 ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool(); 643 Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
644 Settings::values.use_asynchronous_gpu_emulation = 644 Settings::values.use_asynchronous_gpu_emulation =
645 ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); 645 ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
646 Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); 646 Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
@@ -1080,8 +1080,8 @@ void Config::SaveRendererValues() {
1080 WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); 1080 WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
1081 WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, 1081 WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
1082 true); 1082 true);
1083 WriteSetting(QStringLiteral("use_accurate_gpu_emulation"), 1083 WriteSetting(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy),
1084 Settings::values.use_accurate_gpu_emulation, false); 1084 0);
1085 WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), 1085 WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
1086 Settings::values.use_asynchronous_gpu_emulation, false); 1086 Settings::values.use_asynchronous_gpu_emulation, false);
1087 WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); 1087 WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index b9f429f84..0a3f47339 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -19,7 +19,7 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
19 19
20void ConfigureGraphicsAdvanced::SetConfiguration() { 20void ConfigureGraphicsAdvanced::SetConfiguration() {
21 const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); 21 const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
22 ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); 22 ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
23 ui->use_vsync->setEnabled(runtime_lock); 23 ui->use_vsync->setEnabled(runtime_lock);
24 ui->use_vsync->setChecked(Settings::values.use_vsync); 24 ui->use_vsync->setChecked(Settings::values.use_vsync);
25 ui->force_30fps_mode->setEnabled(runtime_lock); 25 ui->force_30fps_mode->setEnabled(runtime_lock);
@@ -29,7 +29,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
29} 29}
30 30
31void ConfigureGraphicsAdvanced::ApplyConfiguration() { 31void ConfigureGraphicsAdvanced::ApplyConfiguration() {
32 Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); 32 auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
33 Settings::values.gpu_accuracy = gpu_accuracy;
33 Settings::values.use_vsync = ui->use_vsync->isChecked(); 34 Settings::values.use_vsync = ui->use_vsync->isChecked();
34 Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); 35 Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
35 Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); 36 Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 42eec278e..0c7b383e0 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -23,11 +23,34 @@
23 </property> 23 </property>
24 <layout class="QVBoxLayout" name="verticalLayout_3"> 24 <layout class="QVBoxLayout" name="verticalLayout_3">
25 <item> 25 <item>
26 <widget class="QCheckBox" name="use_accurate_gpu_emulation"> 26 <layout class="QHBoxLayout" name="horizontalLayout_2">
27 <property name="text"> 27 <item>
28 <string>Use accurate GPU emulation (slow)</string> 28 <widget class="QLabel" name="label_gpu_accuracy">
29 </property> 29 <property name="text">
30 </widget> 30 <string>Accuracy Level:</string>
31 </property>
32 </widget>
33 </item>
34 <item>
35 <widget class="QComboBox" name="gpu_accuracy">
36 <item>
37 <property name="text">
38 <string notr="true">Normal</string>
39 </property>
40 </item>
41 <item>
42 <property name="text">
43 <string notr="true">High</string>
44 </property>
45 </item>
46 <item>
47 <property name="text">
48 <string notr="true">Extreme(very slow)</string>
49 </property>
50 </item>
51 </widget>
52 </item>
53 </layout>
31 </item> 54 </item>
32 <item> 55 <item>
33 <widget class="QCheckBox" name="use_vsync"> 56 <widget class="QCheckBox" name="use_vsync">
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 80341747f..d1ac354bf 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -388,8 +388,8 @@ void Config::ReadValues() {
388 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); 388 static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
389 Settings::values.use_disk_shader_cache = 389 Settings::values.use_disk_shader_cache =
390 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); 390 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
391 Settings::values.use_accurate_gpu_emulation = 391 const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
392 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 392 Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
393 Settings::values.use_asynchronous_gpu_emulation = 393 Settings::values.use_asynchronous_gpu_emulation =
394 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); 394 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
395 Settings::values.use_vsync = 395 Settings::values.use_vsync =
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 171d16fa0..60b1a62fa 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -146,9 +146,9 @@ frame_limit =
146# 0 (default): Off, 1 : On 146# 0 (default): Off, 1 : On
147use_disk_shader_cache = 147use_disk_shader_cache =
148 148
149# Whether to use accurate GPU emulation 149# Which gpu accuracy level to use
150# 0 (default): Off (fast), 1 : On (slow) 150# 0 (Normal), 1 (High), 2 (Extreme)
151use_accurate_gpu_emulation = 151gpu_accuracy =
152 152
153# Whether to use asynchronous GPU emulation 153# Whether to use asynchronous GPU emulation
154# 0 : Off (slow), 1 (default): On (fast) 154# 0 : Off (slow), 1 (default): On (fast)
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp
index ee2591c8f..c0325cc3c 100644
--- a/src/yuzu_tester/config.cpp
+++ b/src/yuzu_tester/config.cpp
@@ -126,8 +126,8 @@ void Config::ReadValues() {
126 Settings::values.frame_limit = 100; 126 Settings::values.frame_limit = 100;
127 Settings::values.use_disk_shader_cache = 127 Settings::values.use_disk_shader_cache =
128 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); 128 sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
129 Settings::values.use_accurate_gpu_emulation = 129 const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
130 sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); 130 Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
131 Settings::values.use_asynchronous_gpu_emulation = 131 Settings::values.use_asynchronous_gpu_emulation =
132 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); 132 sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
133 133