diff options
41 files changed, 1193 insertions, 63 deletions
diff --git a/src/core/settings.cpp b/src/core/settings.cpp index c1282cb80..cd6c257f5 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp | |||
| @@ -92,7 +92,7 @@ void LogSettings() { | |||
| 92 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); | 92 | LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit); |
| 93 | LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); | 93 | LogSetting("Renderer_FrameLimit", Settings::values.frame_limit); |
| 94 | LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); | 94 | LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); |
| 95 | LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); | 95 | LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy); |
| 96 | LogSetting("Renderer_UseAsynchronousGpuEmulation", | 96 | LogSetting("Renderer_UseAsynchronousGpuEmulation", |
| 97 | Settings::values.use_asynchronous_gpu_emulation); | 97 | Settings::values.use_asynchronous_gpu_emulation); |
| 98 | LogSetting("Renderer_UseVsync", Settings::values.use_vsync); | 98 | LogSetting("Renderer_UseVsync", Settings::values.use_vsync); |
| @@ -109,4 +109,12 @@ void LogSettings() { | |||
| 109 | LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local); | 109 | LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local); |
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | bool IsGPULevelExtreme() { | ||
| 113 | return values.gpu_accuracy == GPUAccuracy::Extreme; | ||
| 114 | } | ||
| 115 | |||
| 116 | bool IsGPULevelHigh() { | ||
| 117 | return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High; | ||
| 118 | } | ||
| 119 | |||
| 112 | } // namespace Settings | 120 | } // namespace Settings |
diff --git a/src/core/settings.h b/src/core/settings.h index c73d1c596..7d09253f5 100644 --- a/src/core/settings.h +++ b/src/core/settings.h | |||
| @@ -376,6 +376,12 @@ enum class RendererBackend { | |||
| 376 | Vulkan = 1, | 376 | Vulkan = 1, |
| 377 | }; | 377 | }; |
| 378 | 378 | ||
| 379 | enum class GPUAccuracy : u32 { | ||
| 380 | Normal = 0, | ||
| 381 | High = 1, | ||
| 382 | Extreme = 2, | ||
| 383 | }; | ||
| 384 | |||
| 379 | struct Values { | 385 | struct Values { |
| 380 | // System | 386 | // System |
| 381 | bool use_docked_mode; | 387 | bool use_docked_mode; |
| @@ -436,7 +442,7 @@ struct Values { | |||
| 436 | bool use_frame_limit; | 442 | bool use_frame_limit; |
| 437 | u16 frame_limit; | 443 | u16 frame_limit; |
| 438 | bool use_disk_shader_cache; | 444 | bool use_disk_shader_cache; |
| 439 | bool use_accurate_gpu_emulation; | 445 | GPUAccuracy gpu_accuracy; |
| 440 | bool use_asynchronous_gpu_emulation; | 446 | bool use_asynchronous_gpu_emulation; |
| 441 | bool use_vsync; | 447 | bool use_vsync; |
| 442 | bool force_30fps_mode; | 448 | bool force_30fps_mode; |
| @@ -480,6 +486,9 @@ struct Values { | |||
| 480 | std::map<u64, std::vector<std::string>> disabled_addons; | 486 | std::map<u64, std::vector<std::string>> disabled_addons; |
| 481 | } extern values; | 487 | } extern values; |
| 482 | 488 | ||
| 489 | bool IsGPULevelExtreme(); | ||
| 490 | bool IsGPULevelHigh(); | ||
| 491 | |||
| 483 | void Apply(); | 492 | void Apply(); |
| 484 | void LogSettings(); | 493 | void LogSettings(); |
| 485 | } // namespace Settings | 494 | } // namespace Settings |
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index fd5a3ee9f..1c3b03a1c 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp | |||
| @@ -56,6 +56,18 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) { | |||
| 56 | return "Unknown"; | 56 | return "Unknown"; |
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) { | ||
| 60 | switch (backend) { | ||
| 61 | case Settings::GPUAccuracy::Normal: | ||
| 62 | return "Normal"; | ||
| 63 | case Settings::GPUAccuracy::High: | ||
| 64 | return "High"; | ||
| 65 | case Settings::GPUAccuracy::Extreme: | ||
| 66 | return "Extreme"; | ||
| 67 | } | ||
| 68 | return "Unknown"; | ||
| 69 | } | ||
| 70 | |||
| 59 | u64 GetTelemetryId() { | 71 | u64 GetTelemetryId() { |
| 60 | u64 telemetry_id{}; | 72 | u64 telemetry_id{}; |
| 61 | const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) + | 73 | const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) + |
| @@ -184,8 +196,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) { | |||
| 184 | AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit); | 196 | AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit); |
| 185 | AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit); | 197 | AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit); |
| 186 | AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); | 198 | AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache); |
| 187 | AddField(field_type, "Renderer_UseAccurateGpuEmulation", | 199 | AddField(field_type, "Renderer_GPUAccuracyLevel", |
| 188 | Settings::values.use_accurate_gpu_emulation); | 200 | TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy)); |
| 189 | AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", | 201 | AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", |
| 190 | Settings::values.use_asynchronous_gpu_emulation); | 202 | Settings::values.use_asynchronous_gpu_emulation); |
| 191 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); | 203 | AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c0e8f6ab1..8ede4ba9b 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -23,6 +23,7 @@ add_library(video_core STATIC | |||
| 23 | engines/shader_bytecode.h | 23 | engines/shader_bytecode.h |
| 24 | engines/shader_header.h | 24 | engines/shader_header.h |
| 25 | engines/shader_type.h | 25 | engines/shader_type.h |
| 26 | fence_manager.h | ||
| 26 | gpu.cpp | 27 | gpu.cpp |
| 27 | gpu.h | 28 | gpu.h |
| 28 | gpu_asynch.cpp | 29 | gpu_asynch.cpp |
| @@ -51,6 +52,8 @@ add_library(video_core STATIC | |||
| 51 | renderer_opengl/gl_buffer_cache.h | 52 | renderer_opengl/gl_buffer_cache.h |
| 52 | renderer_opengl/gl_device.cpp | 53 | renderer_opengl/gl_device.cpp |
| 53 | renderer_opengl/gl_device.h | 54 | renderer_opengl/gl_device.h |
| 55 | renderer_opengl/gl_fence_manager.cpp | ||
| 56 | renderer_opengl/gl_fence_manager.h | ||
| 54 | renderer_opengl/gl_framebuffer_cache.cpp | 57 | renderer_opengl/gl_framebuffer_cache.cpp |
| 55 | renderer_opengl/gl_framebuffer_cache.h | 58 | renderer_opengl/gl_framebuffer_cache.h |
| 56 | renderer_opengl/gl_rasterizer.cpp | 59 | renderer_opengl/gl_rasterizer.cpp |
| @@ -176,6 +179,8 @@ if (ENABLE_VULKAN) | |||
| 176 | renderer_vulkan/vk_descriptor_pool.h | 179 | renderer_vulkan/vk_descriptor_pool.h |
| 177 | renderer_vulkan/vk_device.cpp | 180 | renderer_vulkan/vk_device.cpp |
| 178 | renderer_vulkan/vk_device.h | 181 | renderer_vulkan/vk_device.h |
| 182 | renderer_vulkan/vk_fence_manager.cpp | ||
| 183 | renderer_vulkan/vk_fence_manager.h | ||
| 179 | renderer_vulkan/vk_graphics_pipeline.cpp | 184 | renderer_vulkan/vk_graphics_pipeline.cpp |
| 180 | renderer_vulkan/vk_graphics_pipeline.h | 185 | renderer_vulkan/vk_graphics_pipeline.h |
| 181 | renderer_vulkan/vk_image.cpp | 186 | renderer_vulkan/vk_image.cpp |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 83e7a1cde..510f11089 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <list> | ||
| 8 | #include <memory> | 9 | #include <memory> |
| 9 | #include <mutex> | 10 | #include <mutex> |
| 10 | #include <unordered_map> | 11 | #include <unordered_map> |
| @@ -18,8 +19,10 @@ | |||
| 18 | 19 | ||
| 19 | #include "common/alignment.h" | 20 | #include "common/alignment.h" |
| 20 | #include "common/common_types.h" | 21 | #include "common/common_types.h" |
| 22 | #include "common/logging/log.h" | ||
| 21 | #include "core/core.h" | 23 | #include "core/core.h" |
| 22 | #include "core/memory.h" | 24 | #include "core/memory.h" |
| 25 | #include "core/settings.h" | ||
| 23 | #include "video_core/buffer_cache/buffer_block.h" | 26 | #include "video_core/buffer_cache/buffer_block.h" |
| 24 | #include "video_core/buffer_cache/map_interval.h" | 27 | #include "video_core/buffer_cache/map_interval.h" |
| 25 | #include "video_core/memory_manager.h" | 28 | #include "video_core/memory_manager.h" |
| @@ -79,6 +82,9 @@ public: | |||
| 79 | auto map = MapAddress(block, gpu_addr, cpu_addr, size); | 82 | auto map = MapAddress(block, gpu_addr, cpu_addr, size); |
| 80 | if (is_written) { | 83 | if (is_written) { |
| 81 | map->MarkAsModified(true, GetModifiedTicks()); | 84 | map->MarkAsModified(true, GetModifiedTicks()); |
| 85 | if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { | ||
| 86 | MarkForAsyncFlush(map); | ||
| 87 | } | ||
| 82 | if (!map->IsWritten()) { | 88 | if (!map->IsWritten()) { |
| 83 | map->MarkAsWritten(true); | 89 | map->MarkAsWritten(true); |
| 84 | MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | 90 | MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); |
| @@ -137,11 +143,22 @@ public: | |||
| 137 | }); | 143 | }); |
| 138 | for (auto& object : objects) { | 144 | for (auto& object : objects) { |
| 139 | if (object->IsModified() && object->IsRegistered()) { | 145 | if (object->IsModified() && object->IsRegistered()) { |
| 146 | mutex.unlock(); | ||
| 140 | FlushMap(object); | 147 | FlushMap(object); |
| 148 | mutex.lock(); | ||
| 141 | } | 149 | } |
| 142 | } | 150 | } |
| 143 | } | 151 | } |
| 144 | 152 | ||
| 153 | bool MustFlushRegion(VAddr addr, std::size_t size) { | ||
| 154 | std::lock_guard lock{mutex}; | ||
| 155 | |||
| 156 | const std::vector<MapInterval> objects = GetMapsInRange(addr, size); | ||
| 157 | return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) { | ||
| 158 | return map->IsModified() && map->IsRegistered(); | ||
| 159 | }); | ||
| 160 | } | ||
| 161 | |||
| 145 | /// Mark the specified region as being invalidated | 162 | /// Mark the specified region as being invalidated |
| 146 | void InvalidateRegion(VAddr addr, u64 size) { | 163 | void InvalidateRegion(VAddr addr, u64 size) { |
| 147 | std::lock_guard lock{mutex}; | 164 | std::lock_guard lock{mutex}; |
| @@ -154,6 +171,77 @@ public: | |||
| 154 | } | 171 | } |
| 155 | } | 172 | } |
| 156 | 173 | ||
| 174 | void OnCPUWrite(VAddr addr, std::size_t size) { | ||
| 175 | std::lock_guard lock{mutex}; | ||
| 176 | |||
| 177 | for (const auto& object : GetMapsInRange(addr, size)) { | ||
| 178 | if (object->IsMemoryMarked() && object->IsRegistered()) { | ||
| 179 | UnmarkMemory(object); | ||
| 180 | object->SetSyncPending(true); | ||
| 181 | marked_for_unregister.emplace_back(object); | ||
| 182 | } | ||
| 183 | } | ||
| 184 | } | ||
| 185 | |||
| 186 | void SyncGuestHost() { | ||
| 187 | std::lock_guard lock{mutex}; | ||
| 188 | |||
| 189 | for (const auto& object : marked_for_unregister) { | ||
| 190 | if (object->IsRegistered()) { | ||
| 191 | object->SetSyncPending(false); | ||
| 192 | Unregister(object); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | marked_for_unregister.clear(); | ||
| 196 | } | ||
| 197 | |||
| 198 | void CommitAsyncFlushes() { | ||
| 199 | if (uncommitted_flushes) { | ||
| 200 | auto commit_list = std::make_shared<std::list<MapInterval>>(); | ||
| 201 | for (auto& map : *uncommitted_flushes) { | ||
| 202 | if (map->IsRegistered() && map->IsModified()) { | ||
| 203 | // TODO(Blinkhawk): Implement backend asynchronous flushing | ||
| 204 | // AsyncFlushMap(map) | ||
| 205 | commit_list->push_back(map); | ||
| 206 | } | ||
| 207 | } | ||
| 208 | if (!commit_list->empty()) { | ||
| 209 | committed_flushes.push_back(commit_list); | ||
| 210 | } else { | ||
| 211 | committed_flushes.emplace_back(); | ||
| 212 | } | ||
| 213 | } else { | ||
| 214 | committed_flushes.emplace_back(); | ||
| 215 | } | ||
| 216 | uncommitted_flushes.reset(); | ||
| 217 | } | ||
| 218 | |||
| 219 | bool ShouldWaitAsyncFlushes() const { | ||
| 220 | return !committed_flushes.empty() && committed_flushes.front() != nullptr; | ||
| 221 | } | ||
| 222 | |||
| 223 | bool HasUncommittedFlushes() const { | ||
| 224 | return uncommitted_flushes != nullptr; | ||
| 225 | } | ||
| 226 | |||
| 227 | void PopAsyncFlushes() { | ||
| 228 | if (committed_flushes.empty()) { | ||
| 229 | return; | ||
| 230 | } | ||
| 231 | auto& flush_list = committed_flushes.front(); | ||
| 232 | if (!flush_list) { | ||
| 233 | committed_flushes.pop_front(); | ||
| 234 | return; | ||
| 235 | } | ||
| 236 | for (MapInterval& map : *flush_list) { | ||
| 237 | if (map->IsRegistered()) { | ||
| 238 | // TODO(Blinkhawk): Replace this for reading the asynchronous flush | ||
| 239 | FlushMap(map); | ||
| 240 | } | ||
| 241 | } | ||
| 242 | committed_flushes.pop_front(); | ||
| 243 | } | ||
| 244 | |||
| 157 | virtual BufferType GetEmptyBuffer(std::size_t size) = 0; | 245 | virtual BufferType GetEmptyBuffer(std::size_t size) = 0; |
| 158 | 246 | ||
| 159 | protected: | 247 | protected: |
| @@ -196,17 +284,30 @@ protected: | |||
| 196 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; | 284 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; |
| 197 | mapped_addresses.insert({interval, new_map}); | 285 | mapped_addresses.insert({interval, new_map}); |
| 198 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | 286 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); |
| 287 | new_map->SetMemoryMarked(true); | ||
| 199 | if (inherit_written) { | 288 | if (inherit_written) { |
| 200 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); | 289 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); |
| 201 | new_map->MarkAsWritten(true); | 290 | new_map->MarkAsWritten(true); |
| 202 | } | 291 | } |
| 203 | } | 292 | } |
| 204 | 293 | ||
| 205 | /// Unregisters an object from the cache | 294 | void UnmarkMemory(const MapInterval& map) { |
| 206 | void Unregister(MapInterval& map) { | 295 | if (!map->IsMemoryMarked()) { |
| 296 | return; | ||
| 297 | } | ||
| 207 | const std::size_t size = map->GetEnd() - map->GetStart(); | 298 | const std::size_t size = map->GetEnd() - map->GetStart(); |
| 208 | rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); | 299 | rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); |
| 300 | map->SetMemoryMarked(false); | ||
| 301 | } | ||
| 302 | |||
| 303 | /// Unregisters an object from the cache | ||
| 304 | void Unregister(const MapInterval& map) { | ||
| 305 | UnmarkMemory(map); | ||
| 209 | map->MarkAsRegistered(false); | 306 | map->MarkAsRegistered(false); |
| 307 | if (map->IsSyncPending()) { | ||
| 308 | marked_for_unregister.remove(map); | ||
| 309 | map->SetSyncPending(false); | ||
| 310 | } | ||
| 210 | if (map->IsWritten()) { | 311 | if (map->IsWritten()) { |
| 211 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | 312 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); |
| 212 | } | 313 | } |
| @@ -264,6 +365,9 @@ private: | |||
| 264 | MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); | 365 | MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); |
| 265 | if (modified_inheritance) { | 366 | if (modified_inheritance) { |
| 266 | new_map->MarkAsModified(true, GetModifiedTicks()); | 367 | new_map->MarkAsModified(true, GetModifiedTicks()); |
| 368 | if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { | ||
| 369 | MarkForAsyncFlush(new_map); | ||
| 370 | } | ||
| 267 | } | 371 | } |
| 268 | Register(new_map, write_inheritance); | 372 | Register(new_map, write_inheritance); |
| 269 | return new_map; | 373 | return new_map; |
| @@ -450,6 +554,13 @@ private: | |||
| 450 | return false; | 554 | return false; |
| 451 | } | 555 | } |
| 452 | 556 | ||
| 557 | void MarkForAsyncFlush(MapInterval& map) { | ||
| 558 | if (!uncommitted_flushes) { | ||
| 559 | uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>(); | ||
| 560 | } | ||
| 561 | uncommitted_flushes->insert(map); | ||
| 562 | } | ||
| 563 | |||
| 453 | VideoCore::RasterizerInterface& rasterizer; | 564 | VideoCore::RasterizerInterface& rasterizer; |
| 454 | Core::System& system; | 565 | Core::System& system; |
| 455 | 566 | ||
| @@ -479,6 +590,10 @@ private: | |||
| 479 | u64 modified_ticks = 0; | 590 | u64 modified_ticks = 0; |
| 480 | 591 | ||
| 481 | std::vector<u8> staging_buffer; | 592 | std::vector<u8> staging_buffer; |
| 593 | std::list<MapInterval> marked_for_unregister; | ||
| 594 | |||
| 595 | std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{}; | ||
| 596 | std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes; | ||
| 482 | 597 | ||
| 483 | std::recursive_mutex mutex; | 598 | std::recursive_mutex mutex; |
| 484 | }; | 599 | }; |
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index b0956029d..29d8b26f3 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h | |||
| @@ -46,6 +46,22 @@ public: | |||
| 46 | return is_registered; | 46 | return is_registered; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | void SetMemoryMarked(bool is_memory_marked_) { | ||
| 50 | is_memory_marked = is_memory_marked_; | ||
| 51 | } | ||
| 52 | |||
| 53 | bool IsMemoryMarked() const { | ||
| 54 | return is_memory_marked; | ||
| 55 | } | ||
| 56 | |||
| 57 | void SetSyncPending(bool is_sync_pending_) { | ||
| 58 | is_sync_pending = is_sync_pending_; | ||
| 59 | } | ||
| 60 | |||
| 61 | bool IsSyncPending() const { | ||
| 62 | return is_sync_pending; | ||
| 63 | } | ||
| 64 | |||
| 49 | VAddr GetStart() const { | 65 | VAddr GetStart() const { |
| 50 | return start; | 66 | return start; |
| 51 | } | 67 | } |
| @@ -83,6 +99,8 @@ private: | |||
| 83 | bool is_written{}; | 99 | bool is_written{}; |
| 84 | bool is_modified{}; | 100 | bool is_modified{}; |
| 85 | bool is_registered{}; | 101 | bool is_registered{}; |
| 102 | bool is_memory_marked{}; | ||
| 103 | bool is_sync_pending{}; | ||
| 86 | u64 ticks{}; | 104 | u64 ticks{}; |
| 87 | }; | 105 | }; |
| 88 | 106 | ||
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 0b77afc71..324dafdcd 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -21,6 +21,7 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128, | |||
| 21 | void DmaPusher::DispatchCalls() { | 21 | void DmaPusher::DispatchCalls() { |
| 22 | MICROPROFILE_SCOPE(DispatchCalls); | 22 | MICROPROFILE_SCOPE(DispatchCalls); |
| 23 | 23 | ||
| 24 | gpu.SyncGuestHost(); | ||
| 24 | // On entering GPU code, assume all memory may be touched by the ARM core. | 25 | // On entering GPU code, assume all memory may be touched by the ARM core. |
| 25 | gpu.Maxwell3D().OnMemoryWrite(); | 26 | gpu.Maxwell3D().OnMemoryWrite(); |
| 26 | 27 | ||
| @@ -32,6 +33,8 @@ void DmaPusher::DispatchCalls() { | |||
| 32 | } | 33 | } |
| 33 | } | 34 | } |
| 34 | gpu.FlushCommands(); | 35 | gpu.FlushCommands(); |
| 36 | gpu.SyncGuestHost(); | ||
| 37 | gpu.OnCommandListEnd(); | ||
| 35 | } | 38 | } |
| 36 | 39 | ||
| 37 | bool DmaPusher::Step() { | 40 | bool DmaPusher::Step() { |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index baa74ad4c..2824ed707 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -404,7 +404,11 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 404 | 404 | ||
| 405 | switch (regs.query.query_get.operation) { | 405 | switch (regs.query.query_get.operation) { |
| 406 | case Regs::QueryOperation::Release: | 406 | case Regs::QueryOperation::Release: |
| 407 | StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); | 407 | if (regs.query.query_get.fence == 1) { |
| 408 | rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); | ||
| 409 | } else { | ||
| 410 | StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); | ||
| 411 | } | ||
| 408 | break; | 412 | break; |
| 409 | case Regs::QueryOperation::Acquire: | 413 | case Regs::QueryOperation::Acquire: |
| 410 | // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that | 414 | // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that |
| @@ -483,7 +487,7 @@ void Maxwell3D::ProcessSyncPoint() { | |||
| 483 | const u32 increment = regs.sync_info.increment.Value(); | 487 | const u32 increment = regs.sync_info.increment.Value(); |
| 484 | [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); | 488 | [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); |
| 485 | if (increment) { | 489 | if (increment) { |
| 486 | system.GPU().IncrementSyncPoint(sync_point); | 490 | rasterizer.SignalSyncPoint(sync_point); |
| 487 | } | 491 | } |
| 488 | } | 492 | } |
| 489 | 493 | ||
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index c2610f992..3bfed6ab8 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -104,8 +104,13 @@ void MaxwellDMA::HandleCopy() { | |||
| 104 | write_buffer.resize(dst_size); | 104 | write_buffer.resize(dst_size); |
| 105 | } | 105 | } |
| 106 | 106 | ||
| 107 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | 107 | if (Settings::IsGPULevelExtreme()) { |
| 108 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | 108 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); |
| 109 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||
| 110 | } else { | ||
| 111 | memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size); | ||
| 112 | memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | ||
| 113 | } | ||
| 109 | 114 | ||
| 110 | Texture::UnswizzleSubrect( | 115 | Texture::UnswizzleSubrect( |
| 111 | regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, | 116 | regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel, |
| @@ -136,7 +141,7 @@ void MaxwellDMA::HandleCopy() { | |||
| 136 | write_buffer.resize(dst_size); | 141 | write_buffer.resize(dst_size); |
| 137 | } | 142 | } |
| 138 | 143 | ||
| 139 | if (Settings::values.use_accurate_gpu_emulation) { | 144 | if (Settings::IsGPULevelExtreme()) { |
| 140 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); | 145 | memory_manager.ReadBlock(source, read_buffer.data(), src_size); |
| 141 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | 146 | memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); |
| 142 | } else { | 147 | } else { |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h new file mode 100644 index 000000000..dabd1588c --- /dev/null +++ b/src/video_core/fence_manager.h | |||
| @@ -0,0 +1,170 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <array> | ||
| 9 | #include <memory> | ||
| 10 | #include <queue> | ||
| 11 | |||
| 12 | #include "common/assert.h" | ||
| 13 | #include "common/common_types.h" | ||
| 14 | #include "core/core.h" | ||
| 15 | #include "core/memory.h" | ||
| 16 | #include "core/settings.h" | ||
| 17 | #include "video_core/gpu.h" | ||
| 18 | #include "video_core/memory_manager.h" | ||
| 19 | #include "video_core/rasterizer_interface.h" | ||
| 20 | |||
| 21 | namespace VideoCommon { | ||
| 22 | |||
| 23 | class FenceBase { | ||
| 24 | public: | ||
| 25 | FenceBase(u32 payload, bool is_stubbed) | ||
| 26 | : address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {} | ||
| 27 | |||
| 28 | FenceBase(GPUVAddr address, u32 payload, bool is_stubbed) | ||
| 29 | : address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {} | ||
| 30 | |||
| 31 | GPUVAddr GetAddress() const { | ||
| 32 | return address; | ||
| 33 | } | ||
| 34 | |||
| 35 | u32 GetPayload() const { | ||
| 36 | return payload; | ||
| 37 | } | ||
| 38 | |||
| 39 | bool IsSemaphore() const { | ||
| 40 | return is_semaphore; | ||
| 41 | } | ||
| 42 | |||
| 43 | private: | ||
| 44 | GPUVAddr address; | ||
| 45 | u32 payload; | ||
| 46 | bool is_semaphore; | ||
| 47 | |||
| 48 | protected: | ||
| 49 | bool is_stubbed; | ||
| 50 | }; | ||
| 51 | |||
| 52 | template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache> | ||
| 53 | class FenceManager { | ||
| 54 | public: | ||
| 55 | void SignalSemaphore(GPUVAddr addr, u32 value) { | ||
| 56 | TryReleasePendingFences(); | ||
| 57 | const bool should_flush = ShouldFlush(); | ||
| 58 | CommitAsyncFlushes(); | ||
| 59 | TFence new_fence = CreateFence(addr, value, !should_flush); | ||
| 60 | fences.push(new_fence); | ||
| 61 | QueueFence(new_fence); | ||
| 62 | if (should_flush) { | ||
| 63 | rasterizer.FlushCommands(); | ||
| 64 | } | ||
| 65 | rasterizer.SyncGuestHost(); | ||
| 66 | } | ||
| 67 | |||
| 68 | void SignalSyncPoint(u32 value) { | ||
| 69 | TryReleasePendingFences(); | ||
| 70 | const bool should_flush = ShouldFlush(); | ||
| 71 | CommitAsyncFlushes(); | ||
| 72 | TFence new_fence = CreateFence(value, !should_flush); | ||
| 73 | fences.push(new_fence); | ||
| 74 | QueueFence(new_fence); | ||
| 75 | if (should_flush) { | ||
| 76 | rasterizer.FlushCommands(); | ||
| 77 | } | ||
| 78 | rasterizer.SyncGuestHost(); | ||
| 79 | } | ||
| 80 | |||
| 81 | void WaitPendingFences() { | ||
| 82 | auto& gpu{system.GPU()}; | ||
| 83 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 84 | while (!fences.empty()) { | ||
| 85 | TFence& current_fence = fences.front(); | ||
| 86 | if (ShouldWait()) { | ||
| 87 | WaitFence(current_fence); | ||
| 88 | } | ||
| 89 | PopAsyncFlushes(); | ||
| 90 | if (current_fence->IsSemaphore()) { | ||
| 91 | memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload()); | ||
| 92 | } else { | ||
| 93 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | ||
| 94 | } | ||
| 95 | fences.pop(); | ||
| 96 | } | ||
| 97 | } | ||
| 98 | |||
| 99 | protected: | ||
| 100 | FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 101 | TTextureCache& texture_cache, TTBufferCache& buffer_cache, | ||
| 102 | TQueryCache& query_cache) | ||
| 103 | : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache}, | ||
| 104 | buffer_cache{buffer_cache}, query_cache{query_cache} {} | ||
| 105 | |||
| 106 | virtual ~FenceManager() {} | ||
| 107 | |||
| 108 | /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is | ||
| 109 | /// true | ||
| 110 | virtual TFence CreateFence(u32 value, bool is_stubbed) = 0; | ||
| 111 | /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true | ||
| 112 | virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0; | ||
| 113 | /// Queues a fence into the backend if the fence isn't stubbed. | ||
| 114 | virtual void QueueFence(TFence& fence) = 0; | ||
| 115 | /// Notifies that the backend fence has been signaled/reached in host GPU. | ||
| 116 | virtual bool IsFenceSignaled(TFence& fence) const = 0; | ||
| 117 | /// Waits until a fence has been signalled by the host GPU. | ||
| 118 | virtual void WaitFence(TFence& fence) = 0; | ||
| 119 | |||
| 120 | Core::System& system; | ||
| 121 | VideoCore::RasterizerInterface& rasterizer; | ||
| 122 | TTextureCache& texture_cache; | ||
| 123 | TTBufferCache& buffer_cache; | ||
| 124 | TQueryCache& query_cache; | ||
| 125 | |||
| 126 | private: | ||
| 127 | void TryReleasePendingFences() { | ||
| 128 | auto& gpu{system.GPU()}; | ||
| 129 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 130 | while (!fences.empty()) { | ||
| 131 | TFence& current_fence = fences.front(); | ||
| 132 | if (ShouldWait() && !IsFenceSignaled(current_fence)) { | ||
| 133 | return; | ||
| 134 | } | ||
| 135 | PopAsyncFlushes(); | ||
| 136 | if (current_fence->IsSemaphore()) { | ||
| 137 | memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload()); | ||
| 138 | } else { | ||
| 139 | gpu.IncrementSyncPoint(current_fence->GetPayload()); | ||
| 140 | } | ||
| 141 | fences.pop(); | ||
| 142 | } | ||
| 143 | } | ||
| 144 | |||
| 145 | bool ShouldWait() const { | ||
| 146 | return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || | ||
| 147 | query_cache.ShouldWaitAsyncFlushes(); | ||
| 148 | } | ||
| 149 | |||
| 150 | bool ShouldFlush() const { | ||
| 151 | return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() || | ||
| 152 | query_cache.HasUncommittedFlushes(); | ||
| 153 | } | ||
| 154 | |||
| 155 | void PopAsyncFlushes() { | ||
| 156 | texture_cache.PopAsyncFlushes(); | ||
| 157 | buffer_cache.PopAsyncFlushes(); | ||
| 158 | query_cache.PopAsyncFlushes(); | ||
| 159 | } | ||
| 160 | |||
| 161 | void CommitAsyncFlushes() { | ||
| 162 | texture_cache.CommitAsyncFlushes(); | ||
| 163 | buffer_cache.CommitAsyncFlushes(); | ||
| 164 | query_cache.CommitAsyncFlushes(); | ||
| 165 | } | ||
| 166 | |||
| 167 | std::queue<TFence> fences; | ||
| 168 | }; | ||
| 169 | |||
| 170 | } // namespace VideoCommon | ||
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index a606f4abd..3b7572d61 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | |||
| 125 | return true; | 125 | return true; |
| 126 | } | 126 | } |
| 127 | 127 | ||
| 128 | u64 GPU::RequestFlush(VAddr addr, std::size_t size) { | ||
| 129 | std::unique_lock lck{flush_request_mutex}; | ||
| 130 | const u64 fence = ++last_flush_fence; | ||
| 131 | flush_requests.emplace_back(fence, addr, size); | ||
| 132 | return fence; | ||
| 133 | } | ||
| 134 | |||
| 135 | void GPU::TickWork() { | ||
| 136 | std::unique_lock lck{flush_request_mutex}; | ||
| 137 | while (!flush_requests.empty()) { | ||
| 138 | auto& request = flush_requests.front(); | ||
| 139 | const u64 fence = request.fence; | ||
| 140 | const VAddr addr = request.addr; | ||
| 141 | const std::size_t size = request.size; | ||
| 142 | flush_requests.pop_front(); | ||
| 143 | flush_request_mutex.unlock(); | ||
| 144 | renderer->Rasterizer().FlushRegion(addr, size); | ||
| 145 | current_flush_fence.store(fence); | ||
| 146 | flush_request_mutex.lock(); | ||
| 147 | } | ||
| 148 | } | ||
| 149 | |||
| 128 | u64 GPU::GetTicks() const { | 150 | u64 GPU::GetTicks() const { |
| 129 | // This values were reversed engineered by fincs from NVN | 151 | // This values were reversed engineered by fincs from NVN |
| 130 | // The gpu clock is reported in units of 385/625 nanoseconds | 152 | // The gpu clock is reported in units of 385/625 nanoseconds |
| @@ -142,6 +164,13 @@ void GPU::FlushCommands() { | |||
| 142 | renderer->Rasterizer().FlushCommands(); | 164 | renderer->Rasterizer().FlushCommands(); |
| 143 | } | 165 | } |
| 144 | 166 | ||
| 167 | void GPU::SyncGuestHost() { | ||
| 168 | renderer->Rasterizer().SyncGuestHost(); | ||
| 169 | } | ||
| 170 | |||
| 171 | void GPU::OnCommandListEnd() { | ||
| 172 | renderer->Rasterizer().ReleaseFences(); | ||
| 173 | } | ||
| 145 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | 174 | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence |
| 146 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | 175 | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. |
| 147 | // So the values you see in docs might be multiplied by 4. | 176 | // So the values you see in docs might be multiplied by 4. |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 1a2d747be..5e3eb94e9 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -155,7 +155,23 @@ public: | |||
| 155 | /// Calls a GPU method. | 155 | /// Calls a GPU method. |
| 156 | void CallMethod(const MethodCall& method_call); | 156 | void CallMethod(const MethodCall& method_call); |
| 157 | 157 | ||
| 158 | /// Flush all current written commands into the host GPU for execution. | ||
| 158 | void FlushCommands(); | 159 | void FlushCommands(); |
| 160 | /// Synchronizes CPU writes with Host GPU memory. | ||
| 161 | void SyncGuestHost(); | ||
| 162 | /// Signal the ending of command list. | ||
| 163 | virtual void OnCommandListEnd(); | ||
| 164 | |||
| 165 | /// Request a host GPU memory flush from the CPU. | ||
| 166 | u64 RequestFlush(VAddr addr, std::size_t size); | ||
| 167 | |||
| 168 | /// Obtains current flush request fence id. | ||
| 169 | u64 CurrentFlushRequestFence() const { | ||
| 170 | return current_flush_fence.load(std::memory_order_relaxed); | ||
| 171 | } | ||
| 172 | |||
| 173 | /// Tick pending requests within the GPU. | ||
| 174 | void TickWork(); | ||
| 159 | 175 | ||
| 160 | /// Returns a reference to the Maxwell3D GPU engine. | 176 | /// Returns a reference to the Maxwell3D GPU engine. |
| 161 | Engines::Maxwell3D& Maxwell3D(); | 177 | Engines::Maxwell3D& Maxwell3D(); |
| @@ -325,6 +341,19 @@ private: | |||
| 325 | 341 | ||
| 326 | std::condition_variable sync_cv; | 342 | std::condition_variable sync_cv; |
| 327 | 343 | ||
| 344 | struct FlushRequest { | ||
| 345 | FlushRequest(u64 fence, VAddr addr, std::size_t size) | ||
| 346 | : fence{fence}, addr{addr}, size{size} {} | ||
| 347 | u64 fence; | ||
| 348 | VAddr addr; | ||
| 349 | std::size_t size; | ||
| 350 | }; | ||
| 351 | |||
| 352 | std::list<FlushRequest> flush_requests; | ||
| 353 | std::atomic<u64> current_flush_fence{}; | ||
| 354 | u64 last_flush_fence{}; | ||
| 355 | std::mutex flush_request_mutex; | ||
| 356 | |||
| 328 | const bool is_async; | 357 | const bool is_async; |
| 329 | }; | 358 | }; |
| 330 | 359 | ||
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 20e73a37e..53305ab43 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const { | |||
| 52 | gpu_thread.WaitIdle(); | 52 | gpu_thread.WaitIdle(); |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | void GPUAsynch::OnCommandListEnd() { | ||
| 56 | gpu_thread.OnCommandListEnd(); | ||
| 57 | } | ||
| 58 | |||
| 55 | } // namespace VideoCommon | 59 | } // namespace VideoCommon |
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 03fd0eef0..517658612 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -32,6 +32,8 @@ public: | |||
| 32 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 32 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 33 | void WaitIdle() const override; | 33 | void WaitIdle() const override; |
| 34 | 34 | ||
| 35 | void OnCommandListEnd() override; | ||
| 36 | |||
| 35 | protected: | 37 | protected: |
| 36 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | 38 | void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; |
| 37 | 39 | ||
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 10cda686b..c3bb4fe06 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/frontend/emu_window.h" | 8 | #include "core/frontend/emu_window.h" |
| 9 | #include "core/settings.h" | ||
| 9 | #include "video_core/dma_pusher.h" | 10 | #include "video_core/dma_pusher.h" |
| 10 | #include "video_core/gpu.h" | 11 | #include "video_core/gpu.h" |
| 11 | #include "video_core/gpu_thread.h" | 12 | #include "video_core/gpu_thread.h" |
| @@ -14,8 +15,9 @@ | |||
| 14 | namespace VideoCommon::GPUThread { | 15 | namespace VideoCommon::GPUThread { |
| 15 | 16 | ||
| 16 | /// Runs the GPU thread | 17 | /// Runs the GPU thread |
| 17 | static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, | 18 | static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, |
| 18 | Tegra::DmaPusher& dma_pusher, SynchState& state) { | 19 | Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, |
| 20 | SynchState& state) { | ||
| 19 | MicroProfileOnThreadCreate("GpuThread"); | 21 | MicroProfileOnThreadCreate("GpuThread"); |
| 20 | 22 | ||
| 21 | // Wait for first GPU command before acquiring the window context | 23 | // Wait for first GPU command before acquiring the window context |
| @@ -37,10 +39,14 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic | |||
| 37 | dma_pusher.DispatchCalls(); | 39 | dma_pusher.DispatchCalls(); |
| 38 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { | 40 | } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { |
| 39 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); | 41 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); |
| 42 | } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) { | ||
| 43 | renderer.Rasterizer().ReleaseFences(); | ||
| 44 | } else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) { | ||
| 45 | system.GPU().TickWork(); | ||
| 40 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { | 46 | } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { |
| 41 | renderer.Rasterizer().FlushRegion(data->addr, data->size); | 47 | renderer.Rasterizer().FlushRegion(data->addr, data->size); |
| 42 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { | 48 | } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| 43 | renderer.Rasterizer().InvalidateRegion(data->addr, data->size); | 49 | renderer.Rasterizer().OnCPUWrite(data->addr, data->size); |
| 44 | } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { | 50 | } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { |
| 45 | return; | 51 | return; |
| 46 | } else { | 52 | } else { |
| @@ -65,8 +71,8 @@ ThreadManager::~ThreadManager() { | |||
| 65 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | 71 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, |
| 66 | Core::Frontend::GraphicsContext& context, | 72 | Core::Frontend::GraphicsContext& context, |
| 67 | Tegra::DmaPusher& dma_pusher) { | 73 | Tegra::DmaPusher& dma_pusher) { |
| 68 | thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher), | 74 | thread = std::thread{RunThread, std::ref(system), std::ref(renderer), |
| 69 | std::ref(state)}; | 75 | std::ref(context), std::ref(dma_pusher), std::ref(state)}; |
| 70 | } | 76 | } |
| 71 | 77 | ||
| 72 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | 78 | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { |
| @@ -78,16 +84,29 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 78 | } | 84 | } |
| 79 | 85 | ||
| 80 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { | 86 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { |
| 81 | PushCommand(FlushRegionCommand(addr, size)); | 87 | if (!Settings::IsGPULevelHigh()) { |
| 88 | PushCommand(FlushRegionCommand(addr, size)); | ||
| 89 | return; | ||
| 90 | } | ||
| 91 | if (!Settings::IsGPULevelExtreme()) { | ||
| 92 | return; | ||
| 93 | } | ||
| 94 | if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) { | ||
| 95 | auto& gpu = system.GPU(); | ||
| 96 | u64 fence = gpu.RequestFlush(addr, size); | ||
| 97 | PushCommand(GPUTickCommand()); | ||
| 98 | while (fence > gpu.CurrentFlushRequestFence()) { | ||
| 99 | } | ||
| 100 | } | ||
| 82 | } | 101 | } |
| 83 | 102 | ||
| 84 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | 103 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { |
| 85 | system.Renderer().Rasterizer().InvalidateRegion(addr, size); | 104 | system.Renderer().Rasterizer().OnCPUWrite(addr, size); |
| 86 | } | 105 | } |
| 87 | 106 | ||
| 88 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 107 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 89 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important | 108 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important |
| 90 | InvalidateRegion(addr, size); | 109 | system.Renderer().Rasterizer().OnCPUWrite(addr, size); |
| 91 | } | 110 | } |
| 92 | 111 | ||
| 93 | void ThreadManager::WaitIdle() const { | 112 | void ThreadManager::WaitIdle() const { |
| @@ -95,6 +114,10 @@ void ThreadManager::WaitIdle() const { | |||
| 95 | } | 114 | } |
| 96 | } | 115 | } |
| 97 | 116 | ||
| 117 | void ThreadManager::OnCommandListEnd() { | ||
| 118 | PushCommand(OnCommandListEndCommand()); | ||
| 119 | } | ||
| 120 | |||
| 98 | u64 ThreadManager::PushCommand(CommandData&& command_data) { | 121 | u64 ThreadManager::PushCommand(CommandData&& command_data) { |
| 99 | const u64 fence{++state.last_fence}; | 122 | const u64 fence{++state.last_fence}; |
| 100 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | 123 | state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index cd74ad330..5a28335d6 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -70,9 +70,16 @@ struct FlushAndInvalidateRegionCommand final { | |||
| 70 | u64 size; | 70 | u64 size; |
| 71 | }; | 71 | }; |
| 72 | 72 | ||
| 73 | /// Command called within the gpu, to schedule actions after a command list end | ||
| 74 | struct OnCommandListEndCommand final {}; | ||
| 75 | |||
| 76 | /// Command to make the gpu look into pending requests | ||
| 77 | struct GPUTickCommand final {}; | ||
| 78 | |||
| 73 | using CommandData = | 79 | using CommandData = |
| 74 | std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | 80 | std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, |
| 75 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | 81 | InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand, |
| 82 | GPUTickCommand>; | ||
| 76 | 83 | ||
| 77 | struct CommandDataContainer { | 84 | struct CommandDataContainer { |
| 78 | CommandDataContainer() = default; | 85 | CommandDataContainer() = default; |
| @@ -122,6 +129,8 @@ public: | |||
| 122 | // Wait until the gpu thread is idle. | 129 | // Wait until the gpu thread is idle. |
| 123 | void WaitIdle() const; | 130 | void WaitIdle() const; |
| 124 | 131 | ||
| 132 | void OnCommandListEnd(); | ||
| 133 | |||
| 125 | private: | 134 | private: |
| 126 | /// Pushes a command to be executed by the GPU thread | 135 | /// Pushes a command to be executed by the GPU thread |
| 127 | u64 PushCommand(CommandData&& command_data); | 136 | u64 PushCommand(CommandData&& command_data); |
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 5ea2b01f2..2f75f8801 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -12,10 +12,12 @@ | |||
| 12 | #include <mutex> | 12 | #include <mutex> |
| 13 | #include <optional> | 13 | #include <optional> |
| 14 | #include <unordered_map> | 14 | #include <unordered_map> |
| 15 | #include <unordered_set> | ||
| 15 | #include <vector> | 16 | #include <vector> |
| 16 | 17 | ||
| 17 | #include "common/assert.h" | 18 | #include "common/assert.h" |
| 18 | #include "core/core.h" | 19 | #include "core/core.h" |
| 20 | #include "core/settings.h" | ||
| 19 | #include "video_core/engines/maxwell_3d.h" | 21 | #include "video_core/engines/maxwell_3d.h" |
| 20 | #include "video_core/gpu.h" | 22 | #include "video_core/gpu.h" |
| 21 | #include "video_core/memory_manager.h" | 23 | #include "video_core/memory_manager.h" |
| @@ -130,6 +132,9 @@ public: | |||
| 130 | } | 132 | } |
| 131 | 133 | ||
| 132 | query->BindCounter(Stream(type).Current(), timestamp); | 134 | query->BindCounter(Stream(type).Current(), timestamp); |
| 135 | if (Settings::values.use_asynchronous_gpu_emulation) { | ||
| 136 | AsyncFlushQuery(cpu_addr); | ||
| 137 | } | ||
| 133 | } | 138 | } |
| 134 | 139 | ||
| 135 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | 140 | /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. |
| @@ -170,6 +175,37 @@ public: | |||
| 170 | return streams[static_cast<std::size_t>(type)]; | 175 | return streams[static_cast<std::size_t>(type)]; |
| 171 | } | 176 | } |
| 172 | 177 | ||
| 178 | void CommitAsyncFlushes() { | ||
| 179 | committed_flushes.push_back(uncommitted_flushes); | ||
| 180 | uncommitted_flushes.reset(); | ||
| 181 | } | ||
| 182 | |||
| 183 | bool HasUncommittedFlushes() const { | ||
| 184 | return uncommitted_flushes != nullptr; | ||
| 185 | } | ||
| 186 | |||
| 187 | bool ShouldWaitAsyncFlushes() const { | ||
| 188 | if (committed_flushes.empty()) { | ||
| 189 | return false; | ||
| 190 | } | ||
| 191 | return committed_flushes.front() != nullptr; | ||
| 192 | } | ||
| 193 | |||
| 194 | void PopAsyncFlushes() { | ||
| 195 | if (committed_flushes.empty()) { | ||
| 196 | return; | ||
| 197 | } | ||
| 198 | auto& flush_list = committed_flushes.front(); | ||
| 199 | if (!flush_list) { | ||
| 200 | committed_flushes.pop_front(); | ||
| 201 | return; | ||
| 202 | } | ||
| 203 | for (VAddr query_address : *flush_list) { | ||
| 204 | FlushAndRemoveRegion(query_address, 4); | ||
| 205 | } | ||
| 206 | committed_flushes.pop_front(); | ||
| 207 | } | ||
| 208 | |||
| 173 | protected: | 209 | protected: |
| 174 | std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; | 210 | std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; |
| 175 | 211 | ||
| @@ -224,6 +260,13 @@ private: | |||
| 224 | return found != std::end(contents) ? &*found : nullptr; | 260 | return found != std::end(contents) ? &*found : nullptr; |
| 225 | } | 261 | } |
| 226 | 262 | ||
| 263 | void AsyncFlushQuery(VAddr addr) { | ||
| 264 | if (!uncommitted_flushes) { | ||
| 265 | uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>(); | ||
| 266 | } | ||
| 267 | uncommitted_flushes->insert(addr); | ||
| 268 | } | ||
| 269 | |||
| 227 | static constexpr std::uintptr_t PAGE_SIZE = 4096; | 270 | static constexpr std::uintptr_t PAGE_SIZE = 4096; |
| 228 | static constexpr unsigned PAGE_SHIFT = 12; | 271 | static constexpr unsigned PAGE_SHIFT = 12; |
| 229 | 272 | ||
| @@ -235,6 +278,9 @@ private: | |||
| 235 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; | 278 | std::unordered_map<u64, std::vector<CachedQuery>> cached_queries; |
| 236 | 279 | ||
| 237 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; | 280 | std::array<CounterStream, VideoCore::NumQueryTypes> streams; |
| 281 | |||
| 282 | std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{}; | ||
| 283 | std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes; | ||
| 238 | }; | 284 | }; |
| 239 | 285 | ||
| 240 | template <class QueryCache, class HostCounter> | 286 | template <class QueryCache, class HostCounter> |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 8ae5b9c4e..603f61952 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -49,15 +49,33 @@ public: | |||
| 49 | /// Records a GPU query and caches it | 49 | /// Records a GPU query and caches it |
| 50 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; | 50 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; |
| 51 | 51 | ||
| 52 | /// Signal a GPU based semaphore as a fence | ||
| 53 | virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; | ||
| 54 | |||
| 55 | /// Signal a GPU based syncpoint as a fence | ||
| 56 | virtual void SignalSyncPoint(u32 value) = 0; | ||
| 57 | |||
| 58 | /// Release all pending fences. | ||
| 59 | virtual void ReleaseFences() = 0; | ||
| 60 | |||
| 52 | /// Notify rasterizer that all caches should be flushed to Switch memory | 61 | /// Notify rasterizer that all caches should be flushed to Switch memory |
| 53 | virtual void FlushAll() = 0; | 62 | virtual void FlushAll() = 0; |
| 54 | 63 | ||
| 55 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 64 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 56 | virtual void FlushRegion(VAddr addr, u64 size) = 0; | 65 | virtual void FlushRegion(VAddr addr, u64 size) = 0; |
| 57 | 66 | ||
| 67 | /// Check if the the specified memory area requires flushing to CPU Memory. | ||
| 68 | virtual bool MustFlushRegion(VAddr addr, u64 size) = 0; | ||
| 69 | |||
| 58 | /// Notify rasterizer that any caches of the specified region should be invalidated | 70 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 59 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; | 71 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; |
| 60 | 72 | ||
| 73 | /// Notify rasterizer that any caches of the specified region are desync with guest | ||
| 74 | virtual void OnCPUWrite(VAddr addr, u64 size) = 0; | ||
| 75 | |||
| 76 | /// Sync memory between guest and host. | ||
| 77 | virtual void SyncGuestHost() = 0; | ||
| 78 | |||
| 61 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 79 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 62 | /// and invalidated | 80 | /// and invalidated |
| 63 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | 81 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index cb5792407..4efce0de7 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -52,7 +52,7 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | |||
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | void OGLBufferCache::WriteBarrier() { | 54 | void OGLBufferCache::WriteBarrier() { |
| 55 | glMemoryBarrier(GL_ALL_BARRIER_BITS); | 55 | glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); |
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { | 58 | GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { |
| @@ -72,6 +72,7 @@ void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, s | |||
| 72 | void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, | 72 | void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, |
| 73 | u8* data) { | 73 | u8* data) { |
| 74 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | 74 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); |
| 75 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | ||
| 75 | glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), | 76 | glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), |
| 76 | static_cast<GLsizeiptr>(size), data); | 77 | static_cast<GLsizeiptr>(size), data); |
| 77 | } | 78 | } |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp new file mode 100644 index 000000000..99ddcb3f8 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/assert.h" | ||
| 6 | |||
| 7 | #include "video_core/renderer_opengl/gl_fence_manager.h" | ||
| 8 | |||
| 9 | namespace OpenGL { | ||
| 10 | |||
| 11 | GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) | ||
| 12 | : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {} | ||
| 13 | |||
| 14 | GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed) | ||
| 15 | : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {} | ||
| 16 | |||
| 17 | GLInnerFence::~GLInnerFence() = default; | ||
| 18 | |||
| 19 | void GLInnerFence::Queue() { | ||
| 20 | if (is_stubbed) { | ||
| 21 | return; | ||
| 22 | } | ||
| 23 | ASSERT(sync_object.handle == 0); | ||
| 24 | sync_object.Create(); | ||
| 25 | } | ||
| 26 | |||
| 27 | bool GLInnerFence::IsSignaled() const { | ||
| 28 | if (is_stubbed) { | ||
| 29 | return true; | ||
| 30 | } | ||
| 31 | ASSERT(sync_object.handle != 0); | ||
| 32 | GLsizei length; | ||
| 33 | GLint sync_status; | ||
| 34 | glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status); | ||
| 35 | return sync_status == GL_SIGNALED; | ||
| 36 | } | ||
| 37 | |||
| 38 | void GLInnerFence::Wait() { | ||
| 39 | if (is_stubbed) { | ||
| 40 | return; | ||
| 41 | } | ||
| 42 | ASSERT(sync_object.handle != 0); | ||
| 43 | glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED); | ||
| 44 | } | ||
| 45 | |||
| 46 | FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system, | ||
| 47 | VideoCore::RasterizerInterface& rasterizer, | ||
| 48 | TextureCacheOpenGL& texture_cache, | ||
| 49 | OGLBufferCache& buffer_cache, QueryCache& query_cache) | ||
| 50 | : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {} | ||
| 51 | |||
| 52 | Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { | ||
| 53 | return std::make_shared<GLInnerFence>(value, is_stubbed); | ||
| 54 | } | ||
| 55 | |||
| 56 | Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) { | ||
| 57 | return std::make_shared<GLInnerFence>(addr, value, is_stubbed); | ||
| 58 | } | ||
| 59 | |||
| 60 | void FenceManagerOpenGL::QueueFence(Fence& fence) { | ||
| 61 | fence->Queue(); | ||
| 62 | } | ||
| 63 | |||
| 64 | bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const { | ||
| 65 | return fence->IsSignaled(); | ||
| 66 | } | ||
| 67 | |||
| 68 | void FenceManagerOpenGL::WaitFence(Fence& fence) { | ||
| 69 | fence->Wait(); | ||
| 70 | } | ||
| 71 | |||
| 72 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h new file mode 100644 index 000000000..c917b3343 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_fence_manager.h | |||
| @@ -0,0 +1,53 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | #include <glad/glad.h> | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "video_core/fence_manager.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_query_cache.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 15 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 16 | |||
| 17 | namespace OpenGL { | ||
| 18 | |||
| 19 | class GLInnerFence : public VideoCommon::FenceBase { | ||
| 20 | public: | ||
| 21 | GLInnerFence(u32 payload, bool is_stubbed); | ||
| 22 | GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed); | ||
| 23 | ~GLInnerFence(); | ||
| 24 | |||
| 25 | void Queue(); | ||
| 26 | |||
| 27 | bool IsSignaled() const; | ||
| 28 | |||
| 29 | void Wait(); | ||
| 30 | |||
| 31 | private: | ||
| 32 | OGLSync sync_object; | ||
| 33 | }; | ||
| 34 | |||
| 35 | using Fence = std::shared_ptr<GLInnerFence>; | ||
| 36 | using GenericFenceManager = | ||
| 37 | VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>; | ||
| 38 | |||
| 39 | class FenceManagerOpenGL final : public GenericFenceManager { | ||
| 40 | public: | ||
| 41 | FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 42 | TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, | ||
| 43 | QueryCache& query_cache); | ||
| 44 | |||
| 45 | protected: | ||
| 46 | Fence CreateFence(u32 value, bool is_stubbed) override; | ||
| 47 | Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override; | ||
| 48 | void QueueFence(Fence& fence) override; | ||
| 49 | bool IsFenceSignaled(Fence& fence) const override; | ||
| 50 | void WaitFence(Fence& fence) override; | ||
| 51 | }; | ||
| 52 | |||
| 53 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 175374f0d..4c16c89d2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -99,9 +99,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind | |||
| 99 | ScreenInfo& info, GLShader::ProgramManager& program_manager, | 99 | ScreenInfo& info, GLShader::ProgramManager& program_manager, |
| 100 | StateTracker& state_tracker) | 100 | StateTracker& state_tracker) |
| 101 | : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, | 101 | : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, |
| 102 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, | 102 | shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, |
| 103 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, | 103 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, |
| 104 | buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { | 104 | fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, |
| 105 | screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} { | ||
| 105 | CheckExtensions(); | 106 | CheckExtensions(); |
| 106 | } | 107 | } |
| 107 | 108 | ||
| @@ -599,6 +600,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 599 | EndTransformFeedback(); | 600 | EndTransformFeedback(); |
| 600 | 601 | ||
| 601 | ++num_queued_commands; | 602 | ++num_queued_commands; |
| 603 | |||
| 604 | system.GPU().TickWork(); | ||
| 602 | } | 605 | } |
| 603 | 606 | ||
| 604 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 607 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| @@ -649,6 +652,13 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | |||
| 649 | query_cache.FlushRegion(addr, size); | 652 | query_cache.FlushRegion(addr, size); |
| 650 | } | 653 | } |
| 651 | 654 | ||
| 655 | bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { | ||
| 656 | if (!Settings::IsGPULevelHigh()) { | ||
| 657 | return buffer_cache.MustFlushRegion(addr, size); | ||
| 658 | } | ||
| 659 | return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); | ||
| 660 | } | ||
| 661 | |||
| 652 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | 662 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |
| 653 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 663 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 654 | if (addr == 0 || size == 0) { | 664 | if (addr == 0 || size == 0) { |
| @@ -660,8 +670,52 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | |||
| 660 | query_cache.InvalidateRegion(addr, size); | 670 | query_cache.InvalidateRegion(addr, size); |
| 661 | } | 671 | } |
| 662 | 672 | ||
| 673 | void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | ||
| 674 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 675 | if (addr == 0 || size == 0) { | ||
| 676 | return; | ||
| 677 | } | ||
| 678 | texture_cache.OnCPUWrite(addr, size); | ||
| 679 | shader_cache.InvalidateRegion(addr, size); | ||
| 680 | buffer_cache.OnCPUWrite(addr, size); | ||
| 681 | query_cache.InvalidateRegion(addr, size); | ||
| 682 | } | ||
| 683 | |||
| 684 | void RasterizerOpenGL::SyncGuestHost() { | ||
| 685 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||
| 686 | texture_cache.SyncGuestHost(); | ||
| 687 | buffer_cache.SyncGuestHost(); | ||
| 688 | } | ||
| 689 | |||
| 690 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { | ||
| 691 | auto& gpu{system.GPU()}; | ||
| 692 | if (!gpu.IsAsync()) { | ||
| 693 | auto& memory_manager{gpu.MemoryManager()}; | ||
| 694 | memory_manager.Write<u32>(addr, value); | ||
| 695 | return; | ||
| 696 | } | ||
| 697 | fence_manager.SignalSemaphore(addr, value); | ||
| 698 | } | ||
| 699 | |||
| 700 | void RasterizerOpenGL::SignalSyncPoint(u32 value) { | ||
| 701 | auto& gpu{system.GPU()}; | ||
| 702 | if (!gpu.IsAsync()) { | ||
| 703 | gpu.IncrementSyncPoint(value); | ||
| 704 | return; | ||
| 705 | } | ||
| 706 | fence_manager.SignalSyncPoint(value); | ||
| 707 | } | ||
| 708 | |||
| 709 | void RasterizerOpenGL::ReleaseFences() { | ||
| 710 | auto& gpu{system.GPU()}; | ||
| 711 | if (!gpu.IsAsync()) { | ||
| 712 | return; | ||
| 713 | } | ||
| 714 | fence_manager.WaitPendingFences(); | ||
| 715 | } | ||
| 716 | |||
| 663 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 717 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 664 | if (Settings::values.use_accurate_gpu_emulation) { | 718 | if (Settings::IsGPULevelExtreme()) { |
| 665 | FlushRegion(addr, size); | 719 | FlushRegion(addr, size); |
| 666 | } | 720 | } |
| 667 | InvalidateRegion(addr, size); | 721 | InvalidateRegion(addr, size); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index caea174d2..ebd2173eb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include "video_core/rasterizer_interface.h" | 23 | #include "video_core/rasterizer_interface.h" |
| 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 24 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 25 | #include "video_core/renderer_opengl/gl_device.h" | 25 | #include "video_core/renderer_opengl/gl_device.h" |
| 26 | #include "video_core/renderer_opengl/gl_fence_manager.h" | ||
| 26 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" | 27 | #include "video_core/renderer_opengl/gl_framebuffer_cache.h" |
| 27 | #include "video_core/renderer_opengl/gl_query_cache.h" | 28 | #include "video_core/renderer_opengl/gl_query_cache.h" |
| 28 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 29 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| @@ -66,7 +67,13 @@ public: | |||
| 66 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 67 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 67 | void FlushAll() override; | 68 | void FlushAll() override; |
| 68 | void FlushRegion(VAddr addr, u64 size) override; | 69 | void FlushRegion(VAddr addr, u64 size) override; |
| 70 | bool MustFlushRegion(VAddr addr, u64 size) override; | ||
| 69 | void InvalidateRegion(VAddr addr, u64 size) override; | 71 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 72 | void OnCPUWrite(VAddr addr, u64 size) override; | ||
| 73 | void SyncGuestHost() override; | ||
| 74 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | ||
| 75 | void SignalSyncPoint(u32 value) override; | ||
| 76 | void ReleaseFences() override; | ||
| 70 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 77 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 71 | void FlushCommands() override; | 78 | void FlushCommands() override; |
| 72 | void TickFrame() override; | 79 | void TickFrame() override; |
| @@ -222,6 +229,8 @@ private: | |||
| 222 | SamplerCacheOpenGL sampler_cache; | 229 | SamplerCacheOpenGL sampler_cache; |
| 223 | FramebufferCacheOpenGL framebuffer_cache; | 230 | FramebufferCacheOpenGL framebuffer_cache; |
| 224 | QueryCache query_cache; | 231 | QueryCache query_cache; |
| 232 | OGLBufferCache buffer_cache; | ||
| 233 | FenceManagerOpenGL fence_manager; | ||
| 225 | 234 | ||
| 226 | Core::System& system; | 235 | Core::System& system; |
| 227 | ScreenInfo& screen_info; | 236 | ScreenInfo& screen_info; |
| @@ -229,7 +238,6 @@ private: | |||
| 229 | StateTracker& state_tracker; | 238 | StateTracker& state_tracker; |
| 230 | 239 | ||
| 231 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | 240 | static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; |
| 232 | OGLBufferCache buffer_cache; | ||
| 233 | 241 | ||
| 234 | GLint vertex_binding = 0; | 242 | GLint vertex_binding = 0; |
| 235 | 243 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 6d2ff20f9..f63156b8d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -448,7 +448,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 448 | 448 | ||
| 449 | // Look up shader in the cache based on address | 449 | // Look up shader in the cache based on address |
| 450 | const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; | 450 | const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; |
| 451 | Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr}; | 451 | Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader}; |
| 452 | if (shader) { | 452 | if (shader) { |
| 453 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 453 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 454 | } | 454 | } |
| @@ -477,7 +477,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 477 | const std::size_t size_in_bytes = code.size() * sizeof(u64); | 477 | const std::size_t size_in_bytes = code.size() * sizeof(u64); |
| 478 | shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | 478 | shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); |
| 479 | } | 479 | } |
| 480 | Register(shader); | 480 | |
| 481 | if (cpu_addr) { | ||
| 482 | Register(shader); | ||
| 483 | } else { | ||
| 484 | null_shader = shader; | ||
| 485 | } | ||
| 481 | 486 | ||
| 482 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 487 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 483 | } | 488 | } |
| @@ -486,7 +491,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 486 | auto& memory_manager{system.GPU().MemoryManager()}; | 491 | auto& memory_manager{system.GPU().MemoryManager()}; |
| 487 | const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; | 492 | const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; |
| 488 | 493 | ||
| 489 | auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr; | 494 | auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel; |
| 490 | if (kernel) { | 495 | if (kernel) { |
| 491 | return kernel; | 496 | return kernel; |
| 492 | } | 497 | } |
| @@ -507,7 +512,11 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||
| 507 | kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); | 512 | kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); |
| 508 | } | 513 | } |
| 509 | 514 | ||
| 510 | Register(kernel); | 515 | if (cpu_addr) { |
| 516 | Register(kernel); | ||
| 517 | } else { | ||
| 518 | null_kernel = kernel; | ||
| 519 | } | ||
| 511 | return kernel; | 520 | return kernel; |
| 512 | } | 521 | } |
| 513 | 522 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index c836df5bd..91690b470 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -125,6 +125,9 @@ private: | |||
| 125 | ShaderDiskCacheOpenGL disk_cache; | 125 | ShaderDiskCacheOpenGL disk_cache; |
| 126 | std::unordered_map<u64, PrecompiledShader> runtime_cache; | 126 | std::unordered_map<u64, PrecompiledShader> runtime_cache; |
| 127 | 127 | ||
| 128 | Shader null_shader{}; | ||
| 129 | Shader null_kernel{}; | ||
| 130 | |||
| 128 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 131 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 129 | }; | 132 | }; |
| 130 | 133 | ||
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp new file mode 100644 index 000000000..a02be5487 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
| @@ -0,0 +1,101 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <memory> | ||
| 6 | #include <thread> | ||
| 7 | |||
| 8 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_device.h" | ||
| 10 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 12 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 13 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 14 | |||
| 15 | namespace Vulkan { | ||
| 16 | |||
| 17 | InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed) | ||
| 18 | : VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {} | ||
| 19 | |||
| 20 | InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address, | ||
| 21 | u32 payload, bool is_stubbed) | ||
| 22 | : VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {} | ||
| 23 | |||
| 24 | InnerFence::~InnerFence() = default; | ||
| 25 | |||
| 26 | void InnerFence::Queue() { | ||
| 27 | if (is_stubbed) { | ||
| 28 | return; | ||
| 29 | } | ||
| 30 | ASSERT(!event); | ||
| 31 | |||
| 32 | event = device.GetLogical().CreateEvent(); | ||
| 33 | ticks = scheduler.Ticks(); | ||
| 34 | |||
| 35 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 36 | scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) { | ||
| 37 | cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT); | ||
| 38 | }); | ||
| 39 | } | ||
| 40 | |||
| 41 | bool InnerFence::IsSignaled() const { | ||
| 42 | if (is_stubbed) { | ||
| 43 | return true; | ||
| 44 | } | ||
| 45 | ASSERT(event); | ||
| 46 | return IsEventSignalled(); | ||
| 47 | } | ||
| 48 | |||
| 49 | void InnerFence::Wait() { | ||
| 50 | if (is_stubbed) { | ||
| 51 | return; | ||
| 52 | } | ||
| 53 | ASSERT(event); | ||
| 54 | |||
| 55 | if (ticks >= scheduler.Ticks()) { | ||
| 56 | scheduler.Flush(); | ||
| 57 | } | ||
| 58 | while (!IsEventSignalled()) { | ||
| 59 | std::this_thread::yield(); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | bool InnerFence::IsEventSignalled() const { | ||
| 64 | switch (const VkResult result = event.GetStatus()) { | ||
| 65 | case VK_EVENT_SET: | ||
| 66 | return true; | ||
| 67 | case VK_EVENT_RESET: | ||
| 68 | return false; | ||
| 69 | default: | ||
| 70 | throw vk::Exception(result); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 75 | const VKDevice& device, VKScheduler& scheduler, | ||
| 76 | VKTextureCache& texture_cache, VKBufferCache& buffer_cache, | ||
| 77 | VKQueryCache& query_cache) | ||
| 78 | : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache), | ||
| 79 | device{device}, scheduler{scheduler} {} | ||
| 80 | |||
| 81 | Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) { | ||
| 82 | return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed); | ||
| 83 | } | ||
| 84 | |||
| 85 | Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) { | ||
| 86 | return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed); | ||
| 87 | } | ||
| 88 | |||
| 89 | void VKFenceManager::QueueFence(Fence& fence) { | ||
| 90 | fence->Queue(); | ||
| 91 | } | ||
| 92 | |||
| 93 | bool VKFenceManager::IsFenceSignaled(Fence& fence) const { | ||
| 94 | return fence->IsSignaled(); | ||
| 95 | } | ||
| 96 | |||
| 97 | void VKFenceManager::WaitFence(Fence& fence) { | ||
| 98 | fence->Wait(); | ||
| 99 | } | ||
| 100 | |||
| 101 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h new file mode 100644 index 000000000..04d07fe6a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -0,0 +1,74 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <memory> | ||
| 8 | |||
| 9 | #include "video_core/fence_manager.h" | ||
| 10 | #include "video_core/renderer_vulkan/wrapper.h" | ||
| 11 | |||
| 12 | namespace Core { | ||
| 13 | class System; | ||
| 14 | } | ||
| 15 | |||
| 16 | namespace VideoCore { | ||
| 17 | class RasterizerInterface; | ||
| 18 | } | ||
| 19 | |||
| 20 | namespace Vulkan { | ||
| 21 | |||
| 22 | class VKBufferCache; | ||
| 23 | class VKDevice; | ||
| 24 | class VKQueryCache; | ||
| 25 | class VKScheduler; | ||
| 26 | class VKTextureCache; | ||
| 27 | |||
| 28 | class InnerFence : public VideoCommon::FenceBase { | ||
| 29 | public: | ||
| 30 | explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, | ||
| 31 | bool is_stubbed); | ||
| 32 | explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address, | ||
| 33 | u32 payload, bool is_stubbed); | ||
| 34 | ~InnerFence(); | ||
| 35 | |||
| 36 | void Queue(); | ||
| 37 | |||
| 38 | bool IsSignaled() const; | ||
| 39 | |||
| 40 | void Wait(); | ||
| 41 | |||
| 42 | private: | ||
| 43 | bool IsEventSignalled() const; | ||
| 44 | |||
| 45 | const VKDevice& device; | ||
| 46 | VKScheduler& scheduler; | ||
| 47 | vk::Event event; | ||
| 48 | u64 ticks = 0; | ||
| 49 | }; | ||
| 50 | using Fence = std::shared_ptr<InnerFence>; | ||
| 51 | |||
| 52 | using GenericFenceManager = | ||
| 53 | VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>; | ||
| 54 | |||
| 55 | class VKFenceManager final : public GenericFenceManager { | ||
| 56 | public: | ||
| 57 | explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||
| 58 | const VKDevice& device, VKScheduler& scheduler, | ||
| 59 | VKTextureCache& texture_cache, VKBufferCache& buffer_cache, | ||
| 60 | VKQueryCache& query_cache); | ||
| 61 | |||
| 62 | protected: | ||
| 63 | Fence CreateFence(u32 value, bool is_stubbed) override; | ||
| 64 | Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override; | ||
| 65 | void QueueFence(Fence& fence) override; | ||
| 66 | bool IsFenceSignaled(Fence& fence) const override; | ||
| 67 | void WaitFence(Fence& fence) override; | ||
| 68 | |||
| 69 | private: | ||
| 70 | const VKDevice& device; | ||
| 71 | VKScheduler& scheduler; | ||
| 72 | }; | ||
| 73 | |||
| 74 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a792130fd..91b1b16a5 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -207,7 +207,7 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||
| 207 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; | 207 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; |
| 208 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | 208 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); |
| 209 | ASSERT(cpu_addr); | 209 | ASSERT(cpu_addr); |
| 210 | auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; | 210 | auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader; |
| 211 | if (!shader) { | 211 | if (!shader) { |
| 212 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; | 212 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; |
| 213 | 213 | ||
| @@ -218,7 +218,11 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||
| 218 | 218 | ||
| 219 | shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, | 219 | shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, |
| 220 | std::move(code), stage_offset); | 220 | std::move(code), stage_offset); |
| 221 | Register(shader); | 221 | if (cpu_addr) { |
| 222 | Register(shader); | ||
| 223 | } else { | ||
| 224 | null_shader = shader; | ||
| 225 | } | ||
| 222 | } | 226 | } |
| 223 | shaders[index] = std::move(shader); | 227 | shaders[index] = std::move(shader); |
| 224 | } | 228 | } |
| @@ -261,7 +265,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 261 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | 265 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); |
| 262 | ASSERT(cpu_addr); | 266 | ASSERT(cpu_addr); |
| 263 | 267 | ||
| 264 | auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; | 268 | auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel; |
| 265 | if (!shader) { | 269 | if (!shader) { |
| 266 | // No shader found - create a new one | 270 | // No shader found - create a new one |
| 267 | const auto host_ptr = memory_manager.GetPointer(program_addr); | 271 | const auto host_ptr = memory_manager.GetPointer(program_addr); |
| @@ -271,7 +275,11 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 271 | shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, | 275 | shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, |
| 272 | program_addr, *cpu_addr, std::move(code), | 276 | program_addr, *cpu_addr, std::move(code), |
| 273 | kernel_main_offset); | 277 | kernel_main_offset); |
| 274 | Register(shader); | 278 | if (cpu_addr) { |
| 279 | Register(shader); | ||
| 280 | } else { | ||
| 281 | null_kernel = shader; | ||
| 282 | } | ||
| 275 | } | 283 | } |
| 276 | 284 | ||
| 277 | Specialization specialization; | 285 | Specialization specialization; |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 7ccdb7083..602a0a340 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -182,6 +182,9 @@ private: | |||
| 182 | VKUpdateDescriptorQueue& update_descriptor_queue; | 182 | VKUpdateDescriptorQueue& update_descriptor_queue; |
| 183 | VKRenderPassCache& renderpass_cache; | 183 | VKRenderPassCache& renderpass_cache; |
| 184 | 184 | ||
| 185 | Shader null_shader{}; | ||
| 186 | Shader null_kernel{}; | ||
| 187 | |||
| 185 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | 188 | std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |
| 186 | 189 | ||
| 187 | GraphicsPipelineCacheKey last_graphics_key; | 190 | GraphicsPipelineCacheKey last_graphics_key; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index b58a88664..8a1f57891 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "common/microprofile.h" | 17 | #include "common/microprofile.h" |
| 18 | #include "core/core.h" | 18 | #include "core/core.h" |
| 19 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 20 | #include "core/settings.h" | ||
| 20 | #include "video_core/engines/kepler_compute.h" | 21 | #include "video_core/engines/kepler_compute.h" |
| 21 | #include "video_core/engines/maxwell_3d.h" | 22 | #include "video_core/engines/maxwell_3d.h" |
| 22 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 23 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| @@ -299,7 +300,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind | |||
| 299 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, | 300 | pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, |
| 300 | renderpass_cache), | 301 | renderpass_cache), |
| 301 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), | 302 | buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), |
| 302 | sampler_cache(device), query_cache(system, *this, device, scheduler) { | 303 | sampler_cache(device), |
| 304 | fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), | ||
| 305 | query_cache(system, *this, device, scheduler) { | ||
| 303 | scheduler.SetQueryCache(query_cache); | 306 | scheduler.SetQueryCache(query_cache); |
| 304 | } | 307 | } |
| 305 | 308 | ||
| @@ -360,6 +363,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 360 | }); | 363 | }); |
| 361 | 364 | ||
| 362 | EndTransformFeedback(); | 365 | EndTransformFeedback(); |
| 366 | |||
| 367 | system.GPU().TickWork(); | ||
| 363 | } | 368 | } |
| 364 | 369 | ||
| 365 | void RasterizerVulkan::Clear() { | 370 | void RasterizerVulkan::Clear() { |
| @@ -504,6 +509,13 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | |||
| 504 | query_cache.FlushRegion(addr, size); | 509 | query_cache.FlushRegion(addr, size); |
| 505 | } | 510 | } |
| 506 | 511 | ||
| 512 | bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { | ||
| 513 | if (!Settings::IsGPULevelHigh()) { | ||
| 514 | return buffer_cache.MustFlushRegion(addr, size); | ||
| 515 | } | ||
| 516 | return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size); | ||
| 517 | } | ||
| 518 | |||
| 507 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | 519 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { |
| 508 | if (addr == 0 || size == 0) { | 520 | if (addr == 0 || size == 0) { |
| 509 | return; | 521 | return; |
| @@ -514,6 +526,47 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | |||
| 514 | query_cache.InvalidateRegion(addr, size); | 526 | query_cache.InvalidateRegion(addr, size); |
| 515 | } | 527 | } |
| 516 | 528 | ||
| 529 | void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | ||
| 530 | if (addr == 0 || size == 0) { | ||
| 531 | return; | ||
| 532 | } | ||
| 533 | texture_cache.OnCPUWrite(addr, size); | ||
| 534 | pipeline_cache.InvalidateRegion(addr, size); | ||
| 535 | buffer_cache.OnCPUWrite(addr, size); | ||
| 536 | query_cache.InvalidateRegion(addr, size); | ||
| 537 | } | ||
| 538 | |||
| 539 | void RasterizerVulkan::SyncGuestHost() { | ||
| 540 | texture_cache.SyncGuestHost(); | ||
| 541 | buffer_cache.SyncGuestHost(); | ||
| 542 | } | ||
| 543 | |||
| 544 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { | ||
| 545 | auto& gpu{system.GPU()}; | ||
| 546 | if (!gpu.IsAsync()) { | ||
| 547 | gpu.MemoryManager().Write<u32>(addr, value); | ||
| 548 | return; | ||
| 549 | } | ||
| 550 | fence_manager.SignalSemaphore(addr, value); | ||
| 551 | } | ||
| 552 | |||
| 553 | void RasterizerVulkan::SignalSyncPoint(u32 value) { | ||
| 554 | auto& gpu{system.GPU()}; | ||
| 555 | if (!gpu.IsAsync()) { | ||
| 556 | gpu.IncrementSyncPoint(value); | ||
| 557 | return; | ||
| 558 | } | ||
| 559 | fence_manager.SignalSyncPoint(value); | ||
| 560 | } | ||
| 561 | |||
| 562 | void RasterizerVulkan::ReleaseFences() { | ||
| 563 | auto& gpu{system.GPU()}; | ||
| 564 | if (!gpu.IsAsync()) { | ||
| 565 | return; | ||
| 566 | } | ||
| 567 | fence_manager.WaitPendingFences(); | ||
| 568 | } | ||
| 569 | |||
| 517 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 570 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 518 | FlushRegion(addr, size); | 571 | FlushRegion(addr, size); |
| 519 | InvalidateRegion(addr, size); | 572 | InvalidateRegion(addr, size); |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index d9108f862..2fa46b0cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 21 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 22 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 22 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 23 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 23 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 24 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | ||
| 24 | #include "video_core/renderer_vulkan/vk_memory_manager.h" | 25 | #include "video_core/renderer_vulkan/vk_memory_manager.h" |
| 25 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 26 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 27 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| @@ -118,7 +119,13 @@ public: | |||
| 118 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 119 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 119 | void FlushAll() override; | 120 | void FlushAll() override; |
| 120 | void FlushRegion(VAddr addr, u64 size) override; | 121 | void FlushRegion(VAddr addr, u64 size) override; |
| 122 | bool MustFlushRegion(VAddr addr, u64 size) override; | ||
| 121 | void InvalidateRegion(VAddr addr, u64 size) override; | 123 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 124 | void OnCPUWrite(VAddr addr, u64 size) override; | ||
| 125 | void SyncGuestHost() override; | ||
| 126 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | ||
| 127 | void SignalSyncPoint(u32 value) override; | ||
| 128 | void ReleaseFences() override; | ||
| 122 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | 129 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 123 | void FlushCommands() override; | 130 | void FlushCommands() override; |
| 124 | void TickFrame() override; | 131 | void TickFrame() override; |
| @@ -261,6 +268,7 @@ private: | |||
| 261 | VKPipelineCache pipeline_cache; | 268 | VKPipelineCache pipeline_cache; |
| 262 | VKBufferCache buffer_cache; | 269 | VKBufferCache buffer_cache; |
| 263 | VKSamplerCache sampler_cache; | 270 | VKSamplerCache sampler_cache; |
| 271 | VKFenceManager fence_manager; | ||
| 264 | VKQueryCache query_cache; | 272 | VKQueryCache query_cache; |
| 265 | 273 | ||
| 266 | std::array<View, Maxwell::NumRenderTargets> color_attachments; | 274 | std::array<View, Maxwell::NumRenderTargets> color_attachments; |
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 3a52a3a6f..539f3c974 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp | |||
| @@ -63,6 +63,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 63 | X(vkCmdSetBlendConstants); | 63 | X(vkCmdSetBlendConstants); |
| 64 | X(vkCmdSetDepthBias); | 64 | X(vkCmdSetDepthBias); |
| 65 | X(vkCmdSetDepthBounds); | 65 | X(vkCmdSetDepthBounds); |
| 66 | X(vkCmdSetEvent); | ||
| 66 | X(vkCmdSetScissor); | 67 | X(vkCmdSetScissor); |
| 67 | X(vkCmdSetStencilCompareMask); | 68 | X(vkCmdSetStencilCompareMask); |
| 68 | X(vkCmdSetStencilReference); | 69 | X(vkCmdSetStencilReference); |
| @@ -75,6 +76,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 75 | X(vkCreateDescriptorPool); | 76 | X(vkCreateDescriptorPool); |
| 76 | X(vkCreateDescriptorSetLayout); | 77 | X(vkCreateDescriptorSetLayout); |
| 77 | X(vkCreateDescriptorUpdateTemplateKHR); | 78 | X(vkCreateDescriptorUpdateTemplateKHR); |
| 79 | X(vkCreateEvent); | ||
| 78 | X(vkCreateFence); | 80 | X(vkCreateFence); |
| 79 | X(vkCreateFramebuffer); | 81 | X(vkCreateFramebuffer); |
| 80 | X(vkCreateGraphicsPipelines); | 82 | X(vkCreateGraphicsPipelines); |
| @@ -93,6 +95,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 93 | X(vkDestroyDescriptorPool); | 95 | X(vkDestroyDescriptorPool); |
| 94 | X(vkDestroyDescriptorSetLayout); | 96 | X(vkDestroyDescriptorSetLayout); |
| 95 | X(vkDestroyDescriptorUpdateTemplateKHR); | 97 | X(vkDestroyDescriptorUpdateTemplateKHR); |
| 98 | X(vkDestroyEvent); | ||
| 96 | X(vkDestroyFence); | 99 | X(vkDestroyFence); |
| 97 | X(vkDestroyFramebuffer); | 100 | X(vkDestroyFramebuffer); |
| 98 | X(vkDestroyImage); | 101 | X(vkDestroyImage); |
| @@ -112,6 +115,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 112 | X(vkFreeMemory); | 115 | X(vkFreeMemory); |
| 113 | X(vkGetBufferMemoryRequirements); | 116 | X(vkGetBufferMemoryRequirements); |
| 114 | X(vkGetDeviceQueue); | 117 | X(vkGetDeviceQueue); |
| 118 | X(vkGetEventStatus); | ||
| 115 | X(vkGetFenceStatus); | 119 | X(vkGetFenceStatus); |
| 116 | X(vkGetImageMemoryRequirements); | 120 | X(vkGetImageMemoryRequirements); |
| 117 | X(vkGetQueryPoolResults); | 121 | X(vkGetQueryPoolResults); |
| @@ -269,6 +273,10 @@ void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld) | |||
| 269 | dld.vkFreeMemory(device, handle, nullptr); | 273 | dld.vkFreeMemory(device, handle, nullptr); |
| 270 | } | 274 | } |
| 271 | 275 | ||
| 276 | void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept { | ||
| 277 | dld.vkDestroyEvent(device, handle, nullptr); | ||
| 278 | } | ||
| 279 | |||
| 272 | void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept { | 280 | void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept { |
| 273 | dld.vkDestroyFence(device, handle, nullptr); | 281 | dld.vkDestroyFence(device, handle, nullptr); |
| 274 | } | 282 | } |
| @@ -599,6 +607,16 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons | |||
| 599 | return ShaderModule(object, handle, *dld); | 607 | return ShaderModule(object, handle, *dld); |
| 600 | } | 608 | } |
| 601 | 609 | ||
| 610 | Event Device::CreateEvent() const { | ||
| 611 | VkEventCreateInfo ci; | ||
| 612 | ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; | ||
| 613 | ci.pNext = nullptr; | ||
| 614 | ci.flags = 0; | ||
| 615 | VkEvent object; | ||
| 616 | Check(dld->vkCreateEvent(handle, &ci, nullptr, &object)); | ||
| 617 | return Event(object, handle, *dld); | ||
| 618 | } | ||
| 619 | |||
| 602 | SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const { | 620 | SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const { |
| 603 | VkSwapchainKHR object; | 621 | VkSwapchainKHR object; |
| 604 | Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object)); | 622 | Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object)); |
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 6fe0294d8..bda16a2cb 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h | |||
| @@ -199,6 +199,7 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 199 | PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants; | 199 | PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants; |
| 200 | PFN_vkCmdSetDepthBias vkCmdSetDepthBias; | 200 | PFN_vkCmdSetDepthBias vkCmdSetDepthBias; |
| 201 | PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds; | 201 | PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds; |
| 202 | PFN_vkCmdSetEvent vkCmdSetEvent; | ||
| 202 | PFN_vkCmdSetScissor vkCmdSetScissor; | 203 | PFN_vkCmdSetScissor vkCmdSetScissor; |
| 203 | PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask; | 204 | PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask; |
| 204 | PFN_vkCmdSetStencilReference vkCmdSetStencilReference; | 205 | PFN_vkCmdSetStencilReference vkCmdSetStencilReference; |
| @@ -211,6 +212,7 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 211 | PFN_vkCreateDescriptorPool vkCreateDescriptorPool; | 212 | PFN_vkCreateDescriptorPool vkCreateDescriptorPool; |
| 212 | PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; | 213 | PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout; |
| 213 | PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; | 214 | PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR; |
| 215 | PFN_vkCreateEvent vkCreateEvent; | ||
| 214 | PFN_vkCreateFence vkCreateFence; | 216 | PFN_vkCreateFence vkCreateFence; |
| 215 | PFN_vkCreateFramebuffer vkCreateFramebuffer; | 217 | PFN_vkCreateFramebuffer vkCreateFramebuffer; |
| 216 | PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; | 218 | PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines; |
| @@ -229,6 +231,7 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 229 | PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; | 231 | PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool; |
| 230 | PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; | 232 | PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout; |
| 231 | PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; | 233 | PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR; |
| 234 | PFN_vkDestroyEvent vkDestroyEvent; | ||
| 232 | PFN_vkDestroyFence vkDestroyFence; | 235 | PFN_vkDestroyFence vkDestroyFence; |
| 233 | PFN_vkDestroyFramebuffer vkDestroyFramebuffer; | 236 | PFN_vkDestroyFramebuffer vkDestroyFramebuffer; |
| 234 | PFN_vkDestroyImage vkDestroyImage; | 237 | PFN_vkDestroyImage vkDestroyImage; |
| @@ -248,6 +251,7 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 248 | PFN_vkFreeMemory vkFreeMemory; | 251 | PFN_vkFreeMemory vkFreeMemory; |
| 249 | PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; | 252 | PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements; |
| 250 | PFN_vkGetDeviceQueue vkGetDeviceQueue; | 253 | PFN_vkGetDeviceQueue vkGetDeviceQueue; |
| 254 | PFN_vkGetEventStatus vkGetEventStatus; | ||
| 251 | PFN_vkGetFenceStatus vkGetFenceStatus; | 255 | PFN_vkGetFenceStatus vkGetFenceStatus; |
| 252 | PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; | 256 | PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; |
| 253 | PFN_vkGetQueryPoolResults vkGetQueryPoolResults; | 257 | PFN_vkGetQueryPoolResults vkGetQueryPoolResults; |
| @@ -279,6 +283,7 @@ void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept; | |||
| 279 | void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept; | 283 | void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept; |
| 280 | void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept; | 284 | void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept; |
| 281 | void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept; | 285 | void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept; |
| 286 | void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept; | ||
| 282 | void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept; | 287 | void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept; |
| 283 | void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; | 288 | void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; |
| 284 | void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; | 289 | void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; |
| @@ -648,6 +653,15 @@ public: | |||
| 648 | std::vector<VkImage> GetImages() const; | 653 | std::vector<VkImage> GetImages() const; |
| 649 | }; | 654 | }; |
| 650 | 655 | ||
| 656 | class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> { | ||
| 657 | using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle; | ||
| 658 | |||
| 659 | public: | ||
| 660 | VkResult GetStatus() const noexcept { | ||
| 661 | return dld->vkGetEventStatus(owner, handle); | ||
| 662 | } | ||
| 663 | }; | ||
| 664 | |||
| 651 | class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { | 665 | class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { |
| 652 | using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle; | 666 | using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle; |
| 653 | 667 | ||
| @@ -695,6 +709,8 @@ public: | |||
| 695 | 709 | ||
| 696 | ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; | 710 | ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; |
| 697 | 711 | ||
| 712 | Event CreateEvent() const; | ||
| 713 | |||
| 698 | SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; | 714 | SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; |
| 699 | 715 | ||
| 700 | DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept; | 716 | DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept; |
| @@ -938,6 +954,10 @@ public: | |||
| 938 | dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds); | 954 | dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds); |
| 939 | } | 955 | } |
| 940 | 956 | ||
| 957 | void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept { | ||
| 958 | dld->vkCmdSetEvent(handle, event, stage_flags); | ||
| 959 | } | ||
| 960 | |||
| 941 | void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, | 961 | void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers, |
| 942 | const VkDeviceSize* offsets, | 962 | const VkDeviceSize* offsets, |
| 943 | const VkDeviceSize* sizes) const noexcept { | 963 | const VkDeviceSize* sizes) const noexcept { |
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index c5ab21f56..79e10ffbb 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -192,6 +192,22 @@ public: | |||
| 192 | index = index_; | 192 | index = index_; |
| 193 | } | 193 | } |
| 194 | 194 | ||
| 195 | void SetMemoryMarked(bool is_memory_marked_) { | ||
| 196 | is_memory_marked = is_memory_marked_; | ||
| 197 | } | ||
| 198 | |||
| 199 | bool IsMemoryMarked() const { | ||
| 200 | return is_memory_marked; | ||
| 201 | } | ||
| 202 | |||
| 203 | void SetSyncPending(bool is_sync_pending_) { | ||
| 204 | is_sync_pending = is_sync_pending_; | ||
| 205 | } | ||
| 206 | |||
| 207 | bool IsSyncPending() const { | ||
| 208 | return is_sync_pending; | ||
| 209 | } | ||
| 210 | |||
| 195 | void MarkAsPicked(bool is_picked_) { | 211 | void MarkAsPicked(bool is_picked_) { |
| 196 | is_picked = is_picked_; | 212 | is_picked = is_picked_; |
| 197 | } | 213 | } |
| @@ -303,6 +319,8 @@ private: | |||
| 303 | bool is_target{}; | 319 | bool is_target{}; |
| 304 | bool is_registered{}; | 320 | bool is_registered{}; |
| 305 | bool is_picked{}; | 321 | bool is_picked{}; |
| 322 | bool is_memory_marked{}; | ||
| 323 | bool is_sync_pending{}; | ||
| 306 | u32 index{NO_RT}; | 324 | u32 index{NO_RT}; |
| 307 | u64 modification_tick{}; | 325 | u64 modification_tick{}; |
| 308 | }; | 326 | }; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 69ca08fd1..cf6bd005a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include <algorithm> | 7 | #include <algorithm> |
| 8 | #include <array> | 8 | #include <array> |
| 9 | #include <list> | ||
| 9 | #include <memory> | 10 | #include <memory> |
| 10 | #include <mutex> | 11 | #include <mutex> |
| 11 | #include <set> | 12 | #include <set> |
| @@ -62,6 +63,30 @@ public: | |||
| 62 | } | 63 | } |
| 63 | } | 64 | } |
| 64 | 65 | ||
| 66 | void OnCPUWrite(VAddr addr, std::size_t size) { | ||
| 67 | std::lock_guard lock{mutex}; | ||
| 68 | |||
| 69 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { | ||
| 70 | if (surface->IsMemoryMarked()) { | ||
| 71 | UnmarkMemory(surface); | ||
| 72 | surface->SetSyncPending(true); | ||
| 73 | marked_for_unregister.emplace_back(surface); | ||
| 74 | } | ||
| 75 | } | ||
| 76 | } | ||
| 77 | |||
| 78 | void SyncGuestHost() { | ||
| 79 | std::lock_guard lock{mutex}; | ||
| 80 | |||
| 81 | for (const auto& surface : marked_for_unregister) { | ||
| 82 | if (surface->IsRegistered()) { | ||
| 83 | surface->SetSyncPending(false); | ||
| 84 | Unregister(surface); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | marked_for_unregister.clear(); | ||
| 88 | } | ||
| 89 | |||
| 65 | /** | 90 | /** |
| 66 | * Guarantees that rendertargets don't unregister themselves if the | 91 | * Guarantees that rendertargets don't unregister themselves if the |
| 67 | * collide. Protection is currently only done on 3D slices. | 92 | * collide. Protection is currently only done on 3D slices. |
| @@ -85,10 +110,20 @@ public: | |||
| 85 | return a->GetModificationTick() < b->GetModificationTick(); | 110 | return a->GetModificationTick() < b->GetModificationTick(); |
| 86 | }); | 111 | }); |
| 87 | for (const auto& surface : surfaces) { | 112 | for (const auto& surface : surfaces) { |
| 113 | mutex.unlock(); | ||
| 88 | FlushSurface(surface); | 114 | FlushSurface(surface); |
| 115 | mutex.lock(); | ||
| 89 | } | 116 | } |
| 90 | } | 117 | } |
| 91 | 118 | ||
| 119 | bool MustFlushRegion(VAddr addr, std::size_t size) { | ||
| 120 | std::lock_guard lock{mutex}; | ||
| 121 | |||
| 122 | const auto surfaces = GetSurfacesInRegion(addr, size); | ||
| 123 | return std::any_of(surfaces.cbegin(), surfaces.cend(), | ||
| 124 | [](const TSurface& surface) { return surface->IsModified(); }); | ||
| 125 | } | ||
| 126 | |||
| 92 | TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, | 127 | TView GetTextureSurface(const Tegra::Texture::TICEntry& tic, |
| 93 | const VideoCommon::Shader::Sampler& entry) { | 128 | const VideoCommon::Shader::Sampler& entry) { |
| 94 | std::lock_guard lock{mutex}; | 129 | std::lock_guard lock{mutex}; |
| @@ -206,8 +241,14 @@ public: | |||
| 206 | 241 | ||
| 207 | auto surface_view = GetSurface(gpu_addr, *cpu_addr, | 242 | auto surface_view = GetSurface(gpu_addr, *cpu_addr, |
| 208 | SurfaceParams::CreateForFramebuffer(system, index), true); | 243 | SurfaceParams::CreateForFramebuffer(system, index), true); |
| 209 | if (render_targets[index].target) | 244 | if (render_targets[index].target) { |
| 210 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | 245 | auto& surface = render_targets[index].target; |
| 246 | surface->MarkAsRenderTarget(false, NO_RT); | ||
| 247 | const auto& cr_params = surface->GetSurfaceParams(); | ||
| 248 | if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation) { | ||
| 249 | AsyncFlushSurface(surface); | ||
| 250 | } | ||
| 251 | } | ||
| 211 | render_targets[index].target = surface_view.first; | 252 | render_targets[index].target = surface_view.first; |
| 212 | render_targets[index].view = surface_view.second; | 253 | render_targets[index].view = surface_view.second; |
| 213 | if (render_targets[index].target) | 254 | if (render_targets[index].target) |
| @@ -284,6 +325,34 @@ public: | |||
| 284 | return ++ticks; | 325 | return ++ticks; |
| 285 | } | 326 | } |
| 286 | 327 | ||
| 328 | void CommitAsyncFlushes() { | ||
| 329 | committed_flushes.push_back(uncommitted_flushes); | ||
| 330 | uncommitted_flushes.reset(); | ||
| 331 | } | ||
| 332 | |||
| 333 | bool HasUncommittedFlushes() const { | ||
| 334 | return uncommitted_flushes != nullptr; | ||
| 335 | } | ||
| 336 | |||
| 337 | bool ShouldWaitAsyncFlushes() const { | ||
| 338 | return !committed_flushes.empty() && committed_flushes.front() != nullptr; | ||
| 339 | } | ||
| 340 | |||
| 341 | void PopAsyncFlushes() { | ||
| 342 | if (committed_flushes.empty()) { | ||
| 343 | return; | ||
| 344 | } | ||
| 345 | auto& flush_list = committed_flushes.front(); | ||
| 346 | if (!flush_list) { | ||
| 347 | committed_flushes.pop_front(); | ||
| 348 | return; | ||
| 349 | } | ||
| 350 | for (TSurface& surface : *flush_list) { | ||
| 351 | FlushSurface(surface); | ||
| 352 | } | ||
| 353 | committed_flushes.pop_front(); | ||
| 354 | } | ||
| 355 | |||
| 287 | protected: | 356 | protected: |
| 288 | explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | 357 | explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
| 289 | bool is_astc_supported) | 358 | bool is_astc_supported) |
| @@ -345,9 +414,20 @@ protected: | |||
| 345 | surface->SetCpuAddr(*cpu_addr); | 414 | surface->SetCpuAddr(*cpu_addr); |
| 346 | RegisterInnerCache(surface); | 415 | RegisterInnerCache(surface); |
| 347 | surface->MarkAsRegistered(true); | 416 | surface->MarkAsRegistered(true); |
| 417 | surface->SetMemoryMarked(true); | ||
| 348 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | 418 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); |
| 349 | } | 419 | } |
| 350 | 420 | ||
| 421 | void UnmarkMemory(TSurface surface) { | ||
| 422 | if (!surface->IsMemoryMarked()) { | ||
| 423 | return; | ||
| 424 | } | ||
| 425 | const std::size_t size = surface->GetSizeInBytes(); | ||
| 426 | const VAddr cpu_addr = surface->GetCpuAddr(); | ||
| 427 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||
| 428 | surface->SetMemoryMarked(false); | ||
| 429 | } | ||
| 430 | |||
| 351 | void Unregister(TSurface surface) { | 431 | void Unregister(TSurface surface) { |
| 352 | if (guard_render_targets && surface->IsProtected()) { | 432 | if (guard_render_targets && surface->IsProtected()) { |
| 353 | return; | 433 | return; |
| @@ -355,9 +435,11 @@ protected: | |||
| 355 | if (!guard_render_targets && surface->IsRenderTarget()) { | 435 | if (!guard_render_targets && surface->IsRenderTarget()) { |
| 356 | ManageRenderTargetUnregister(surface); | 436 | ManageRenderTargetUnregister(surface); |
| 357 | } | 437 | } |
| 358 | const std::size_t size = surface->GetSizeInBytes(); | 438 | UnmarkMemory(surface); |
| 359 | const VAddr cpu_addr = surface->GetCpuAddr(); | 439 | if (surface->IsSyncPending()) { |
| 360 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | 440 | marked_for_unregister.remove(surface); |
| 441 | surface->SetSyncPending(false); | ||
| 442 | } | ||
| 361 | UnregisterInnerCache(surface); | 443 | UnregisterInnerCache(surface); |
| 362 | surface->MarkAsRegistered(false); | 444 | surface->MarkAsRegistered(false); |
| 363 | ReserveSurface(surface->GetSurfaceParams(), surface); | 445 | ReserveSurface(surface->GetSurfaceParams(), surface); |
| @@ -417,7 +499,7 @@ private: | |||
| 417 | **/ | 499 | **/ |
| 418 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, | 500 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, |
| 419 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | 501 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { |
| 420 | if (Settings::values.use_accurate_gpu_emulation) { | 502 | if (Settings::IsGPULevelExtreme()) { |
| 421 | return RecycleStrategy::Flush; | 503 | return RecycleStrategy::Flush; |
| 422 | } | 504 | } |
| 423 | // 3D Textures decision | 505 | // 3D Textures decision |
| @@ -461,7 +543,7 @@ private: | |||
| 461 | } | 543 | } |
| 462 | switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { | 544 | switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { |
| 463 | case RecycleStrategy::Ignore: { | 545 | case RecycleStrategy::Ignore: { |
| 464 | return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation); | 546 | return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme()); |
| 465 | } | 547 | } |
| 466 | case RecycleStrategy::Flush: { | 548 | case RecycleStrategy::Flush: { |
| 467 | std::sort(overlaps.begin(), overlaps.end(), | 549 | std::sort(overlaps.begin(), overlaps.end(), |
| @@ -509,7 +591,7 @@ private: | |||
| 509 | } | 591 | } |
| 510 | const auto& final_params = new_surface->GetSurfaceParams(); | 592 | const auto& final_params = new_surface->GetSurfaceParams(); |
| 511 | if (cr_params.type != final_params.type) { | 593 | if (cr_params.type != final_params.type) { |
| 512 | if (Settings::values.use_accurate_gpu_emulation) { | 594 | if (Settings::IsGPULevelExtreme()) { |
| 513 | BufferCopy(current_surface, new_surface); | 595 | BufferCopy(current_surface, new_surface); |
| 514 | } | 596 | } |
| 515 | } else { | 597 | } else { |
| @@ -598,7 +680,7 @@ private: | |||
| 598 | if (passed_tests == 0) { | 680 | if (passed_tests == 0) { |
| 599 | return {}; | 681 | return {}; |
| 600 | // In Accurate GPU all tests should pass, else we recycle | 682 | // In Accurate GPU all tests should pass, else we recycle |
| 601 | } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { | 683 | } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { |
| 602 | return {}; | 684 | return {}; |
| 603 | } | 685 | } |
| 604 | for (const auto& surface : overlaps) { | 686 | for (const auto& surface : overlaps) { |
| @@ -668,7 +750,7 @@ private: | |||
| 668 | for (const auto& surface : overlaps) { | 750 | for (const auto& surface : overlaps) { |
| 669 | if (!surface->MatchTarget(params.target)) { | 751 | if (!surface->MatchTarget(params.target)) { |
| 670 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { | 752 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { |
| 671 | if (Settings::values.use_accurate_gpu_emulation) { | 753 | if (Settings::IsGPULevelExtreme()) { |
| 672 | return std::nullopt; | 754 | return std::nullopt; |
| 673 | } | 755 | } |
| 674 | Unregister(surface); | 756 | Unregister(surface); |
| @@ -1106,6 +1188,13 @@ private: | |||
| 1106 | TView view; | 1188 | TView view; |
| 1107 | }; | 1189 | }; |
| 1108 | 1190 | ||
| 1191 | void AsyncFlushSurface(TSurface& surface) { | ||
| 1192 | if (!uncommitted_flushes) { | ||
| 1193 | uncommitted_flushes = std::make_shared<std::list<TSurface>>(); | ||
| 1194 | } | ||
| 1195 | uncommitted_flushes->push_back(surface); | ||
| 1196 | } | ||
| 1197 | |||
| 1109 | VideoCore::RasterizerInterface& rasterizer; | 1198 | VideoCore::RasterizerInterface& rasterizer; |
| 1110 | 1199 | ||
| 1111 | FormatLookupTable format_lookup_table; | 1200 | FormatLookupTable format_lookup_table; |
| @@ -1150,6 +1239,11 @@ private: | |||
| 1150 | std::unordered_map<u32, TSurface> invalid_cache; | 1239 | std::unordered_map<u32, TSurface> invalid_cache; |
| 1151 | std::vector<u8> invalid_memory; | 1240 | std::vector<u8> invalid_memory; |
| 1152 | 1241 | ||
| 1242 | std::list<TSurface> marked_for_unregister; | ||
| 1243 | |||
| 1244 | std::shared_ptr<std::list<TSurface>> uncommitted_flushes{}; | ||
| 1245 | std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes; | ||
| 1246 | |||
| 1153 | StagingCache staging_cache; | 1247 | StagingCache staging_cache; |
| 1154 | std::recursive_mutex mutex; | 1248 | std::recursive_mutex mutex; |
| 1155 | }; | 1249 | }; |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 7f6dfac84..196a3a116 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -639,8 +639,8 @@ void Config::ReadRendererValues() { | |||
| 639 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); | 639 | Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt(); |
| 640 | Settings::values.use_disk_shader_cache = | 640 | Settings::values.use_disk_shader_cache = |
| 641 | ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); | 641 | ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool(); |
| 642 | Settings::values.use_accurate_gpu_emulation = | 642 | const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt(); |
| 643 | ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool(); | 643 | Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); |
| 644 | Settings::values.use_asynchronous_gpu_emulation = | 644 | Settings::values.use_asynchronous_gpu_emulation = |
| 645 | ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); | 645 | ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool(); |
| 646 | Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); | 646 | Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool(); |
| @@ -1080,8 +1080,8 @@ void Config::SaveRendererValues() { | |||
| 1080 | WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); | 1080 | WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100); |
| 1081 | WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, | 1081 | WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache, |
| 1082 | true); | 1082 | true); |
| 1083 | WriteSetting(QStringLiteral("use_accurate_gpu_emulation"), | 1083 | WriteSetting(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy), |
| 1084 | Settings::values.use_accurate_gpu_emulation, false); | 1084 | 0); |
| 1085 | WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), | 1085 | WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"), |
| 1086 | Settings::values.use_asynchronous_gpu_emulation, false); | 1086 | Settings::values.use_asynchronous_gpu_emulation, false); |
| 1087 | WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); | 1087 | WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index b9f429f84..0a3f47339 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -19,7 +19,7 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default; | |||
| 19 | 19 | ||
| 20 | void ConfigureGraphicsAdvanced::SetConfiguration() { | 20 | void ConfigureGraphicsAdvanced::SetConfiguration() { |
| 21 | const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); | 21 | const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn(); |
| 22 | ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); | 22 | ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy)); |
| 23 | ui->use_vsync->setEnabled(runtime_lock); | 23 | ui->use_vsync->setEnabled(runtime_lock); |
| 24 | ui->use_vsync->setChecked(Settings::values.use_vsync); | 24 | ui->use_vsync->setChecked(Settings::values.use_vsync); |
| 25 | ui->force_30fps_mode->setEnabled(runtime_lock); | 25 | ui->force_30fps_mode->setEnabled(runtime_lock); |
| @@ -29,7 +29,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { | |||
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | void ConfigureGraphicsAdvanced::ApplyConfiguration() { | 31 | void ConfigureGraphicsAdvanced::ApplyConfiguration() { |
| 32 | Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); | 32 | auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex()); |
| 33 | Settings::values.gpu_accuracy = gpu_accuracy; | ||
| 33 | Settings::values.use_vsync = ui->use_vsync->isChecked(); | 34 | Settings::values.use_vsync = ui->use_vsync->isChecked(); |
| 34 | Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); | 35 | Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked(); |
| 35 | Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); | 36 | Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex(); |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 42eec278e..0c7b383e0 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui | |||
| @@ -23,11 +23,34 @@ | |||
| 23 | </property> | 23 | </property> |
| 24 | <layout class="QVBoxLayout" name="verticalLayout_3"> | 24 | <layout class="QVBoxLayout" name="verticalLayout_3"> |
| 25 | <item> | 25 | <item> |
| 26 | <widget class="QCheckBox" name="use_accurate_gpu_emulation"> | 26 | <layout class="QHBoxLayout" name="horizontalLayout_2"> |
| 27 | <property name="text"> | 27 | <item> |
| 28 | <string>Use accurate GPU emulation (slow)</string> | 28 | <widget class="QLabel" name="label_gpu_accuracy"> |
| 29 | </property> | 29 | <property name="text"> |
| 30 | </widget> | 30 | <string>Accuracy Level:</string> |
| 31 | </property> | ||
| 32 | </widget> | ||
| 33 | </item> | ||
| 34 | <item> | ||
| 35 | <widget class="QComboBox" name="gpu_accuracy"> | ||
| 36 | <item> | ||
| 37 | <property name="text"> | ||
| 38 | <string notr="true">Normal</string> | ||
| 39 | </property> | ||
| 40 | </item> | ||
| 41 | <item> | ||
| 42 | <property name="text"> | ||
| 43 | <string notr="true">High</string> | ||
| 44 | </property> | ||
| 45 | </item> | ||
| 46 | <item> | ||
| 47 | <property name="text"> | ||
| 48 | <string notr="true">Extreme(very slow)</string> | ||
| 49 | </property> | ||
| 50 | </item> | ||
| 51 | </widget> | ||
| 52 | </item> | ||
| 53 | </layout> | ||
| 31 | </item> | 54 | </item> |
| 32 | <item> | 55 | <item> |
| 33 | <widget class="QCheckBox" name="use_vsync"> | 56 | <widget class="QCheckBox" name="use_vsync"> |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 80341747f..d1ac354bf 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -388,8 +388,8 @@ void Config::ReadValues() { | |||
| 388 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); | 388 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100)); |
| 389 | Settings::values.use_disk_shader_cache = | 389 | Settings::values.use_disk_shader_cache = |
| 390 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); | 390 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); |
| 391 | Settings::values.use_accurate_gpu_emulation = | 391 | const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); |
| 392 | sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); | 392 | Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); |
| 393 | Settings::values.use_asynchronous_gpu_emulation = | 393 | Settings::values.use_asynchronous_gpu_emulation = |
| 394 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | 394 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); |
| 395 | Settings::values.use_vsync = | 395 | Settings::values.use_vsync = |
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 171d16fa0..60b1a62fa 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h | |||
| @@ -146,9 +146,9 @@ frame_limit = | |||
| 146 | # 0 (default): Off, 1 : On | 146 | # 0 (default): Off, 1 : On |
| 147 | use_disk_shader_cache = | 147 | use_disk_shader_cache = |
| 148 | 148 | ||
| 149 | # Whether to use accurate GPU emulation | 149 | # Which gpu accuracy level to use |
| 150 | # 0 (default): Off (fast), 1 : On (slow) | 150 | # 0 (Normal), 1 (High), 2 (Extreme) |
| 151 | use_accurate_gpu_emulation = | 151 | gpu_accuracy = |
| 152 | 152 | ||
| 153 | # Whether to use asynchronous GPU emulation | 153 | # Whether to use asynchronous GPU emulation |
| 154 | # 0 : Off (slow), 1 (default): On (fast) | 154 | # 0 : Off (slow), 1 (default): On (fast) |
diff --git a/src/yuzu_tester/config.cpp b/src/yuzu_tester/config.cpp index ee2591c8f..c0325cc3c 100644 --- a/src/yuzu_tester/config.cpp +++ b/src/yuzu_tester/config.cpp | |||
| @@ -126,8 +126,8 @@ void Config::ReadValues() { | |||
| 126 | Settings::values.frame_limit = 100; | 126 | Settings::values.frame_limit = 100; |
| 127 | Settings::values.use_disk_shader_cache = | 127 | Settings::values.use_disk_shader_cache = |
| 128 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); | 128 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); |
| 129 | Settings::values.use_accurate_gpu_emulation = | 129 | const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); |
| 130 | sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); | 130 | Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level); |
| 131 | Settings::values.use_asynchronous_gpu_emulation = | 131 | Settings::values.use_asynchronous_gpu_emulation = |
| 132 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); | 132 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); |
| 133 | 133 | ||