summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/common/settings.cpp2
-rw-r--r--src/common/settings.h3
-rw-r--r--src/core/memory.cpp2
-rw-r--r--src/tests/video_core/buffer_base.cpp2
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/buffer_cache/buffer_base.h14
-rw-r--r--src/video_core/engines/engine_upload.cpp2
-rw-r--r--src/video_core/engines/fermi_2d.cpp6
-rw-r--r--src/video_core/engines/fermi_2d.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp7
-rw-r--r--src/video_core/engines/maxwell_dma.cpp21
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/vulkan_turbo_mode.comp29
-rw-r--r--src/video_core/invalidation_accumulator.h79
-rw-r--r--src/video_core/macro/macro_hle.cpp42
-rw-r--r--src/video_core/memory_manager.cpp102
-rw-r--r--src/video_core/memory_manager.h18
-rw-r--r--src/video_core/rasterizer_interface.h7
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp8
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp11
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp42
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp51
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.h19
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp128
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h10
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_turbo_mode.cpp205
-rw-r--r--src/video_core/renderer_vulkan/vk_turbo_mode.h26
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp5
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h5
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp14
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.h5
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp27
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h24
-rw-r--r--src/yuzu/configuration/config.cpp6
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp21
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.h2
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui20
-rw-r--r--src/yuzu/main.cpp20
-rw-r--r--src/yuzu/main.h1
-rw-r--r--src/yuzu_cmd/config.cpp2
44 files changed, 850 insertions, 176 deletions
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 149e621f9..1638b79f5 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -185,6 +185,7 @@ void RestoreGlobalState(bool is_powered_on) {
185 // Renderer 185 // Renderer
186 values.fsr_sharpening_slider.SetGlobal(true); 186 values.fsr_sharpening_slider.SetGlobal(true);
187 values.renderer_backend.SetGlobal(true); 187 values.renderer_backend.SetGlobal(true);
188 values.renderer_force_max_clock.SetGlobal(true);
188 values.vulkan_device.SetGlobal(true); 189 values.vulkan_device.SetGlobal(true);
189 values.aspect_ratio.SetGlobal(true); 190 values.aspect_ratio.SetGlobal(true);
190 values.max_anisotropy.SetGlobal(true); 191 values.max_anisotropy.SetGlobal(true);
@@ -200,6 +201,7 @@ void RestoreGlobalState(bool is_powered_on) {
200 values.use_asynchronous_shaders.SetGlobal(true); 201 values.use_asynchronous_shaders.SetGlobal(true);
201 values.use_fast_gpu_time.SetGlobal(true); 202 values.use_fast_gpu_time.SetGlobal(true);
202 values.use_pessimistic_flushes.SetGlobal(true); 203 values.use_pessimistic_flushes.SetGlobal(true);
204 values.use_vulkan_driver_pipeline_cache.SetGlobal(true);
203 values.bg_red.SetGlobal(true); 205 values.bg_red.SetGlobal(true);
204 values.bg_green.SetGlobal(true); 206 values.bg_green.SetGlobal(true);
205 values.bg_blue.SetGlobal(true); 207 values.bg_blue.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 5017951c5..9eb3711ca 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -415,6 +415,7 @@ struct Values {
415 // Renderer 415 // Renderer
416 SwitchableSetting<RendererBackend, true> renderer_backend{ 416 SwitchableSetting<RendererBackend, true> renderer_backend{
417 RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; 417 RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"};
418 SwitchableSetting<bool> renderer_force_max_clock{true, "force_max_clock"};
418 Setting<bool> renderer_debug{false, "debug"}; 419 Setting<bool> renderer_debug{false, "debug"};
419 Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; 420 Setting<bool> renderer_shader_feedback{false, "shader_feedback"};
420 Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"}; 421 Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"};
@@ -451,6 +452,8 @@ struct Values {
451 SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; 452 SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
452 SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; 453 SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
453 SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"}; 454 SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"};
455 SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true,
456 "use_vulkan_driver_pipeline_cache"};
454 457
455 SwitchableSetting<u8> bg_red{0, "bg_red"}; 458 SwitchableSetting<u8> bg_red{0, "bg_red"};
456 SwitchableSetting<u8> bg_green{0, "bg_green"}; 459 SwitchableSetting<u8> bg_green{0, "bg_green"};
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 26be74df4..a1e41faff 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -436,7 +436,7 @@ struct Memory::Impl {
436 } 436 }
437 437
438 if (Settings::IsFastmemEnabled()) { 438 if (Settings::IsFastmemEnabled()) {
439 const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; 439 const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached;
440 system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); 440 system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
441 } 441 }
442 442
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp
index f7236afab..5cd0628f2 100644
--- a/src/tests/video_core/buffer_base.cpp
+++ b/src/tests/video_core/buffer_base.cpp
@@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
538 int num = 0; 538 int num = 0;
539 buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); 539 buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
540 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); 540 buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
541 REQUIRE(num == 0); 541 REQUIRE(num == 1);
542 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); 542 REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
543 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); 543 REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
544 buffer.FlushCachedWrites(); 544 buffer.FlushCachedWrites();
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index aa271a377..f617665de 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -85,6 +85,7 @@ add_library(video_core STATIC
85 gpu.h 85 gpu.h
86 gpu_thread.cpp 86 gpu_thread.cpp
87 gpu_thread.h 87 gpu_thread.h
88 invalidation_accumulator.h
88 memory_manager.cpp 89 memory_manager.cpp
89 memory_manager.h 90 memory_manager.h
90 precompiled_headers.h 91 precompiled_headers.h
@@ -190,6 +191,8 @@ add_library(video_core STATIC
190 renderer_vulkan/vk_texture_cache.cpp 191 renderer_vulkan/vk_texture_cache.cpp
191 renderer_vulkan/vk_texture_cache.h 192 renderer_vulkan/vk_texture_cache.h
192 renderer_vulkan/vk_texture_cache_base.cpp 193 renderer_vulkan/vk_texture_cache_base.cpp
194 renderer_vulkan/vk_turbo_mode.cpp
195 renderer_vulkan/vk_turbo_mode.h
193 renderer_vulkan/vk_update_descriptor.cpp 196 renderer_vulkan/vk_update_descriptor.cpp
194 renderer_vulkan/vk_update_descriptor.h 197 renderer_vulkan/vk_update_descriptor.h
195 shader_cache.cpp 198 shader_cache.cpp
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h
index 92d77eef2..c47b7d866 100644
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@@ -430,7 +430,7 @@ private:
430 if (query_begin >= SizeBytes() || size < 0) { 430 if (query_begin >= SizeBytes() || size < 0) {
431 return; 431 return;
432 } 432 }
433 u64* const untracked_words = Array<Type::Untracked>(); 433 [[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
434 u64* const state_words = Array<type>(); 434 u64* const state_words = Array<type>();
435 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); 435 const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
436 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; 436 u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
@@ -483,7 +483,7 @@ private:
483 NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); 483 NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
484 } 484 }
485 // Exclude CPU modified pages when visiting GPU pages 485 // Exclude CPU modified pages when visiting GPU pages
486 const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); 486 const u64 word = current_word;
487 u64 page = page_begin; 487 u64 page = page_begin;
488 page_begin = 0; 488 page_begin = 0;
489 489
@@ -531,7 +531,7 @@ private:
531 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { 531 [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
532 static_assert(type != Type::Untracked); 532 static_assert(type != Type::Untracked);
533 533
534 const u64* const untracked_words = Array<Type::Untracked>(); 534 [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
535 const u64* const state_words = Array<type>(); 535 const u64* const state_words = Array<type>();
536 const u64 num_query_words = size / BYTES_PER_WORD + 1; 536 const u64 num_query_words = size / BYTES_PER_WORD + 1;
537 const u64 word_begin = offset / BYTES_PER_WORD; 537 const u64 word_begin = offset / BYTES_PER_WORD;
@@ -539,8 +539,7 @@ private:
539 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); 539 const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
540 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; 540 u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
541 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { 541 for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
542 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; 542 const u64 word = state_words[word_index];
543 const u64 word = state_words[word_index] & ~off_word;
544 if (word == 0) { 543 if (word == 0) {
545 continue; 544 continue;
546 } 545 }
@@ -564,7 +563,7 @@ private:
564 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { 563 [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
565 static_assert(type != Type::Untracked); 564 static_assert(type != Type::Untracked);
566 565
567 const u64* const untracked_words = Array<Type::Untracked>(); 566 [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
568 const u64* const state_words = Array<type>(); 567 const u64* const state_words = Array<type>();
569 const u64 num_query_words = size / BYTES_PER_WORD + 1; 568 const u64 num_query_words = size / BYTES_PER_WORD + 1;
570 const u64 word_begin = offset / BYTES_PER_WORD; 569 const u64 word_begin = offset / BYTES_PER_WORD;
@@ -574,8 +573,7 @@ private:
574 u64 begin = std::numeric_limits<u64>::max(); 573 u64 begin = std::numeric_limits<u64>::max();
575 u64 end = 0; 574 u64 end = 0;
576 for (u64 word_index = word_begin; word_index < word_end; ++word_index) { 575 for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
577 const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; 576 const u64 word = state_words[word_index];
578 const u64 word = state_words[word_index] & ~off_word;
579 if (word == 0) { 577 if (word == 0) {
580 continue; 578 continue;
581 } 579 }
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index cea1dd8b0..7f5a0c29d 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) {
76 regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, 76 regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
77 x_elements, regs.line_count, regs.dest.BlockHeight(), 77 x_elements, regs.line_count, regs.dest.BlockHeight(),
78 regs.dest.BlockDepth(), regs.line_length_in); 78 regs.dest.BlockDepth(), regs.line_length_in);
79 memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); 79 memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
80 } 80 }
81} 81}
82 82
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index e655e7254..a126c359c 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -6,6 +6,7 @@
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "video_core/engines/fermi_2d.h" 7#include "video_core/engines/fermi_2d.h"
8#include "video_core/engines/sw_blitter/blitter.h" 8#include "video_core/engines/sw_blitter/blitter.h"
9#include "video_core/memory_manager.h"
9#include "video_core/rasterizer_interface.h" 10#include "video_core/rasterizer_interface.h"
10#include "video_core/surface.h" 11#include "video_core/surface.h"
11#include "video_core/textures/decoders.h" 12#include "video_core/textures/decoders.h"
@@ -20,8 +21,8 @@ namespace Tegra::Engines {
20 21
21using namespace Texture; 22using namespace Texture;
22 23
23Fermi2D::Fermi2D(MemoryManager& memory_manager_) { 24Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} {
24 sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_); 25 sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager);
25 // Nvidia's OpenGL driver seems to assume these values 26 // Nvidia's OpenGL driver seems to assume these values
26 regs.src.depth = 1; 27 regs.src.depth = 1;
27 regs.dst.depth = 1; 28 regs.dst.depth = 1;
@@ -104,6 +105,7 @@ void Fermi2D::Blit() {
104 config.src_x0 = 0; 105 config.src_x0 = 0;
105 } 106 }
106 107
108 memory_manager.FlushCaching();
107 if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { 109 if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
108 sw_blitter->Blit(src, regs.dst, config); 110 sw_blitter->Blit(src, regs.dst, config);
109 } 111 }
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 523fbdec2..705b323e1 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -305,6 +305,7 @@ public:
305private: 305private:
306 VideoCore::RasterizerInterface* rasterizer = nullptr; 306 VideoCore::RasterizerInterface* rasterizer = nullptr;
307 std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter; 307 std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter;
308 MemoryManager& memory_manager;
308 309
309 /// Performs the copy from the source surface to the destination surface as configured in the 310 /// Performs the copy from the source surface to the destination surface as configured in the
310 /// registers. 311 /// registers.
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index fbfd1ddd2..97f547789 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -485,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
485} 485}
486 486
487void Maxwell3D::ProcessQueryGet() { 487void Maxwell3D::ProcessQueryGet() {
488 // TODO(Subv): Support the other query units.
489 if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) {
490 LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented");
491 }
492
493 switch (regs.report_semaphore.query.operation) { 488 switch (regs.report_semaphore.query.operation) {
494 case Regs::ReportSemaphore::Operation::Release: 489 case Regs::ReportSemaphore::Operation::Release:
495 if (regs.report_semaphore.query.short_query != 0) { 490 if (regs.report_semaphore.query.short_query != 0) {
@@ -649,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) {
649 644
650 const GPUVAddr address{buffer_address + regs.const_buffer.offset}; 645 const GPUVAddr address{buffer_address + regs.const_buffer.offset};
651 const size_t copy_size = amount * sizeof(u32); 646 const size_t copy_size = amount * sizeof(u32);
652 memory_manager.WriteBlock(address, start_base, copy_size); 647 memory_manager.WriteBlockCached(address, start_base, copy_size);
653 648
654 // Increment the current buffer position. 649 // Increment the current buffer position.
655 regs.const_buffer.offset += static_cast<u32>(copy_size); 650 regs.const_buffer.offset += static_cast<u32>(copy_size);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 01f70ea9e..7762c7d96 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -69,7 +69,7 @@ void MaxwellDMA::Launch() {
69 if (launch.multi_line_enable) { 69 if (launch.multi_line_enable) {
70 const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; 70 const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH;
71 const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; 71 const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH;
72 72 memory_manager.FlushCaching();
73 if (!is_src_pitch && !is_dst_pitch) { 73 if (!is_src_pitch && !is_dst_pitch) {
74 // If both the source and the destination are in block layout, assert. 74 // If both the source and the destination are in block layout, assert.
75 CopyBlockLinearToBlockLinear(); 75 CopyBlockLinearToBlockLinear();
@@ -104,6 +104,7 @@ void MaxwellDMA::Launch() {
104 reinterpret_cast<u8*>(tmp_buffer.data()), 104 reinterpret_cast<u8*>(tmp_buffer.data()),
105 regs.line_length_in * sizeof(u32)); 105 regs.line_length_in * sizeof(u32));
106 } else { 106 } else {
107 memory_manager.FlushCaching();
107 const auto convert_linear_2_blocklinear_addr = [](u64 address) { 108 const auto convert_linear_2_blocklinear_addr = [](u64 address) {
108 return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | 109 return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) |
109 ((address & 0x180) >> 1) | ((address & 0x20) << 3); 110 ((address & 0x180) >> 1) | ((address & 0x20) << 3);
@@ -121,8 +122,8 @@ void MaxwellDMA::Launch() {
121 memory_manager.ReadBlockUnsafe( 122 memory_manager.ReadBlockUnsafe(
122 convert_linear_2_blocklinear_addr(regs.offset_in + offset), 123 convert_linear_2_blocklinear_addr(regs.offset_in + offset),
123 tmp_buffer.data(), tmp_buffer.size()); 124 tmp_buffer.data(), tmp_buffer.size());
124 memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(), 125 memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
125 tmp_buffer.size()); 126 tmp_buffer.size());
126 } 127 }
127 } else if (is_src_pitch && !is_dst_pitch) { 128 } else if (is_src_pitch && !is_dst_pitch) {
128 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); 129 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
@@ -132,7 +133,7 @@ void MaxwellDMA::Launch() {
132 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 133 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
133 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), 134 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(),
134 tmp_buffer.size()); 135 tmp_buffer.size());
135 memory_manager.WriteBlock( 136 memory_manager.WriteBlockCached(
136 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 137 convert_linear_2_blocklinear_addr(regs.offset_out + offset),
137 tmp_buffer.data(), tmp_buffer.size()); 138 tmp_buffer.data(), tmp_buffer.size());
138 } 139 }
@@ -141,8 +142,8 @@ void MaxwellDMA::Launch() {
141 std::vector<u8> tmp_buffer(regs.line_length_in); 142 std::vector<u8> tmp_buffer(regs.line_length_in);
142 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), 143 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
143 regs.line_length_in); 144 regs.line_length_in);
144 memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), 145 memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
145 regs.line_length_in); 146 regs.line_length_in);
146 } 147 }
147 } 148 }
148 } 149 }
@@ -204,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
204 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 205 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
205 regs.pitch_out); 206 regs.pitch_out);
206 207
207 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 208 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
208} 209}
209 210
210void MaxwellDMA::CopyPitchToBlockLinear() { 211void MaxwellDMA::CopyPitchToBlockLinear() {
@@ -256,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
256 dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 257 dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
257 regs.pitch_in); 258 regs.pitch_in);
258 259
259 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 260 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
260} 261}
261 262
262void MaxwellDMA::FastCopyBlockLinearToPitch() { 263void MaxwellDMA::FastCopyBlockLinearToPitch() {
@@ -287,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() {
287 regs.src_params.block_size.height, regs.src_params.block_size.depth, 288 regs.src_params.block_size.height, regs.src_params.block_size.depth,
288 regs.pitch_out); 289 regs.pitch_out);
289 290
290 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 291 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
291} 292}
292 293
293void MaxwellDMA::CopyBlockLinearToBlockLinear() { 294void MaxwellDMA::CopyBlockLinearToBlockLinear() {
@@ -347,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
347 dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, 348 dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
348 dst.block_size.height, dst.block_size.depth, pitch); 349 dst.block_size.height, dst.block_size.depth, pitch);
349 350
350 memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); 351 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
351} 352}
352 353
353void MaxwellDMA::ReleaseSemaphore() { 354void MaxwellDMA::ReleaseSemaphore() {
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index e6dc24f22..f275b2aa9 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -47,6 +47,7 @@ set(SHADER_FILES
47 vulkan_present_scaleforce_fp16.frag 47 vulkan_present_scaleforce_fp16.frag
48 vulkan_present_scaleforce_fp32.frag 48 vulkan_present_scaleforce_fp32.frag
49 vulkan_quad_indexed.comp 49 vulkan_quad_indexed.comp
50 vulkan_turbo_mode.comp
50 vulkan_uint8.comp 51 vulkan_uint8.comp
51) 52)
52 53
diff --git a/src/video_core/host_shaders/vulkan_turbo_mode.comp b/src/video_core/host_shaders/vulkan_turbo_mode.comp
new file mode 100644
index 000000000..d651001d9
--- /dev/null
+++ b/src/video_core/host_shaders/vulkan_turbo_mode.comp
@@ -0,0 +1,29 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#version 460 core
5
6layout (local_size_x = 16, local_size_y = 8, local_size_z = 1) in;
7
8layout (binding = 0) buffer ThreadData {
9 uint data[];
10};
11
12uint xorshift32(uint x) {
13 x ^= x << 13;
14 x ^= x >> 17;
15 x ^= x << 5;
16 return x;
17}
18
19uint getGlobalIndex() {
20 return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * gl_WorkGroupSize.y * gl_NumWorkGroups.y;
21}
22
23void main() {
24 uint myIndex = xorshift32(getGlobalIndex());
25 uint otherIndex = xorshift32(myIndex);
26
27 uint otherValue = atomicAdd(data[otherIndex % data.length()], 0) + 1;
28 atomicAdd(data[myIndex % data.length()], otherValue);
29}
diff --git a/src/video_core/invalidation_accumulator.h b/src/video_core/invalidation_accumulator.h
new file mode 100644
index 000000000..2c2aaf7bb
--- /dev/null
+++ b/src/video_core/invalidation_accumulator.h
@@ -0,0 +1,79 @@
1// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <utility>
7#include <vector>
8
9#include "common/common_types.h"
10
11namespace VideoCommon {
12
13class InvalidationAccumulator {
14public:
15 InvalidationAccumulator() = default;
16 ~InvalidationAccumulator() = default;
17
18 void Add(GPUVAddr address, size_t size) {
19 const auto reset_values = [&]() {
20 if (has_collected) {
21 buffer.emplace_back(start_address, accumulated_size);
22 }
23 start_address = address;
24 accumulated_size = size;
25 last_collection = start_address + size;
26 };
27 if (address >= start_address && address + size <= last_collection) [[likely]] {
28 return;
29 }
30 size = ((address + size + atomicity_size_mask) & atomicity_mask) - address;
31 address = address & atomicity_mask;
32 if (!has_collected) [[unlikely]] {
33 reset_values();
34 has_collected = true;
35 return;
36 }
37 if (address != last_collection) [[unlikely]] {
38 reset_values();
39 return;
40 }
41 accumulated_size += size;
42 last_collection += size;
43 }
44
45 void Clear() {
46 buffer.clear();
47 start_address = 0;
48 last_collection = 0;
49 has_collected = false;
50 }
51
52 bool AnyAccumulated() const {
53 return has_collected;
54 }
55
56 template <typename Func>
57 void Callback(Func&& func) {
58 if (!has_collected) {
59 return;
60 }
61 buffer.emplace_back(start_address, accumulated_size);
62 for (auto& [address, size] : buffer) {
63 func(address, size);
64 }
65 }
66
67private:
68 static constexpr size_t atomicity_bits = 5;
69 static constexpr size_t atomicity_size = 1ULL << atomicity_bits;
70 static constexpr size_t atomicity_size_mask = atomicity_size - 1;
71 static constexpr size_t atomicity_mask = ~atomicity_size_mask;
72 GPUVAddr start_address{};
73 GPUVAddr last_collection{};
74 size_t accumulated_size{};
75 bool has_collected{};
76 std::vector<std::pair<VAddr, size_t>> buffer;
77};
78
79} // namespace VideoCommon
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index a5476e795..6272a4652 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -50,38 +50,6 @@ protected:
50 Maxwell3D& maxwell3d; 50 Maxwell3D& maxwell3d;
51}; 51};
52 52
53class HLE_DrawArrays final : public HLEMacroImpl {
54public:
55 explicit HLE_DrawArrays(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
56
57 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
58 maxwell3d.RefreshParameters();
59
60 auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
61 maxwell3d.draw_manager->DrawArray(topology, parameters[1], parameters[2],
62 maxwell3d.regs.global_base_instance_index, 1);
63 }
64};
65
66class HLE_DrawIndexed final : public HLEMacroImpl {
67public:
68 explicit HLE_DrawIndexed(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
69
70 void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
71 maxwell3d.RefreshParameters();
72 maxwell3d.regs.index_buffer.start_addr_high = parameters[1];
73 maxwell3d.regs.index_buffer.start_addr_low = parameters[2];
74 maxwell3d.regs.index_buffer.format =
75 static_cast<Engines::Maxwell3D::Regs::IndexFormat>(parameters[3]);
76 maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
77
78 auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
79 maxwell3d.draw_manager->DrawIndex(topology, 0, parameters[4],
80 maxwell3d.regs.global_base_vertex_index,
81 maxwell3d.regs.global_base_instance_index, 1);
82 }
83};
84
85/* 53/*
86 * @note: these macros have two versions, a normal and extended version, with the extended version 54 * @note: these macros have two versions, a normal and extended version, with the extended version
87 * also assigning the base vertex/instance. 55 * also assigning the base vertex/instance.
@@ -497,11 +465,6 @@ public:
497} // Anonymous namespace 465} // Anonymous namespace
498 466
499HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { 467HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
500 builders.emplace(0xDD6A7FA92A7D2674ULL,
501 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
502 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
503 return std::make_unique<HLE_DrawArrays>(maxwell3d__);
504 }));
505 builders.emplace(0x0D61FC9FAAC9FCADULL, 468 builders.emplace(0x0D61FC9FAAC9FCADULL,
506 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( 469 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
507 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { 470 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
@@ -512,11 +475,6 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
512 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { 475 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
513 return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__); 476 return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__);
514 })); 477 }));
515 builders.emplace(0x2DB33AADB741839CULL,
516 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
517 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
518 return std::make_unique<HLE_DrawIndexed>(maxwell3d__);
519 }));
520 builders.emplace(0x771BB18C62444DA0ULL, 478 builders.emplace(0x771BB18C62444DA0ULL,
521 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( 479 std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
522 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { 480 [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 3a5cdeb39..3bcae3503 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -6,11 +6,13 @@
6#include "common/alignment.h" 6#include "common/alignment.h"
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "common/settings.h"
9#include "core/core.h" 10#include "core/core.h"
10#include "core/device_memory.h" 11#include "core/device_memory.h"
11#include "core/hle/kernel/k_page_table.h" 12#include "core/hle/kernel/k_page_table.h"
12#include "core/hle/kernel/k_process.h" 13#include "core/hle/kernel/k_process.h"
13#include "core/memory.h" 14#include "core/memory.h"
15#include "video_core/invalidation_accumulator.h"
14#include "video_core/memory_manager.h" 16#include "video_core/memory_manager.h"
15#include "video_core/rasterizer_interface.h" 17#include "video_core/rasterizer_interface.h"
16#include "video_core/renderer_base.h" 18#include "video_core/renderer_base.h"
@@ -26,7 +28,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
26 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, 28 entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38,
27 page_bits != big_page_bits ? page_bits : 0}, 29 page_bits != big_page_bits ? page_bits : 0},
28 kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( 30 kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add(
29 1, std::memory_order_acq_rel)} { 31 1, std::memory_order_acq_rel)},
32 accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} {
30 address_space_size = 1ULL << address_space_bits; 33 address_space_size = 1ULL << address_space_bits;
31 page_size = 1ULL << page_bits; 34 page_size = 1ULL << page_bits;
32 page_mask = page_size - 1ULL; 35 page_mask = page_size - 1ULL;
@@ -43,6 +46,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
43 big_page_table_cpu.resize(big_page_table_size); 46 big_page_table_cpu.resize(big_page_table_size);
44 big_page_continous.resize(big_page_table_size / continous_bits, 0); 47 big_page_continous.resize(big_page_table_size / continous_bits, 0);
45 entries.resize(page_table_size / 32, 0); 48 entries.resize(page_table_size / 32, 0);
49 if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
50 fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
51 } else {
52 fastmem_arena = nullptr;
53 }
46} 54}
47 55
48MemoryManager::~MemoryManager() = default; 56MemoryManager::~MemoryManager() = default;
@@ -185,15 +193,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
185 if (size == 0) { 193 if (size == 0) {
186 return; 194 return;
187 } 195 }
188 const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); 196 GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash);
189
190 for (const auto& [map_addr, map_size] : submapped_ranges) {
191 // Flush and invalidate through the GPU interface, to be asynchronous if possible.
192 const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr);
193 ASSERT(cpu_addr);
194 197
195 rasterizer->UnmapMemory(*cpu_addr, map_size); 198 for (const auto& [map_addr, map_size] : page_stash) {
199 rasterizer->UnmapMemory(map_addr, map_size);
196 } 200 }
201 page_stash.clear();
197 202
198 BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); 203 BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
199 PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); 204 PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID);
@@ -355,7 +360,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si
355 } 360 }
356} 361}
357 362
358template <bool is_safe> 363template <bool is_safe, bool use_fastmem>
359void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, 364void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
360 [[maybe_unused]] VideoCommon::CacheType which) const { 365 [[maybe_unused]] VideoCommon::CacheType which) const {
361 auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, 366 auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index,
@@ -369,8 +374,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
369 if constexpr (is_safe) { 374 if constexpr (is_safe) {
370 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); 375 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
371 } 376 }
372 u8* physical = memory.GetPointer(cpu_addr_base); 377 if constexpr (use_fastmem) {
373 std::memcpy(dest_buffer, physical, copy_amount); 378 std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
379 } else {
380 u8* physical = memory.GetPointer(cpu_addr_base);
381 std::memcpy(dest_buffer, physical, copy_amount);
382 }
374 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; 383 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
375 }; 384 };
376 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { 385 auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
@@ -379,11 +388,15 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
379 if constexpr (is_safe) { 388 if constexpr (is_safe) {
380 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); 389 rasterizer->FlushRegion(cpu_addr_base, copy_amount, which);
381 } 390 }
382 if (!IsBigPageContinous(page_index)) [[unlikely]] { 391 if constexpr (use_fastmem) {
383 memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); 392 std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount);
384 } else { 393 } else {
385 u8* physical = memory.GetPointer(cpu_addr_base); 394 if (!IsBigPageContinous(page_index)) [[unlikely]] {
386 std::memcpy(dest_buffer, physical, copy_amount); 395 memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount);
396 } else {
397 u8* physical = memory.GetPointer(cpu_addr_base);
398 std::memcpy(dest_buffer, physical, copy_amount);
399 }
387 } 400 }
388 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; 401 dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
389 }; 402 };
@@ -397,12 +410,20 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std:
397 410
398void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, 411void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
399 VideoCommon::CacheType which) const { 412 VideoCommon::CacheType which) const {
400 ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which); 413 if (fastmem_arena) [[likely]] {
414 ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which);
415 return;
416 }
417 ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which);
401} 418}
402 419
403void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, 420void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer,
404 const std::size_t size) const { 421 const std::size_t size) const {
405 ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); 422 if (fastmem_arena) [[likely]] {
423 ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
424 return;
425 }
426 ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None);
406} 427}
407 428
408template <bool is_safe> 429template <bool is_safe>
@@ -454,6 +475,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf
454 WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); 475 WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
455} 476}
456 477
478void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer,
479 std::size_t size) {
480 WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None);
481 accumulator->Add(gpu_dest_addr, size);
482}
483
457void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, 484void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size,
458 VideoCommon::CacheType which) const { 485 VideoCommon::CacheType which) const {
459 auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, 486 auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
@@ -663,7 +690,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
663std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( 690std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
664 GPUVAddr gpu_addr, std::size_t size) const { 691 GPUVAddr gpu_addr, std::size_t size) const {
665 std::vector<std::pair<GPUVAddr, std::size_t>> result{}; 692 std::vector<std::pair<GPUVAddr, std::size_t>> result{};
666 std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; 693 GetSubmappedRangeImpl<true>(gpu_addr, size, result);
694 return result;
695}
696
697template <bool is_gpu_address>
698void MemoryManager::GetSubmappedRangeImpl(
699 GPUVAddr gpu_addr, std::size_t size,
700 std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
701 result) const {
702 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
703 last_segment{};
667 std::optional<VAddr> old_page_addr{}; 704 std::optional<VAddr> old_page_addr{};
668 const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, 705 const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index,
669 [[maybe_unused]] std::size_t offset, 706 [[maybe_unused]] std::size_t offset,
@@ -685,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
685 } 722 }
686 old_page_addr = {cpu_addr_base + copy_amount}; 723 old_page_addr = {cpu_addr_base + copy_amount};
687 if (!last_segment) { 724 if (!last_segment) {
688 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; 725 if constexpr (is_gpu_address) {
689 last_segment = {new_base_addr, copy_amount}; 726 const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset;
727 last_segment = {new_base_addr, copy_amount};
728 } else {
729 last_segment = {cpu_addr_base, copy_amount};
730 }
690 } else { 731 } else {
691 last_segment->second += copy_amount; 732 last_segment->second += copy_amount;
692 } 733 }
@@ -703,8 +744,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
703 } 744 }
704 old_page_addr = {cpu_addr_base + copy_amount}; 745 old_page_addr = {cpu_addr_base + copy_amount};
705 if (!last_segment) { 746 if (!last_segment) {
706 const GPUVAddr new_base_addr = (page_index << page_bits) + offset; 747 if constexpr (is_gpu_address) {
707 last_segment = {new_base_addr, copy_amount}; 748 const GPUVAddr new_base_addr = (page_index << page_bits) + offset;
749 last_segment = {new_base_addr, copy_amount};
750 } else {
751 last_segment = {cpu_addr_base, copy_amount};
752 }
708 } else { 753 } else {
709 last_segment->second += copy_amount; 754 last_segment->second += copy_amount;
710 } 755 }
@@ -715,7 +760,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
715 }; 760 };
716 MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); 761 MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages);
717 split(0, 0, 0); 762 split(0, 0, 0);
718 return result; 763}
764
765void MemoryManager::FlushCaching() {
766 if (!accumulator->AnyAccumulated()) {
767 return;
768 }
769 accumulator->Callback([this](GPUVAddr addr, size_t size) {
770 GetSubmappedRangeImpl<false>(addr, size, page_stash);
771 });
772 rasterizer->InnerInvalidation(page_stash);
773 page_stash.clear();
774 accumulator->Clear();
719} 775}
720 776
721} // namespace Tegra 777} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 828e13439..2936364f0 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -19,6 +19,10 @@ namespace VideoCore {
19class RasterizerInterface; 19class RasterizerInterface;
20} 20}
21 21
22namespace VideoCommon {
23class InvalidationAccumulator;
24}
25
22namespace Core { 26namespace Core {
23class DeviceMemory; 27class DeviceMemory;
24namespace Memory { 28namespace Memory {
@@ -80,6 +84,7 @@ public:
80 */ 84 */
81 void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; 85 void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const;
82 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); 86 void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
87 void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
83 88
84 /** 89 /**
85 * Checks if a gpu region can be simply read with a pointer. 90 * Checks if a gpu region can be simply read with a pointer.
@@ -129,12 +134,14 @@ public:
129 size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, 134 size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
130 size_t max_size = std::numeric_limits<size_t>::max()) const; 135 size_t max_size = std::numeric_limits<size_t>::max()) const;
131 136
137 void FlushCaching();
138
132private: 139private:
133 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> 140 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
134 inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, 141 inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
135 FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; 142 FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const;
136 143
137 template <bool is_safe> 144 template <bool is_safe, bool use_fastmem>
138 void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, 145 void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size,
139 VideoCommon::CacheType which) const; 146 VideoCommon::CacheType which) const;
140 147
@@ -154,6 +161,12 @@ private:
154 inline bool IsBigPageContinous(size_t big_page_index) const; 161 inline bool IsBigPageContinous(size_t big_page_index) const;
155 inline void SetBigPageContinous(size_t big_page_index, bool value); 162 inline void SetBigPageContinous(size_t big_page_index, bool value);
156 163
164 template <bool is_gpu_address>
165 void GetSubmappedRangeImpl(
166 GPUVAddr gpu_addr, std::size_t size,
167 std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>&
168 result) const;
169
157 Core::System& system; 170 Core::System& system;
158 Core::Memory::Memory& memory; 171 Core::Memory::Memory& memory;
159 Core::DeviceMemory& device_memory; 172 Core::DeviceMemory& device_memory;
@@ -201,10 +214,13 @@ private:
201 Common::VirtualBuffer<u32> big_page_table_cpu; 214 Common::VirtualBuffer<u32> big_page_table_cpu;
202 215
203 std::vector<u64> big_page_continous; 216 std::vector<u64> big_page_continous;
217 std::vector<std::pair<VAddr, std::size_t>> page_stash{};
218 u8* fastmem_arena{};
204 219
205 constexpr static size_t continous_bits = 64; 220 constexpr static size_t continous_bits = 64;
206 221
207 const size_t unique_identifier; 222 const size_t unique_identifier;
223 std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
208 224
209 static std::atomic<size_t> unique_identifier_generator; 225 static std::atomic<size_t> unique_identifier_generator;
210}; 226};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index f44c7df50..1735b6164 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -6,6 +6,7 @@
6#include <functional> 6#include <functional>
7#include <optional> 7#include <optional>
8#include <span> 8#include <span>
9#include <utility>
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "common/polyfill_thread.h" 11#include "common/polyfill_thread.h"
11#include "video_core/cache_types.h" 12#include "video_core/cache_types.h"
@@ -95,6 +96,12 @@ public:
95 virtual void InvalidateRegion(VAddr addr, u64 size, 96 virtual void InvalidateRegion(VAddr addr, u64 size,
96 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; 97 VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
97 98
99 virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
100 for (const auto& [cpu_addr, size] : sequences) {
101 InvalidateRegion(cpu_addr, size);
102 }
103 }
104
98 /// Notify rasterizer that any caches of the specified region are desync with guest 105 /// Notify rasterizer that any caches of the specified region are desync with guest
99 virtual void OnCPUWrite(VAddr addr, u64 size) = 0; 106 virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
100 107
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index f502a7d09..1578cb206 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -78,6 +78,8 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
78 return separated_extensions; 78 return separated_extensions;
79} 79}
80 80
81} // Anonymous namespace
82
81Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, 83Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
82 VkSurfaceKHR surface) { 84 VkSurfaceKHR surface) {
83 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); 85 const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices();
@@ -89,7 +91,6 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl
89 const vk::PhysicalDevice physical_device(devices[device_index], dld); 91 const vk::PhysicalDevice physical_device(devices[device_index], dld);
90 return Device(*instance, physical_device, surface, dld); 92 return Device(*instance, physical_device, surface, dld);
91} 93}
92} // Anonymous namespace
93 94
94RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, 95RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
95 Core::Frontend::EmuWindow& emu_window, 96 Core::Frontend::EmuWindow& emu_window,
@@ -98,7 +99,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
98 : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), 99 : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
99 cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), 100 cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
100 instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, 101 instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
101 true, Settings::values.renderer_debug.GetValue())), 102 Settings::values.renderer_debug.GetValue())),
102 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), 103 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
103 surface(CreateSurface(instance, render_window)), 104 surface(CreateSurface(instance, render_window)),
104 device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), 105 device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
@@ -109,6 +110,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
109 screen_info), 110 screen_info),
110 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, 111 rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
111 state_tracker, scheduler) { 112 state_tracker, scheduler) {
113 if (Settings::values.renderer_force_max_clock.GetValue()) {
114 turbo_mode.emplace(instance, dld);
115 }
112 Report(); 116 Report();
113} catch (const vk::Exception& exception) { 117} catch (const vk::Exception& exception) {
114 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); 118 LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index e7bfecb20..009e75e0d 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -13,6 +13,7 @@
13#include "video_core/renderer_vulkan/vk_scheduler.h" 13#include "video_core/renderer_vulkan/vk_scheduler.h"
14#include "video_core/renderer_vulkan/vk_state_tracker.h" 14#include "video_core/renderer_vulkan/vk_state_tracker.h"
15#include "video_core/renderer_vulkan/vk_swapchain.h" 15#include "video_core/renderer_vulkan/vk_swapchain.h"
16#include "video_core/renderer_vulkan/vk_turbo_mode.h"
16#include "video_core/vulkan_common/vulkan_device.h" 17#include "video_core/vulkan_common/vulkan_device.h"
17#include "video_core/vulkan_common/vulkan_memory_allocator.h" 18#include "video_core/vulkan_common/vulkan_memory_allocator.h"
18#include "video_core/vulkan_common/vulkan_wrapper.h" 19#include "video_core/vulkan_common/vulkan_wrapper.h"
@@ -31,6 +32,9 @@ class GPU;
31 32
32namespace Vulkan { 33namespace Vulkan {
33 34
35Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
36 VkSurfaceKHR surface);
37
34class RendererVulkan final : public VideoCore::RendererBase { 38class RendererVulkan final : public VideoCore::RendererBase {
35public: 39public:
36 explicit RendererVulkan(Core::TelemetrySession& telemtry_session, 40 explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
@@ -74,6 +78,7 @@ private:
74 Swapchain swapchain; 78 Swapchain swapchain;
75 BlitScreen blit_screen; 79 BlitScreen blit_screen;
76 RasterizerVulkan rasterizer; 80 RasterizerVulkan rasterizer;
81 std::optional<TurboMode> turbo_mode;
77}; 82};
78 83
79} // namespace Vulkan 84} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 487d8b416..b0153a502 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -336,6 +336,9 @@ void BufferCacheRuntime::Finish() {
336 336
337void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, 337void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
338 std::span<const VideoCommon::BufferCopy> copies, bool barrier) { 338 std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
339 if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) {
340 return;
341 }
339 static constexpr VkMemoryBarrier READ_BARRIER{ 342 static constexpr VkMemoryBarrier READ_BARRIER{
340 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, 343 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
341 .pNext = nullptr, 344 .pNext = nullptr,
@@ -394,6 +397,9 @@ void BufferCacheRuntime::PostCopyBarrier() {
394} 397}
395 398
396void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) { 399void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) {
400 if (dest_buffer == VK_NULL_HANDLE) {
401 return;
402 }
397 static constexpr VkMemoryBarrier READ_BARRIER{ 403 static constexpr VkMemoryBarrier READ_BARRIER{
398 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, 404 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
399 .pNext = nullptr, 405 .pNext = nullptr,
@@ -473,6 +479,11 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset
473 cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); 479 cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride);
474 }); 480 });
475 } else { 481 } else {
482 if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) {
483 ReserveNullBuffer();
484 buffer = *null_buffer;
485 offset = 0;
486 }
476 scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { 487 scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) {
477 cmdbuf.BindVertexBuffer(index, buffer, offset); 488 cmdbuf.BindVertexBuffer(index, buffer, offset);
478 }); 489 });
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index 04a3a861e..2a0f0dbf0 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -24,13 +24,15 @@ using Shader::ImageBufferDescriptor;
24using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; 24using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET;
25using Tegra::Texture::TexturePair; 25using Tegra::Texture::TexturePair;
26 26
27ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, 27ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_,
28 DescriptorPool& descriptor_pool,
28 UpdateDescriptorQueue& update_descriptor_queue_, 29 UpdateDescriptorQueue& update_descriptor_queue_,
29 Common::ThreadWorker* thread_worker, 30 Common::ThreadWorker* thread_worker,
30 PipelineStatistics* pipeline_statistics, 31 PipelineStatistics* pipeline_statistics,
31 VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, 32 VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_,
32 vk::ShaderModule spv_module_) 33 vk::ShaderModule spv_module_)
33 : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, 34 : device{device_}, pipeline_cache(pipeline_cache_),
35 update_descriptor_queue{update_descriptor_queue_}, info{info_},
34 spv_module(std::move(spv_module_)) { 36 spv_module(std::move(spv_module_)) {
35 if (shader_notify) { 37 if (shader_notify) {
36 shader_notify->MarkShaderBuilding(); 38 shader_notify->MarkShaderBuilding();
@@ -56,23 +58,27 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
56 if (device.IsKhrPipelineExecutablePropertiesEnabled()) { 58 if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
57 flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; 59 flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
58 } 60 }
59 pipeline = device.GetLogical().CreateComputePipeline({ 61 pipeline = device.GetLogical().CreateComputePipeline(
60 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 62 {
61 .pNext = nullptr, 63 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
62 .flags = flags, 64 .pNext = nullptr,
63 .stage{ 65 .flags = flags,
64 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 66 .stage{
65 .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, 67 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
66 .flags = 0, 68 .pNext =
67 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 69 device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
68 .module = *spv_module, 70 .flags = 0,
69 .pName = "main", 71 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
70 .pSpecializationInfo = nullptr, 72 .module = *spv_module,
73 .pName = "main",
74 .pSpecializationInfo = nullptr,
75 },
76 .layout = *pipeline_layout,
77 .basePipelineHandle = 0,
78 .basePipelineIndex = 0,
71 }, 79 },
72 .layout = *pipeline_layout, 80 *pipeline_cache);
73 .basePipelineHandle = 0, 81
74 .basePipelineIndex = 0,
75 });
76 if (pipeline_statistics) { 82 if (pipeline_statistics) {
77 pipeline_statistics->Collect(*pipeline); 83 pipeline_statistics->Collect(*pipeline);
78 } 84 }
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
index d70837fc5..78d77027f 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@@ -28,7 +28,8 @@ class Scheduler;
28 28
29class ComputePipeline { 29class ComputePipeline {
30public: 30public:
31 explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, 31 explicit ComputePipeline(const Device& device, vk::PipelineCache& pipeline_cache,
32 DescriptorPool& descriptor_pool,
32 UpdateDescriptorQueue& update_descriptor_queue, 33 UpdateDescriptorQueue& update_descriptor_queue,
33 Common::ThreadWorker* thread_worker, 34 Common::ThreadWorker* thread_worker,
34 PipelineStatistics* pipeline_statistics, 35 PipelineStatistics* pipeline_statistics,
@@ -46,6 +47,7 @@ public:
46 47
47private: 48private:
48 const Device& device; 49 const Device& device;
50 vk::PipelineCache& pipeline_cache;
49 UpdateDescriptorQueue& update_descriptor_queue; 51 UpdateDescriptorQueue& update_descriptor_queue;
50 Shader::Info info; 52 Shader::Info info;
51 53
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 734c379b9..f91bb5a1d 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -234,13 +234,14 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m
234 234
235GraphicsPipeline::GraphicsPipeline( 235GraphicsPipeline::GraphicsPipeline(
236 Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, 236 Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_,
237 VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, 237 vk::PipelineCache& pipeline_cache_, VideoCore::ShaderNotify* shader_notify,
238 const Device& device_, DescriptorPool& descriptor_pool,
238 UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, 239 UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread,
239 PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, 240 PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache,
240 const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages, 241 const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages,
241 const std::array<const Shader::Info*, NUM_STAGES>& infos) 242 const std::array<const Shader::Info*, NUM_STAGES>& infos)
242 : key{key_}, device{device_}, texture_cache{texture_cache_}, 243 : key{key_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
243 buffer_cache{buffer_cache_}, scheduler{scheduler_}, 244 pipeline_cache(pipeline_cache_), scheduler{scheduler_},
244 update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { 245 update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} {
245 if (shader_notify) { 246 if (shader_notify) {
246 shader_notify->MarkShaderBuilding(); 247 shader_notify->MarkShaderBuilding();
@@ -897,27 +898,29 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
897 if (device.IsKhrPipelineExecutablePropertiesEnabled()) { 898 if (device.IsKhrPipelineExecutablePropertiesEnabled()) {
898 flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; 899 flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
899 } 900 }
900 pipeline = device.GetLogical().CreateGraphicsPipeline({ 901 pipeline = device.GetLogical().CreateGraphicsPipeline(
901 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, 902 {
902 .pNext = nullptr, 903 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
903 .flags = flags, 904 .pNext = nullptr,
904 .stageCount = static_cast<u32>(shader_stages.size()), 905 .flags = flags,
905 .pStages = shader_stages.data(), 906 .stageCount = static_cast<u32>(shader_stages.size()),
906 .pVertexInputState = &vertex_input_ci, 907 .pStages = shader_stages.data(),
907 .pInputAssemblyState = &input_assembly_ci, 908 .pVertexInputState = &vertex_input_ci,
908 .pTessellationState = &tessellation_ci, 909 .pInputAssemblyState = &input_assembly_ci,
909 .pViewportState = &viewport_ci, 910 .pTessellationState = &tessellation_ci,
910 .pRasterizationState = &rasterization_ci, 911 .pViewportState = &viewport_ci,
911 .pMultisampleState = &multisample_ci, 912 .pRasterizationState = &rasterization_ci,
912 .pDepthStencilState = &depth_stencil_ci, 913 .pMultisampleState = &multisample_ci,
913 .pColorBlendState = &color_blend_ci, 914 .pDepthStencilState = &depth_stencil_ci,
914 .pDynamicState = &dynamic_state_ci, 915 .pColorBlendState = &color_blend_ci,
915 .layout = *pipeline_layout, 916 .pDynamicState = &dynamic_state_ci,
916 .renderPass = render_pass, 917 .layout = *pipeline_layout,
917 .subpass = 0, 918 .renderPass = render_pass,
918 .basePipelineHandle = nullptr, 919 .subpass = 0,
919 .basePipelineIndex = 0, 920 .basePipelineHandle = nullptr,
920 }); 921 .basePipelineIndex = 0,
922 },
923 *pipeline_cache);
921} 924}
922 925
923void GraphicsPipeline::Validate() { 926void GraphicsPipeline::Validate() {
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
index 1ed2967be..67c657d0e 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -70,16 +70,14 @@ class GraphicsPipeline {
70 static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; 70 static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage;
71 71
72public: 72public:
73 explicit GraphicsPipeline(Scheduler& scheduler, BufferCache& buffer_cache, 73 explicit GraphicsPipeline(
74 TextureCache& texture_cache, VideoCore::ShaderNotify* shader_notify, 74 Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache,
75 const Device& device, DescriptorPool& descriptor_pool, 75 vk::PipelineCache& pipeline_cache, VideoCore::ShaderNotify* shader_notify,
76 UpdateDescriptorQueue& update_descriptor_queue, 76 const Device& device, DescriptorPool& descriptor_pool,
77 Common::ThreadWorker* worker_thread, 77 UpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread,
78 PipelineStatistics* pipeline_statistics, 78 PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache,
79 RenderPassCache& render_pass_cache, 79 const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages,
80 const GraphicsPipelineCacheKey& key, 80 const std::array<const Shader::Info*, NUM_STAGES>& infos);
81 std::array<vk::ShaderModule, NUM_STAGES> stages,
82 const std::array<const Shader::Info*, NUM_STAGES>& infos);
83 81
84 GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; 82 GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
85 GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; 83 GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;
@@ -133,6 +131,7 @@ private:
133 const Device& device; 131 const Device& device;
134 TextureCache& texture_cache; 132 TextureCache& texture_cache;
135 BufferCache& buffer_cache; 133 BufferCache& buffer_cache;
134 vk::PipelineCache& pipeline_cache;
136 Scheduler& scheduler; 135 Scheduler& scheduler;
137 UpdateDescriptorQueue& update_descriptor_queue; 136 UpdateDescriptorQueue& update_descriptor_queue;
138 137
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 3046b72ab..67e5bc648 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -55,6 +55,7 @@ using VideoCommon::GenericEnvironment;
55using VideoCommon::GraphicsEnvironment; 55using VideoCommon::GraphicsEnvironment;
56 56
57constexpr u32 CACHE_VERSION = 10; 57constexpr u32 CACHE_VERSION = 10;
58constexpr std::array<char, 8> VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'};
58 59
59template <typename Container> 60template <typename Container>
60auto MakeSpan(Container& container) { 61auto MakeSpan(Container& container) {
@@ -284,6 +285,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
284 render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, 285 render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
285 texture_cache{texture_cache_}, shader_notify{shader_notify_}, 286 texture_cache{texture_cache_}, shader_notify{shader_notify_},
286 use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, 287 use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
288 use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()},
287 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), 289 workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
288 serialization_thread(1, "VkPipelineSerialization") { 290 serialization_thread(1, "VkPipelineSerialization") {
289 const auto& float_control{device.FloatControlProperties()}; 291 const auto& float_control{device.FloatControlProperties()};
@@ -362,7 +364,12 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
362 }; 364 };
363} 365}
364 366
365PipelineCache::~PipelineCache() = default; 367PipelineCache::~PipelineCache() {
368 if (use_vulkan_pipeline_cache && !vulkan_pipeline_cache_filename.empty()) {
369 SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache,
370 CACHE_VERSION);
371 }
372}
366 373
367GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { 374GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
368 MICROPROFILE_SCOPE(Vulkan_PipelineCache); 375 MICROPROFILE_SCOPE(Vulkan_PipelineCache);
@@ -418,6 +425,12 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
418 } 425 }
419 pipeline_cache_filename = base_dir / "vulkan.bin"; 426 pipeline_cache_filename = base_dir / "vulkan.bin";
420 427
428 if (use_vulkan_pipeline_cache) {
429 vulkan_pipeline_cache_filename = base_dir / "vulkan_pipelines.bin";
430 vulkan_pipeline_cache =
431 LoadVulkanPipelineCache(vulkan_pipeline_cache_filename, CACHE_VERSION);
432 }
433
421 struct { 434 struct {
422 std::mutex mutex; 435 std::mutex mutex;
423 size_t total{}; 436 size_t total{};
@@ -496,6 +509,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
496 509
497 workers.WaitForRequests(stop_loading); 510 workers.WaitForRequests(stop_loading);
498 511
512 if (use_vulkan_pipeline_cache) {
513 SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache,
514 CACHE_VERSION);
515 }
516
499 if (state.statistics) { 517 if (state.statistics) {
500 state.statistics->Report(); 518 state.statistics->Report();
501 } 519 }
@@ -616,10 +634,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
616 previous_stage = &program; 634 previous_stage = &program;
617 } 635 }
618 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; 636 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
619 return std::make_unique<GraphicsPipeline>(scheduler, buffer_cache, texture_cache, 637 return std::make_unique<GraphicsPipeline>(
620 &shader_notify, device, descriptor_pool, 638 scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device,
621 update_descriptor_queue, thread_worker, statistics, 639 descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
622 render_pass_cache, key, std::move(modules), infos); 640 std::move(modules), infos);
623 641
624} catch (const Shader::Exception& exception) { 642} catch (const Shader::Exception& exception) {
625 LOG_ERROR(Render_Vulkan, "{}", exception.what()); 643 LOG_ERROR(Render_Vulkan, "{}", exception.what());
@@ -689,13 +707,107 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
689 spv_module.SetObjectNameEXT(name.c_str()); 707 spv_module.SetObjectNameEXT(name.c_str());
690 } 708 }
691 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; 709 Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
692 return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue, 710 return std::make_unique<ComputePipeline>(device, vulkan_pipeline_cache, descriptor_pool,
693 thread_worker, statistics, &shader_notify, 711 update_descriptor_queue, thread_worker, statistics,
694 program.info, std::move(spv_module)); 712 &shader_notify, program.info, std::move(spv_module));
695 713
696} catch (const Shader::Exception& exception) { 714} catch (const Shader::Exception& exception) {
697 LOG_ERROR(Render_Vulkan, "{}", exception.what()); 715 LOG_ERROR(Render_Vulkan, "{}", exception.what());
698 return nullptr; 716 return nullptr;
699} 717}
700 718
719void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename,
720 const vk::PipelineCache& pipeline_cache,
721 u32 cache_version) try {
722 std::ofstream file(filename, std::ios::binary);
723 file.exceptions(std::ifstream::failbit);
724 if (!file.is_open()) {
725 LOG_ERROR(Common_Filesystem, "Failed to open Vulkan driver pipeline cache file {}",
726 Common::FS::PathToUTF8String(filename));
727 return;
728 }
729 file.write(VULKAN_CACHE_MAGIC_NUMBER.data(), VULKAN_CACHE_MAGIC_NUMBER.size())
730 .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version));
731
732 size_t cache_size = 0;
733 std::vector<char> cache_data;
734 if (pipeline_cache) {
735 pipeline_cache.Read(&cache_size, nullptr);
736 cache_data.resize(cache_size);
737 pipeline_cache.Read(&cache_size, cache_data.data());
738 }
739 file.write(cache_data.data(), cache_size);
740
741 LOG_INFO(Render_Vulkan, "Vulkan driver pipelines cached at: {}",
742 Common::FS::PathToUTF8String(filename));
743
744} catch (const std::ios_base::failure& e) {
745 LOG_ERROR(Common_Filesystem, "{}", e.what());
746 if (!Common::FS::RemoveFile(filename)) {
747 LOG_ERROR(Common_Filesystem, "Failed to delete Vulkan driver pipeline cache file {}",
748 Common::FS::PathToUTF8String(filename));
749 }
750}
751
752vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem::path& filename,
753 u32 expected_cache_version) {
754 const auto create_pipeline_cache = [this](size_t data_size, const void* data) {
755 VkPipelineCacheCreateInfo pipeline_cache_ci = {
756 .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
757 .pNext = nullptr,
758 .flags = 0,
759 .initialDataSize = data_size,
760 .pInitialData = data};
761 return device.GetLogical().CreatePipelineCache(pipeline_cache_ci);
762 };
763 try {
764 std::ifstream file(filename, std::ios::binary | std::ios::ate);
765 if (!file.is_open()) {
766 return create_pipeline_cache(0, nullptr);
767 }
768 file.exceptions(std::ifstream::failbit);
769 const auto end{file.tellg()};
770 file.seekg(0, std::ios::beg);
771
772 std::array<char, 8> magic_number;
773 u32 cache_version;
774 file.read(magic_number.data(), magic_number.size())
775 .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
776 if (magic_number != VULKAN_CACHE_MAGIC_NUMBER || cache_version != expected_cache_version) {
777 file.close();
778 if (Common::FS::RemoveFile(filename)) {
779 if (magic_number != VULKAN_CACHE_MAGIC_NUMBER) {
780 LOG_ERROR(Common_Filesystem, "Invalid Vulkan driver pipeline cache file");
781 }
782 if (cache_version != expected_cache_version) {
783 LOG_INFO(Common_Filesystem, "Deleting old Vulkan driver pipeline cache");
784 }
785 } else {
786 LOG_ERROR(Common_Filesystem,
787 "Invalid Vulkan pipeline cache file and failed to delete it in \"{}\"",
788 Common::FS::PathToUTF8String(filename));
789 }
790 return create_pipeline_cache(0, nullptr);
791 }
792
793 const size_t cache_size = static_cast<size_t>(end) - magic_number.size();
794 std::vector<char> cache_data(cache_size);
795 file.read(cache_data.data(), cache_size);
796
797 LOG_INFO(Render_Vulkan,
798 "Loaded Vulkan driver pipeline cache: ", Common::FS::PathToUTF8String(filename));
799
800 return create_pipeline_cache(cache_size, cache_data.data());
801
802 } catch (const std::ios_base::failure& e) {
803 LOG_ERROR(Common_Filesystem, "{}", e.what());
804 if (!Common::FS::RemoveFile(filename)) {
805 LOG_ERROR(Common_Filesystem, "Failed to delete Vulkan driver pipeline cache file {}",
806 Common::FS::PathToUTF8String(filename));
807 }
808
809 return create_pipeline_cache(0, nullptr);
810 }
811}
812
701} // namespace Vulkan 813} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index b4f593ef5..5171912d7 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -135,6 +135,12 @@ private:
135 PipelineStatistics* statistics, 135 PipelineStatistics* statistics,
136 bool build_in_parallel); 136 bool build_in_parallel);
137 137
138 void SerializeVulkanPipelineCache(const std::filesystem::path& filename,
139 const vk::PipelineCache& pipeline_cache, u32 cache_version);
140
141 vk::PipelineCache LoadVulkanPipelineCache(const std::filesystem::path& filename,
142 u32 expected_cache_version);
143
138 const Device& device; 144 const Device& device;
139 Scheduler& scheduler; 145 Scheduler& scheduler;
140 DescriptorPool& descriptor_pool; 146 DescriptorPool& descriptor_pool;
@@ -144,6 +150,7 @@ private:
144 TextureCache& texture_cache; 150 TextureCache& texture_cache;
145 VideoCore::ShaderNotify& shader_notify; 151 VideoCore::ShaderNotify& shader_notify;
146 bool use_asynchronous_shaders{}; 152 bool use_asynchronous_shaders{};
153 bool use_vulkan_pipeline_cache{};
147 154
148 GraphicsPipelineCacheKey graphics_key{}; 155 GraphicsPipelineCacheKey graphics_key{};
149 GraphicsPipeline* current_pipeline{}; 156 GraphicsPipeline* current_pipeline{};
@@ -158,6 +165,9 @@ private:
158 165
159 std::filesystem::path pipeline_cache_filename; 166 std::filesystem::path pipeline_cache_filename;
160 167
168 std::filesystem::path vulkan_pipeline_cache_filename;
169 vk::PipelineCache vulkan_pipeline_cache;
170
161 Common::ThreadWorker workers; 171 Common::ThreadWorker workers;
162 Common::ThreadWorker serialization_thread; 172 Common::ThreadWorker serialization_thread;
163 DynamicFeatures dynamic_features; 173 DynamicFeatures dynamic_features;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 242bf9602..ed4a72166 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -186,6 +186,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
186 186
187 SCOPE_EXIT({ gpu.TickWork(); }); 187 SCOPE_EXIT({ gpu.TickWork(); });
188 FlushWork(); 188 FlushWork();
189 gpu_memory->FlushCaching();
189 190
190 query_cache.UpdateCounters(); 191 query_cache.UpdateCounters();
191 192
@@ -393,6 +394,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
393 394
394void RasterizerVulkan::DispatchCompute() { 395void RasterizerVulkan::DispatchCompute() {
395 FlushWork(); 396 FlushWork();
397 gpu_memory->FlushCaching();
396 398
397 ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; 399 ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
398 if (!pipeline) { 400 if (!pipeline) {
@@ -481,6 +483,27 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
481 } 483 }
482} 484}
483 485
486void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
487 {
488 std::scoped_lock lock{texture_cache.mutex};
489 for (const auto& [addr, size] : sequences) {
490 texture_cache.WriteMemory(addr, size);
491 }
492 }
493 {
494 std::scoped_lock lock{buffer_cache.mutex};
495 for (const auto& [addr, size] : sequences) {
496 buffer_cache.WriteMemory(addr, size);
497 }
498 }
499 {
500 for (const auto& [addr, size] : sequences) {
501 query_cache.InvalidateRegion(addr, size);
502 pipeline_cache.InvalidateRegion(addr, size);
503 }
504 }
505}
506
484void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { 507void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
485 if (addr == 0 || size == 0) { 508 if (addr == 0 || size == 0) {
486 return; 509 return;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index c661e5b19..472cc64d9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -79,6 +79,7 @@ public:
79 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 79 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
80 void InvalidateRegion(VAddr addr, u64 size, 80 void InvalidateRegion(VAddr addr, u64 size,
81 VideoCommon::CacheType which = VideoCommon::CacheType::All) override; 81 VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
82 void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
82 void OnCPUWrite(VAddr addr, u64 size) override; 83 void OnCPUWrite(VAddr addr, u64 size) override;
83 void InvalidateGPUCache() override; 84 void InvalidateGPUCache() override;
84 void UnmapMemory(VAddr addr, u64 size) override; 85 void UnmapMemory(VAddr addr, u64 size) override;
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
new file mode 100644
index 000000000..852b86f84
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
@@ -0,0 +1,205 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "common/literals.h"
5#include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h"
6#include "video_core/renderer_vulkan/renderer_vulkan.h"
7#include "video_core/renderer_vulkan/vk_shader_util.h"
8#include "video_core/renderer_vulkan/vk_turbo_mode.h"
9#include "video_core/vulkan_common/vulkan_device.h"
10
11namespace Vulkan {
12
13using namespace Common::Literals;
14
15TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld)
16 : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} {
17 m_thread = std::jthread([&](auto stop_token) { Run(stop_token); });
18}
19
20TurboMode::~TurboMode() = default;
21
22void TurboMode::Run(std::stop_token stop_token) {
23 auto& dld = m_device.GetLogical();
24
25 // Allocate buffer. 2MiB should be sufficient.
26 auto buffer = dld.CreateBuffer(VkBufferCreateInfo{
27 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
28 .pNext = nullptr,
29 .flags = 0,
30 .size = 2_MiB,
31 .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
32 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
33 .queueFamilyIndexCount = 0,
34 .pQueueFamilyIndices = nullptr,
35 });
36
37 // Commit some device local memory for the buffer.
38 auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
39
40 // Create the descriptor pool to contain our descriptor.
41 constexpr VkDescriptorPoolSize pool_size{
42 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
43 .descriptorCount = 1,
44 };
45
46 auto descriptor_pool = dld.CreateDescriptorPool(VkDescriptorPoolCreateInfo{
47 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
48 .pNext = nullptr,
49 .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
50 .maxSets = 1,
51 .poolSizeCount = 1,
52 .pPoolSizes = &pool_size,
53 });
54
55 // Create the descriptor set layout from the pool.
56 constexpr VkDescriptorSetLayoutBinding layout_binding{
57 .binding = 0,
58 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
59 .descriptorCount = 1,
60 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
61 .pImmutableSamplers = nullptr,
62 };
63
64 auto descriptor_set_layout = dld.CreateDescriptorSetLayout(VkDescriptorSetLayoutCreateInfo{
65 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
66 .pNext = nullptr,
67 .flags = 0,
68 .bindingCount = 1,
69 .pBindings = &layout_binding,
70 });
71
72 // Actually create the descriptor set.
73 auto descriptor_set = descriptor_pool.Allocate(VkDescriptorSetAllocateInfo{
74 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
75 .pNext = nullptr,
76 .descriptorPool = *descriptor_pool,
77 .descriptorSetCount = 1,
78 .pSetLayouts = descriptor_set_layout.address(),
79 });
80
81 // Create the shader.
82 auto shader = BuildShader(m_device, VULKAN_TURBO_MODE_COMP_SPV);
83
84 // Create the pipeline layout.
85 auto pipeline_layout = dld.CreatePipelineLayout(VkPipelineLayoutCreateInfo{
86 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
87 .pNext = nullptr,
88 .flags = 0,
89 .setLayoutCount = 1,
90 .pSetLayouts = descriptor_set_layout.address(),
91 .pushConstantRangeCount = 0,
92 .pPushConstantRanges = nullptr,
93 });
94
95 // Actually create the pipeline.
96 const VkPipelineShaderStageCreateInfo shader_stage{
97 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
98 .pNext = nullptr,
99 .flags = 0,
100 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
101 .module = *shader,
102 .pName = "main",
103 .pSpecializationInfo = nullptr,
104 };
105
106 auto pipeline = dld.CreateComputePipeline(VkComputePipelineCreateInfo{
107 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
108 .pNext = nullptr,
109 .flags = 0,
110 .stage = shader_stage,
111 .layout = *pipeline_layout,
112 .basePipelineHandle = VK_NULL_HANDLE,
113 .basePipelineIndex = 0,
114 });
115
116 // Create a fence to wait on.
117 auto fence = dld.CreateFence(VkFenceCreateInfo{
118 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
119 .pNext = nullptr,
120 .flags = 0,
121 });
122
123 // Create a command pool to allocate a command buffer from.
124 auto command_pool = dld.CreateCommandPool(VkCommandPoolCreateInfo{
125 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
126 .pNext = nullptr,
127 .flags =
128 VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
129 .queueFamilyIndex = m_device.GetGraphicsFamily(),
130 });
131
132 // Create a single command buffer.
133 auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
134 auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()};
135
136 while (!stop_token.stop_requested()) {
137 // Reset the fence.
138 fence.Reset();
139
140 // Update descriptor set.
141 const VkDescriptorBufferInfo buffer_info{
142 .buffer = *buffer,
143 .offset = 0,
144 .range = VK_WHOLE_SIZE,
145 };
146
147 const VkWriteDescriptorSet buffer_write{
148 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
149 .pNext = nullptr,
150 .dstSet = descriptor_set[0],
151 .dstBinding = 0,
152 .dstArrayElement = 0,
153 .descriptorCount = 1,
154 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
155 .pImageInfo = nullptr,
156 .pBufferInfo = &buffer_info,
157 .pTexelBufferView = nullptr,
158 };
159
160 dld.UpdateDescriptorSets(std::array{buffer_write}, {});
161
162 // Set up the command buffer.
163 cmdbuf.Begin(VkCommandBufferBeginInfo{
164 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
165 .pNext = nullptr,
166 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
167 .pInheritanceInfo = nullptr,
168 });
169
170 // Clear the buffer.
171 cmdbuf.FillBuffer(*buffer, 0, VK_WHOLE_SIZE, 0);
172
173 // Bind descriptor set.
174 cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0,
175 descriptor_set, {});
176
177 // Bind the pipeline.
178 cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
179
180 // Dispatch.
181 cmdbuf.Dispatch(64, 64, 1);
182
183 // Finish.
184 cmdbuf.End();
185
186 const VkSubmitInfo submit_info{
187 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
188 .pNext = nullptr,
189 .waitSemaphoreCount = 0,
190 .pWaitSemaphores = nullptr,
191 .pWaitDstStageMask = nullptr,
192 .commandBufferCount = 1,
193 .pCommandBuffers = cmdbuf.address(),
194 .signalSemaphoreCount = 0,
195 .pSignalSemaphores = nullptr,
196 };
197
198 m_device.GetGraphicsQueue().Submit(std::array{submit_info}, *fence);
199
200 // Wait for completion.
201 fence.Wait();
202 }
203}
204
205} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h
new file mode 100644
index 000000000..2060e2395
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h
@@ -0,0 +1,26 @@
1// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "common/polyfill_thread.h"
7#include "video_core/vulkan_common/vulkan_device.h"
8#include "video_core/vulkan_common/vulkan_memory_allocator.h"
9#include "video_core/vulkan_common/vulkan_wrapper.h"
10
11namespace Vulkan {
12
13class TurboMode {
14public:
15 explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld);
16 ~TurboMode();
17
18private:
19 void Run(std::stop_token stop_token);
20
21 Device m_device;
22 MemoryAllocator m_allocator;
23 std::jthread m_thread;
24};
25
26} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 5c5bfa18d..77aee802d 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -1472,7 +1472,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
1472 is_patch_list_restart_supported = 1472 is_patch_list_restart_supported =
1473 primitive_topology_list_restart.primitiveTopologyPatchListRestart; 1473 primitive_topology_list_restart.primitiveTopologyPatchListRestart;
1474 } 1474 }
1475 if (has_khr_image_format_list && has_khr_swapchain_mutable_format) { 1475 if (requires_surface && has_khr_image_format_list && has_khr_swapchain_mutable_format) {
1476 extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); 1476 extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
1477 extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); 1477 extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME);
1478 khr_swapchain_mutable_format = true; 1478 khr_swapchain_mutable_format = true;
@@ -1487,6 +1487,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
1487 1487
1488 max_push_descriptors = push_descriptor.maxPushDescriptors; 1488 max_push_descriptors = push_descriptor.maxPushDescriptors;
1489 } 1489 }
1490
1491 has_null_descriptor = true;
1492
1490 return extensions; 1493 return extensions;
1491} 1494}
1492 1495
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 920a8f4e3..6042046e1 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -397,6 +397,10 @@ public:
397 return must_emulate_bgr565; 397 return must_emulate_bgr565;
398 } 398 }
399 399
400 bool HasNullDescriptor() const {
401 return has_null_descriptor;
402 }
403
400 u32 GetMaxVertexInputAttributes() const { 404 u32 GetMaxVertexInputAttributes() const {
401 return max_vertex_input_attributes; 405 return max_vertex_input_attributes;
402 } 406 }
@@ -511,6 +515,7 @@ private:
511 bool supports_d24_depth{}; ///< Supports D24 depth buffers. 515 bool supports_d24_depth{}; ///< Supports D24 depth buffers.
512 bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. 516 bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting.
513 bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. 517 bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
518 bool has_null_descriptor{}; ///< Has support for null descriptors.
514 u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline 519 u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline
515 u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline 520 u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline
516 521
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index 562039b56..b6d83e446 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -32,7 +32,7 @@
32namespace Vulkan { 32namespace Vulkan {
33namespace { 33namespace {
34[[nodiscard]] std::vector<const char*> RequiredExtensions( 34[[nodiscard]] std::vector<const char*> RequiredExtensions(
35 Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) { 35 Core::Frontend::WindowSystemType window_type, bool enable_validation) {
36 std::vector<const char*> extensions; 36 std::vector<const char*> extensions;
37 extensions.reserve(6); 37 extensions.reserve(6);
38 switch (window_type) { 38 switch (window_type) {
@@ -65,7 +65,7 @@ namespace {
65 if (window_type != Core::Frontend::WindowSystemType::Headless) { 65 if (window_type != Core::Frontend::WindowSystemType::Headless) {
66 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); 66 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
67 } 67 }
68 if (enable_debug_utils) { 68 if (enable_validation) {
69 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); 69 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
70 } 70 }
71 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); 71 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
@@ -95,9 +95,9 @@ namespace {
95 return true; 95 return true;
96} 96}
97 97
98[[nodiscard]] std::vector<const char*> Layers(bool enable_layers) { 98[[nodiscard]] std::vector<const char*> Layers(bool enable_validation) {
99 std::vector<const char*> layers; 99 std::vector<const char*> layers;
100 if (enable_layers) { 100 if (enable_validation) {
101 layers.push_back("VK_LAYER_KHRONOS_validation"); 101 layers.push_back("VK_LAYER_KHRONOS_validation");
102 } 102 }
103 return layers; 103 return layers;
@@ -125,7 +125,7 @@ void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const
125 125
126vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, 126vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
127 u32 required_version, Core::Frontend::WindowSystemType window_type, 127 u32 required_version, Core::Frontend::WindowSystemType window_type,
128 bool enable_debug_utils, bool enable_layers) { 128 bool enable_validation) {
129 if (!library.IsOpen()) { 129 if (!library.IsOpen()) {
130 LOG_ERROR(Render_Vulkan, "Vulkan library not available"); 130 LOG_ERROR(Render_Vulkan, "Vulkan library not available");
131 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); 131 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
@@ -138,11 +138,11 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD
138 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); 138 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
139 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); 139 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
140 } 140 }
141 const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils); 141 const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_validation);
142 if (!AreExtensionsSupported(dld, extensions)) { 142 if (!AreExtensionsSupported(dld, extensions)) {
143 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); 143 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
144 } 144 }
145 std::vector<const char*> layers = Layers(enable_layers); 145 std::vector<const char*> layers = Layers(enable_validation);
146 RemoveUnavailableLayers(dld, layers); 146 RemoveUnavailableLayers(dld, layers);
147 147
148 const u32 available_version = vk::AvailableVersion(dld); 148 const u32 available_version = vk::AvailableVersion(dld);
diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h
index 40419d802..b59b92f83 100644
--- a/src/video_core/vulkan_common/vulkan_instance.h
+++ b/src/video_core/vulkan_common/vulkan_instance.h
@@ -17,8 +17,7 @@ namespace Vulkan {
17 * @param dld Dispatch table to load function pointers into 17 * @param dld Dispatch table to load function pointers into
18 * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1) 18 * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1)
19 * @param window_type Window system type's enabled extension 19 * @param window_type Window system type's enabled extension
20 * @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not 20 * @param enable_validation Whether to enable Vulkan validation layers or not
21 * @param enable_layers Whether to enable Vulkan validation layers or not
22 * 21 *
23 * @return A new Vulkan instance 22 * @return A new Vulkan instance
24 * @throw vk::Exception on failure 23 * @throw vk::Exception on failure
@@ -26,6 +25,6 @@ namespace Vulkan {
26[[nodiscard]] vk::Instance CreateInstance( 25[[nodiscard]] vk::Instance CreateInstance(
27 const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version, 26 const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version,
28 Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless, 27 Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless,
29 bool enable_debug_utils = false, bool enable_layers = false); 28 bool enable_validation = false);
30 29
31} // namespace Vulkan 30} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 861767c13..61be1fce1 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -152,6 +152,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
152 X(vkCreateGraphicsPipelines); 152 X(vkCreateGraphicsPipelines);
153 X(vkCreateImage); 153 X(vkCreateImage);
154 X(vkCreateImageView); 154 X(vkCreateImageView);
155 X(vkCreatePipelineCache);
155 X(vkCreatePipelineLayout); 156 X(vkCreatePipelineLayout);
156 X(vkCreateQueryPool); 157 X(vkCreateQueryPool);
157 X(vkCreateRenderPass); 158 X(vkCreateRenderPass);
@@ -171,6 +172,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
171 X(vkDestroyImage); 172 X(vkDestroyImage);
172 X(vkDestroyImageView); 173 X(vkDestroyImageView);
173 X(vkDestroyPipeline); 174 X(vkDestroyPipeline);
175 X(vkDestroyPipelineCache);
174 X(vkDestroyPipelineLayout); 176 X(vkDestroyPipelineLayout);
175 X(vkDestroyQueryPool); 177 X(vkDestroyQueryPool);
176 X(vkDestroyRenderPass); 178 X(vkDestroyRenderPass);
@@ -188,6 +190,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
188 X(vkGetEventStatus); 190 X(vkGetEventStatus);
189 X(vkGetFenceStatus); 191 X(vkGetFenceStatus);
190 X(vkGetImageMemoryRequirements); 192 X(vkGetImageMemoryRequirements);
193 X(vkGetPipelineCacheData);
191 X(vkGetMemoryFdKHR); 194 X(vkGetMemoryFdKHR);
192#ifdef _WIN32 195#ifdef _WIN32
193 X(vkGetMemoryWin32HandleKHR); 196 X(vkGetMemoryWin32HandleKHR);
@@ -431,6 +434,10 @@ void Destroy(VkDevice device, VkPipeline handle, const DeviceDispatch& dld) noex
431 dld.vkDestroyPipeline(device, handle, nullptr); 434 dld.vkDestroyPipeline(device, handle, nullptr);
432} 435}
433 436
437void Destroy(VkDevice device, VkPipelineCache handle, const DeviceDispatch& dld) noexcept {
438 dld.vkDestroyPipelineCache(device, handle, nullptr);
439}
440
434void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept { 441void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept {
435 dld.vkDestroyPipelineLayout(device, handle, nullptr); 442 dld.vkDestroyPipelineLayout(device, handle, nullptr);
436} 443}
@@ -651,6 +658,10 @@ void ShaderModule::SetObjectNameEXT(const char* name) const {
651 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); 658 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name);
652} 659}
653 660
661void PipelineCache::SetObjectNameEXT(const char* name) const {
662 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_PIPELINE_CACHE, name);
663}
664
654void Semaphore::SetObjectNameEXT(const char* name) const { 665void Semaphore::SetObjectNameEXT(const char* name) const {
655 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); 666 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name);
656} 667}
@@ -746,21 +757,29 @@ DescriptorSetLayout Device::CreateDescriptorSetLayout(
746 return DescriptorSetLayout(object, handle, *dld); 757 return DescriptorSetLayout(object, handle, *dld);
747} 758}
748 759
760PipelineCache Device::CreatePipelineCache(const VkPipelineCacheCreateInfo& ci) const {
761 VkPipelineCache cache;
762 Check(dld->vkCreatePipelineCache(handle, &ci, nullptr, &cache));
763 return PipelineCache(cache, handle, *dld);
764}
765
749PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const { 766PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const {
750 VkPipelineLayout object; 767 VkPipelineLayout object;
751 Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object)); 768 Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object));
752 return PipelineLayout(object, handle, *dld); 769 return PipelineLayout(object, handle, *dld);
753} 770}
754 771
755Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const { 772Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci,
773 VkPipelineCache cache) const {
756 VkPipeline object; 774 VkPipeline object;
757 Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object)); 775 Check(dld->vkCreateGraphicsPipelines(handle, cache, 1, &ci, nullptr, &object));
758 return Pipeline(object, handle, *dld); 776 return Pipeline(object, handle, *dld);
759} 777}
760 778
761Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const { 779Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci,
780 VkPipelineCache cache) const {
762 VkPipeline object; 781 VkPipeline object;
763 Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object)); 782 Check(dld->vkCreateComputePipelines(handle, cache, 1, &ci, nullptr, &object));
764 return Pipeline(object, handle, *dld); 783 return Pipeline(object, handle, *dld);
765} 784}
766 785
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index accfad8c1..412779b51 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -270,6 +270,7 @@ struct DeviceDispatch : InstanceDispatch {
270 PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines{}; 270 PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines{};
271 PFN_vkCreateImage vkCreateImage{}; 271 PFN_vkCreateImage vkCreateImage{};
272 PFN_vkCreateImageView vkCreateImageView{}; 272 PFN_vkCreateImageView vkCreateImageView{};
273 PFN_vkCreatePipelineCache vkCreatePipelineCache{};
273 PFN_vkCreatePipelineLayout vkCreatePipelineLayout{}; 274 PFN_vkCreatePipelineLayout vkCreatePipelineLayout{};
274 PFN_vkCreateQueryPool vkCreateQueryPool{}; 275 PFN_vkCreateQueryPool vkCreateQueryPool{};
275 PFN_vkCreateRenderPass vkCreateRenderPass{}; 276 PFN_vkCreateRenderPass vkCreateRenderPass{};
@@ -289,6 +290,7 @@ struct DeviceDispatch : InstanceDispatch {
289 PFN_vkDestroyImage vkDestroyImage{}; 290 PFN_vkDestroyImage vkDestroyImage{};
290 PFN_vkDestroyImageView vkDestroyImageView{}; 291 PFN_vkDestroyImageView vkDestroyImageView{};
291 PFN_vkDestroyPipeline vkDestroyPipeline{}; 292 PFN_vkDestroyPipeline vkDestroyPipeline{};
293 PFN_vkDestroyPipelineCache vkDestroyPipelineCache{};
292 PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout{}; 294 PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout{};
293 PFN_vkDestroyQueryPool vkDestroyQueryPool{}; 295 PFN_vkDestroyQueryPool vkDestroyQueryPool{};
294 PFN_vkDestroyRenderPass vkDestroyRenderPass{}; 296 PFN_vkDestroyRenderPass vkDestroyRenderPass{};
@@ -306,6 +308,7 @@ struct DeviceDispatch : InstanceDispatch {
306 PFN_vkGetEventStatus vkGetEventStatus{}; 308 PFN_vkGetEventStatus vkGetEventStatus{};
307 PFN_vkGetFenceStatus vkGetFenceStatus{}; 309 PFN_vkGetFenceStatus vkGetFenceStatus{};
308 PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{}; 310 PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{};
311 PFN_vkGetPipelineCacheData vkGetPipelineCacheData{};
309 PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{}; 312 PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{};
310#ifdef _WIN32 313#ifdef _WIN32
311 PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{}; 314 PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{};
@@ -351,6 +354,7 @@ void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
351void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; 354void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
352void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept; 355void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept;
353void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept; 356void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept;
357void Destroy(VkDevice, VkPipelineCache, const DeviceDispatch&) noexcept;
354void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept; 358void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept;
355void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept; 359void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept;
356void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept; 360void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept;
@@ -773,6 +777,18 @@ public:
773 void SetObjectNameEXT(const char* name) const; 777 void SetObjectNameEXT(const char* name) const;
774}; 778};
775 779
780class PipelineCache : public Handle<VkPipelineCache, VkDevice, DeviceDispatch> {
781 using Handle<VkPipelineCache, VkDevice, DeviceDispatch>::Handle;
782
783public:
784 /// Set object name.
785 void SetObjectNameEXT(const char* name) const;
786
787 VkResult Read(size_t* size, void* data) const noexcept {
788 return dld->vkGetPipelineCacheData(owner, handle, size, data);
789 }
790};
791
776class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { 792class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> {
777 using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; 793 using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle;
778 794
@@ -844,11 +860,15 @@ public:
844 860
845 DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const; 861 DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const;
846 862
863 PipelineCache CreatePipelineCache(const VkPipelineCacheCreateInfo& ci) const;
864
847 PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const; 865 PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const;
848 866
849 Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const; 867 Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci,
868 VkPipelineCache cache = nullptr) const;
850 869
851 Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const; 870 Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci,
871 VkPipelineCache cache = nullptr) const;
852 872
853 Sampler CreateSampler(const VkSamplerCreateInfo& ci) const; 873 Sampler CreateSampler(const VkSamplerCreateInfo& ci) const;
854 874
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index e9425b5bd..0db62baa3 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -690,6 +690,7 @@ void Config::ReadRendererValues() {
690 qt_config->beginGroup(QStringLiteral("Renderer")); 690 qt_config->beginGroup(QStringLiteral("Renderer"));
691 691
692 ReadGlobalSetting(Settings::values.renderer_backend); 692 ReadGlobalSetting(Settings::values.renderer_backend);
693 ReadGlobalSetting(Settings::values.renderer_force_max_clock);
693 ReadGlobalSetting(Settings::values.vulkan_device); 694 ReadGlobalSetting(Settings::values.vulkan_device);
694 ReadGlobalSetting(Settings::values.fullscreen_mode); 695 ReadGlobalSetting(Settings::values.fullscreen_mode);
695 ReadGlobalSetting(Settings::values.aspect_ratio); 696 ReadGlobalSetting(Settings::values.aspect_ratio);
@@ -709,6 +710,7 @@ void Config::ReadRendererValues() {
709 ReadGlobalSetting(Settings::values.use_asynchronous_shaders); 710 ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
710 ReadGlobalSetting(Settings::values.use_fast_gpu_time); 711 ReadGlobalSetting(Settings::values.use_fast_gpu_time);
711 ReadGlobalSetting(Settings::values.use_pessimistic_flushes); 712 ReadGlobalSetting(Settings::values.use_pessimistic_flushes);
713 ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache);
712 ReadGlobalSetting(Settings::values.bg_red); 714 ReadGlobalSetting(Settings::values.bg_red);
713 ReadGlobalSetting(Settings::values.bg_green); 715 ReadGlobalSetting(Settings::values.bg_green);
714 ReadGlobalSetting(Settings::values.bg_blue); 716 ReadGlobalSetting(Settings::values.bg_blue);
@@ -1305,6 +1307,9 @@ void Config::SaveRendererValues() {
1305 static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), 1307 static_cast<u32>(Settings::values.renderer_backend.GetValue(global)),
1306 static_cast<u32>(Settings::values.renderer_backend.GetDefault()), 1308 static_cast<u32>(Settings::values.renderer_backend.GetDefault()),
1307 Settings::values.renderer_backend.UsingGlobal()); 1309 Settings::values.renderer_backend.UsingGlobal());
1310 WriteSetting(QString::fromStdString(Settings::values.renderer_force_max_clock.GetLabel()),
1311 static_cast<u32>(Settings::values.renderer_force_max_clock.GetValue(global)),
1312 static_cast<u32>(Settings::values.renderer_force_max_clock.GetDefault()));
1308 WriteGlobalSetting(Settings::values.vulkan_device); 1313 WriteGlobalSetting(Settings::values.vulkan_device);
1309 WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), 1314 WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()),
1310 static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)), 1315 static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)),
@@ -1348,6 +1353,7 @@ void Config::SaveRendererValues() {
1348 WriteGlobalSetting(Settings::values.use_asynchronous_shaders); 1353 WriteGlobalSetting(Settings::values.use_asynchronous_shaders);
1349 WriteGlobalSetting(Settings::values.use_fast_gpu_time); 1354 WriteGlobalSetting(Settings::values.use_fast_gpu_time);
1350 WriteGlobalSetting(Settings::values.use_pessimistic_flushes); 1355 WriteGlobalSetting(Settings::values.use_pessimistic_flushes);
1356 WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache);
1351 WriteGlobalSetting(Settings::values.bg_red); 1357 WriteGlobalSetting(Settings::values.bg_red);
1352 WriteGlobalSetting(Settings::values.bg_green); 1358 WriteGlobalSetting(Settings::values.bg_green);
1353 WriteGlobalSetting(Settings::values.bg_blue); 1359 WriteGlobalSetting(Settings::values.bg_blue);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 01f074699..fdf8485ce 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -25,10 +25,13 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
25 ui->use_asynchronous_shaders->setEnabled(runtime_lock); 25 ui->use_asynchronous_shaders->setEnabled(runtime_lock);
26 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); 26 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
27 27
28 ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue());
28 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); 29 ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
29 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); 30 ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
30 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); 31 ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
31 ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue()); 32 ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue());
33 ui->use_vulkan_driver_pipeline_cache->setChecked(
34 Settings::values.use_vulkan_driver_pipeline_cache.GetValue());
32 35
33 if (Settings::IsConfiguringGlobal()) { 36 if (Settings::IsConfiguringGlobal()) {
34 ui->gpu_accuracy->setCurrentIndex( 37 ui->gpu_accuracy->setCurrentIndex(
@@ -37,6 +40,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
37 Settings::values.max_anisotropy.GetValue()); 40 Settings::values.max_anisotropy.GetValue());
38 } else { 41 } else {
39 ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy); 42 ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy);
43 ConfigurationShared::SetPerGameSetting(ui->renderer_force_max_clock,
44 &Settings::values.renderer_force_max_clock);
40 ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox, 45 ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox,
41 &Settings::values.max_anisotropy); 46 &Settings::values.max_anisotropy);
42 ConfigurationShared::SetHighlight(ui->label_gpu_accuracy, 47 ConfigurationShared::SetHighlight(ui->label_gpu_accuracy,
@@ -48,6 +53,9 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
48 53
49void ConfigureGraphicsAdvanced::ApplyConfiguration() { 54void ConfigureGraphicsAdvanced::ApplyConfiguration() {
50 ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); 55 ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy);
56 ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock,
57 ui->renderer_force_max_clock,
58 renderer_force_max_clock);
51 ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, 59 ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy,
52 ui->anisotropic_filtering_combobox); 60 ui->anisotropic_filtering_combobox);
53 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); 61 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync);
@@ -58,6 +66,9 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
58 ui->use_fast_gpu_time, use_fast_gpu_time); 66 ui->use_fast_gpu_time, use_fast_gpu_time);
59 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_pessimistic_flushes, 67 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_pessimistic_flushes,
60 ui->use_pessimistic_flushes, use_pessimistic_flushes); 68 ui->use_pessimistic_flushes, use_pessimistic_flushes);
69 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vulkan_driver_pipeline_cache,
70 ui->use_vulkan_driver_pipeline_cache,
71 use_vulkan_driver_pipeline_cache);
61} 72}
62 73
63void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { 74void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) {
@@ -76,18 +87,25 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
76 // Disable if not global (only happens during game) 87 // Disable if not global (only happens during game)
77 if (Settings::IsConfiguringGlobal()) { 88 if (Settings::IsConfiguringGlobal()) {
78 ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); 89 ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal());
90 ui->renderer_force_max_clock->setEnabled(
91 Settings::values.renderer_force_max_clock.UsingGlobal());
79 ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); 92 ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
80 ui->use_asynchronous_shaders->setEnabled( 93 ui->use_asynchronous_shaders->setEnabled(
81 Settings::values.use_asynchronous_shaders.UsingGlobal()); 94 Settings::values.use_asynchronous_shaders.UsingGlobal());
82 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); 95 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
83 ui->use_pessimistic_flushes->setEnabled( 96 ui->use_pessimistic_flushes->setEnabled(
84 Settings::values.use_pessimistic_flushes.UsingGlobal()); 97 Settings::values.use_pessimistic_flushes.UsingGlobal());
98 ui->use_vulkan_driver_pipeline_cache->setEnabled(
99 Settings::values.use_vulkan_driver_pipeline_cache.UsingGlobal());
85 ui->anisotropic_filtering_combobox->setEnabled( 100 ui->anisotropic_filtering_combobox->setEnabled(
86 Settings::values.max_anisotropy.UsingGlobal()); 101 Settings::values.max_anisotropy.UsingGlobal());
87 102
88 return; 103 return;
89 } 104 }
90 105
106 ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock,
107 Settings::values.renderer_force_max_clock,
108 renderer_force_max_clock);
91 ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); 109 ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync);
92 ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, 110 ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders,
93 Settings::values.use_asynchronous_shaders, 111 Settings::values.use_asynchronous_shaders,
@@ -97,6 +115,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
97 ConfigurationShared::SetColoredTristate(ui->use_pessimistic_flushes, 115 ConfigurationShared::SetColoredTristate(ui->use_pessimistic_flushes,
98 Settings::values.use_pessimistic_flushes, 116 Settings::values.use_pessimistic_flushes,
99 use_pessimistic_flushes); 117 use_pessimistic_flushes);
118 ConfigurationShared::SetColoredTristate(ui->use_vulkan_driver_pipeline_cache,
119 Settings::values.use_vulkan_driver_pipeline_cache,
120 use_vulkan_driver_pipeline_cache);
100 ConfigurationShared::SetColoredComboBox( 121 ConfigurationShared::SetColoredComboBox(
101 ui->gpu_accuracy, ui->label_gpu_accuracy, 122 ui->gpu_accuracy, ui->label_gpu_accuracy,
102 static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); 123 static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index 12e816905..df557d585 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -36,10 +36,12 @@ private:
36 36
37 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; 37 std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui;
38 38
39 ConfigurationShared::CheckState renderer_force_max_clock;
39 ConfigurationShared::CheckState use_vsync; 40 ConfigurationShared::CheckState use_vsync;
40 ConfigurationShared::CheckState use_asynchronous_shaders; 41 ConfigurationShared::CheckState use_asynchronous_shaders;
41 ConfigurationShared::CheckState use_fast_gpu_time; 42 ConfigurationShared::CheckState use_fast_gpu_time;
42 ConfigurationShared::CheckState use_pessimistic_flushes; 43 ConfigurationShared::CheckState use_pessimistic_flushes;
44 ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache;
43 45
44 const Core::System& system; 46 const Core::System& system;
45}; 47};
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 87a121471..061885e30 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -70,6 +70,16 @@
70 </widget> 70 </widget>
71 </item> 71 </item>
72 <item> 72 <item>
73 <widget class="QCheckBox" name="renderer_force_max_clock">
74 <property name="toolTip">
75 <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string>
76 </property>
77 <property name="text">
78 <string>Force maximum clocks (Vulkan only)</string>
79 </property>
80 </widget>
81 </item>
82 <item>
73 <widget class="QCheckBox" name="use_vsync"> 83 <widget class="QCheckBox" name="use_vsync">
74 <property name="toolTip"> 84 <property name="toolTip">
75 <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string> 85 <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string>
@@ -110,6 +120,16 @@
110 </widget> 120 </widget>
111 </item> 121 </item>
112 <item> 122 <item>
123 <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache">
124 <property name="toolTip">
125 <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string>
126 </property>
127 <property name="text">
128 <string>Use Vulkan pipeline cache</string>
129 </property>
130 </widget>
131 </item>
132 <item>
113 <widget class="QWidget" name="af_layout" native="true"> 133 <widget class="QWidget" name="af_layout" native="true">
114 <layout class="QHBoxLayout" name="horizontalLayout_1"> 134 <layout class="QHBoxLayout" name="horizontalLayout_1">
115 <property name="leftMargin"> 135 <property name="leftMargin">
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp
index 524650144..c55f81c2f 100644
--- a/src/yuzu/main.cpp
+++ b/src/yuzu/main.cpp
@@ -2229,8 +2229,10 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ
2229 } 2229 }
2230 2230
2231 switch (target) { 2231 switch (target) {
2232 case GameListRemoveTarget::GlShaderCache:
2233 case GameListRemoveTarget::VkShaderCache: 2232 case GameListRemoveTarget::VkShaderCache:
2233 RemoveVulkanDriverPipelineCache(program_id);
2234 [[fallthrough]];
2235 case GameListRemoveTarget::GlShaderCache:
2234 RemoveTransferableShaderCache(program_id, target); 2236 RemoveTransferableShaderCache(program_id, target);
2235 break; 2237 break;
2236 case GameListRemoveTarget::AllShaderCache: 2238 case GameListRemoveTarget::AllShaderCache:
@@ -2271,6 +2273,22 @@ void GMainWindow::RemoveTransferableShaderCache(u64 program_id, GameListRemoveTa
2271 } 2273 }
2272} 2274}
2273 2275
2276void GMainWindow::RemoveVulkanDriverPipelineCache(u64 program_id) {
2277 static constexpr std::string_view target_file_name = "vulkan_pipelines.bin";
2278
2279 const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
2280 const auto shader_cache_folder_path = shader_cache_dir / fmt::format("{:016x}", program_id);
2281 const auto target_file = shader_cache_folder_path / target_file_name;
2282
2283 if (!Common::FS::Exists(target_file)) {
2284 return;
2285 }
2286 if (!Common::FS::RemoveFile(target_file)) {
2287 QMessageBox::warning(this, tr("Error Removing Vulkan Driver Pipeline Cache"),
2288 tr("Failed to remove the driver pipeline cache."));
2289 }
2290}
2291
2274void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) { 2292void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) {
2275 const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); 2293 const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir);
2276 const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id); 2294 const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id);
diff --git a/src/yuzu/main.h b/src/yuzu/main.h
index db318485d..f25ce65a8 100644
--- a/src/yuzu/main.h
+++ b/src/yuzu/main.h
@@ -347,6 +347,7 @@ private:
347 void RemoveUpdateContent(u64 program_id, InstalledEntryType type); 347 void RemoveUpdateContent(u64 program_id, InstalledEntryType type);
348 void RemoveAddOnContent(u64 program_id, InstalledEntryType type); 348 void RemoveAddOnContent(u64 program_id, InstalledEntryType type);
349 void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target); 349 void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target);
350 void RemoveVulkanDriverPipelineCache(u64 program_id);
350 void RemoveAllTransferableShaderCaches(u64 program_id); 351 void RemoveAllTransferableShaderCaches(u64 program_id);
351 void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); 352 void RemoveCustomConfiguration(u64 program_id, const std::string& game_path);
352 std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); 353 std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id);
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 1e45e57bc..527017282 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -296,6 +296,7 @@ void Config::ReadValues() {
296 296
297 // Renderer 297 // Renderer
298 ReadSetting("Renderer", Settings::values.renderer_backend); 298 ReadSetting("Renderer", Settings::values.renderer_backend);
299 ReadSetting("Renderer", Settings::values.renderer_force_max_clock);
299 ReadSetting("Renderer", Settings::values.renderer_debug); 300 ReadSetting("Renderer", Settings::values.renderer_debug);
300 ReadSetting("Renderer", Settings::values.renderer_shader_feedback); 301 ReadSetting("Renderer", Settings::values.renderer_shader_feedback);
301 ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); 302 ReadSetting("Renderer", Settings::values.enable_nsight_aftermath);
@@ -321,6 +322,7 @@ void Config::ReadValues() {
321 ReadSetting("Renderer", Settings::values.accelerate_astc); 322 ReadSetting("Renderer", Settings::values.accelerate_astc);
322 ReadSetting("Renderer", Settings::values.use_fast_gpu_time); 323 ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
323 ReadSetting("Renderer", Settings::values.use_pessimistic_flushes); 324 ReadSetting("Renderer", Settings::values.use_pessimistic_flushes);
325 ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache);
324 326
325 ReadSetting("Renderer", Settings::values.bg_red); 327 ReadSetting("Renderer", Settings::values.bg_red);
326 ReadSetting("Renderer", Settings::values.bg_green); 328 ReadSetting("Renderer", Settings::values.bg_green);