diff options
44 files changed, 850 insertions, 176 deletions
diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 149e621f9..1638b79f5 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp | |||
| @@ -185,6 +185,7 @@ void RestoreGlobalState(bool is_powered_on) { | |||
| 185 | // Renderer | 185 | // Renderer |
| 186 | values.fsr_sharpening_slider.SetGlobal(true); | 186 | values.fsr_sharpening_slider.SetGlobal(true); |
| 187 | values.renderer_backend.SetGlobal(true); | 187 | values.renderer_backend.SetGlobal(true); |
| 188 | values.renderer_force_max_clock.SetGlobal(true); | ||
| 188 | values.vulkan_device.SetGlobal(true); | 189 | values.vulkan_device.SetGlobal(true); |
| 189 | values.aspect_ratio.SetGlobal(true); | 190 | values.aspect_ratio.SetGlobal(true); |
| 190 | values.max_anisotropy.SetGlobal(true); | 191 | values.max_anisotropy.SetGlobal(true); |
| @@ -200,6 +201,7 @@ void RestoreGlobalState(bool is_powered_on) { | |||
| 200 | values.use_asynchronous_shaders.SetGlobal(true); | 201 | values.use_asynchronous_shaders.SetGlobal(true); |
| 201 | values.use_fast_gpu_time.SetGlobal(true); | 202 | values.use_fast_gpu_time.SetGlobal(true); |
| 202 | values.use_pessimistic_flushes.SetGlobal(true); | 203 | values.use_pessimistic_flushes.SetGlobal(true); |
| 204 | values.use_vulkan_driver_pipeline_cache.SetGlobal(true); | ||
| 203 | values.bg_red.SetGlobal(true); | 205 | values.bg_red.SetGlobal(true); |
| 204 | values.bg_green.SetGlobal(true); | 206 | values.bg_green.SetGlobal(true); |
| 205 | values.bg_blue.SetGlobal(true); | 207 | values.bg_blue.SetGlobal(true); |
diff --git a/src/common/settings.h b/src/common/settings.h index 5017951c5..9eb3711ca 100644 --- a/src/common/settings.h +++ b/src/common/settings.h | |||
| @@ -415,6 +415,7 @@ struct Values { | |||
| 415 | // Renderer | 415 | // Renderer |
| 416 | SwitchableSetting<RendererBackend, true> renderer_backend{ | 416 | SwitchableSetting<RendererBackend, true> renderer_backend{ |
| 417 | RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; | 417 | RendererBackend::Vulkan, RendererBackend::OpenGL, RendererBackend::Null, "backend"}; |
| 418 | SwitchableSetting<bool> renderer_force_max_clock{true, "force_max_clock"}; | ||
| 418 | Setting<bool> renderer_debug{false, "debug"}; | 419 | Setting<bool> renderer_debug{false, "debug"}; |
| 419 | Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; | 420 | Setting<bool> renderer_shader_feedback{false, "shader_feedback"}; |
| 420 | Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"}; | 421 | Setting<bool> enable_nsight_aftermath{false, "nsight_aftermath"}; |
| @@ -451,6 +452,8 @@ struct Values { | |||
| 451 | SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; | 452 | SwitchableSetting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"}; |
| 452 | SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; | 453 | SwitchableSetting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"}; |
| 453 | SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"}; | 454 | SwitchableSetting<bool> use_pessimistic_flushes{false, "use_pessimistic_flushes"}; |
| 455 | SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true, | ||
| 456 | "use_vulkan_driver_pipeline_cache"}; | ||
| 454 | 457 | ||
| 455 | SwitchableSetting<u8> bg_red{0, "bg_red"}; | 458 | SwitchableSetting<u8> bg_red{0, "bg_red"}; |
| 456 | SwitchableSetting<u8> bg_green{0, "bg_green"}; | 459 | SwitchableSetting<u8> bg_green{0, "bg_green"}; |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 26be74df4..a1e41faff 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -436,7 +436,7 @@ struct Memory::Impl { | |||
| 436 | } | 436 | } |
| 437 | 437 | ||
| 438 | if (Settings::IsFastmemEnabled()) { | 438 | if (Settings::IsFastmemEnabled()) { |
| 439 | const bool is_read_enable = Settings::IsGPULevelHigh() || !cached; | 439 | const bool is_read_enable = !Settings::IsGPULevelExtreme() || !cached; |
| 440 | system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); | 440 | system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached); |
| 441 | } | 441 | } |
| 442 | 442 | ||
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp index f7236afab..5cd0628f2 100644 --- a/src/tests/video_core/buffer_base.cpp +++ b/src/tests/video_core/buffer_base.cpp | |||
| @@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") { | |||
| 538 | int num = 0; | 538 | int num = 0; |
| 539 | buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); | 539 | buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); |
| 540 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | 540 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); |
| 541 | REQUIRE(num == 0); | 541 | REQUIRE(num == 1); |
| 542 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | 542 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); |
| 543 | REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); | 543 | REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); |
| 544 | buffer.FlushCachedWrites(); | 544 | buffer.FlushCachedWrites(); |
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index aa271a377..f617665de 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -85,6 +85,7 @@ add_library(video_core STATIC | |||
| 85 | gpu.h | 85 | gpu.h |
| 86 | gpu_thread.cpp | 86 | gpu_thread.cpp |
| 87 | gpu_thread.h | 87 | gpu_thread.h |
| 88 | invalidation_accumulator.h | ||
| 88 | memory_manager.cpp | 89 | memory_manager.cpp |
| 89 | memory_manager.h | 90 | memory_manager.h |
| 90 | precompiled_headers.h | 91 | precompiled_headers.h |
| @@ -190,6 +191,8 @@ add_library(video_core STATIC | |||
| 190 | renderer_vulkan/vk_texture_cache.cpp | 191 | renderer_vulkan/vk_texture_cache.cpp |
| 191 | renderer_vulkan/vk_texture_cache.h | 192 | renderer_vulkan/vk_texture_cache.h |
| 192 | renderer_vulkan/vk_texture_cache_base.cpp | 193 | renderer_vulkan/vk_texture_cache_base.cpp |
| 194 | renderer_vulkan/vk_turbo_mode.cpp | ||
| 195 | renderer_vulkan/vk_turbo_mode.h | ||
| 193 | renderer_vulkan/vk_update_descriptor.cpp | 196 | renderer_vulkan/vk_update_descriptor.cpp |
| 194 | renderer_vulkan/vk_update_descriptor.h | 197 | renderer_vulkan/vk_update_descriptor.h |
| 195 | shader_cache.cpp | 198 | shader_cache.cpp |
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 92d77eef2..c47b7d866 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -430,7 +430,7 @@ private: | |||
| 430 | if (query_begin >= SizeBytes() || size < 0) { | 430 | if (query_begin >= SizeBytes() || size < 0) { |
| 431 | return; | 431 | return; |
| 432 | } | 432 | } |
| 433 | u64* const untracked_words = Array<Type::Untracked>(); | 433 | [[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>(); |
| 434 | u64* const state_words = Array<type>(); | 434 | u64* const state_words = Array<type>(); |
| 435 | const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); | 435 | const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); |
| 436 | u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; | 436 | u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; |
| @@ -483,7 +483,7 @@ private: | |||
| 483 | NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); | 483 | NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); |
| 484 | } | 484 | } |
| 485 | // Exclude CPU modified pages when visiting GPU pages | 485 | // Exclude CPU modified pages when visiting GPU pages |
| 486 | const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); | 486 | const u64 word = current_word; |
| 487 | u64 page = page_begin; | 487 | u64 page = page_begin; |
| 488 | page_begin = 0; | 488 | page_begin = 0; |
| 489 | 489 | ||
| @@ -531,7 +531,7 @@ private: | |||
| 531 | [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { | 531 | [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { |
| 532 | static_assert(type != Type::Untracked); | 532 | static_assert(type != Type::Untracked); |
| 533 | 533 | ||
| 534 | const u64* const untracked_words = Array<Type::Untracked>(); | 534 | [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>(); |
| 535 | const u64* const state_words = Array<type>(); | 535 | const u64* const state_words = Array<type>(); |
| 536 | const u64 num_query_words = size / BYTES_PER_WORD + 1; | 536 | const u64 num_query_words = size / BYTES_PER_WORD + 1; |
| 537 | const u64 word_begin = offset / BYTES_PER_WORD; | 537 | const u64 word_begin = offset / BYTES_PER_WORD; |
| @@ -539,8 +539,7 @@ private: | |||
| 539 | const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); | 539 | const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); |
| 540 | u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; | 540 | u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; |
| 541 | for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { | 541 | for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { |
| 542 | const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; | 542 | const u64 word = state_words[word_index]; |
| 543 | const u64 word = state_words[word_index] & ~off_word; | ||
| 544 | if (word == 0) { | 543 | if (word == 0) { |
| 545 | continue; | 544 | continue; |
| 546 | } | 545 | } |
| @@ -564,7 +563,7 @@ private: | |||
| 564 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | 563 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { |
| 565 | static_assert(type != Type::Untracked); | 564 | static_assert(type != Type::Untracked); |
| 566 | 565 | ||
| 567 | const u64* const untracked_words = Array<Type::Untracked>(); | 566 | [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>(); |
| 568 | const u64* const state_words = Array<type>(); | 567 | const u64* const state_words = Array<type>(); |
| 569 | const u64 num_query_words = size / BYTES_PER_WORD + 1; | 568 | const u64 num_query_words = size / BYTES_PER_WORD + 1; |
| 570 | const u64 word_begin = offset / BYTES_PER_WORD; | 569 | const u64 word_begin = offset / BYTES_PER_WORD; |
| @@ -574,8 +573,7 @@ private: | |||
| 574 | u64 begin = std::numeric_limits<u64>::max(); | 573 | u64 begin = std::numeric_limits<u64>::max(); |
| 575 | u64 end = 0; | 574 | u64 end = 0; |
| 576 | for (u64 word_index = word_begin; word_index < word_end; ++word_index) { | 575 | for (u64 word_index = word_begin; word_index < word_end; ++word_index) { |
| 577 | const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; | 576 | const u64 word = state_words[word_index]; |
| 578 | const u64 word = state_words[word_index] & ~off_word; | ||
| 579 | if (word == 0) { | 577 | if (word == 0) { |
| 580 | continue; | 578 | continue; |
| 581 | } | 579 | } |
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index cea1dd8b0..7f5a0c29d 100644 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp | |||
| @@ -76,7 +76,7 @@ void State::ProcessData(std::span<const u8> read_buffer) { | |||
| 76 | regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, | 76 | regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, |
| 77 | x_elements, regs.line_count, regs.dest.BlockHeight(), | 77 | x_elements, regs.line_count, regs.dest.BlockHeight(), |
| 78 | regs.dest.BlockDepth(), regs.line_length_in); | 78 | regs.dest.BlockDepth(), regs.line_length_in); |
| 79 | memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | 79 | memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size); |
| 80 | } | 80 | } |
| 81 | } | 81 | } |
| 82 | 82 | ||
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index e655e7254..a126c359c 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "video_core/engines/fermi_2d.h" | 7 | #include "video_core/engines/fermi_2d.h" |
| 8 | #include "video_core/engines/sw_blitter/blitter.h" | 8 | #include "video_core/engines/sw_blitter/blitter.h" |
| 9 | #include "video_core/memory_manager.h" | ||
| 9 | #include "video_core/rasterizer_interface.h" | 10 | #include "video_core/rasterizer_interface.h" |
| 10 | #include "video_core/surface.h" | 11 | #include "video_core/surface.h" |
| 11 | #include "video_core/textures/decoders.h" | 12 | #include "video_core/textures/decoders.h" |
| @@ -20,8 +21,8 @@ namespace Tegra::Engines { | |||
| 20 | 21 | ||
| 21 | using namespace Texture; | 22 | using namespace Texture; |
| 22 | 23 | ||
| 23 | Fermi2D::Fermi2D(MemoryManager& memory_manager_) { | 24 | Fermi2D::Fermi2D(MemoryManager& memory_manager_) : memory_manager{memory_manager_} { |
| 24 | sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager_); | 25 | sw_blitter = std::make_unique<Blitter::SoftwareBlitEngine>(memory_manager); |
| 25 | // Nvidia's OpenGL driver seems to assume these values | 26 | // Nvidia's OpenGL driver seems to assume these values |
| 26 | regs.src.depth = 1; | 27 | regs.src.depth = 1; |
| 27 | regs.dst.depth = 1; | 28 | regs.dst.depth = 1; |
| @@ -104,6 +105,7 @@ void Fermi2D::Blit() { | |||
| 104 | config.src_x0 = 0; | 105 | config.src_x0 = 0; |
| 105 | } | 106 | } |
| 106 | 107 | ||
| 108 | memory_manager.FlushCaching(); | ||
| 107 | if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { | 109 | if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) { |
| 108 | sw_blitter->Blit(src, regs.dst, config); | 110 | sw_blitter->Blit(src, regs.dst, config); |
| 109 | } | 111 | } |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 523fbdec2..705b323e1 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -305,6 +305,7 @@ public: | |||
| 305 | private: | 305 | private: |
| 306 | VideoCore::RasterizerInterface* rasterizer = nullptr; | 306 | VideoCore::RasterizerInterface* rasterizer = nullptr; |
| 307 | std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter; | 307 | std::unique_ptr<Blitter::SoftwareBlitEngine> sw_blitter; |
| 308 | MemoryManager& memory_manager; | ||
| 308 | 309 | ||
| 309 | /// Performs the copy from the source surface to the destination surface as configured in the | 310 | /// Performs the copy from the source surface to the destination surface as configured in the |
| 310 | /// registers. | 311 | /// registers. |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index fbfd1ddd2..97f547789 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -485,11 +485,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | |||
| 485 | } | 485 | } |
| 486 | 486 | ||
| 487 | void Maxwell3D::ProcessQueryGet() { | 487 | void Maxwell3D::ProcessQueryGet() { |
| 488 | // TODO(Subv): Support the other query units. | ||
| 489 | if (regs.report_semaphore.query.location != Regs::ReportSemaphore::Location::All) { | ||
| 490 | LOG_DEBUG(HW_GPU, "Locations other than ALL are unimplemented"); | ||
| 491 | } | ||
| 492 | |||
| 493 | switch (regs.report_semaphore.query.operation) { | 488 | switch (regs.report_semaphore.query.operation) { |
| 494 | case Regs::ReportSemaphore::Operation::Release: | 489 | case Regs::ReportSemaphore::Operation::Release: |
| 495 | if (regs.report_semaphore.query.short_query != 0) { | 490 | if (regs.report_semaphore.query.short_query != 0) { |
| @@ -649,7 +644,7 @@ void Maxwell3D::ProcessCBMultiData(const u32* start_base, u32 amount) { | |||
| 649 | 644 | ||
| 650 | const GPUVAddr address{buffer_address + regs.const_buffer.offset}; | 645 | const GPUVAddr address{buffer_address + regs.const_buffer.offset}; |
| 651 | const size_t copy_size = amount * sizeof(u32); | 646 | const size_t copy_size = amount * sizeof(u32); |
| 652 | memory_manager.WriteBlock(address, start_base, copy_size); | 647 | memory_manager.WriteBlockCached(address, start_base, copy_size); |
| 653 | 648 | ||
| 654 | // Increment the current buffer position. | 649 | // Increment the current buffer position. |
| 655 | regs.const_buffer.offset += static_cast<u32>(copy_size); | 650 | regs.const_buffer.offset += static_cast<u32>(copy_size); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 01f70ea9e..7762c7d96 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -69,7 +69,7 @@ void MaxwellDMA::Launch() { | |||
| 69 | if (launch.multi_line_enable) { | 69 | if (launch.multi_line_enable) { |
| 70 | const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; | 70 | const bool is_src_pitch = launch.src_memory_layout == LaunchDMA::MemoryLayout::PITCH; |
| 71 | const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; | 71 | const bool is_dst_pitch = launch.dst_memory_layout == LaunchDMA::MemoryLayout::PITCH; |
| 72 | 72 | memory_manager.FlushCaching(); | |
| 73 | if (!is_src_pitch && !is_dst_pitch) { | 73 | if (!is_src_pitch && !is_dst_pitch) { |
| 74 | // If both the source and the destination are in block layout, assert. | 74 | // If both the source and the destination are in block layout, assert. |
| 75 | CopyBlockLinearToBlockLinear(); | 75 | CopyBlockLinearToBlockLinear(); |
| @@ -104,6 +104,7 @@ void MaxwellDMA::Launch() { | |||
| 104 | reinterpret_cast<u8*>(tmp_buffer.data()), | 104 | reinterpret_cast<u8*>(tmp_buffer.data()), |
| 105 | regs.line_length_in * sizeof(u32)); | 105 | regs.line_length_in * sizeof(u32)); |
| 106 | } else { | 106 | } else { |
| 107 | memory_manager.FlushCaching(); | ||
| 107 | const auto convert_linear_2_blocklinear_addr = [](u64 address) { | 108 | const auto convert_linear_2_blocklinear_addr = [](u64 address) { |
| 108 | return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | | 109 | return (address & ~0x1f0ULL) | ((address & 0x40) >> 2) | ((address & 0x10) << 1) | |
| 109 | ((address & 0x180) >> 1) | ((address & 0x20) << 3); | 110 | ((address & 0x180) >> 1) | ((address & 0x20) << 3); |
| @@ -121,8 +122,8 @@ void MaxwellDMA::Launch() { | |||
| 121 | memory_manager.ReadBlockUnsafe( | 122 | memory_manager.ReadBlockUnsafe( |
| 122 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), | 123 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), |
| 123 | tmp_buffer.data(), tmp_buffer.size()); | 124 | tmp_buffer.data(), tmp_buffer.size()); |
| 124 | memory_manager.WriteBlock(regs.offset_out + offset, tmp_buffer.data(), | 125 | memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), |
| 125 | tmp_buffer.size()); | 126 | tmp_buffer.size()); |
| 126 | } | 127 | } |
| 127 | } else if (is_src_pitch && !is_dst_pitch) { | 128 | } else if (is_src_pitch && !is_dst_pitch) { |
| 128 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); | 129 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); |
| @@ -132,7 +133,7 @@ void MaxwellDMA::Launch() { | |||
| 132 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 133 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 133 | memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), | 134 | memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), |
| 134 | tmp_buffer.size()); | 135 | tmp_buffer.size()); |
| 135 | memory_manager.WriteBlock( | 136 | memory_manager.WriteBlockCached( |
| 136 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), | 137 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), |
| 137 | tmp_buffer.data(), tmp_buffer.size()); | 138 | tmp_buffer.data(), tmp_buffer.size()); |
| 138 | } | 139 | } |
| @@ -141,8 +142,8 @@ void MaxwellDMA::Launch() { | |||
| 141 | std::vector<u8> tmp_buffer(regs.line_length_in); | 142 | std::vector<u8> tmp_buffer(regs.line_length_in); |
| 142 | memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), | 143 | memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), |
| 143 | regs.line_length_in); | 144 | regs.line_length_in); |
| 144 | memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), | 145 | memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), |
| 145 | regs.line_length_in); | 146 | regs.line_length_in); |
| 146 | } | 147 | } |
| 147 | } | 148 | } |
| 148 | } | 149 | } |
| @@ -204,7 +205,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 204 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | 205 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |
| 205 | regs.pitch_out); | 206 | regs.pitch_out); |
| 206 | 207 | ||
| 207 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 208 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |
| 208 | } | 209 | } |
| 209 | 210 | ||
| 210 | void MaxwellDMA::CopyPitchToBlockLinear() { | 211 | void MaxwellDMA::CopyPitchToBlockLinear() { |
| @@ -256,7 +257,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | |||
| 256 | dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | 257 | dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |
| 257 | regs.pitch_in); | 258 | regs.pitch_in); |
| 258 | 259 | ||
| 259 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 260 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |
| 260 | } | 261 | } |
| 261 | 262 | ||
| 262 | void MaxwellDMA::FastCopyBlockLinearToPitch() { | 263 | void MaxwellDMA::FastCopyBlockLinearToPitch() { |
| @@ -287,7 +288,7 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { | |||
| 287 | regs.src_params.block_size.height, regs.src_params.block_size.depth, | 288 | regs.src_params.block_size.height, regs.src_params.block_size.depth, |
| 288 | regs.pitch_out); | 289 | regs.pitch_out); |
| 289 | 290 | ||
| 290 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 291 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |
| 291 | } | 292 | } |
| 292 | 293 | ||
| 293 | void MaxwellDMA::CopyBlockLinearToBlockLinear() { | 294 | void MaxwellDMA::CopyBlockLinearToBlockLinear() { |
| @@ -347,7 +348,7 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { | |||
| 347 | dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, | 348 | dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, |
| 348 | dst.block_size.height, dst.block_size.depth, pitch); | 349 | dst.block_size.height, dst.block_size.depth, pitch); |
| 349 | 350 | ||
| 350 | memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); | 351 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |
| 351 | } | 352 | } |
| 352 | 353 | ||
| 353 | void MaxwellDMA::ReleaseSemaphore() { | 354 | void MaxwellDMA::ReleaseSemaphore() { |
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index e6dc24f22..f275b2aa9 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -47,6 +47,7 @@ set(SHADER_FILES | |||
| 47 | vulkan_present_scaleforce_fp16.frag | 47 | vulkan_present_scaleforce_fp16.frag |
| 48 | vulkan_present_scaleforce_fp32.frag | 48 | vulkan_present_scaleforce_fp32.frag |
| 49 | vulkan_quad_indexed.comp | 49 | vulkan_quad_indexed.comp |
| 50 | vulkan_turbo_mode.comp | ||
| 50 | vulkan_uint8.comp | 51 | vulkan_uint8.comp |
| 51 | ) | 52 | ) |
| 52 | 53 | ||
diff --git a/src/video_core/host_shaders/vulkan_turbo_mode.comp b/src/video_core/host_shaders/vulkan_turbo_mode.comp new file mode 100644 index 000000000..d651001d9 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_turbo_mode.comp | |||
| @@ -0,0 +1,29 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #version 460 core | ||
| 5 | |||
| 6 | layout (local_size_x = 16, local_size_y = 8, local_size_z = 1) in; | ||
| 7 | |||
| 8 | layout (binding = 0) buffer ThreadData { | ||
| 9 | uint data[]; | ||
| 10 | }; | ||
| 11 | |||
| 12 | uint xorshift32(uint x) { | ||
| 13 | x ^= x << 13; | ||
| 14 | x ^= x >> 17; | ||
| 15 | x ^= x << 5; | ||
| 16 | return x; | ||
| 17 | } | ||
| 18 | |||
| 19 | uint getGlobalIndex() { | ||
| 20 | return gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * gl_WorkGroupSize.y * gl_NumWorkGroups.y; | ||
| 21 | } | ||
| 22 | |||
| 23 | void main() { | ||
| 24 | uint myIndex = xorshift32(getGlobalIndex()); | ||
| 25 | uint otherIndex = xorshift32(myIndex); | ||
| 26 | |||
| 27 | uint otherValue = atomicAdd(data[otherIndex % data.length()], 0) + 1; | ||
| 28 | atomicAdd(data[myIndex % data.length()], otherValue); | ||
| 29 | } | ||
diff --git a/src/video_core/invalidation_accumulator.h b/src/video_core/invalidation_accumulator.h new file mode 100644 index 000000000..2c2aaf7bb --- /dev/null +++ b/src/video_core/invalidation_accumulator.h | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include <utility> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | |||
| 11 | namespace VideoCommon { | ||
| 12 | |||
| 13 | class InvalidationAccumulator { | ||
| 14 | public: | ||
| 15 | InvalidationAccumulator() = default; | ||
| 16 | ~InvalidationAccumulator() = default; | ||
| 17 | |||
| 18 | void Add(GPUVAddr address, size_t size) { | ||
| 19 | const auto reset_values = [&]() { | ||
| 20 | if (has_collected) { | ||
| 21 | buffer.emplace_back(start_address, accumulated_size); | ||
| 22 | } | ||
| 23 | start_address = address; | ||
| 24 | accumulated_size = size; | ||
| 25 | last_collection = start_address + size; | ||
| 26 | }; | ||
| 27 | if (address >= start_address && address + size <= last_collection) [[likely]] { | ||
| 28 | return; | ||
| 29 | } | ||
| 30 | size = ((address + size + atomicity_size_mask) & atomicity_mask) - address; | ||
| 31 | address = address & atomicity_mask; | ||
| 32 | if (!has_collected) [[unlikely]] { | ||
| 33 | reset_values(); | ||
| 34 | has_collected = true; | ||
| 35 | return; | ||
| 36 | } | ||
| 37 | if (address != last_collection) [[unlikely]] { | ||
| 38 | reset_values(); | ||
| 39 | return; | ||
| 40 | } | ||
| 41 | accumulated_size += size; | ||
| 42 | last_collection += size; | ||
| 43 | } | ||
| 44 | |||
| 45 | void Clear() { | ||
| 46 | buffer.clear(); | ||
| 47 | start_address = 0; | ||
| 48 | last_collection = 0; | ||
| 49 | has_collected = false; | ||
| 50 | } | ||
| 51 | |||
| 52 | bool AnyAccumulated() const { | ||
| 53 | return has_collected; | ||
| 54 | } | ||
| 55 | |||
| 56 | template <typename Func> | ||
| 57 | void Callback(Func&& func) { | ||
| 58 | if (!has_collected) { | ||
| 59 | return; | ||
| 60 | } | ||
| 61 | buffer.emplace_back(start_address, accumulated_size); | ||
| 62 | for (auto& [address, size] : buffer) { | ||
| 63 | func(address, size); | ||
| 64 | } | ||
| 65 | } | ||
| 66 | |||
| 67 | private: | ||
| 68 | static constexpr size_t atomicity_bits = 5; | ||
| 69 | static constexpr size_t atomicity_size = 1ULL << atomicity_bits; | ||
| 70 | static constexpr size_t atomicity_size_mask = atomicity_size - 1; | ||
| 71 | static constexpr size_t atomicity_mask = ~atomicity_size_mask; | ||
| 72 | GPUVAddr start_address{}; | ||
| 73 | GPUVAddr last_collection{}; | ||
| 74 | size_t accumulated_size{}; | ||
| 75 | bool has_collected{}; | ||
| 76 | std::vector<std::pair<VAddr, size_t>> buffer; | ||
| 77 | }; | ||
| 78 | |||
| 79 | } // namespace VideoCommon | ||
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index a5476e795..6272a4652 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp | |||
| @@ -50,38 +50,6 @@ protected: | |||
| 50 | Maxwell3D& maxwell3d; | 50 | Maxwell3D& maxwell3d; |
| 51 | }; | 51 | }; |
| 52 | 52 | ||
| 53 | class HLE_DrawArrays final : public HLEMacroImpl { | ||
| 54 | public: | ||
| 55 | explicit HLE_DrawArrays(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||
| 56 | |||
| 57 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||
| 58 | maxwell3d.RefreshParameters(); | ||
| 59 | |||
| 60 | auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); | ||
| 61 | maxwell3d.draw_manager->DrawArray(topology, parameters[1], parameters[2], | ||
| 62 | maxwell3d.regs.global_base_instance_index, 1); | ||
| 63 | } | ||
| 64 | }; | ||
| 65 | |||
| 66 | class HLE_DrawIndexed final : public HLEMacroImpl { | ||
| 67 | public: | ||
| 68 | explicit HLE_DrawIndexed(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||
| 69 | |||
| 70 | void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||
| 71 | maxwell3d.RefreshParameters(); | ||
| 72 | maxwell3d.regs.index_buffer.start_addr_high = parameters[1]; | ||
| 73 | maxwell3d.regs.index_buffer.start_addr_low = parameters[2]; | ||
| 74 | maxwell3d.regs.index_buffer.format = | ||
| 75 | static_cast<Engines::Maxwell3D::Regs::IndexFormat>(parameters[3]); | ||
| 76 | maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||
| 77 | |||
| 78 | auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); | ||
| 79 | maxwell3d.draw_manager->DrawIndex(topology, 0, parameters[4], | ||
| 80 | maxwell3d.regs.global_base_vertex_index, | ||
| 81 | maxwell3d.regs.global_base_instance_index, 1); | ||
| 82 | } | ||
| 83 | }; | ||
| 84 | |||
| 85 | /* | 53 | /* |
| 86 | * @note: these macros have two versions, a normal and extended version, with the extended version | 54 | * @note: these macros have two versions, a normal and extended version, with the extended version |
| 87 | * also assigning the base vertex/instance. | 55 | * also assigning the base vertex/instance. |
| @@ -497,11 +465,6 @@ public: | |||
| 497 | } // Anonymous namespace | 465 | } // Anonymous namespace |
| 498 | 466 | ||
| 499 | HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { | 467 | HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { |
| 500 | builders.emplace(0xDD6A7FA92A7D2674ULL, | ||
| 501 | std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( | ||
| 502 | [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { | ||
| 503 | return std::make_unique<HLE_DrawArrays>(maxwell3d__); | ||
| 504 | })); | ||
| 505 | builders.emplace(0x0D61FC9FAAC9FCADULL, | 468 | builders.emplace(0x0D61FC9FAAC9FCADULL, |
| 506 | std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( | 469 | std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
| 507 | [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { | 470 | [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
| @@ -512,11 +475,6 @@ HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { | |||
| 512 | [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { | 475 | [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
| 513 | return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__); | 476 | return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__); |
| 514 | })); | 477 | })); |
| 515 | builders.emplace(0x2DB33AADB741839CULL, | ||
| 516 | std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( | ||
| 517 | [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { | ||
| 518 | return std::make_unique<HLE_DrawIndexed>(maxwell3d__); | ||
| 519 | })); | ||
| 520 | builders.emplace(0x771BB18C62444DA0ULL, | 478 | builders.emplace(0x771BB18C62444DA0ULL, |
| 521 | std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( | 479 | std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>( |
| 522 | [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { | 480 | [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> { |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 3a5cdeb39..3bcae3503 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -6,11 +6,13 @@ | |||
| 6 | #include "common/alignment.h" | 6 | #include "common/alignment.h" |
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "common/settings.h" | ||
| 9 | #include "core/core.h" | 10 | #include "core/core.h" |
| 10 | #include "core/device_memory.h" | 11 | #include "core/device_memory.h" |
| 11 | #include "core/hle/kernel/k_page_table.h" | 12 | #include "core/hle/kernel/k_page_table.h" |
| 12 | #include "core/hle/kernel/k_process.h" | 13 | #include "core/hle/kernel/k_process.h" |
| 13 | #include "core/memory.h" | 14 | #include "core/memory.h" |
| 15 | #include "video_core/invalidation_accumulator.h" | ||
| 14 | #include "video_core/memory_manager.h" | 16 | #include "video_core/memory_manager.h" |
| 15 | #include "video_core/rasterizer_interface.h" | 17 | #include "video_core/rasterizer_interface.h" |
| 16 | #include "video_core/renderer_base.h" | 18 | #include "video_core/renderer_base.h" |
| @@ -26,7 +28,8 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 | |||
| 26 | entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, | 28 | entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, |
| 27 | page_bits != big_page_bits ? page_bits : 0}, | 29 | page_bits != big_page_bits ? page_bits : 0}, |
| 28 | kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( | 30 | kind_map{PTEKind::INVALID}, unique_identifier{unique_identifier_generator.fetch_add( |
| 29 | 1, std::memory_order_acq_rel)} { | 31 | 1, std::memory_order_acq_rel)}, |
| 32 | accumulator{std::make_unique<VideoCommon::InvalidationAccumulator>()} { | ||
| 30 | address_space_size = 1ULL << address_space_bits; | 33 | address_space_size = 1ULL << address_space_bits; |
| 31 | page_size = 1ULL << page_bits; | 34 | page_size = 1ULL << page_bits; |
| 32 | page_mask = page_size - 1ULL; | 35 | page_mask = page_size - 1ULL; |
| @@ -43,6 +46,11 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 | |||
| 43 | big_page_table_cpu.resize(big_page_table_size); | 46 | big_page_table_cpu.resize(big_page_table_size); |
| 44 | big_page_continous.resize(big_page_table_size / continous_bits, 0); | 47 | big_page_continous.resize(big_page_table_size / continous_bits, 0); |
| 45 | entries.resize(page_table_size / 32, 0); | 48 | entries.resize(page_table_size / 32, 0); |
| 49 | if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) { | ||
| 50 | fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer(); | ||
| 51 | } else { | ||
| 52 | fastmem_arena = nullptr; | ||
| 53 | } | ||
| 46 | } | 54 | } |
| 47 | 55 | ||
| 48 | MemoryManager::~MemoryManager() = default; | 56 | MemoryManager::~MemoryManager() = default; |
| @@ -185,15 +193,12 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
| 185 | if (size == 0) { | 193 | if (size == 0) { |
| 186 | return; | 194 | return; |
| 187 | } | 195 | } |
| 188 | const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); | 196 | GetSubmappedRangeImpl<false>(gpu_addr, size, page_stash); |
| 189 | |||
| 190 | for (const auto& [map_addr, map_size] : submapped_ranges) { | ||
| 191 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. | ||
| 192 | const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map_addr); | ||
| 193 | ASSERT(cpu_addr); | ||
| 194 | 197 | ||
| 195 | rasterizer->UnmapMemory(*cpu_addr, map_size); | 198 | for (const auto& [map_addr, map_size] : page_stash) { |
| 199 | rasterizer->UnmapMemory(map_addr, map_size); | ||
| 196 | } | 200 | } |
| 201 | page_stash.clear(); | ||
| 197 | 202 | ||
| 198 | BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); | 203 | BigPageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |
| 199 | PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); | 204 | PageTableOp<EntryType::Free>(gpu_addr, 0, size, PTEKind::INVALID); |
| @@ -355,7 +360,7 @@ inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t si | |||
| 355 | } | 360 | } |
| 356 | } | 361 | } |
| 357 | 362 | ||
| 358 | template <bool is_safe> | 363 | template <bool is_safe, bool use_fastmem> |
| 359 | void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, | 364 | void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, |
| 360 | [[maybe_unused]] VideoCommon::CacheType which) const { | 365 | [[maybe_unused]] VideoCommon::CacheType which) const { |
| 361 | auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, | 366 | auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, |
| @@ -369,8 +374,12 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: | |||
| 369 | if constexpr (is_safe) { | 374 | if constexpr (is_safe) { |
| 370 | rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | 375 | rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); |
| 371 | } | 376 | } |
| 372 | u8* physical = memory.GetPointer(cpu_addr_base); | 377 | if constexpr (use_fastmem) { |
| 373 | std::memcpy(dest_buffer, physical, copy_amount); | 378 | std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); |
| 379 | } else { | ||
| 380 | u8* physical = memory.GetPointer(cpu_addr_base); | ||
| 381 | std::memcpy(dest_buffer, physical, copy_amount); | ||
| 382 | } | ||
| 374 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | 383 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
| 375 | }; | 384 | }; |
| 376 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | 385 | auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { |
| @@ -379,11 +388,15 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: | |||
| 379 | if constexpr (is_safe) { | 388 | if constexpr (is_safe) { |
| 380 | rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); | 389 | rasterizer->FlushRegion(cpu_addr_base, copy_amount, which); |
| 381 | } | 390 | } |
| 382 | if (!IsBigPageContinous(page_index)) [[unlikely]] { | 391 | if constexpr (use_fastmem) { |
| 383 | memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); | 392 | std::memcpy(dest_buffer, &fastmem_arena[cpu_addr_base], copy_amount); |
| 384 | } else { | 393 | } else { |
| 385 | u8* physical = memory.GetPointer(cpu_addr_base); | 394 | if (!IsBigPageContinous(page_index)) [[unlikely]] { |
| 386 | std::memcpy(dest_buffer, physical, copy_amount); | 395 | memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); |
| 396 | } else { | ||
| 397 | u8* physical = memory.GetPointer(cpu_addr_base); | ||
| 398 | std::memcpy(dest_buffer, physical, copy_amount); | ||
| 399 | } | ||
| 387 | } | 400 | } |
| 388 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | 401 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |
| 389 | }; | 402 | }; |
| @@ -397,12 +410,20 @@ void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std: | |||
| 397 | 410 | ||
| 398 | void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, | 411 | void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, |
| 399 | VideoCommon::CacheType which) const { | 412 | VideoCommon::CacheType which) const { |
| 400 | ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size, which); | 413 | if (fastmem_arena) [[likely]] { |
| 414 | ReadBlockImpl<true, true>(gpu_src_addr, dest_buffer, size, which); | ||
| 415 | return; | ||
| 416 | } | ||
| 417 | ReadBlockImpl<true, false>(gpu_src_addr, dest_buffer, size, which); | ||
| 401 | } | 418 | } |
| 402 | 419 | ||
| 403 | void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, | 420 | void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, |
| 404 | const std::size_t size) const { | 421 | const std::size_t size) const { |
| 405 | ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); | 422 | if (fastmem_arena) [[likely]] { |
| 423 | ReadBlockImpl<false, true>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); | ||
| 424 | return; | ||
| 425 | } | ||
| 426 | ReadBlockImpl<false, false>(gpu_src_addr, dest_buffer, size, VideoCommon::CacheType::None); | ||
| 406 | } | 427 | } |
| 407 | 428 | ||
| 408 | template <bool is_safe> | 429 | template <bool is_safe> |
| @@ -454,6 +475,12 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buf | |||
| 454 | WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); | 475 | WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); |
| 455 | } | 476 | } |
| 456 | 477 | ||
| 478 | void MemoryManager::WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, | ||
| 479 | std::size_t size) { | ||
| 480 | WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size, VideoCommon::CacheType::None); | ||
| 481 | accumulator->Add(gpu_dest_addr, size); | ||
| 482 | } | ||
| 483 | |||
| 457 | void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, | 484 | void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size, |
| 458 | VideoCommon::CacheType which) const { | 485 | VideoCommon::CacheType which) const { |
| 459 | auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, | 486 | auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, |
| @@ -663,7 +690,17 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons | |||
| 663 | std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | 690 | std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( |
| 664 | GPUVAddr gpu_addr, std::size_t size) const { | 691 | GPUVAddr gpu_addr, std::size_t size) const { |
| 665 | std::vector<std::pair<GPUVAddr, std::size_t>> result{}; | 692 | std::vector<std::pair<GPUVAddr, std::size_t>> result{}; |
| 666 | std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; | 693 | GetSubmappedRangeImpl<true>(gpu_addr, size, result); |
| 694 | return result; | ||
| 695 | } | ||
| 696 | |||
| 697 | template <bool is_gpu_address> | ||
| 698 | void MemoryManager::GetSubmappedRangeImpl( | ||
| 699 | GPUVAddr gpu_addr, std::size_t size, | ||
| 700 | std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& | ||
| 701 | result) const { | ||
| 702 | std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> | ||
| 703 | last_segment{}; | ||
| 667 | std::optional<VAddr> old_page_addr{}; | 704 | std::optional<VAddr> old_page_addr{}; |
| 668 | const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, | 705 | const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, |
| 669 | [[maybe_unused]] std::size_t offset, | 706 | [[maybe_unused]] std::size_t offset, |
| @@ -685,8 +722,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | |||
| 685 | } | 722 | } |
| 686 | old_page_addr = {cpu_addr_base + copy_amount}; | 723 | old_page_addr = {cpu_addr_base + copy_amount}; |
| 687 | if (!last_segment) { | 724 | if (!last_segment) { |
| 688 | const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; | 725 | if constexpr (is_gpu_address) { |
| 689 | last_segment = {new_base_addr, copy_amount}; | 726 | const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; |
| 727 | last_segment = {new_base_addr, copy_amount}; | ||
| 728 | } else { | ||
| 729 | last_segment = {cpu_addr_base, copy_amount}; | ||
| 730 | } | ||
| 690 | } else { | 731 | } else { |
| 691 | last_segment->second += copy_amount; | 732 | last_segment->second += copy_amount; |
| 692 | } | 733 | } |
| @@ -703,8 +744,12 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | |||
| 703 | } | 744 | } |
| 704 | old_page_addr = {cpu_addr_base + copy_amount}; | 745 | old_page_addr = {cpu_addr_base + copy_amount}; |
| 705 | if (!last_segment) { | 746 | if (!last_segment) { |
| 706 | const GPUVAddr new_base_addr = (page_index << page_bits) + offset; | 747 | if constexpr (is_gpu_address) { |
| 707 | last_segment = {new_base_addr, copy_amount}; | 748 | const GPUVAddr new_base_addr = (page_index << page_bits) + offset; |
| 749 | last_segment = {new_base_addr, copy_amount}; | ||
| 750 | } else { | ||
| 751 | last_segment = {cpu_addr_base, copy_amount}; | ||
| 752 | } | ||
| 708 | } else { | 753 | } else { |
| 709 | last_segment->second += copy_amount; | 754 | last_segment->second += copy_amount; |
| 710 | } | 755 | } |
| @@ -715,7 +760,18 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | |||
| 715 | }; | 760 | }; |
| 716 | MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); | 761 | MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); |
| 717 | split(0, 0, 0); | 762 | split(0, 0, 0); |
| 718 | return result; | 763 | } |
| 764 | |||
| 765 | void MemoryManager::FlushCaching() { | ||
| 766 | if (!accumulator->AnyAccumulated()) { | ||
| 767 | return; | ||
| 768 | } | ||
| 769 | accumulator->Callback([this](GPUVAddr addr, size_t size) { | ||
| 770 | GetSubmappedRangeImpl<false>(addr, size, page_stash); | ||
| 771 | }); | ||
| 772 | rasterizer->InnerInvalidation(page_stash); | ||
| 773 | page_stash.clear(); | ||
| 774 | accumulator->Clear(); | ||
| 719 | } | 775 | } |
| 720 | 776 | ||
| 721 | } // namespace Tegra | 777 | } // namespace Tegra |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 828e13439..2936364f0 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -19,6 +19,10 @@ namespace VideoCore { | |||
| 19 | class RasterizerInterface; | 19 | class RasterizerInterface; |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | namespace VideoCommon { | ||
| 23 | class InvalidationAccumulator; | ||
| 24 | } | ||
| 25 | |||
| 22 | namespace Core { | 26 | namespace Core { |
| 23 | class DeviceMemory; | 27 | class DeviceMemory; |
| 24 | namespace Memory { | 28 | namespace Memory { |
| @@ -80,6 +84,7 @@ public: | |||
| 80 | */ | 84 | */ |
| 81 | void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; | 85 | void ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; |
| 82 | void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); | 86 | void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); |
| 87 | void WriteBlockCached(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); | ||
| 83 | 88 | ||
| 84 | /** | 89 | /** |
| 85 | * Checks if a gpu region can be simply read with a pointer. | 90 | * Checks if a gpu region can be simply read with a pointer. |
| @@ -129,12 +134,14 @@ public: | |||
| 129 | size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, | 134 | size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, |
| 130 | size_t max_size = std::numeric_limits<size_t>::max()) const; | 135 | size_t max_size = std::numeric_limits<size_t>::max()) const; |
| 131 | 136 | ||
| 137 | void FlushCaching(); | ||
| 138 | |||
| 132 | private: | 139 | private: |
| 133 | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> | 140 | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> |
| 134 | inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, | 141 | inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, |
| 135 | FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; | 142 | FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; |
| 136 | 143 | ||
| 137 | template <bool is_safe> | 144 | template <bool is_safe, bool use_fastmem> |
| 138 | void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, | 145 | void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, |
| 139 | VideoCommon::CacheType which) const; | 146 | VideoCommon::CacheType which) const; |
| 140 | 147 | ||
| @@ -154,6 +161,12 @@ private: | |||
| 154 | inline bool IsBigPageContinous(size_t big_page_index) const; | 161 | inline bool IsBigPageContinous(size_t big_page_index) const; |
| 155 | inline void SetBigPageContinous(size_t big_page_index, bool value); | 162 | inline void SetBigPageContinous(size_t big_page_index, bool value); |
| 156 | 163 | ||
| 164 | template <bool is_gpu_address> | ||
| 165 | void GetSubmappedRangeImpl( | ||
| 166 | GPUVAddr gpu_addr, std::size_t size, | ||
| 167 | std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& | ||
| 168 | result) const; | ||
| 169 | |||
| 157 | Core::System& system; | 170 | Core::System& system; |
| 158 | Core::Memory::Memory& memory; | 171 | Core::Memory::Memory& memory; |
| 159 | Core::DeviceMemory& device_memory; | 172 | Core::DeviceMemory& device_memory; |
| @@ -201,10 +214,13 @@ private: | |||
| 201 | Common::VirtualBuffer<u32> big_page_table_cpu; | 214 | Common::VirtualBuffer<u32> big_page_table_cpu; |
| 202 | 215 | ||
| 203 | std::vector<u64> big_page_continous; | 216 | std::vector<u64> big_page_continous; |
| 217 | std::vector<std::pair<VAddr, std::size_t>> page_stash{}; | ||
| 218 | u8* fastmem_arena{}; | ||
| 204 | 219 | ||
| 205 | constexpr static size_t continous_bits = 64; | 220 | constexpr static size_t continous_bits = 64; |
| 206 | 221 | ||
| 207 | const size_t unique_identifier; | 222 | const size_t unique_identifier; |
| 223 | std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; | ||
| 208 | 224 | ||
| 209 | static std::atomic<size_t> unique_identifier_generator; | 225 | static std::atomic<size_t> unique_identifier_generator; |
| 210 | }; | 226 | }; |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index f44c7df50..1735b6164 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <functional> | 6 | #include <functional> |
| 7 | #include <optional> | 7 | #include <optional> |
| 8 | #include <span> | 8 | #include <span> |
| 9 | #include <utility> | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | #include "common/polyfill_thread.h" | 11 | #include "common/polyfill_thread.h" |
| 11 | #include "video_core/cache_types.h" | 12 | #include "video_core/cache_types.h" |
| @@ -95,6 +96,12 @@ public: | |||
| 95 | virtual void InvalidateRegion(VAddr addr, u64 size, | 96 | virtual void InvalidateRegion(VAddr addr, u64 size, |
| 96 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; | 97 | VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0; |
| 97 | 98 | ||
| 99 | virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | ||
| 100 | for (const auto& [cpu_addr, size] : sequences) { | ||
| 101 | InvalidateRegion(cpu_addr, size); | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 98 | /// Notify rasterizer that any caches of the specified region are desync with guest | 105 | /// Notify rasterizer that any caches of the specified region are desync with guest |
| 99 | virtual void OnCPUWrite(VAddr addr, u64 size) = 0; | 106 | virtual void OnCPUWrite(VAddr addr, u64 size) = 0; |
| 100 | 107 | ||
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index f502a7d09..1578cb206 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -78,6 +78,8 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext | |||
| 78 | return separated_extensions; | 78 | return separated_extensions; |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | } // Anonymous namespace | ||
| 82 | |||
| 81 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, | 83 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, |
| 82 | VkSurfaceKHR surface) { | 84 | VkSurfaceKHR surface) { |
| 83 | const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); | 85 | const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); |
| @@ -89,7 +91,6 @@ Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dl | |||
| 89 | const vk::PhysicalDevice physical_device(devices[device_index], dld); | 91 | const vk::PhysicalDevice physical_device(devices[device_index], dld); |
| 90 | return Device(*instance, physical_device, surface, dld); | 92 | return Device(*instance, physical_device, surface, dld); |
| 91 | } | 93 | } |
| 92 | } // Anonymous namespace | ||
| 93 | 94 | ||
| 94 | RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | 95 | RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, |
| 95 | Core::Frontend::EmuWindow& emu_window, | 96 | Core::Frontend::EmuWindow& emu_window, |
| @@ -98,7 +99,7 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | |||
| 98 | : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), | 99 | : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_), |
| 99 | cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), | 100 | cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()), |
| 100 | instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | 101 | instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, |
| 101 | true, Settings::values.renderer_debug.GetValue())), | 102 | Settings::values.renderer_debug.GetValue())), |
| 102 | debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), | 103 | debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), |
| 103 | surface(CreateSurface(instance, render_window)), | 104 | surface(CreateSurface(instance, render_window)), |
| 104 | device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), | 105 | device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), |
| @@ -109,6 +110,9 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | |||
| 109 | screen_info), | 110 | screen_info), |
| 110 | rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, | 111 | rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator, |
| 111 | state_tracker, scheduler) { | 112 | state_tracker, scheduler) { |
| 113 | if (Settings::values.renderer_force_max_clock.GetValue()) { | ||
| 114 | turbo_mode.emplace(instance, dld); | ||
| 115 | } | ||
| 112 | Report(); | 116 | Report(); |
| 113 | } catch (const vk::Exception& exception) { | 117 | } catch (const vk::Exception& exception) { |
| 114 | LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); | 118 | LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index e7bfecb20..009e75e0d 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 13 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 14 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | 14 | #include "video_core/renderer_vulkan/vk_state_tracker.h" |
| 15 | #include "video_core/renderer_vulkan/vk_swapchain.h" | 15 | #include "video_core/renderer_vulkan/vk_swapchain.h" |
| 16 | #include "video_core/renderer_vulkan/vk_turbo_mode.h" | ||
| 16 | #include "video_core/vulkan_common/vulkan_device.h" | 17 | #include "video_core/vulkan_common/vulkan_device.h" |
| 17 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 18 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| @@ -31,6 +32,9 @@ class GPU; | |||
| 31 | 32 | ||
| 32 | namespace Vulkan { | 33 | namespace Vulkan { |
| 33 | 34 | ||
| 35 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, | ||
| 36 | VkSurfaceKHR surface); | ||
| 37 | |||
| 34 | class RendererVulkan final : public VideoCore::RendererBase { | 38 | class RendererVulkan final : public VideoCore::RendererBase { |
| 35 | public: | 39 | public: |
| 36 | explicit RendererVulkan(Core::TelemetrySession& telemtry_session, | 40 | explicit RendererVulkan(Core::TelemetrySession& telemtry_session, |
| @@ -74,6 +78,7 @@ private: | |||
| 74 | Swapchain swapchain; | 78 | Swapchain swapchain; |
| 75 | BlitScreen blit_screen; | 79 | BlitScreen blit_screen; |
| 76 | RasterizerVulkan rasterizer; | 80 | RasterizerVulkan rasterizer; |
| 81 | std::optional<TurboMode> turbo_mode; | ||
| 77 | }; | 82 | }; |
| 78 | 83 | ||
| 79 | } // namespace Vulkan | 84 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 487d8b416..b0153a502 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -336,6 +336,9 @@ void BufferCacheRuntime::Finish() { | |||
| 336 | 336 | ||
| 337 | void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, | 337 | void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, |
| 338 | std::span<const VideoCommon::BufferCopy> copies, bool barrier) { | 338 | std::span<const VideoCommon::BufferCopy> copies, bool barrier) { |
| 339 | if (dst_buffer == VK_NULL_HANDLE || src_buffer == VK_NULL_HANDLE) { | ||
| 340 | return; | ||
| 341 | } | ||
| 339 | static constexpr VkMemoryBarrier READ_BARRIER{ | 342 | static constexpr VkMemoryBarrier READ_BARRIER{ |
| 340 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | 343 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| 341 | .pNext = nullptr, | 344 | .pNext = nullptr, |
| @@ -394,6 +397,9 @@ void BufferCacheRuntime::PostCopyBarrier() { | |||
| 394 | } | 397 | } |
| 395 | 398 | ||
| 396 | void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) { | 399 | void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t size, u32 value) { |
| 400 | if (dest_buffer == VK_NULL_HANDLE) { | ||
| 401 | return; | ||
| 402 | } | ||
| 397 | static constexpr VkMemoryBarrier READ_BARRIER{ | 403 | static constexpr VkMemoryBarrier READ_BARRIER{ |
| 398 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | 404 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| 399 | .pNext = nullptr, | 405 | .pNext = nullptr, |
| @@ -473,6 +479,11 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset | |||
| 473 | cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); | 479 | cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); |
| 474 | }); | 480 | }); |
| 475 | } else { | 481 | } else { |
| 482 | if (!device.HasNullDescriptor() && buffer == VK_NULL_HANDLE) { | ||
| 483 | ReserveNullBuffer(); | ||
| 484 | buffer = *null_buffer; | ||
| 485 | offset = 0; | ||
| 486 | } | ||
| 476 | scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { | 487 | scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { |
| 477 | cmdbuf.BindVertexBuffer(index, buffer, offset); | 488 | cmdbuf.BindVertexBuffer(index, buffer, offset); |
| 478 | }); | 489 | }); |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 04a3a861e..2a0f0dbf0 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp | |||
| @@ -24,13 +24,15 @@ using Shader::ImageBufferDescriptor; | |||
| 24 | using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; | 24 | using Shader::Backend::SPIRV::RESCALING_LAYOUT_WORDS_OFFSET; |
| 25 | using Tegra::Texture::TexturePair; | 25 | using Tegra::Texture::TexturePair; |
| 26 | 26 | ||
| 27 | ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, | 27 | ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipeline_cache_, |
| 28 | DescriptorPool& descriptor_pool, | ||
| 28 | UpdateDescriptorQueue& update_descriptor_queue_, | 29 | UpdateDescriptorQueue& update_descriptor_queue_, |
| 29 | Common::ThreadWorker* thread_worker, | 30 | Common::ThreadWorker* thread_worker, |
| 30 | PipelineStatistics* pipeline_statistics, | 31 | PipelineStatistics* pipeline_statistics, |
| 31 | VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, | 32 | VideoCore::ShaderNotify* shader_notify, const Shader::Info& info_, |
| 32 | vk::ShaderModule spv_module_) | 33 | vk::ShaderModule spv_module_) |
| 33 | : device{device_}, update_descriptor_queue{update_descriptor_queue_}, info{info_}, | 34 | : device{device_}, pipeline_cache(pipeline_cache_), |
| 35 | update_descriptor_queue{update_descriptor_queue_}, info{info_}, | ||
| 34 | spv_module(std::move(spv_module_)) { | 36 | spv_module(std::move(spv_module_)) { |
| 35 | if (shader_notify) { | 37 | if (shader_notify) { |
| 36 | shader_notify->MarkShaderBuilding(); | 38 | shader_notify->MarkShaderBuilding(); |
| @@ -56,23 +58,27 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript | |||
| 56 | if (device.IsKhrPipelineExecutablePropertiesEnabled()) { | 58 | if (device.IsKhrPipelineExecutablePropertiesEnabled()) { |
| 57 | flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; | 59 | flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; |
| 58 | } | 60 | } |
| 59 | pipeline = device.GetLogical().CreateComputePipeline({ | 61 | pipeline = device.GetLogical().CreateComputePipeline( |
| 60 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | 62 | { |
| 61 | .pNext = nullptr, | 63 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
| 62 | .flags = flags, | 64 | .pNext = nullptr, |
| 63 | .stage{ | 65 | .flags = flags, |
| 64 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | 66 | .stage{ |
| 65 | .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, | 67 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, |
| 66 | .flags = 0, | 68 | .pNext = |
| 67 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | 69 | device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, |
| 68 | .module = *spv_module, | 70 | .flags = 0, |
| 69 | .pName = "main", | 71 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, |
| 70 | .pSpecializationInfo = nullptr, | 72 | .module = *spv_module, |
| 73 | .pName = "main", | ||
| 74 | .pSpecializationInfo = nullptr, | ||
| 75 | }, | ||
| 76 | .layout = *pipeline_layout, | ||
| 77 | .basePipelineHandle = 0, | ||
| 78 | .basePipelineIndex = 0, | ||
| 71 | }, | 79 | }, |
| 72 | .layout = *pipeline_layout, | 80 | *pipeline_cache); |
| 73 | .basePipelineHandle = 0, | 81 | |
| 74 | .basePipelineIndex = 0, | ||
| 75 | }); | ||
| 76 | if (pipeline_statistics) { | 82 | if (pipeline_statistics) { |
| 77 | pipeline_statistics->Collect(*pipeline); | 83 | pipeline_statistics->Collect(*pipeline); |
| 78 | } | 84 | } |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index d70837fc5..78d77027f 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h | |||
| @@ -28,7 +28,8 @@ class Scheduler; | |||
| 28 | 28 | ||
| 29 | class ComputePipeline { | 29 | class ComputePipeline { |
| 30 | public: | 30 | public: |
| 31 | explicit ComputePipeline(const Device& device, DescriptorPool& descriptor_pool, | 31 | explicit ComputePipeline(const Device& device, vk::PipelineCache& pipeline_cache, |
| 32 | DescriptorPool& descriptor_pool, | ||
| 32 | UpdateDescriptorQueue& update_descriptor_queue, | 33 | UpdateDescriptorQueue& update_descriptor_queue, |
| 33 | Common::ThreadWorker* thread_worker, | 34 | Common::ThreadWorker* thread_worker, |
| 34 | PipelineStatistics* pipeline_statistics, | 35 | PipelineStatistics* pipeline_statistics, |
| @@ -46,6 +47,7 @@ public: | |||
| 46 | 47 | ||
| 47 | private: | 48 | private: |
| 48 | const Device& device; | 49 | const Device& device; |
| 50 | vk::PipelineCache& pipeline_cache; | ||
| 49 | UpdateDescriptorQueue& update_descriptor_queue; | 51 | UpdateDescriptorQueue& update_descriptor_queue; |
| 50 | Shader::Info info; | 52 | Shader::Info info; |
| 51 | 53 | ||
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 734c379b9..f91bb5a1d 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -234,13 +234,14 @@ ConfigureFuncPtr ConfigureFunc(const std::array<vk::ShaderModule, NUM_STAGES>& m | |||
| 234 | 234 | ||
| 235 | GraphicsPipeline::GraphicsPipeline( | 235 | GraphicsPipeline::GraphicsPipeline( |
| 236 | Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, | 236 | Scheduler& scheduler_, BufferCache& buffer_cache_, TextureCache& texture_cache_, |
| 237 | VideoCore::ShaderNotify* shader_notify, const Device& device_, DescriptorPool& descriptor_pool, | 237 | vk::PipelineCache& pipeline_cache_, VideoCore::ShaderNotify* shader_notify, |
| 238 | const Device& device_, DescriptorPool& descriptor_pool, | ||
| 238 | UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, | 239 | UpdateDescriptorQueue& update_descriptor_queue_, Common::ThreadWorker* worker_thread, |
| 239 | PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, | 240 | PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, |
| 240 | const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages, | 241 | const GraphicsPipelineCacheKey& key_, std::array<vk::ShaderModule, NUM_STAGES> stages, |
| 241 | const std::array<const Shader::Info*, NUM_STAGES>& infos) | 242 | const std::array<const Shader::Info*, NUM_STAGES>& infos) |
| 242 | : key{key_}, device{device_}, texture_cache{texture_cache_}, | 243 | : key{key_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, |
| 243 | buffer_cache{buffer_cache_}, scheduler{scheduler_}, | 244 | pipeline_cache(pipeline_cache_), scheduler{scheduler_}, |
| 244 | update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { | 245 | update_descriptor_queue{update_descriptor_queue_}, spv_modules{std::move(stages)} { |
| 245 | if (shader_notify) { | 246 | if (shader_notify) { |
| 246 | shader_notify->MarkShaderBuilding(); | 247 | shader_notify->MarkShaderBuilding(); |
| @@ -897,27 +898,29 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { | |||
| 897 | if (device.IsKhrPipelineExecutablePropertiesEnabled()) { | 898 | if (device.IsKhrPipelineExecutablePropertiesEnabled()) { |
| 898 | flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; | 899 | flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; |
| 899 | } | 900 | } |
| 900 | pipeline = device.GetLogical().CreateGraphicsPipeline({ | 901 | pipeline = device.GetLogical().CreateGraphicsPipeline( |
| 901 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, | 902 | { |
| 902 | .pNext = nullptr, | 903 | .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, |
| 903 | .flags = flags, | 904 | .pNext = nullptr, |
| 904 | .stageCount = static_cast<u32>(shader_stages.size()), | 905 | .flags = flags, |
| 905 | .pStages = shader_stages.data(), | 906 | .stageCount = static_cast<u32>(shader_stages.size()), |
| 906 | .pVertexInputState = &vertex_input_ci, | 907 | .pStages = shader_stages.data(), |
| 907 | .pInputAssemblyState = &input_assembly_ci, | 908 | .pVertexInputState = &vertex_input_ci, |
| 908 | .pTessellationState = &tessellation_ci, | 909 | .pInputAssemblyState = &input_assembly_ci, |
| 909 | .pViewportState = &viewport_ci, | 910 | .pTessellationState = &tessellation_ci, |
| 910 | .pRasterizationState = &rasterization_ci, | 911 | .pViewportState = &viewport_ci, |
| 911 | .pMultisampleState = &multisample_ci, | 912 | .pRasterizationState = &rasterization_ci, |
| 912 | .pDepthStencilState = &depth_stencil_ci, | 913 | .pMultisampleState = &multisample_ci, |
| 913 | .pColorBlendState = &color_blend_ci, | 914 | .pDepthStencilState = &depth_stencil_ci, |
| 914 | .pDynamicState = &dynamic_state_ci, | 915 | .pColorBlendState = &color_blend_ci, |
| 915 | .layout = *pipeline_layout, | 916 | .pDynamicState = &dynamic_state_ci, |
| 916 | .renderPass = render_pass, | 917 | .layout = *pipeline_layout, |
| 917 | .subpass = 0, | 918 | .renderPass = render_pass, |
| 918 | .basePipelineHandle = nullptr, | 919 | .subpass = 0, |
| 919 | .basePipelineIndex = 0, | 920 | .basePipelineHandle = nullptr, |
| 920 | }); | 921 | .basePipelineIndex = 0, |
| 922 | }, | ||
| 923 | *pipeline_cache); | ||
| 921 | } | 924 | } |
| 922 | 925 | ||
| 923 | void GraphicsPipeline::Validate() { | 926 | void GraphicsPipeline::Validate() { |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 1ed2967be..67c657d0e 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h | |||
| @@ -70,16 +70,14 @@ class GraphicsPipeline { | |||
| 70 | static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; | 70 | static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; |
| 71 | 71 | ||
| 72 | public: | 72 | public: |
| 73 | explicit GraphicsPipeline(Scheduler& scheduler, BufferCache& buffer_cache, | 73 | explicit GraphicsPipeline( |
| 74 | TextureCache& texture_cache, VideoCore::ShaderNotify* shader_notify, | 74 | Scheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache, |
| 75 | const Device& device, DescriptorPool& descriptor_pool, | 75 | vk::PipelineCache& pipeline_cache, VideoCore::ShaderNotify* shader_notify, |
| 76 | UpdateDescriptorQueue& update_descriptor_queue, | 76 | const Device& device, DescriptorPool& descriptor_pool, |
| 77 | Common::ThreadWorker* worker_thread, | 77 | UpdateDescriptorQueue& update_descriptor_queue, Common::ThreadWorker* worker_thread, |
| 78 | PipelineStatistics* pipeline_statistics, | 78 | PipelineStatistics* pipeline_statistics, RenderPassCache& render_pass_cache, |
| 79 | RenderPassCache& render_pass_cache, | 79 | const GraphicsPipelineCacheKey& key, std::array<vk::ShaderModule, NUM_STAGES> stages, |
| 80 | const GraphicsPipelineCacheKey& key, | 80 | const std::array<const Shader::Info*, NUM_STAGES>& infos); |
| 81 | std::array<vk::ShaderModule, NUM_STAGES> stages, | ||
| 82 | const std::array<const Shader::Info*, NUM_STAGES>& infos); | ||
| 83 | 81 | ||
| 84 | GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; | 82 | GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; |
| 85 | GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; | 83 | GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; |
| @@ -133,6 +131,7 @@ private: | |||
| 133 | const Device& device; | 131 | const Device& device; |
| 134 | TextureCache& texture_cache; | 132 | TextureCache& texture_cache; |
| 135 | BufferCache& buffer_cache; | 133 | BufferCache& buffer_cache; |
| 134 | vk::PipelineCache& pipeline_cache; | ||
| 136 | Scheduler& scheduler; | 135 | Scheduler& scheduler; |
| 137 | UpdateDescriptorQueue& update_descriptor_queue; | 136 | UpdateDescriptorQueue& update_descriptor_queue; |
| 138 | 137 | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3046b72ab..67e5bc648 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -55,6 +55,7 @@ using VideoCommon::GenericEnvironment; | |||
| 55 | using VideoCommon::GraphicsEnvironment; | 55 | using VideoCommon::GraphicsEnvironment; |
| 56 | 56 | ||
| 57 | constexpr u32 CACHE_VERSION = 10; | 57 | constexpr u32 CACHE_VERSION = 10; |
| 58 | constexpr std::array<char, 8> VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'}; | ||
| 58 | 59 | ||
| 59 | template <typename Container> | 60 | template <typename Container> |
| 60 | auto MakeSpan(Container& container) { | 61 | auto MakeSpan(Container& container) { |
| @@ -284,6 +285,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 284 | render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, | 285 | render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, |
| 285 | texture_cache{texture_cache_}, shader_notify{shader_notify_}, | 286 | texture_cache{texture_cache_}, shader_notify{shader_notify_}, |
| 286 | use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, | 287 | use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, |
| 288 | use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()}, | ||
| 287 | workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), | 289 | workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), |
| 288 | serialization_thread(1, "VkPipelineSerialization") { | 290 | serialization_thread(1, "VkPipelineSerialization") { |
| 289 | const auto& float_control{device.FloatControlProperties()}; | 291 | const auto& float_control{device.FloatControlProperties()}; |
| @@ -362,7 +364,12 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 362 | }; | 364 | }; |
| 363 | } | 365 | } |
| 364 | 366 | ||
| 365 | PipelineCache::~PipelineCache() = default; | 367 | PipelineCache::~PipelineCache() { |
| 368 | if (use_vulkan_pipeline_cache && !vulkan_pipeline_cache_filename.empty()) { | ||
| 369 | SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache, | ||
| 370 | CACHE_VERSION); | ||
| 371 | } | ||
| 372 | } | ||
| 366 | 373 | ||
| 367 | GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { | 374 | GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { |
| 368 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); | 375 | MICROPROFILE_SCOPE(Vulkan_PipelineCache); |
| @@ -418,6 +425,12 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading | |||
| 418 | } | 425 | } |
| 419 | pipeline_cache_filename = base_dir / "vulkan.bin"; | 426 | pipeline_cache_filename = base_dir / "vulkan.bin"; |
| 420 | 427 | ||
| 428 | if (use_vulkan_pipeline_cache) { | ||
| 429 | vulkan_pipeline_cache_filename = base_dir / "vulkan_pipelines.bin"; | ||
| 430 | vulkan_pipeline_cache = | ||
| 431 | LoadVulkanPipelineCache(vulkan_pipeline_cache_filename, CACHE_VERSION); | ||
| 432 | } | ||
| 433 | |||
| 421 | struct { | 434 | struct { |
| 422 | std::mutex mutex; | 435 | std::mutex mutex; |
| 423 | size_t total{}; | 436 | size_t total{}; |
| @@ -496,6 +509,11 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading | |||
| 496 | 509 | ||
| 497 | workers.WaitForRequests(stop_loading); | 510 | workers.WaitForRequests(stop_loading); |
| 498 | 511 | ||
| 512 | if (use_vulkan_pipeline_cache) { | ||
| 513 | SerializeVulkanPipelineCache(vulkan_pipeline_cache_filename, vulkan_pipeline_cache, | ||
| 514 | CACHE_VERSION); | ||
| 515 | } | ||
| 516 | |||
| 499 | if (state.statistics) { | 517 | if (state.statistics) { |
| 500 | state.statistics->Report(); | 518 | state.statistics->Report(); |
| 501 | } | 519 | } |
| @@ -616,10 +634,10 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| 616 | previous_stage = &program; | 634 | previous_stage = &program; |
| 617 | } | 635 | } |
| 618 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; | 636 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; |
| 619 | return std::make_unique<GraphicsPipeline>(scheduler, buffer_cache, texture_cache, | 637 | return std::make_unique<GraphicsPipeline>( |
| 620 | &shader_notify, device, descriptor_pool, | 638 | scheduler, buffer_cache, texture_cache, vulkan_pipeline_cache, &shader_notify, device, |
| 621 | update_descriptor_queue, thread_worker, statistics, | 639 | descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key, |
| 622 | render_pass_cache, key, std::move(modules), infos); | 640 | std::move(modules), infos); |
| 623 | 641 | ||
| 624 | } catch (const Shader::Exception& exception) { | 642 | } catch (const Shader::Exception& exception) { |
| 625 | LOG_ERROR(Render_Vulkan, "{}", exception.what()); | 643 | LOG_ERROR(Render_Vulkan, "{}", exception.what()); |
| @@ -689,13 +707,107 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | |||
| 689 | spv_module.SetObjectNameEXT(name.c_str()); | 707 | spv_module.SetObjectNameEXT(name.c_str()); |
| 690 | } | 708 | } |
| 691 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; | 709 | Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; |
| 692 | return std::make_unique<ComputePipeline>(device, descriptor_pool, update_descriptor_queue, | 710 | return std::make_unique<ComputePipeline>(device, vulkan_pipeline_cache, descriptor_pool, |
| 693 | thread_worker, statistics, &shader_notify, | 711 | update_descriptor_queue, thread_worker, statistics, |
| 694 | program.info, std::move(spv_module)); | 712 | &shader_notify, program.info, std::move(spv_module)); |
| 695 | 713 | ||
| 696 | } catch (const Shader::Exception& exception) { | 714 | } catch (const Shader::Exception& exception) { |
| 697 | LOG_ERROR(Render_Vulkan, "{}", exception.what()); | 715 | LOG_ERROR(Render_Vulkan, "{}", exception.what()); |
| 698 | return nullptr; | 716 | return nullptr; |
| 699 | } | 717 | } |
| 700 | 718 | ||
| 719 | void PipelineCache::SerializeVulkanPipelineCache(const std::filesystem::path& filename, | ||
| 720 | const vk::PipelineCache& pipeline_cache, | ||
| 721 | u32 cache_version) try { | ||
| 722 | std::ofstream file(filename, std::ios::binary); | ||
| 723 | file.exceptions(std::ifstream::failbit); | ||
| 724 | if (!file.is_open()) { | ||
| 725 | LOG_ERROR(Common_Filesystem, "Failed to open Vulkan driver pipeline cache file {}", | ||
| 726 | Common::FS::PathToUTF8String(filename)); | ||
| 727 | return; | ||
| 728 | } | ||
| 729 | file.write(VULKAN_CACHE_MAGIC_NUMBER.data(), VULKAN_CACHE_MAGIC_NUMBER.size()) | ||
| 730 | .write(reinterpret_cast<const char*>(&cache_version), sizeof(cache_version)); | ||
| 731 | |||
| 732 | size_t cache_size = 0; | ||
| 733 | std::vector<char> cache_data; | ||
| 734 | if (pipeline_cache) { | ||
| 735 | pipeline_cache.Read(&cache_size, nullptr); | ||
| 736 | cache_data.resize(cache_size); | ||
| 737 | pipeline_cache.Read(&cache_size, cache_data.data()); | ||
| 738 | } | ||
| 739 | file.write(cache_data.data(), cache_size); | ||
| 740 | |||
| 741 | LOG_INFO(Render_Vulkan, "Vulkan driver pipelines cached at: {}", | ||
| 742 | Common::FS::PathToUTF8String(filename)); | ||
| 743 | |||
| 744 | } catch (const std::ios_base::failure& e) { | ||
| 745 | LOG_ERROR(Common_Filesystem, "{}", e.what()); | ||
| 746 | if (!Common::FS::RemoveFile(filename)) { | ||
| 747 | LOG_ERROR(Common_Filesystem, "Failed to delete Vulkan driver pipeline cache file {}", | ||
| 748 | Common::FS::PathToUTF8String(filename)); | ||
| 749 | } | ||
| 750 | } | ||
| 751 | |||
| 752 | vk::PipelineCache PipelineCache::LoadVulkanPipelineCache(const std::filesystem::path& filename, | ||
| 753 | u32 expected_cache_version) { | ||
| 754 | const auto create_pipeline_cache = [this](size_t data_size, const void* data) { | ||
| 755 | VkPipelineCacheCreateInfo pipeline_cache_ci = { | ||
| 756 | .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, | ||
| 757 | .pNext = nullptr, | ||
| 758 | .flags = 0, | ||
| 759 | .initialDataSize = data_size, | ||
| 760 | .pInitialData = data}; | ||
| 761 | return device.GetLogical().CreatePipelineCache(pipeline_cache_ci); | ||
| 762 | }; | ||
| 763 | try { | ||
| 764 | std::ifstream file(filename, std::ios::binary | std::ios::ate); | ||
| 765 | if (!file.is_open()) { | ||
| 766 | return create_pipeline_cache(0, nullptr); | ||
| 767 | } | ||
| 768 | file.exceptions(std::ifstream::failbit); | ||
| 769 | const auto end{file.tellg()}; | ||
| 770 | file.seekg(0, std::ios::beg); | ||
| 771 | |||
| 772 | std::array<char, 8> magic_number; | ||
| 773 | u32 cache_version; | ||
| 774 | file.read(magic_number.data(), magic_number.size()) | ||
| 775 | .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version)); | ||
| 776 | if (magic_number != VULKAN_CACHE_MAGIC_NUMBER || cache_version != expected_cache_version) { | ||
| 777 | file.close(); | ||
| 778 | if (Common::FS::RemoveFile(filename)) { | ||
| 779 | if (magic_number != VULKAN_CACHE_MAGIC_NUMBER) { | ||
| 780 | LOG_ERROR(Common_Filesystem, "Invalid Vulkan driver pipeline cache file"); | ||
| 781 | } | ||
| 782 | if (cache_version != expected_cache_version) { | ||
| 783 | LOG_INFO(Common_Filesystem, "Deleting old Vulkan driver pipeline cache"); | ||
| 784 | } | ||
| 785 | } else { | ||
| 786 | LOG_ERROR(Common_Filesystem, | ||
| 787 | "Invalid Vulkan pipeline cache file and failed to delete it in \"{}\"", | ||
| 788 | Common::FS::PathToUTF8String(filename)); | ||
| 789 | } | ||
| 790 | return create_pipeline_cache(0, nullptr); | ||
| 791 | } | ||
| 792 | |||
| 793 | const size_t cache_size = static_cast<size_t>(end) - magic_number.size(); | ||
| 794 | std::vector<char> cache_data(cache_size); | ||
| 795 | file.read(cache_data.data(), cache_size); | ||
| 796 | |||
| 797 | LOG_INFO(Render_Vulkan, | ||
| 798 | "Loaded Vulkan driver pipeline cache: ", Common::FS::PathToUTF8String(filename)); | ||
| 799 | |||
| 800 | return create_pipeline_cache(cache_size, cache_data.data()); | ||
| 801 | |||
| 802 | } catch (const std::ios_base::failure& e) { | ||
| 803 | LOG_ERROR(Common_Filesystem, "{}", e.what()); | ||
| 804 | if (!Common::FS::RemoveFile(filename)) { | ||
| 805 | LOG_ERROR(Common_Filesystem, "Failed to delete Vulkan driver pipeline cache file {}", | ||
| 806 | Common::FS::PathToUTF8String(filename)); | ||
| 807 | } | ||
| 808 | |||
| 809 | return create_pipeline_cache(0, nullptr); | ||
| 810 | } | ||
| 811 | } | ||
| 812 | |||
| 701 | } // namespace Vulkan | 813 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index b4f593ef5..5171912d7 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -135,6 +135,12 @@ private: | |||
| 135 | PipelineStatistics* statistics, | 135 | PipelineStatistics* statistics, |
| 136 | bool build_in_parallel); | 136 | bool build_in_parallel); |
| 137 | 137 | ||
| 138 | void SerializeVulkanPipelineCache(const std::filesystem::path& filename, | ||
| 139 | const vk::PipelineCache& pipeline_cache, u32 cache_version); | ||
| 140 | |||
| 141 | vk::PipelineCache LoadVulkanPipelineCache(const std::filesystem::path& filename, | ||
| 142 | u32 expected_cache_version); | ||
| 143 | |||
| 138 | const Device& device; | 144 | const Device& device; |
| 139 | Scheduler& scheduler; | 145 | Scheduler& scheduler; |
| 140 | DescriptorPool& descriptor_pool; | 146 | DescriptorPool& descriptor_pool; |
| @@ -144,6 +150,7 @@ private: | |||
| 144 | TextureCache& texture_cache; | 150 | TextureCache& texture_cache; |
| 145 | VideoCore::ShaderNotify& shader_notify; | 151 | VideoCore::ShaderNotify& shader_notify; |
| 146 | bool use_asynchronous_shaders{}; | 152 | bool use_asynchronous_shaders{}; |
| 153 | bool use_vulkan_pipeline_cache{}; | ||
| 147 | 154 | ||
| 148 | GraphicsPipelineCacheKey graphics_key{}; | 155 | GraphicsPipelineCacheKey graphics_key{}; |
| 149 | GraphicsPipeline* current_pipeline{}; | 156 | GraphicsPipeline* current_pipeline{}; |
| @@ -158,6 +165,9 @@ private: | |||
| 158 | 165 | ||
| 159 | std::filesystem::path pipeline_cache_filename; | 166 | std::filesystem::path pipeline_cache_filename; |
| 160 | 167 | ||
| 168 | std::filesystem::path vulkan_pipeline_cache_filename; | ||
| 169 | vk::PipelineCache vulkan_pipeline_cache; | ||
| 170 | |||
| 161 | Common::ThreadWorker workers; | 171 | Common::ThreadWorker workers; |
| 162 | Common::ThreadWorker serialization_thread; | 172 | Common::ThreadWorker serialization_thread; |
| 163 | DynamicFeatures dynamic_features; | 173 | DynamicFeatures dynamic_features; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 242bf9602..ed4a72166 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -186,6 +186,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
| 186 | 186 | ||
| 187 | SCOPE_EXIT({ gpu.TickWork(); }); | 187 | SCOPE_EXIT({ gpu.TickWork(); }); |
| 188 | FlushWork(); | 188 | FlushWork(); |
| 189 | gpu_memory->FlushCaching(); | ||
| 189 | 190 | ||
| 190 | query_cache.UpdateCounters(); | 191 | query_cache.UpdateCounters(); |
| 191 | 192 | ||
| @@ -393,6 +394,7 @@ void RasterizerVulkan::Clear(u32 layer_count) { | |||
| 393 | 394 | ||
| 394 | void RasterizerVulkan::DispatchCompute() { | 395 | void RasterizerVulkan::DispatchCompute() { |
| 395 | FlushWork(); | 396 | FlushWork(); |
| 397 | gpu_memory->FlushCaching(); | ||
| 396 | 398 | ||
| 397 | ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; | 399 | ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()}; |
| 398 | if (!pipeline) { | 400 | if (!pipeline) { |
| @@ -481,6 +483,27 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | |||
| 481 | } | 483 | } |
| 482 | } | 484 | } |
| 483 | 485 | ||
| 486 | void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) { | ||
| 487 | { | ||
| 488 | std::scoped_lock lock{texture_cache.mutex}; | ||
| 489 | for (const auto& [addr, size] : sequences) { | ||
| 490 | texture_cache.WriteMemory(addr, size); | ||
| 491 | } | ||
| 492 | } | ||
| 493 | { | ||
| 494 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 495 | for (const auto& [addr, size] : sequences) { | ||
| 496 | buffer_cache.WriteMemory(addr, size); | ||
| 497 | } | ||
| 498 | } | ||
| 499 | { | ||
| 500 | for (const auto& [addr, size] : sequences) { | ||
| 501 | query_cache.InvalidateRegion(addr, size); | ||
| 502 | pipeline_cache.InvalidateRegion(addr, size); | ||
| 503 | } | ||
| 504 | } | ||
| 505 | } | ||
| 506 | |||
| 484 | void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | 507 | void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { |
| 485 | if (addr == 0 || size == 0) { | 508 | if (addr == 0 || size == 0) { |
| 486 | return; | 509 | return; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c661e5b19..472cc64d9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -79,6 +79,7 @@ public: | |||
| 79 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 79 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 80 | void InvalidateRegion(VAddr addr, u64 size, | 80 | void InvalidateRegion(VAddr addr, u64 size, |
| 81 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | 81 | VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |
| 82 | void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; | ||
| 82 | void OnCPUWrite(VAddr addr, u64 size) override; | 83 | void OnCPUWrite(VAddr addr, u64 size) override; |
| 83 | void InvalidateGPUCache() override; | 84 | void InvalidateGPUCache() override; |
| 84 | void UnmapMemory(VAddr addr, u64 size) override; | 85 | void UnmapMemory(VAddr addr, u64 size) override; |
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp new file mode 100644 index 000000000..852b86f84 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp | |||
| @@ -0,0 +1,205 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include "common/literals.h" | ||
| 5 | #include "video_core/host_shaders/vulkan_turbo_mode_comp_spv.h" | ||
| 6 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | ||
| 7 | #include "video_core/renderer_vulkan/vk_shader_util.h" | ||
| 8 | #include "video_core/renderer_vulkan/vk_turbo_mode.h" | ||
| 9 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 10 | |||
| 11 | namespace Vulkan { | ||
| 12 | |||
| 13 | using namespace Common::Literals; | ||
| 14 | |||
| 15 | TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) | ||
| 16 | : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} { | ||
| 17 | m_thread = std::jthread([&](auto stop_token) { Run(stop_token); }); | ||
| 18 | } | ||
| 19 | |||
| 20 | TurboMode::~TurboMode() = default; | ||
| 21 | |||
| 22 | void TurboMode::Run(std::stop_token stop_token) { | ||
| 23 | auto& dld = m_device.GetLogical(); | ||
| 24 | |||
| 25 | // Allocate buffer. 2MiB should be sufficient. | ||
| 26 | auto buffer = dld.CreateBuffer(VkBufferCreateInfo{ | ||
| 27 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 28 | .pNext = nullptr, | ||
| 29 | .flags = 0, | ||
| 30 | .size = 2_MiB, | ||
| 31 | .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 32 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 33 | .queueFamilyIndexCount = 0, | ||
| 34 | .pQueueFamilyIndices = nullptr, | ||
| 35 | }); | ||
| 36 | |||
| 37 | // Commit some device local memory for the buffer. | ||
| 38 | auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||
| 39 | |||
| 40 | // Create the descriptor pool to contain our descriptor. | ||
| 41 | constexpr VkDescriptorPoolSize pool_size{ | ||
| 42 | .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 43 | .descriptorCount = 1, | ||
| 44 | }; | ||
| 45 | |||
| 46 | auto descriptor_pool = dld.CreateDescriptorPool(VkDescriptorPoolCreateInfo{ | ||
| 47 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, | ||
| 48 | .pNext = nullptr, | ||
| 49 | .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, | ||
| 50 | .maxSets = 1, | ||
| 51 | .poolSizeCount = 1, | ||
| 52 | .pPoolSizes = &pool_size, | ||
| 53 | }); | ||
| 54 | |||
| 55 | // Create the descriptor set layout from the pool. | ||
| 56 | constexpr VkDescriptorSetLayoutBinding layout_binding{ | ||
| 57 | .binding = 0, | ||
| 58 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 59 | .descriptorCount = 1, | ||
| 60 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 61 | .pImmutableSamplers = nullptr, | ||
| 62 | }; | ||
| 63 | |||
| 64 | auto descriptor_set_layout = dld.CreateDescriptorSetLayout(VkDescriptorSetLayoutCreateInfo{ | ||
| 65 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||
| 66 | .pNext = nullptr, | ||
| 67 | .flags = 0, | ||
| 68 | .bindingCount = 1, | ||
| 69 | .pBindings = &layout_binding, | ||
| 70 | }); | ||
| 71 | |||
| 72 | // Actually create the descriptor set. | ||
| 73 | auto descriptor_set = descriptor_pool.Allocate(VkDescriptorSetAllocateInfo{ | ||
| 74 | .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, | ||
| 75 | .pNext = nullptr, | ||
| 76 | .descriptorPool = *descriptor_pool, | ||
| 77 | .descriptorSetCount = 1, | ||
| 78 | .pSetLayouts = descriptor_set_layout.address(), | ||
| 79 | }); | ||
| 80 | |||
| 81 | // Create the shader. | ||
| 82 | auto shader = BuildShader(m_device, VULKAN_TURBO_MODE_COMP_SPV); | ||
| 83 | |||
| 84 | // Create the pipeline layout. | ||
| 85 | auto pipeline_layout = dld.CreatePipelineLayout(VkPipelineLayoutCreateInfo{ | ||
| 86 | .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||
| 87 | .pNext = nullptr, | ||
| 88 | .flags = 0, | ||
| 89 | .setLayoutCount = 1, | ||
| 90 | .pSetLayouts = descriptor_set_layout.address(), | ||
| 91 | .pushConstantRangeCount = 0, | ||
| 92 | .pPushConstantRanges = nullptr, | ||
| 93 | }); | ||
| 94 | |||
| 95 | // Actually create the pipeline. | ||
| 96 | const VkPipelineShaderStageCreateInfo shader_stage{ | ||
| 97 | .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, | ||
| 98 | .pNext = nullptr, | ||
| 99 | .flags = 0, | ||
| 100 | .stage = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 101 | .module = *shader, | ||
| 102 | .pName = "main", | ||
| 103 | .pSpecializationInfo = nullptr, | ||
| 104 | }; | ||
| 105 | |||
| 106 | auto pipeline = dld.CreateComputePipeline(VkComputePipelineCreateInfo{ | ||
| 107 | .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, | ||
| 108 | .pNext = nullptr, | ||
| 109 | .flags = 0, | ||
| 110 | .stage = shader_stage, | ||
| 111 | .layout = *pipeline_layout, | ||
| 112 | .basePipelineHandle = VK_NULL_HANDLE, | ||
| 113 | .basePipelineIndex = 0, | ||
| 114 | }); | ||
| 115 | |||
| 116 | // Create a fence to wait on. | ||
| 117 | auto fence = dld.CreateFence(VkFenceCreateInfo{ | ||
| 118 | .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, | ||
| 119 | .pNext = nullptr, | ||
| 120 | .flags = 0, | ||
| 121 | }); | ||
| 122 | |||
| 123 | // Create a command pool to allocate a command buffer from. | ||
| 124 | auto command_pool = dld.CreateCommandPool(VkCommandPoolCreateInfo{ | ||
| 125 | .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, | ||
| 126 | .pNext = nullptr, | ||
| 127 | .flags = | ||
| 128 | VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, | ||
| 129 | .queueFamilyIndex = m_device.GetGraphicsFamily(), | ||
| 130 | }); | ||
| 131 | |||
| 132 | // Create a single command buffer. | ||
| 133 | auto cmdbufs = command_pool.Allocate(1, VK_COMMAND_BUFFER_LEVEL_PRIMARY); | ||
| 134 | auto cmdbuf = vk::CommandBuffer{cmdbufs[0], m_device.GetDispatchLoader()}; | ||
| 135 | |||
| 136 | while (!stop_token.stop_requested()) { | ||
| 137 | // Reset the fence. | ||
| 138 | fence.Reset(); | ||
| 139 | |||
| 140 | // Update descriptor set. | ||
| 141 | const VkDescriptorBufferInfo buffer_info{ | ||
| 142 | .buffer = *buffer, | ||
| 143 | .offset = 0, | ||
| 144 | .range = VK_WHOLE_SIZE, | ||
| 145 | }; | ||
| 146 | |||
| 147 | const VkWriteDescriptorSet buffer_write{ | ||
| 148 | .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||
| 149 | .pNext = nullptr, | ||
| 150 | .dstSet = descriptor_set[0], | ||
| 151 | .dstBinding = 0, | ||
| 152 | .dstArrayElement = 0, | ||
| 153 | .descriptorCount = 1, | ||
| 154 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 155 | .pImageInfo = nullptr, | ||
| 156 | .pBufferInfo = &buffer_info, | ||
| 157 | .pTexelBufferView = nullptr, | ||
| 158 | }; | ||
| 159 | |||
| 160 | dld.UpdateDescriptorSets(std::array{buffer_write}, {}); | ||
| 161 | |||
| 162 | // Set up the command buffer. | ||
| 163 | cmdbuf.Begin(VkCommandBufferBeginInfo{ | ||
| 164 | .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, | ||
| 165 | .pNext = nullptr, | ||
| 166 | .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, | ||
| 167 | .pInheritanceInfo = nullptr, | ||
| 168 | }); | ||
| 169 | |||
| 170 | // Clear the buffer. | ||
| 171 | cmdbuf.FillBuffer(*buffer, 0, VK_WHOLE_SIZE, 0); | ||
| 172 | |||
| 173 | // Bind descriptor set. | ||
| 174 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, | ||
| 175 | descriptor_set, {}); | ||
| 176 | |||
| 177 | // Bind the pipeline. | ||
| 178 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||
| 179 | |||
| 180 | // Dispatch. | ||
| 181 | cmdbuf.Dispatch(64, 64, 1); | ||
| 182 | |||
| 183 | // Finish. | ||
| 184 | cmdbuf.End(); | ||
| 185 | |||
| 186 | const VkSubmitInfo submit_info{ | ||
| 187 | .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, | ||
| 188 | .pNext = nullptr, | ||
| 189 | .waitSemaphoreCount = 0, | ||
| 190 | .pWaitSemaphores = nullptr, | ||
| 191 | .pWaitDstStageMask = nullptr, | ||
| 192 | .commandBufferCount = 1, | ||
| 193 | .pCommandBuffers = cmdbuf.address(), | ||
| 194 | .signalSemaphoreCount = 0, | ||
| 195 | .pSignalSemaphores = nullptr, | ||
| 196 | }; | ||
| 197 | |||
| 198 | m_device.GetGraphicsQueue().Submit(std::array{submit_info}, *fence); | ||
| 199 | |||
| 200 | // Wait for completion. | ||
| 201 | fence.Wait(); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | } // namespace Vulkan | ||
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.h b/src/video_core/renderer_vulkan/vk_turbo_mode.h new file mode 100644 index 000000000..2060e2395 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.h | |||
| @@ -0,0 +1,26 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #pragma once | ||
| 5 | |||
| 6 | #include "common/polyfill_thread.h" | ||
| 7 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 8 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 9 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 10 | |||
| 11 | namespace Vulkan { | ||
| 12 | |||
| 13 | class TurboMode { | ||
| 14 | public: | ||
| 15 | explicit TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld); | ||
| 16 | ~TurboMode(); | ||
| 17 | |||
| 18 | private: | ||
| 19 | void Run(std::stop_token stop_token); | ||
| 20 | |||
| 21 | Device m_device; | ||
| 22 | MemoryAllocator m_allocator; | ||
| 23 | std::jthread m_thread; | ||
| 24 | }; | ||
| 25 | |||
| 26 | } // namespace Vulkan | ||
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 5c5bfa18d..77aee802d 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -1472,7 +1472,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 1472 | is_patch_list_restart_supported = | 1472 | is_patch_list_restart_supported = |
| 1473 | primitive_topology_list_restart.primitiveTopologyPatchListRestart; | 1473 | primitive_topology_list_restart.primitiveTopologyPatchListRestart; |
| 1474 | } | 1474 | } |
| 1475 | if (has_khr_image_format_list && has_khr_swapchain_mutable_format) { | 1475 | if (requires_surface && has_khr_image_format_list && has_khr_swapchain_mutable_format) { |
| 1476 | extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); | 1476 | extensions.push_back(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); |
| 1477 | extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); | 1477 | extensions.push_back(VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME); |
| 1478 | khr_swapchain_mutable_format = true; | 1478 | khr_swapchain_mutable_format = true; |
| @@ -1487,6 +1487,9 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | |||
| 1487 | 1487 | ||
| 1488 | max_push_descriptors = push_descriptor.maxPushDescriptors; | 1488 | max_push_descriptors = push_descriptor.maxPushDescriptors; |
| 1489 | } | 1489 | } |
| 1490 | |||
| 1491 | has_null_descriptor = true; | ||
| 1492 | |||
| 1490 | return extensions; | 1493 | return extensions; |
| 1491 | } | 1494 | } |
| 1492 | 1495 | ||
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 920a8f4e3..6042046e1 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -397,6 +397,10 @@ public: | |||
| 397 | return must_emulate_bgr565; | 397 | return must_emulate_bgr565; |
| 398 | } | 398 | } |
| 399 | 399 | ||
| 400 | bool HasNullDescriptor() const { | ||
| 401 | return has_null_descriptor; | ||
| 402 | } | ||
| 403 | |||
| 400 | u32 GetMaxVertexInputAttributes() const { | 404 | u32 GetMaxVertexInputAttributes() const { |
| 401 | return max_vertex_input_attributes; | 405 | return max_vertex_input_attributes; |
| 402 | } | 406 | } |
| @@ -511,6 +515,7 @@ private: | |||
| 511 | bool supports_d24_depth{}; ///< Supports D24 depth buffers. | 515 | bool supports_d24_depth{}; ///< Supports D24 depth buffers. |
| 512 | bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. | 516 | bool cant_blit_msaa{}; ///< Does not support MSAA<->MSAA blitting. |
| 513 | bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. | 517 | bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format. |
| 518 | bool has_null_descriptor{}; ///< Has support for null descriptors. | ||
| 514 | u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline | 519 | u32 max_vertex_input_attributes{}; ///< Max vertex input attributes in pipeline |
| 515 | u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline | 520 | u32 max_vertex_input_bindings{}; ///< Max vertex input buffers in pipeline |
| 516 | 521 | ||
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index 562039b56..b6d83e446 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp | |||
| @@ -32,7 +32,7 @@ | |||
| 32 | namespace Vulkan { | 32 | namespace Vulkan { |
| 33 | namespace { | 33 | namespace { |
| 34 | [[nodiscard]] std::vector<const char*> RequiredExtensions( | 34 | [[nodiscard]] std::vector<const char*> RequiredExtensions( |
| 35 | Core::Frontend::WindowSystemType window_type, bool enable_debug_utils) { | 35 | Core::Frontend::WindowSystemType window_type, bool enable_validation) { |
| 36 | std::vector<const char*> extensions; | 36 | std::vector<const char*> extensions; |
| 37 | extensions.reserve(6); | 37 | extensions.reserve(6); |
| 38 | switch (window_type) { | 38 | switch (window_type) { |
| @@ -65,7 +65,7 @@ namespace { | |||
| 65 | if (window_type != Core::Frontend::WindowSystemType::Headless) { | 65 | if (window_type != Core::Frontend::WindowSystemType::Headless) { |
| 66 | extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); | 66 | extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); |
| 67 | } | 67 | } |
| 68 | if (enable_debug_utils) { | 68 | if (enable_validation) { |
| 69 | extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); | 69 | extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); |
| 70 | } | 70 | } |
| 71 | extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); | 71 | extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); |
| @@ -95,9 +95,9 @@ namespace { | |||
| 95 | return true; | 95 | return true; |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | [[nodiscard]] std::vector<const char*> Layers(bool enable_layers) { | 98 | [[nodiscard]] std::vector<const char*> Layers(bool enable_validation) { |
| 99 | std::vector<const char*> layers; | 99 | std::vector<const char*> layers; |
| 100 | if (enable_layers) { | 100 | if (enable_validation) { |
| 101 | layers.push_back("VK_LAYER_KHRONOS_validation"); | 101 | layers.push_back("VK_LAYER_KHRONOS_validation"); |
| 102 | } | 102 | } |
| 103 | return layers; | 103 | return layers; |
| @@ -125,7 +125,7 @@ void RemoveUnavailableLayers(const vk::InstanceDispatch& dld, std::vector<const | |||
| 125 | 125 | ||
| 126 | vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, | 126 | vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, |
| 127 | u32 required_version, Core::Frontend::WindowSystemType window_type, | 127 | u32 required_version, Core::Frontend::WindowSystemType window_type, |
| 128 | bool enable_debug_utils, bool enable_layers) { | 128 | bool enable_validation) { |
| 129 | if (!library.IsOpen()) { | 129 | if (!library.IsOpen()) { |
| 130 | LOG_ERROR(Render_Vulkan, "Vulkan library not available"); | 130 | LOG_ERROR(Render_Vulkan, "Vulkan library not available"); |
| 131 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | 131 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); |
| @@ -138,11 +138,11 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD | |||
| 138 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); | 138 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); |
| 139 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | 139 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); |
| 140 | } | 140 | } |
| 141 | const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_debug_utils); | 141 | const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_validation); |
| 142 | if (!AreExtensionsSupported(dld, extensions)) { | 142 | if (!AreExtensionsSupported(dld, extensions)) { |
| 143 | throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); | 143 | throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); |
| 144 | } | 144 | } |
| 145 | std::vector<const char*> layers = Layers(enable_layers); | 145 | std::vector<const char*> layers = Layers(enable_validation); |
| 146 | RemoveUnavailableLayers(dld, layers); | 146 | RemoveUnavailableLayers(dld, layers); |
| 147 | 147 | ||
| 148 | const u32 available_version = vk::AvailableVersion(dld); | 148 | const u32 available_version = vk::AvailableVersion(dld); |
diff --git a/src/video_core/vulkan_common/vulkan_instance.h b/src/video_core/vulkan_common/vulkan_instance.h index 40419d802..b59b92f83 100644 --- a/src/video_core/vulkan_common/vulkan_instance.h +++ b/src/video_core/vulkan_common/vulkan_instance.h | |||
| @@ -17,8 +17,7 @@ namespace Vulkan { | |||
| 17 | * @param dld Dispatch table to load function pointers into | 17 | * @param dld Dispatch table to load function pointers into |
| 18 | * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1) | 18 | * @param required_version Required Vulkan version (for example, VK_API_VERSION_1_1) |
| 19 | * @param window_type Window system type's enabled extension | 19 | * @param window_type Window system type's enabled extension |
| 20 | * @param enable_debug_utils Whether to enable VK_EXT_debug_utils_extension_name or not | 20 | * @param enable_validation Whether to enable Vulkan validation layers or not |
| 21 | * @param enable_layers Whether to enable Vulkan validation layers or not | ||
| 22 | * | 21 | * |
| 23 | * @return A new Vulkan instance | 22 | * @return A new Vulkan instance |
| 24 | * @throw vk::Exception on failure | 23 | * @throw vk::Exception on failure |
| @@ -26,6 +25,6 @@ namespace Vulkan { | |||
| 26 | [[nodiscard]] vk::Instance CreateInstance( | 25 | [[nodiscard]] vk::Instance CreateInstance( |
| 27 | const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version, | 26 | const Common::DynamicLibrary& library, vk::InstanceDispatch& dld, u32 required_version, |
| 28 | Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless, | 27 | Core::Frontend::WindowSystemType window_type = Core::Frontend::WindowSystemType::Headless, |
| 29 | bool enable_debug_utils = false, bool enable_layers = false); | 28 | bool enable_validation = false); |
| 30 | 29 | ||
| 31 | } // namespace Vulkan | 30 | } // namespace Vulkan |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 861767c13..61be1fce1 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp | |||
| @@ -152,6 +152,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 152 | X(vkCreateGraphicsPipelines); | 152 | X(vkCreateGraphicsPipelines); |
| 153 | X(vkCreateImage); | 153 | X(vkCreateImage); |
| 154 | X(vkCreateImageView); | 154 | X(vkCreateImageView); |
| 155 | X(vkCreatePipelineCache); | ||
| 155 | X(vkCreatePipelineLayout); | 156 | X(vkCreatePipelineLayout); |
| 156 | X(vkCreateQueryPool); | 157 | X(vkCreateQueryPool); |
| 157 | X(vkCreateRenderPass); | 158 | X(vkCreateRenderPass); |
| @@ -171,6 +172,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 171 | X(vkDestroyImage); | 172 | X(vkDestroyImage); |
| 172 | X(vkDestroyImageView); | 173 | X(vkDestroyImageView); |
| 173 | X(vkDestroyPipeline); | 174 | X(vkDestroyPipeline); |
| 175 | X(vkDestroyPipelineCache); | ||
| 174 | X(vkDestroyPipelineLayout); | 176 | X(vkDestroyPipelineLayout); |
| 175 | X(vkDestroyQueryPool); | 177 | X(vkDestroyQueryPool); |
| 176 | X(vkDestroyRenderPass); | 178 | X(vkDestroyRenderPass); |
| @@ -188,6 +190,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 188 | X(vkGetEventStatus); | 190 | X(vkGetEventStatus); |
| 189 | X(vkGetFenceStatus); | 191 | X(vkGetFenceStatus); |
| 190 | X(vkGetImageMemoryRequirements); | 192 | X(vkGetImageMemoryRequirements); |
| 193 | X(vkGetPipelineCacheData); | ||
| 191 | X(vkGetMemoryFdKHR); | 194 | X(vkGetMemoryFdKHR); |
| 192 | #ifdef _WIN32 | 195 | #ifdef _WIN32 |
| 193 | X(vkGetMemoryWin32HandleKHR); | 196 | X(vkGetMemoryWin32HandleKHR); |
| @@ -431,6 +434,10 @@ void Destroy(VkDevice device, VkPipeline handle, const DeviceDispatch& dld) noex | |||
| 431 | dld.vkDestroyPipeline(device, handle, nullptr); | 434 | dld.vkDestroyPipeline(device, handle, nullptr); |
| 432 | } | 435 | } |
| 433 | 436 | ||
| 437 | void Destroy(VkDevice device, VkPipelineCache handle, const DeviceDispatch& dld) noexcept { | ||
| 438 | dld.vkDestroyPipelineCache(device, handle, nullptr); | ||
| 439 | } | ||
| 440 | |||
| 434 | void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept { | 441 | void Destroy(VkDevice device, VkPipelineLayout handle, const DeviceDispatch& dld) noexcept { |
| 435 | dld.vkDestroyPipelineLayout(device, handle, nullptr); | 442 | dld.vkDestroyPipelineLayout(device, handle, nullptr); |
| 436 | } | 443 | } |
| @@ -651,6 +658,10 @@ void ShaderModule::SetObjectNameEXT(const char* name) const { | |||
| 651 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); | 658 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SHADER_MODULE, name); |
| 652 | } | 659 | } |
| 653 | 660 | ||
| 661 | void PipelineCache::SetObjectNameEXT(const char* name) const { | ||
| 662 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_PIPELINE_CACHE, name); | ||
| 663 | } | ||
| 664 | |||
| 654 | void Semaphore::SetObjectNameEXT(const char* name) const { | 665 | void Semaphore::SetObjectNameEXT(const char* name) const { |
| 655 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); | 666 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_SEMAPHORE, name); |
| 656 | } | 667 | } |
| @@ -746,21 +757,29 @@ DescriptorSetLayout Device::CreateDescriptorSetLayout( | |||
| 746 | return DescriptorSetLayout(object, handle, *dld); | 757 | return DescriptorSetLayout(object, handle, *dld); |
| 747 | } | 758 | } |
| 748 | 759 | ||
| 760 | PipelineCache Device::CreatePipelineCache(const VkPipelineCacheCreateInfo& ci) const { | ||
| 761 | VkPipelineCache cache; | ||
| 762 | Check(dld->vkCreatePipelineCache(handle, &ci, nullptr, &cache)); | ||
| 763 | return PipelineCache(cache, handle, *dld); | ||
| 764 | } | ||
| 765 | |||
| 749 | PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const { | 766 | PipelineLayout Device::CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const { |
| 750 | VkPipelineLayout object; | 767 | VkPipelineLayout object; |
| 751 | Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object)); | 768 | Check(dld->vkCreatePipelineLayout(handle, &ci, nullptr, &object)); |
| 752 | return PipelineLayout(object, handle, *dld); | 769 | return PipelineLayout(object, handle, *dld); |
| 753 | } | 770 | } |
| 754 | 771 | ||
| 755 | Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const { | 772 | Pipeline Device::CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci, |
| 773 | VkPipelineCache cache) const { | ||
| 756 | VkPipeline object; | 774 | VkPipeline object; |
| 757 | Check(dld->vkCreateGraphicsPipelines(handle, nullptr, 1, &ci, nullptr, &object)); | 775 | Check(dld->vkCreateGraphicsPipelines(handle, cache, 1, &ci, nullptr, &object)); |
| 758 | return Pipeline(object, handle, *dld); | 776 | return Pipeline(object, handle, *dld); |
| 759 | } | 777 | } |
| 760 | 778 | ||
| 761 | Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const { | 779 | Pipeline Device::CreateComputePipeline(const VkComputePipelineCreateInfo& ci, |
| 780 | VkPipelineCache cache) const { | ||
| 762 | VkPipeline object; | 781 | VkPipeline object; |
| 763 | Check(dld->vkCreateComputePipelines(handle, nullptr, 1, &ci, nullptr, &object)); | 782 | Check(dld->vkCreateComputePipelines(handle, cache, 1, &ci, nullptr, &object)); |
| 764 | return Pipeline(object, handle, *dld); | 783 | return Pipeline(object, handle, *dld); |
| 765 | } | 784 | } |
| 766 | 785 | ||
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index accfad8c1..412779b51 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -270,6 +270,7 @@ struct DeviceDispatch : InstanceDispatch { | |||
| 270 | PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines{}; | 270 | PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines{}; |
| 271 | PFN_vkCreateImage vkCreateImage{}; | 271 | PFN_vkCreateImage vkCreateImage{}; |
| 272 | PFN_vkCreateImageView vkCreateImageView{}; | 272 | PFN_vkCreateImageView vkCreateImageView{}; |
| 273 | PFN_vkCreatePipelineCache vkCreatePipelineCache{}; | ||
| 273 | PFN_vkCreatePipelineLayout vkCreatePipelineLayout{}; | 274 | PFN_vkCreatePipelineLayout vkCreatePipelineLayout{}; |
| 274 | PFN_vkCreateQueryPool vkCreateQueryPool{}; | 275 | PFN_vkCreateQueryPool vkCreateQueryPool{}; |
| 275 | PFN_vkCreateRenderPass vkCreateRenderPass{}; | 276 | PFN_vkCreateRenderPass vkCreateRenderPass{}; |
| @@ -289,6 +290,7 @@ struct DeviceDispatch : InstanceDispatch { | |||
| 289 | PFN_vkDestroyImage vkDestroyImage{}; | 290 | PFN_vkDestroyImage vkDestroyImage{}; |
| 290 | PFN_vkDestroyImageView vkDestroyImageView{}; | 291 | PFN_vkDestroyImageView vkDestroyImageView{}; |
| 291 | PFN_vkDestroyPipeline vkDestroyPipeline{}; | 292 | PFN_vkDestroyPipeline vkDestroyPipeline{}; |
| 293 | PFN_vkDestroyPipelineCache vkDestroyPipelineCache{}; | ||
| 292 | PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout{}; | 294 | PFN_vkDestroyPipelineLayout vkDestroyPipelineLayout{}; |
| 293 | PFN_vkDestroyQueryPool vkDestroyQueryPool{}; | 295 | PFN_vkDestroyQueryPool vkDestroyQueryPool{}; |
| 294 | PFN_vkDestroyRenderPass vkDestroyRenderPass{}; | 296 | PFN_vkDestroyRenderPass vkDestroyRenderPass{}; |
| @@ -306,6 +308,7 @@ struct DeviceDispatch : InstanceDispatch { | |||
| 306 | PFN_vkGetEventStatus vkGetEventStatus{}; | 308 | PFN_vkGetEventStatus vkGetEventStatus{}; |
| 307 | PFN_vkGetFenceStatus vkGetFenceStatus{}; | 309 | PFN_vkGetFenceStatus vkGetFenceStatus{}; |
| 308 | PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{}; | 310 | PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{}; |
| 311 | PFN_vkGetPipelineCacheData vkGetPipelineCacheData{}; | ||
| 309 | PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{}; | 312 | PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{}; |
| 310 | #ifdef _WIN32 | 313 | #ifdef _WIN32 |
| 311 | PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{}; | 314 | PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{}; |
| @@ -351,6 +354,7 @@ void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept; | |||
| 351 | void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; | 354 | void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept; |
| 352 | void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept; | 355 | void Destroy(VkDevice, VkImageView, const DeviceDispatch&) noexcept; |
| 353 | void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept; | 356 | void Destroy(VkDevice, VkPipeline, const DeviceDispatch&) noexcept; |
| 357 | void Destroy(VkDevice, VkPipelineCache, const DeviceDispatch&) noexcept; | ||
| 354 | void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept; | 358 | void Destroy(VkDevice, VkPipelineLayout, const DeviceDispatch&) noexcept; |
| 355 | void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept; | 359 | void Destroy(VkDevice, VkQueryPool, const DeviceDispatch&) noexcept; |
| 356 | void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept; | 360 | void Destroy(VkDevice, VkRenderPass, const DeviceDispatch&) noexcept; |
| @@ -773,6 +777,18 @@ public: | |||
| 773 | void SetObjectNameEXT(const char* name) const; | 777 | void SetObjectNameEXT(const char* name) const; |
| 774 | }; | 778 | }; |
| 775 | 779 | ||
| 780 | class PipelineCache : public Handle<VkPipelineCache, VkDevice, DeviceDispatch> { | ||
| 781 | using Handle<VkPipelineCache, VkDevice, DeviceDispatch>::Handle; | ||
| 782 | |||
| 783 | public: | ||
| 784 | /// Set object name. | ||
| 785 | void SetObjectNameEXT(const char* name) const; | ||
| 786 | |||
| 787 | VkResult Read(size_t* size, void* data) const noexcept { | ||
| 788 | return dld->vkGetPipelineCacheData(owner, handle, size, data); | ||
| 789 | } | ||
| 790 | }; | ||
| 791 | |||
| 776 | class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { | 792 | class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { |
| 777 | using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; | 793 | using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; |
| 778 | 794 | ||
| @@ -844,11 +860,15 @@ public: | |||
| 844 | 860 | ||
| 845 | DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const; | 861 | DescriptorSetLayout CreateDescriptorSetLayout(const VkDescriptorSetLayoutCreateInfo& ci) const; |
| 846 | 862 | ||
| 863 | PipelineCache CreatePipelineCache(const VkPipelineCacheCreateInfo& ci) const; | ||
| 864 | |||
| 847 | PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const; | 865 | PipelineLayout CreatePipelineLayout(const VkPipelineLayoutCreateInfo& ci) const; |
| 848 | 866 | ||
| 849 | Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci) const; | 867 | Pipeline CreateGraphicsPipeline(const VkGraphicsPipelineCreateInfo& ci, |
| 868 | VkPipelineCache cache = nullptr) const; | ||
| 850 | 869 | ||
| 851 | Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci) const; | 870 | Pipeline CreateComputePipeline(const VkComputePipelineCreateInfo& ci, |
| 871 | VkPipelineCache cache = nullptr) const; | ||
| 852 | 872 | ||
| 853 | Sampler CreateSampler(const VkSamplerCreateInfo& ci) const; | 873 | Sampler CreateSampler(const VkSamplerCreateInfo& ci) const; |
| 854 | 874 | ||
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index e9425b5bd..0db62baa3 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -690,6 +690,7 @@ void Config::ReadRendererValues() { | |||
| 690 | qt_config->beginGroup(QStringLiteral("Renderer")); | 690 | qt_config->beginGroup(QStringLiteral("Renderer")); |
| 691 | 691 | ||
| 692 | ReadGlobalSetting(Settings::values.renderer_backend); | 692 | ReadGlobalSetting(Settings::values.renderer_backend); |
| 693 | ReadGlobalSetting(Settings::values.renderer_force_max_clock); | ||
| 693 | ReadGlobalSetting(Settings::values.vulkan_device); | 694 | ReadGlobalSetting(Settings::values.vulkan_device); |
| 694 | ReadGlobalSetting(Settings::values.fullscreen_mode); | 695 | ReadGlobalSetting(Settings::values.fullscreen_mode); |
| 695 | ReadGlobalSetting(Settings::values.aspect_ratio); | 696 | ReadGlobalSetting(Settings::values.aspect_ratio); |
| @@ -709,6 +710,7 @@ void Config::ReadRendererValues() { | |||
| 709 | ReadGlobalSetting(Settings::values.use_asynchronous_shaders); | 710 | ReadGlobalSetting(Settings::values.use_asynchronous_shaders); |
| 710 | ReadGlobalSetting(Settings::values.use_fast_gpu_time); | 711 | ReadGlobalSetting(Settings::values.use_fast_gpu_time); |
| 711 | ReadGlobalSetting(Settings::values.use_pessimistic_flushes); | 712 | ReadGlobalSetting(Settings::values.use_pessimistic_flushes); |
| 713 | ReadGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); | ||
| 712 | ReadGlobalSetting(Settings::values.bg_red); | 714 | ReadGlobalSetting(Settings::values.bg_red); |
| 713 | ReadGlobalSetting(Settings::values.bg_green); | 715 | ReadGlobalSetting(Settings::values.bg_green); |
| 714 | ReadGlobalSetting(Settings::values.bg_blue); | 716 | ReadGlobalSetting(Settings::values.bg_blue); |
| @@ -1305,6 +1307,9 @@ void Config::SaveRendererValues() { | |||
| 1305 | static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), | 1307 | static_cast<u32>(Settings::values.renderer_backend.GetValue(global)), |
| 1306 | static_cast<u32>(Settings::values.renderer_backend.GetDefault()), | 1308 | static_cast<u32>(Settings::values.renderer_backend.GetDefault()), |
| 1307 | Settings::values.renderer_backend.UsingGlobal()); | 1309 | Settings::values.renderer_backend.UsingGlobal()); |
| 1310 | WriteSetting(QString::fromStdString(Settings::values.renderer_force_max_clock.GetLabel()), | ||
| 1311 | static_cast<u32>(Settings::values.renderer_force_max_clock.GetValue(global)), | ||
| 1312 | static_cast<u32>(Settings::values.renderer_force_max_clock.GetDefault())); | ||
| 1308 | WriteGlobalSetting(Settings::values.vulkan_device); | 1313 | WriteGlobalSetting(Settings::values.vulkan_device); |
| 1309 | WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), | 1314 | WriteSetting(QString::fromStdString(Settings::values.fullscreen_mode.GetLabel()), |
| 1310 | static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)), | 1315 | static_cast<u32>(Settings::values.fullscreen_mode.GetValue(global)), |
| @@ -1348,6 +1353,7 @@ void Config::SaveRendererValues() { | |||
| 1348 | WriteGlobalSetting(Settings::values.use_asynchronous_shaders); | 1353 | WriteGlobalSetting(Settings::values.use_asynchronous_shaders); |
| 1349 | WriteGlobalSetting(Settings::values.use_fast_gpu_time); | 1354 | WriteGlobalSetting(Settings::values.use_fast_gpu_time); |
| 1350 | WriteGlobalSetting(Settings::values.use_pessimistic_flushes); | 1355 | WriteGlobalSetting(Settings::values.use_pessimistic_flushes); |
| 1356 | WriteGlobalSetting(Settings::values.use_vulkan_driver_pipeline_cache); | ||
| 1351 | WriteGlobalSetting(Settings::values.bg_red); | 1357 | WriteGlobalSetting(Settings::values.bg_red); |
| 1352 | WriteGlobalSetting(Settings::values.bg_green); | 1358 | WriteGlobalSetting(Settings::values.bg_green); |
| 1353 | WriteGlobalSetting(Settings::values.bg_blue); | 1359 | WriteGlobalSetting(Settings::values.bg_blue); |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 01f074699..fdf8485ce 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp | |||
| @@ -25,10 +25,13 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { | |||
| 25 | ui->use_asynchronous_shaders->setEnabled(runtime_lock); | 25 | ui->use_asynchronous_shaders->setEnabled(runtime_lock); |
| 26 | ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); | 26 | ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); |
| 27 | 27 | ||
| 28 | ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue()); | ||
| 28 | ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); | 29 | ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); |
| 29 | ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); | 30 | ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); |
| 30 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); | 31 | ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); |
| 31 | ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue()); | 32 | ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue()); |
| 33 | ui->use_vulkan_driver_pipeline_cache->setChecked( | ||
| 34 | Settings::values.use_vulkan_driver_pipeline_cache.GetValue()); | ||
| 32 | 35 | ||
| 33 | if (Settings::IsConfiguringGlobal()) { | 36 | if (Settings::IsConfiguringGlobal()) { |
| 34 | ui->gpu_accuracy->setCurrentIndex( | 37 | ui->gpu_accuracy->setCurrentIndex( |
| @@ -37,6 +40,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { | |||
| 37 | Settings::values.max_anisotropy.GetValue()); | 40 | Settings::values.max_anisotropy.GetValue()); |
| 38 | } else { | 41 | } else { |
| 39 | ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy); | 42 | ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy); |
| 43 | ConfigurationShared::SetPerGameSetting(ui->renderer_force_max_clock, | ||
| 44 | &Settings::values.renderer_force_max_clock); | ||
| 40 | ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox, | 45 | ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox, |
| 41 | &Settings::values.max_anisotropy); | 46 | &Settings::values.max_anisotropy); |
| 42 | ConfigurationShared::SetHighlight(ui->label_gpu_accuracy, | 47 | ConfigurationShared::SetHighlight(ui->label_gpu_accuracy, |
| @@ -48,6 +53,9 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { | |||
| 48 | 53 | ||
| 49 | void ConfigureGraphicsAdvanced::ApplyConfiguration() { | 54 | void ConfigureGraphicsAdvanced::ApplyConfiguration() { |
| 50 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); | 55 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.gpu_accuracy, ui->gpu_accuracy); |
| 56 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.renderer_force_max_clock, | ||
| 57 | ui->renderer_force_max_clock, | ||
| 58 | renderer_force_max_clock); | ||
| 51 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, | 59 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy, |
| 52 | ui->anisotropic_filtering_combobox); | 60 | ui->anisotropic_filtering_combobox); |
| 53 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); | 61 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync); |
| @@ -58,6 +66,9 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { | |||
| 58 | ui->use_fast_gpu_time, use_fast_gpu_time); | 66 | ui->use_fast_gpu_time, use_fast_gpu_time); |
| 59 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_pessimistic_flushes, | 67 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_pessimistic_flushes, |
| 60 | ui->use_pessimistic_flushes, use_pessimistic_flushes); | 68 | ui->use_pessimistic_flushes, use_pessimistic_flushes); |
| 69 | ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vulkan_driver_pipeline_cache, | ||
| 70 | ui->use_vulkan_driver_pipeline_cache, | ||
| 71 | use_vulkan_driver_pipeline_cache); | ||
| 61 | } | 72 | } |
| 62 | 73 | ||
| 63 | void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { | 74 | void ConfigureGraphicsAdvanced::changeEvent(QEvent* event) { |
| @@ -76,18 +87,25 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { | |||
| 76 | // Disable if not global (only happens during game) | 87 | // Disable if not global (only happens during game) |
| 77 | if (Settings::IsConfiguringGlobal()) { | 88 | if (Settings::IsConfiguringGlobal()) { |
| 78 | ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); | 89 | ui->gpu_accuracy->setEnabled(Settings::values.gpu_accuracy.UsingGlobal()); |
| 90 | ui->renderer_force_max_clock->setEnabled( | ||
| 91 | Settings::values.renderer_force_max_clock.UsingGlobal()); | ||
| 79 | ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); | 92 | ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal()); |
| 80 | ui->use_asynchronous_shaders->setEnabled( | 93 | ui->use_asynchronous_shaders->setEnabled( |
| 81 | Settings::values.use_asynchronous_shaders.UsingGlobal()); | 94 | Settings::values.use_asynchronous_shaders.UsingGlobal()); |
| 82 | ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); | 95 | ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); |
| 83 | ui->use_pessimistic_flushes->setEnabled( | 96 | ui->use_pessimistic_flushes->setEnabled( |
| 84 | Settings::values.use_pessimistic_flushes.UsingGlobal()); | 97 | Settings::values.use_pessimistic_flushes.UsingGlobal()); |
| 98 | ui->use_vulkan_driver_pipeline_cache->setEnabled( | ||
| 99 | Settings::values.use_vulkan_driver_pipeline_cache.UsingGlobal()); | ||
| 85 | ui->anisotropic_filtering_combobox->setEnabled( | 100 | ui->anisotropic_filtering_combobox->setEnabled( |
| 86 | Settings::values.max_anisotropy.UsingGlobal()); | 101 | Settings::values.max_anisotropy.UsingGlobal()); |
| 87 | 102 | ||
| 88 | return; | 103 | return; |
| 89 | } | 104 | } |
| 90 | 105 | ||
| 106 | ConfigurationShared::SetColoredTristate(ui->renderer_force_max_clock, | ||
| 107 | Settings::values.renderer_force_max_clock, | ||
| 108 | renderer_force_max_clock); | ||
| 91 | ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); | 109 | ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync); |
| 92 | ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, | 110 | ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders, |
| 93 | Settings::values.use_asynchronous_shaders, | 111 | Settings::values.use_asynchronous_shaders, |
| @@ -97,6 +115,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { | |||
| 97 | ConfigurationShared::SetColoredTristate(ui->use_pessimistic_flushes, | 115 | ConfigurationShared::SetColoredTristate(ui->use_pessimistic_flushes, |
| 98 | Settings::values.use_pessimistic_flushes, | 116 | Settings::values.use_pessimistic_flushes, |
| 99 | use_pessimistic_flushes); | 117 | use_pessimistic_flushes); |
| 118 | ConfigurationShared::SetColoredTristate(ui->use_vulkan_driver_pipeline_cache, | ||
| 119 | Settings::values.use_vulkan_driver_pipeline_cache, | ||
| 120 | use_vulkan_driver_pipeline_cache); | ||
| 100 | ConfigurationShared::SetColoredComboBox( | 121 | ConfigurationShared::SetColoredComboBox( |
| 101 | ui->gpu_accuracy, ui->label_gpu_accuracy, | 122 | ui->gpu_accuracy, ui->label_gpu_accuracy, |
| 102 | static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); | 123 | static_cast<int>(Settings::values.gpu_accuracy.GetValue(true))); |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index 12e816905..df557d585 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h | |||
| @@ -36,10 +36,12 @@ private: | |||
| 36 | 36 | ||
| 37 | std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; | 37 | std::unique_ptr<Ui::ConfigureGraphicsAdvanced> ui; |
| 38 | 38 | ||
| 39 | ConfigurationShared::CheckState renderer_force_max_clock; | ||
| 39 | ConfigurationShared::CheckState use_vsync; | 40 | ConfigurationShared::CheckState use_vsync; |
| 40 | ConfigurationShared::CheckState use_asynchronous_shaders; | 41 | ConfigurationShared::CheckState use_asynchronous_shaders; |
| 41 | ConfigurationShared::CheckState use_fast_gpu_time; | 42 | ConfigurationShared::CheckState use_fast_gpu_time; |
| 42 | ConfigurationShared::CheckState use_pessimistic_flushes; | 43 | ConfigurationShared::CheckState use_pessimistic_flushes; |
| 44 | ConfigurationShared::CheckState use_vulkan_driver_pipeline_cache; | ||
| 43 | 45 | ||
| 44 | const Core::System& system; | 46 | const Core::System& system; |
| 45 | }; | 47 | }; |
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 87a121471..061885e30 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui | |||
| @@ -70,6 +70,16 @@ | |||
| 70 | </widget> | 70 | </widget> |
| 71 | </item> | 71 | </item> |
| 72 | <item> | 72 | <item> |
| 73 | <widget class="QCheckBox" name="renderer_force_max_clock"> | ||
| 74 | <property name="toolTip"> | ||
| 75 | <string>Runs work in the background while waiting for graphics commands to keep the GPU from lowering its clock speed.</string> | ||
| 76 | </property> | ||
| 77 | <property name="text"> | ||
| 78 | <string>Force maximum clocks (Vulkan only)</string> | ||
| 79 | </property> | ||
| 80 | </widget> | ||
| 81 | </item> | ||
| 82 | <item> | ||
| 73 | <widget class="QCheckBox" name="use_vsync"> | 83 | <widget class="QCheckBox" name="use_vsync"> |
| 74 | <property name="toolTip"> | 84 | <property name="toolTip"> |
| 75 | <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string> | 85 | <string>VSync prevents the screen from tearing, but some graphics cards have lower performance with VSync enabled. Keep it enabled if you don't notice a performance difference.</string> |
| @@ -110,6 +120,16 @@ | |||
| 110 | </widget> | 120 | </widget> |
| 111 | </item> | 121 | </item> |
| 112 | <item> | 122 | <item> |
| 123 | <widget class="QCheckBox" name="use_vulkan_driver_pipeline_cache"> | ||
| 124 | <property name="toolTip"> | ||
| 125 | <string>Enables GPU vendor-specific pipeline cache. This option can improve shader loading time significantly in cases where the Vulkan driver does not store pipeline cache files internally.</string> | ||
| 126 | </property> | ||
| 127 | <property name="text"> | ||
| 128 | <string>Use Vulkan pipeline cache</string> | ||
| 129 | </property> | ||
| 130 | </widget> | ||
| 131 | </item> | ||
| 132 | <item> | ||
| 113 | <widget class="QWidget" name="af_layout" native="true"> | 133 | <widget class="QWidget" name="af_layout" native="true"> |
| 114 | <layout class="QHBoxLayout" name="horizontalLayout_1"> | 134 | <layout class="QHBoxLayout" name="horizontalLayout_1"> |
| 115 | <property name="leftMargin"> | 135 | <property name="leftMargin"> |
diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 524650144..c55f81c2f 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp | |||
| @@ -2229,8 +2229,10 @@ void GMainWindow::OnGameListRemoveFile(u64 program_id, GameListRemoveTarget targ | |||
| 2229 | } | 2229 | } |
| 2230 | 2230 | ||
| 2231 | switch (target) { | 2231 | switch (target) { |
| 2232 | case GameListRemoveTarget::GlShaderCache: | ||
| 2233 | case GameListRemoveTarget::VkShaderCache: | 2232 | case GameListRemoveTarget::VkShaderCache: |
| 2233 | RemoveVulkanDriverPipelineCache(program_id); | ||
| 2234 | [[fallthrough]]; | ||
| 2235 | case GameListRemoveTarget::GlShaderCache: | ||
| 2234 | RemoveTransferableShaderCache(program_id, target); | 2236 | RemoveTransferableShaderCache(program_id, target); |
| 2235 | break; | 2237 | break; |
| 2236 | case GameListRemoveTarget::AllShaderCache: | 2238 | case GameListRemoveTarget::AllShaderCache: |
| @@ -2271,6 +2273,22 @@ void GMainWindow::RemoveTransferableShaderCache(u64 program_id, GameListRemoveTa | |||
| 2271 | } | 2273 | } |
| 2272 | } | 2274 | } |
| 2273 | 2275 | ||
| 2276 | void GMainWindow::RemoveVulkanDriverPipelineCache(u64 program_id) { | ||
| 2277 | static constexpr std::string_view target_file_name = "vulkan_pipelines.bin"; | ||
| 2278 | |||
| 2279 | const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); | ||
| 2280 | const auto shader_cache_folder_path = shader_cache_dir / fmt::format("{:016x}", program_id); | ||
| 2281 | const auto target_file = shader_cache_folder_path / target_file_name; | ||
| 2282 | |||
| 2283 | if (!Common::FS::Exists(target_file)) { | ||
| 2284 | return; | ||
| 2285 | } | ||
| 2286 | if (!Common::FS::RemoveFile(target_file)) { | ||
| 2287 | QMessageBox::warning(this, tr("Error Removing Vulkan Driver Pipeline Cache"), | ||
| 2288 | tr("Failed to remove the driver pipeline cache.")); | ||
| 2289 | } | ||
| 2290 | } | ||
| 2291 | |||
| 2274 | void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) { | 2292 | void GMainWindow::RemoveAllTransferableShaderCaches(u64 program_id) { |
| 2275 | const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); | 2293 | const auto shader_cache_dir = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir); |
| 2276 | const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id); | 2294 | const auto program_shader_cache_dir = shader_cache_dir / fmt::format("{:016x}", program_id); |
diff --git a/src/yuzu/main.h b/src/yuzu/main.h index db318485d..f25ce65a8 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h | |||
| @@ -347,6 +347,7 @@ private: | |||
| 347 | void RemoveUpdateContent(u64 program_id, InstalledEntryType type); | 347 | void RemoveUpdateContent(u64 program_id, InstalledEntryType type); |
| 348 | void RemoveAddOnContent(u64 program_id, InstalledEntryType type); | 348 | void RemoveAddOnContent(u64 program_id, InstalledEntryType type); |
| 349 | void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target); | 349 | void RemoveTransferableShaderCache(u64 program_id, GameListRemoveTarget target); |
| 350 | void RemoveVulkanDriverPipelineCache(u64 program_id); | ||
| 350 | void RemoveAllTransferableShaderCaches(u64 program_id); | 351 | void RemoveAllTransferableShaderCaches(u64 program_id); |
| 351 | void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); | 352 | void RemoveCustomConfiguration(u64 program_id, const std::string& game_path); |
| 352 | std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); | 353 | std::optional<u64> SelectRomFSDumpTarget(const FileSys::ContentProvider&, u64 program_id); |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 1e45e57bc..527017282 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -296,6 +296,7 @@ void Config::ReadValues() { | |||
| 296 | 296 | ||
| 297 | // Renderer | 297 | // Renderer |
| 298 | ReadSetting("Renderer", Settings::values.renderer_backend); | 298 | ReadSetting("Renderer", Settings::values.renderer_backend); |
| 299 | ReadSetting("Renderer", Settings::values.renderer_force_max_clock); | ||
| 299 | ReadSetting("Renderer", Settings::values.renderer_debug); | 300 | ReadSetting("Renderer", Settings::values.renderer_debug); |
| 300 | ReadSetting("Renderer", Settings::values.renderer_shader_feedback); | 301 | ReadSetting("Renderer", Settings::values.renderer_shader_feedback); |
| 301 | ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); | 302 | ReadSetting("Renderer", Settings::values.enable_nsight_aftermath); |
| @@ -321,6 +322,7 @@ void Config::ReadValues() { | |||
| 321 | ReadSetting("Renderer", Settings::values.accelerate_astc); | 322 | ReadSetting("Renderer", Settings::values.accelerate_astc); |
| 322 | ReadSetting("Renderer", Settings::values.use_fast_gpu_time); | 323 | ReadSetting("Renderer", Settings::values.use_fast_gpu_time); |
| 323 | ReadSetting("Renderer", Settings::values.use_pessimistic_flushes); | 324 | ReadSetting("Renderer", Settings::values.use_pessimistic_flushes); |
| 325 | ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache); | ||
| 324 | 326 | ||
| 325 | ReadSetting("Renderer", Settings::values.bg_red); | 327 | ReadSetting("Renderer", Settings::values.bg_red); |
| 326 | ReadSetting("Renderer", Settings::values.bg_green); | 328 | ReadSetting("Renderer", Settings::values.bg_green); |