diff options
| -rw-r--r-- | src/input_common/input_engine.cpp | 11 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_master_semaphore.cpp | 46 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_master_semaphore.h | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 36 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 3 | ||||
| -rw-r--r-- | src/video_core/vulkan_common/vulkan_device.cpp | 2 |
10 files changed, 117 insertions, 9 deletions
diff --git a/src/input_common/input_engine.cpp b/src/input_common/input_engine.cpp index 91aa96aa7..e4c5b5b3c 100644 --- a/src/input_common/input_engine.cpp +++ b/src/input_common/input_engine.cpp | |||
| @@ -380,13 +380,16 @@ void InputEngine::TriggerOnMotionChange(const PadIdentifier& identifier, int mot | |||
| 380 | if (!configuring || !mapping_callback.on_data) { | 380 | if (!configuring || !mapping_callback.on_data) { |
| 381 | return; | 381 | return; |
| 382 | } | 382 | } |
| 383 | const auto old_value = GetMotion(identifier, motion); | ||
| 383 | bool is_active = false; | 384 | bool is_active = false; |
| 384 | if (std::abs(value.accel_x) > 1.5f || std::abs(value.accel_y) > 1.5f || | 385 | if (std::abs(value.accel_x - old_value.accel_x) > 1.5f || |
| 385 | std::abs(value.accel_z) > 1.5f) { | 386 | std::abs(value.accel_y - old_value.accel_y) > 1.5f || |
| 387 | std::abs(value.accel_z - old_value.accel_z) > 1.5f) { | ||
| 386 | is_active = true; | 388 | is_active = true; |
| 387 | } | 389 | } |
| 388 | if (std::abs(value.gyro_x) > 0.6f || std::abs(value.gyro_y) > 0.6f || | 390 | if (std::abs(value.gyro_x - old_value.gyro_x) > 0.6f || |
| 389 | std::abs(value.gyro_z) > 0.6f) { | 391 | std::abs(value.gyro_y - old_value.gyro_y) > 0.6f || |
| 392 | std::abs(value.gyro_z - old_value.gyro_z) > 0.6f) { | ||
| 390 | is_active = true; | 393 | is_active = true; |
| 391 | } | 394 | } |
| 392 | if (!is_active) { | 395 | if (!is_active) { |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 1190999a8..3e9b3302b 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -144,6 +144,10 @@ public: | |||
| 144 | return state_tracker; | 144 | return state_tracker; |
| 145 | } | 145 | } |
| 146 | 146 | ||
| 147 | void BarrierFeedbackLoop() const noexcept { | ||
| 148 | // OpenGL does not require a barrier for attachment feedback loops. | ||
| 149 | } | ||
| 150 | |||
| 147 | private: | 151 | private: |
| 148 | struct StagingBuffers { | 152 | struct StagingBuffers { |
| 149 | explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); | 153 | explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index f1bcd5cd6..506b78f08 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -481,12 +481,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
| 481 | if constexpr (Spec::enabled_stages[4]) { | 481 | if constexpr (Spec::enabled_stages[4]) { |
| 482 | prepare_stage(4); | 482 | prepare_stage(4); |
| 483 | } | 483 | } |
| 484 | texture_cache.UpdateRenderTargets(false); | ||
| 485 | texture_cache.CheckFeedbackLoop(views); | ||
| 484 | ConfigureDraw(rescaling, render_area); | 486 | ConfigureDraw(rescaling, render_area); |
| 485 | } | 487 | } |
| 486 | 488 | ||
| 487 | void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, | 489 | void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, |
| 488 | const RenderAreaPushConstant& render_area) { | 490 | const RenderAreaPushConstant& render_area) { |
| 489 | texture_cache.UpdateRenderTargets(false); | ||
| 490 | scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); | 491 | scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); |
| 491 | 492 | ||
| 492 | if (!is_built.load(std::memory_order::relaxed)) { | 493 | if (!is_built.load(std::memory_order::relaxed)) { |
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 47c74e4d8..8b65aeaeb 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp | |||
| @@ -10,11 +10,16 @@ | |||
| 10 | 10 | ||
| 11 | namespace Vulkan { | 11 | namespace Vulkan { |
| 12 | 12 | ||
| 13 | constexpr u64 FENCE_RESERVE_SIZE = 8; | ||
| 14 | |||
| 13 | MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) { | 15 | MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) { |
| 14 | if (!device.HasTimelineSemaphore()) { | 16 | if (!device.HasTimelineSemaphore()) { |
| 15 | static constexpr VkFenceCreateInfo fence_ci{ | 17 | static constexpr VkFenceCreateInfo fence_ci{ |
| 16 | .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0}; | 18 | .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0}; |
| 17 | fence = device.GetLogical().CreateFence(fence_ci); | 19 | free_queue.resize(FENCE_RESERVE_SIZE); |
| 20 | std::ranges::generate(free_queue, | ||
| 21 | [&] { return device.GetLogical().CreateFence(fence_ci); }); | ||
| 22 | wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); }); | ||
| 18 | return; | 23 | return; |
| 19 | } | 24 | } |
| 20 | 25 | ||
| @@ -167,16 +172,53 @@ VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphor | |||
| 167 | .pSignalSemaphores = &signal_semaphore, | 172 | .pSignalSemaphores = &signal_semaphore, |
| 168 | }; | 173 | }; |
| 169 | 174 | ||
| 175 | auto fence = GetFreeFence(); | ||
| 170 | auto result = device.GetGraphicsQueue().Submit(submit_info, *fence); | 176 | auto result = device.GetGraphicsQueue().Submit(submit_info, *fence); |
| 171 | 177 | ||
| 172 | if (result == VK_SUCCESS) { | 178 | if (result == VK_SUCCESS) { |
| 179 | std::scoped_lock lock{wait_mutex}; | ||
| 180 | wait_queue.emplace(host_tick, std::move(fence)); | ||
| 181 | wait_cv.notify_one(); | ||
| 182 | } | ||
| 183 | |||
| 184 | return result; | ||
| 185 | } | ||
| 186 | |||
| 187 | void MasterSemaphore::WaitThread(std::stop_token token) { | ||
| 188 | while (!token.stop_requested()) { | ||
| 189 | u64 host_tick; | ||
| 190 | vk::Fence fence; | ||
| 191 | { | ||
| 192 | std::unique_lock lock{wait_mutex}; | ||
| 193 | Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); }); | ||
| 194 | if (token.stop_requested()) { | ||
| 195 | return; | ||
| 196 | } | ||
| 197 | std::tie(host_tick, fence) = std::move(wait_queue.front()); | ||
| 198 | wait_queue.pop(); | ||
| 199 | } | ||
| 200 | |||
| 173 | fence.Wait(); | 201 | fence.Wait(); |
| 174 | fence.Reset(); | 202 | fence.Reset(); |
| 175 | gpu_tick.store(host_tick); | 203 | gpu_tick.store(host_tick); |
| 176 | gpu_tick.notify_all(); | 204 | gpu_tick.notify_all(); |
| 205 | |||
| 206 | std::scoped_lock lock{free_mutex}; | ||
| 207 | free_queue.push_front(std::move(fence)); | ||
| 177 | } | 208 | } |
| 209 | } | ||
| 178 | 210 | ||
| 179 | return result; | 211 | vk::Fence MasterSemaphore::GetFreeFence() { |
| 212 | std::scoped_lock lock{free_mutex}; | ||
| 213 | if (free_queue.empty()) { | ||
| 214 | static constexpr VkFenceCreateInfo fence_ci{ | ||
| 215 | .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0}; | ||
| 216 | return device.GetLogical().CreateFence(fence_ci); | ||
| 217 | } | ||
| 218 | |||
| 219 | auto fence = std::move(free_queue.back()); | ||
| 220 | free_queue.pop_back(); | ||
| 221 | return fence; | ||
| 180 | } | 222 | } |
| 181 | 223 | ||
| 182 | } // namespace Vulkan | 224 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index f2f61f781..1e7c90215 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h | |||
| @@ -5,8 +5,10 @@ | |||
| 5 | 5 | ||
| 6 | #include <atomic> | 6 | #include <atomic> |
| 7 | #include <condition_variable> | 7 | #include <condition_variable> |
| 8 | #include <deque> | ||
| 8 | #include <mutex> | 9 | #include <mutex> |
| 9 | #include <thread> | 10 | #include <thread> |
| 11 | #include <queue> | ||
| 10 | 12 | ||
| 11 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 12 | #include "common/polyfill_thread.h" | 14 | #include "common/polyfill_thread.h" |
| @@ -17,6 +19,8 @@ namespace Vulkan { | |||
| 17 | class Device; | 19 | class Device; |
| 18 | 20 | ||
| 19 | class MasterSemaphore { | 21 | class MasterSemaphore { |
| 22 | using Waitable = std::pair<u64, vk::Fence>; | ||
| 23 | |||
| 20 | public: | 24 | public: |
| 21 | explicit MasterSemaphore(const Device& device); | 25 | explicit MasterSemaphore(const Device& device); |
| 22 | ~MasterSemaphore(); | 26 | ~MasterSemaphore(); |
| @@ -57,13 +61,22 @@ private: | |||
| 57 | VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, | 61 | VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, |
| 58 | VkSemaphore wait_semaphore, u64 host_tick); | 62 | VkSemaphore wait_semaphore, u64 host_tick); |
| 59 | 63 | ||
| 64 | void WaitThread(std::stop_token token); | ||
| 65 | |||
| 66 | vk::Fence GetFreeFence(); | ||
| 67 | |||
| 60 | private: | 68 | private: |
| 61 | const Device& device; ///< Device. | 69 | const Device& device; ///< Device. |
| 62 | vk::Fence fence; ///< Fence. | ||
| 63 | vk::Semaphore semaphore; ///< Timeline semaphore. | 70 | vk::Semaphore semaphore; ///< Timeline semaphore. |
| 64 | std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick. | 71 | std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick. |
| 65 | std::atomic<u64> current_tick{1}; ///< Current logical tick. | 72 | std::atomic<u64> current_tick{1}; ///< Current logical tick. |
| 73 | std::mutex wait_mutex; | ||
| 74 | std::mutex free_mutex; | ||
| 75 | std::condition_variable_any wait_cv; | ||
| 76 | std::queue<Waitable> wait_queue; ///< Queue for the fences to be waited on by the wait thread. | ||
| 77 | std::deque<vk::Fence> free_queue; ///< Holds available fences for submission. | ||
| 66 | std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs. | 78 | std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs. |
| 79 | std::jthread wait_thread; ///< Helper thread that waits for submitted fences. | ||
| 67 | }; | 80 | }; |
| 68 | 81 | ||
| 69 | } // namespace Vulkan | 82 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 77d72697e..8711e2a87 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -861,6 +861,10 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { | |||
| 861 | return *buffers[level]; | 861 | return *buffers[level]; |
| 862 | } | 862 | } |
| 863 | 863 | ||
| 864 | void TextureCacheRuntime::BarrierFeedbackLoop() { | ||
| 865 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 866 | } | ||
| 867 | |||
| 864 | void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, | 868 | void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, |
| 865 | std::span<const VideoCommon::ImageCopy> copies) { | 869 | std::span<const VideoCommon::ImageCopy> copies) { |
| 866 | std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); | 870 | std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 4166b3d20..0f7a5ffd4 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -103,6 +103,8 @@ public: | |||
| 103 | 103 | ||
| 104 | [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); | 104 | [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); |
| 105 | 105 | ||
| 106 | void BarrierFeedbackLoop(); | ||
| 107 | |||
| 106 | const Device& device; | 108 | const Device& device; |
| 107 | Scheduler& scheduler; | 109 | Scheduler& scheduler; |
| 108 | MemoryAllocator& memory_allocator; | 110 | MemoryAllocator& memory_allocator; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index b24086fce..8e62a5f78 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -184,6 +184,42 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { | |||
| 184 | } | 184 | } |
| 185 | 185 | ||
| 186 | template <class P> | 186 | template <class P> |
| 187 | void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) { | ||
| 188 | const bool requires_barrier = [&] { | ||
| 189 | for (const auto& view : views) { | ||
| 190 | if (!view.id) { | ||
| 191 | continue; | ||
| 192 | } | ||
| 193 | auto& image_view = slot_image_views[view.id]; | ||
| 194 | |||
| 195 | // Check color targets | ||
| 196 | for (const auto& ct_view_id : render_targets.color_buffer_ids) { | ||
| 197 | if (ct_view_id) { | ||
| 198 | auto& ct_view = slot_image_views[ct_view_id]; | ||
| 199 | if (image_view.image_id == ct_view.image_id) { | ||
| 200 | return true; | ||
| 201 | } | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | // Check zeta target | ||
| 206 | if (render_targets.depth_buffer_id) { | ||
| 207 | auto& zt_view = slot_image_views[render_targets.depth_buffer_id]; | ||
| 208 | if (image_view.image_id == zt_view.image_id) { | ||
| 209 | return true; | ||
| 210 | } | ||
| 211 | } | ||
| 212 | } | ||
| 213 | |||
| 214 | return false; | ||
| 215 | }(); | ||
| 216 | |||
| 217 | if (requires_barrier) { | ||
| 218 | runtime.BarrierFeedbackLoop(); | ||
| 219 | } | ||
| 220 | } | ||
| 221 | |||
| 222 | template <class P> | ||
| 187 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { | 223 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { |
| 188 | if (index > channel_state->graphics_sampler_table.Limit()) { | 224 | if (index > channel_state->graphics_sampler_table.Limit()) { |
| 189 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); | 225 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 0720494e5..1a3308e2d 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -148,6 +148,9 @@ public: | |||
| 148 | /// Fill image_view_ids with the compute images in indices | 148 | /// Fill image_view_ids with the compute images in indices |
| 149 | void FillComputeImageViews(std::span<ImageViewInOut> views); | 149 | void FillComputeImageViews(std::span<ImageViewInOut> views); |
| 150 | 150 | ||
| 151 | /// Handle feedback loops during draws. | ||
| 152 | void CheckFeedbackLoop(std::span<const ImageViewInOut> views); | ||
| 153 | |||
| 151 | /// Get the sampler from the graphics descriptor table in the specified index | 154 | /// Get the sampler from the graphics descriptor table in the specified index |
| 152 | Sampler* GetGraphicsSampler(u32 index); | 155 | Sampler* GetGraphicsSampler(u32 index); |
| 153 | 156 | ||
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index b49f78bc9..3a7c2dedf 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -1040,7 +1040,7 @@ void Device::CollectPhysicalMemoryInfo() { | |||
| 1040 | } | 1040 | } |
| 1041 | const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage); | 1041 | const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage); |
| 1042 | device_access_memory = static_cast<u64>(std::max<s64>( | 1042 | device_access_memory = static_cast<u64>(std::max<s64>( |
| 1043 | std::min<s64>(available_memory - 8_GiB, 4_GiB), static_cast<s64>(local_memory))); | 1043 | std::min<s64>(available_memory - 8_GiB, 4_GiB), std::min<s64>(local_memory, 4_GiB))); |
| 1044 | } | 1044 | } |
| 1045 | 1045 | ||
| 1046 | void Device::CollectToolingInfo() { | 1046 | void Device::CollectToolingInfo() { |