diff options
| author | 2022-02-06 01:16:11 +0100 | |
|---|---|---|
| committer | 2022-10-06 21:00:52 +0200 | |
| commit | bc8b3d225eda388f0603830cbff8357893abb0f9 (patch) | |
| tree | 479b41b73913feceeeb0c9c6f3147d6491c0fa04 | |
| parent | MemoryManager: initial multi paging system implementation. (diff) | |
| download | yuzu-bc8b3d225eda388f0603830cbff8357893abb0f9.tar.gz yuzu-bc8b3d225eda388f0603830cbff8357893abb0f9.tar.xz yuzu-bc8b3d225eda388f0603830cbff8357893abb0f9.zip | |
VideoCore: Refactor fencing system.
20 files changed, 154 insertions, 167 deletions
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index e6a976714..18c5324a9 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | |||
| @@ -40,7 +40,8 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {} | |||
| 40 | 40 | ||
| 41 | void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, | 41 | void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, |
| 42 | u32 height, u32 stride, android::BufferTransformFlags transform, | 42 | u32 height, u32 stride, android::BufferTransformFlags transform, |
| 43 | const Common::Rectangle<int>& crop_rect) { | 43 | const Common::Rectangle<int>& crop_rect, |
| 44 | std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { | ||
| 44 | const VAddr addr = nvmap.GetHandleAddress(buffer_handle); | 45 | const VAddr addr = nvmap.GetHandleAddress(buffer_handle); |
| 45 | LOG_TRACE(Service, | 46 | LOG_TRACE(Service, |
| 46 | "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", | 47 | "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", |
| @@ -50,7 +51,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form | |||
| 50 | stride, format, transform, crop_rect}; | 51 | stride, format, transform, crop_rect}; |
| 51 | 52 | ||
| 52 | system.GetPerfStats().EndSystemFrame(); | 53 | system.GetPerfStats().EndSystemFrame(); |
| 53 | system.GPU().RequestSwapBuffers(&framebuffer, nullptr, 0); | 54 | system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences); |
| 54 | system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); | 55 | system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); |
| 55 | system.GetPerfStats().BeginSystemFrame(); | 56 | system.GetPerfStats().BeginSystemFrame(); |
| 56 | } | 57 | } |
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index 1ca9b2e74..04217ab12 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h | |||
| @@ -38,7 +38,8 @@ public: | |||
| 38 | /// Performs a screen flip, drawing the buffer pointed to by the handle. | 38 | /// Performs a screen flip, drawing the buffer pointed to by the handle. |
| 39 | void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height, | 39 | void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height, |
| 40 | u32 stride, android::BufferTransformFlags transform, | 40 | u32 stride, android::BufferTransformFlags transform, |
| 41 | const Common::Rectangle<int>& crop_rect); | 41 | const Common::Rectangle<int>& crop_rect, |
| 42 | std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences); | ||
| 42 | 43 | ||
| 43 | Kernel::KEvent* QueryEvent(u32 event_id) override; | 44 | Kernel::KEvent* QueryEvent(u32 event_id) override; |
| 44 | 45 | ||
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index aa112021d..4658f1e8b 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp | |||
| @@ -269,17 +269,6 @@ void NVFlinger::Compose() { | |||
| 269 | return; // We are likely shutting down | 269 | return; // We are likely shutting down |
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | auto& syncpoint_manager = system.Host1x().GetSyncpointManager(); | ||
| 273 | const auto& multi_fence = buffer.fence; | ||
| 274 | guard->unlock(); | ||
| 275 | for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { | ||
| 276 | const auto& fence = multi_fence.fences[fence_id]; | ||
| 277 | syncpoint_manager.WaitGuest(fence.id, fence.value); | ||
| 278 | } | ||
| 279 | guard->lock(); | ||
| 280 | |||
| 281 | MicroProfileFlip(); | ||
| 282 | |||
| 283 | // Now send the buffer to the GPU for drawing. | 272 | // Now send the buffer to the GPU for drawing. |
| 284 | // TODO(Subv): Support more than just disp0. The display device selection is probably based | 273 | // TODO(Subv): Support more than just disp0. The display device selection is probably based |
| 285 | // on which display we're drawing (Default, Internal, External, etc) | 274 | // on which display we're drawing (Default, Internal, External, etc) |
| @@ -293,8 +282,10 @@ void NVFlinger::Compose() { | |||
| 293 | 282 | ||
| 294 | nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(), | 283 | nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(), |
| 295 | igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(), | 284 | igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(), |
| 296 | static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect); | 285 | static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect, |
| 286 | buffer.fence.fences, buffer.fence.num_fences); | ||
| 297 | 287 | ||
| 288 | MicroProfileFlip(); | ||
| 298 | guard->lock(); | 289 | guard->lock(); |
| 299 | 290 | ||
| 300 | swap_interval = buffer.swap_interval; | 291 | swap_interval = buffer.swap_interval; |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6b6764d72..e55cac0d6 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -826,6 +826,19 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
| 826 | const bool is_accuracy_normal = | 826 | const bool is_accuracy_normal = |
| 827 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; | 827 | Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; |
| 828 | 828 | ||
| 829 | auto it = committed_ranges.begin(); | ||
| 830 | while (it != committed_ranges.end()) { | ||
| 831 | auto& current_intervals = *it; | ||
| 832 | auto next_it = std::next(it); | ||
| 833 | while (next_it != committed_ranges.end()) { | ||
| 834 | for (auto& interval : *next_it) { | ||
| 835 | current_intervals.subtract(interval); | ||
| 836 | } | ||
| 837 | next_it++; | ||
| 838 | } | ||
| 839 | it++; | ||
| 840 | } | ||
| 841 | |||
| 829 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; | 842 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; |
| 830 | u64 total_size_bytes = 0; | 843 | u64 total_size_bytes = 0; |
| 831 | u64 largest_copy = 0; | 844 | u64 largest_copy = 0; |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index b01f04d0c..9835e3ac1 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -24,8 +24,6 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128, | |||
| 24 | void DmaPusher::DispatchCalls() { | 24 | void DmaPusher::DispatchCalls() { |
| 25 | MICROPROFILE_SCOPE(DispatchCalls); | 25 | MICROPROFILE_SCOPE(DispatchCalls); |
| 26 | 26 | ||
| 27 | gpu.SyncGuestHost(); | ||
| 28 | |||
| 29 | dma_pushbuffer_subindex = 0; | 27 | dma_pushbuffer_subindex = 0; |
| 30 | 28 | ||
| 31 | dma_state.is_last_call = true; | 29 | dma_state.is_last_call = true; |
| @@ -36,7 +34,6 @@ void DmaPusher::DispatchCalls() { | |||
| 36 | } | 34 | } |
| 37 | } | 35 | } |
| 38 | gpu.FlushCommands(); | 36 | gpu.FlushCommands(); |
| 39 | gpu.SyncGuestHost(); | ||
| 40 | gpu.OnCommandListEnd(); | 37 | gpu.OnCommandListEnd(); |
| 41 | } | 38 | } |
| 42 | 39 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 3a4646289..950c70dcd 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -242,6 +242,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume | |||
| 242 | return; | 242 | return; |
| 243 | case MAXWELL3D_REG_INDEX(fragment_barrier): | 243 | case MAXWELL3D_REG_INDEX(fragment_barrier): |
| 244 | return rasterizer->FragmentBarrier(); | 244 | return rasterizer->FragmentBarrier(); |
| 245 | case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache): | ||
| 246 | rasterizer->InvalidateGPUCache(); | ||
| 247 | return rasterizer->WaitForIdle(); | ||
| 245 | case MAXWELL3D_REG_INDEX(tiled_cache_barrier): | 248 | case MAXWELL3D_REG_INDEX(tiled_cache_barrier): |
| 246 | return rasterizer->TiledCacheBarrier(); | 249 | return rasterizer->TiledCacheBarrier(); |
| 247 | } | 250 | } |
| @@ -472,10 +475,25 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 472 | 475 | ||
| 473 | switch (regs.query.query_get.operation) { | 476 | switch (regs.query.query_get.operation) { |
| 474 | case Regs::QueryOperation::Release: | 477 | case Regs::QueryOperation::Release: |
| 475 | if (regs.query.query_get.fence == 1) { | 478 | if (regs.query.query_get.fence == 1 || regs.query.query_get.short_query != 0) { |
| 476 | rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); | 479 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; |
| 480 | const u32 payload = regs.query.query_sequence; | ||
| 481 | std::function<void()> operation([this, sequence_address, payload] { | ||
| 482 | memory_manager.Write<u32>(sequence_address, payload); | ||
| 483 | }); | ||
| 484 | rasterizer->SignalFence(std::move(operation)); | ||
| 477 | } else { | 485 | } else { |
| 478 | StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); | 486 | struct LongQueryResult { |
| 487 | u64_le value; | ||
| 488 | u64_le timestamp; | ||
| 489 | }; | ||
| 490 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; | ||
| 491 | const u32 payload = regs.query.query_sequence; | ||
| 492 | std::function<void()> operation([this, sequence_address, payload] { | ||
| 493 | LongQueryResult query_result{payload, system.GPU().GetTicks()}; | ||
| 494 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | ||
| 495 | }); | ||
| 496 | rasterizer->SignalFence(std::move(operation)); | ||
| 479 | } | 497 | } |
| 480 | break; | 498 | break; |
| 481 | case Regs::QueryOperation::Acquire: | 499 | case Regs::QueryOperation::Acquire: |
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 8c17639e4..dd9494efa 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp | |||
| @@ -79,12 +79,15 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 79 | u64 timestamp; | 79 | u64 timestamp; |
| 80 | }; | 80 | }; |
| 81 | 81 | ||
| 82 | Block block{}; | 82 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; |
| 83 | block.sequence = regs.semaphore_sequence; | 83 | const u32 payload = regs.semaphore_sequence; |
| 84 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | 84 | std::function<void()> operation([this, sequence_address, payload] { |
| 85 | // CoreTiming | 85 | Block block{}; |
| 86 | block.timestamp = gpu.GetTicks(); | 86 | block.sequence = payload; |
| 87 | memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); | 87 | block.timestamp = gpu.GetTicks(); |
| 88 | memory_manager.WriteBlock(sequence_address, &block, sizeof(block)); | ||
| 89 | }); | ||
| 90 | rasterizer->SignalFence(std::move(operation)); | ||
| 88 | } else { | 91 | } else { |
| 89 | do { | 92 | do { |
| 90 | const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | 93 | const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; |
| @@ -94,6 +97,7 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 94 | regs.acquire_active = true; | 97 | regs.acquire_active = true; |
| 95 | regs.acquire_mode = false; | 98 | regs.acquire_mode = false; |
| 96 | if (word != regs.acquire_value) { | 99 | if (word != regs.acquire_value) { |
| 100 | rasterizer->ReleaseFences(); | ||
| 97 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); | 101 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); |
| 98 | continue; | 102 | continue; |
| 99 | } | 103 | } |
| @@ -101,11 +105,13 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 101 | regs.acquire_active = true; | 105 | regs.acquire_active = true; |
| 102 | regs.acquire_mode = true; | 106 | regs.acquire_mode = true; |
| 103 | if (word < regs.acquire_value) { | 107 | if (word < regs.acquire_value) { |
| 108 | rasterizer->ReleaseFences(); | ||
| 104 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); | 109 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); |
| 105 | continue; | 110 | continue; |
| 106 | } | 111 | } |
| 107 | } else if (op == GpuSemaphoreOperation::AcquireMask) { | 112 | } else if (op == GpuSemaphoreOperation::AcquireMask) { |
| 108 | if (word & regs.semaphore_sequence == 0) { | 113 | if (word && regs.semaphore_sequence == 0) { |
| 114 | rasterizer->ReleaseFences(); | ||
| 109 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); | 115 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); |
| 110 | continue; | 116 | continue; |
| 111 | } | 117 | } |
| @@ -117,16 +123,23 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 117 | } | 123 | } |
| 118 | 124 | ||
| 119 | void Puller::ProcessSemaphoreRelease() { | 125 | void Puller::ProcessSemaphoreRelease() { |
| 120 | rasterizer->SignalSemaphore(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release); | 126 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; |
| 127 | const u32 payload = regs.semaphore_release; | ||
| 128 | std::function<void()> operation([this, sequence_address, payload] { | ||
| 129 | memory_manager.Write<u32>(sequence_address, payload); | ||
| 130 | }); | ||
| 131 | rasterizer->SignalFence(std::move(operation)); | ||
| 121 | } | 132 | } |
| 122 | 133 | ||
| 123 | void Puller::ProcessSemaphoreAcquire() { | 134 | void Puller::ProcessSemaphoreAcquire() { |
| 124 | const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); | 135 | u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); |
| 125 | const auto value = regs.semaphore_acquire; | 136 | const auto value = regs.semaphore_acquire; |
| 126 | std::this_thread::sleep_for(std::chrono::milliseconds(5)); | 137 | while (word != value) { |
| 127 | if (word != value) { | ||
| 128 | regs.acquire_active = true; | 138 | regs.acquire_active = true; |
| 129 | regs.acquire_value = value; | 139 | regs.acquire_value = value; |
| 140 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||
| 141 | rasterizer->ReleaseFences(); | ||
| 142 | word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); | ||
| 130 | // TODO(kemathe73) figure out how to do the acquire_timeout | 143 | // TODO(kemathe73) figure out how to do the acquire_timeout |
| 131 | regs.acquire_mode = false; | 144 | regs.acquire_mode = false; |
| 132 | regs.acquire_source = false; | 145 | regs.acquire_source = false; |
| @@ -147,9 +160,9 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { | |||
| 147 | case BufferMethods::SemaphoreAddressHigh: | 160 | case BufferMethods::SemaphoreAddressHigh: |
| 148 | case BufferMethods::SemaphoreAddressLow: | 161 | case BufferMethods::SemaphoreAddressLow: |
| 149 | case BufferMethods::SemaphoreSequencePayload: | 162 | case BufferMethods::SemaphoreSequencePayload: |
| 150 | case BufferMethods::WrcacheFlush: | ||
| 151 | case BufferMethods::SyncpointPayload: | 163 | case BufferMethods::SyncpointPayload: |
| 152 | break; | 164 | break; |
| 165 | case BufferMethods::WrcacheFlush: | ||
| 153 | case BufferMethods::RefCnt: | 166 | case BufferMethods::RefCnt: |
| 154 | rasterizer->SignalReference(); | 167 | rasterizer->SignalReference(); |
| 155 | break; | 168 | break; |
| @@ -173,7 +186,7 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { | |||
| 173 | } | 186 | } |
| 174 | case BufferMethods::MemOpB: { | 187 | case BufferMethods::MemOpB: { |
| 175 | // Implement this better. | 188 | // Implement this better. |
| 176 | rasterizer->SyncGuestHost(); | 189 | rasterizer->InvalidateGPUCache(); |
| 177 | break; | 190 | break; |
| 178 | } | 191 | } |
| 179 | case BufferMethods::MemOpC: | 192 | case BufferMethods::MemOpC: |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 03a70e5e0..c390ac91b 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | 5 | ||
| 6 | #include <algorithm> | 6 | #include <algorithm> |
| 7 | #include <cstring> | 7 | #include <cstring> |
| 8 | #include <deque> | ||
| 9 | #include <functional> | ||
| 8 | #include <memory> | 10 | #include <memory> |
| 9 | #include <queue> | 11 | #include <queue> |
| 10 | 12 | ||
| @@ -19,28 +21,7 @@ namespace VideoCommon { | |||
| 19 | 21 | ||
| 20 | class FenceBase { | 22 | class FenceBase { |
| 21 | public: | 23 | public: |
| 22 | explicit FenceBase(u32 payload_, bool is_stubbed_) | 24 | explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {} |
| 23 | : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {} | ||
| 24 | |||
| 25 | explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_) | ||
| 26 | : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {} | ||
| 27 | |||
| 28 | u8* GetAddress() const { | ||
| 29 | return address; | ||
| 30 | } | ||
| 31 | |||
| 32 | u32 GetPayload() const { | ||
| 33 | return payload; | ||
| 34 | } | ||
| 35 | |||
| 36 | bool IsSemaphore() const { | ||
| 37 | return is_semaphore; | ||
| 38 | } | ||
| 39 | |||
| 40 | private: | ||
| 41 | u8* address; | ||
| 42 | u32 payload; | ||
| 43 | bool is_semaphore; | ||
| 44 | 25 | ||
| 45 | protected: | 26 | protected: |
| 46 | bool is_stubbed; | 27 | bool is_stubbed; |
| @@ -60,31 +41,28 @@ public: | |||
| 60 | buffer_cache.AccumulateFlushes(); | 41 | buffer_cache.AccumulateFlushes(); |
| 61 | } | 42 | } |
| 62 | 43 | ||
| 63 | void SignalSemaphore(u8* addr, u32 value) { | 44 | void SyncOperation(std::function<void()>&& func) { |
| 45 | uncommitted_operations.emplace_back(std::move(func)); | ||
| 46 | } | ||
| 47 | |||
| 48 | void SignalFence(std::function<void()>&& func) { | ||
| 64 | TryReleasePendingFences(); | 49 | TryReleasePendingFences(); |
| 65 | const bool should_flush = ShouldFlush(); | 50 | const bool should_flush = ShouldFlush(); |
| 66 | CommitAsyncFlushes(); | 51 | CommitAsyncFlushes(); |
| 67 | TFence new_fence = CreateFence(addr, value, !should_flush); | 52 | uncommitted_operations.emplace_back(std::move(func)); |
| 53 | CommitOperations(); | ||
| 54 | TFence new_fence = CreateFence(!should_flush); | ||
| 68 | fences.push(new_fence); | 55 | fences.push(new_fence); |
| 69 | QueueFence(new_fence); | 56 | QueueFence(new_fence); |
| 70 | if (should_flush) { | 57 | if (should_flush) { |
| 71 | rasterizer.FlushCommands(); | 58 | rasterizer.FlushCommands(); |
| 72 | } | 59 | } |
| 73 | rasterizer.SyncGuestHost(); | ||
| 74 | } | 60 | } |
| 75 | 61 | ||
| 76 | void SignalSyncPoint(u32 value) { | 62 | void SignalSyncPoint(u32 value) { |
| 77 | syncpoint_manager.IncrementGuest(value); | 63 | syncpoint_manager.IncrementGuest(value); |
| 78 | TryReleasePendingFences(); | 64 | std::function<void()> func([this, value] { syncpoint_manager.IncrementHost(value); }); |
| 79 | const bool should_flush = ShouldFlush(); | 65 | SignalFence(std::move(func)); |
| 80 | CommitAsyncFlushes(); | ||
| 81 | TFence new_fence = CreateFence(value, !should_flush); | ||
| 82 | fences.push(new_fence); | ||
| 83 | QueueFence(new_fence); | ||
| 84 | if (should_flush) { | ||
| 85 | rasterizer.FlushCommands(); | ||
| 86 | } | ||
| 87 | rasterizer.SyncGuestHost(); | ||
| 88 | } | 66 | } |
| 89 | 67 | ||
| 90 | void WaitPendingFences() { | 68 | void WaitPendingFences() { |
| @@ -94,12 +72,10 @@ public: | |||
| 94 | WaitFence(current_fence); | 72 | WaitFence(current_fence); |
| 95 | } | 73 | } |
| 96 | PopAsyncFlushes(); | 74 | PopAsyncFlushes(); |
| 97 | if (current_fence->IsSemaphore()) { | 75 | auto operations = std::move(pending_operations.front()); |
| 98 | char* address = reinterpret_cast<char*>(current_fence->GetAddress()); | 76 | pending_operations.pop_front(); |
| 99 | auto payload = current_fence->GetPayload(); | 77 | for (auto& operation : operations) { |
| 100 | std::memcpy(address, &payload, sizeof(payload)); | 78 | operation(); |
| 101 | } else { | ||
| 102 | syncpoint_manager.IncrementHost(current_fence->GetPayload()); | ||
| 103 | } | 79 | } |
| 104 | PopFence(); | 80 | PopFence(); |
| 105 | } | 81 | } |
| @@ -114,11 +90,9 @@ protected: | |||
| 114 | 90 | ||
| 115 | virtual ~FenceManager() = default; | 91 | virtual ~FenceManager() = default; |
| 116 | 92 | ||
| 117 | /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is | 93 | /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is |
| 118 | /// true | 94 | /// true |
| 119 | virtual TFence CreateFence(u32 value, bool is_stubbed) = 0; | 95 | virtual TFence CreateFence(bool is_stubbed) = 0; |
| 120 | /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true | ||
| 121 | virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0; | ||
| 122 | /// Queues a fence into the backend if the fence isn't stubbed. | 96 | /// Queues a fence into the backend if the fence isn't stubbed. |
| 123 | virtual void QueueFence(TFence& fence) = 0; | 97 | virtual void QueueFence(TFence& fence) = 0; |
| 124 | /// Notifies that the backend fence has been signaled/reached in host GPU. | 98 | /// Notifies that the backend fence has been signaled/reached in host GPU. |
| @@ -141,12 +115,10 @@ private: | |||
| 141 | return; | 115 | return; |
| 142 | } | 116 | } |
| 143 | PopAsyncFlushes(); | 117 | PopAsyncFlushes(); |
| 144 | if (current_fence->IsSemaphore()) { | 118 | auto operations = std::move(pending_operations.front()); |
| 145 | char* address = reinterpret_cast<char*>(current_fence->GetAddress()); | 119 | pending_operations.pop_front(); |
| 146 | const auto payload = current_fence->GetPayload(); | 120 | for (auto& operation : operations) { |
| 147 | std::memcpy(address, &payload, sizeof(payload)); | 121 | operation(); |
| 148 | } else { | ||
| 149 | syncpoint_manager.IncrementHost(current_fence->GetPayload()); | ||
| 150 | } | 122 | } |
| 151 | PopFence(); | 123 | PopFence(); |
| 152 | } | 124 | } |
| @@ -165,16 +137,20 @@ private: | |||
| 165 | } | 137 | } |
| 166 | 138 | ||
| 167 | void PopAsyncFlushes() { | 139 | void PopAsyncFlushes() { |
| 168 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | 140 | { |
| 169 | texture_cache.PopAsyncFlushes(); | 141 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 170 | buffer_cache.PopAsyncFlushes(); | 142 | texture_cache.PopAsyncFlushes(); |
| 143 | buffer_cache.PopAsyncFlushes(); | ||
| 144 | } | ||
| 171 | query_cache.PopAsyncFlushes(); | 145 | query_cache.PopAsyncFlushes(); |
| 172 | } | 146 | } |
| 173 | 147 | ||
| 174 | void CommitAsyncFlushes() { | 148 | void CommitAsyncFlushes() { |
| 175 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | 149 | { |
| 176 | texture_cache.CommitAsyncFlushes(); | 150 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 177 | buffer_cache.CommitAsyncFlushes(); | 151 | texture_cache.CommitAsyncFlushes(); |
| 152 | buffer_cache.CommitAsyncFlushes(); | ||
| 153 | } | ||
| 178 | query_cache.CommitAsyncFlushes(); | 154 | query_cache.CommitAsyncFlushes(); |
| 179 | } | 155 | } |
| 180 | 156 | ||
| @@ -183,7 +159,13 @@ private: | |||
| 183 | fences.pop(); | 159 | fences.pop(); |
| 184 | } | 160 | } |
| 185 | 161 | ||
| 162 | void CommitOperations() { | ||
| 163 | pending_operations.emplace_back(std::move(uncommitted_operations)); | ||
| 164 | } | ||
| 165 | |||
| 186 | std::queue<TFence> fences; | 166 | std::queue<TFence> fences; |
| 167 | std::deque<std::function<void()>> uncommitted_operations; | ||
| 168 | std::deque<std::deque<std::function<void()>>> pending_operations; | ||
| 187 | 169 | ||
| 188 | DelayedDestructionRing<TFence, 6> delayed_destruction_ring; | 170 | DelayedDestructionRing<TFence, 6> delayed_destruction_ring; |
| 189 | }; | 171 | }; |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index a1d19b1c8..d7a3dd96b 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -93,16 +93,13 @@ struct GPU::Impl { | |||
| 93 | } | 93 | } |
| 94 | 94 | ||
| 95 | /// Synchronizes CPU writes with Host GPU memory. | 95 | /// Synchronizes CPU writes with Host GPU memory. |
| 96 | void SyncGuestHost() { | 96 | void InvalidateGPUCache() { |
| 97 | rasterizer->SyncGuestHost(); | 97 | rasterizer->InvalidateGPUCache(); |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | /// Signal the ending of command list. | 100 | /// Signal the ending of command list. |
| 101 | void OnCommandListEnd() { | 101 | void OnCommandListEnd() { |
| 102 | if (is_async) { | 102 | gpu_thread.OnCommandListEnd(); |
| 103 | // This command only applies to asynchronous GPU mode | ||
| 104 | gpu_thread.OnCommandListEnd(); | ||
| 105 | } | ||
| 106 | } | 103 | } |
| 107 | 104 | ||
| 108 | /// Request a host GPU memory flush from the CPU. | 105 | /// Request a host GPU memory flush from the CPU. |
| @@ -296,7 +293,7 @@ struct GPU::Impl { | |||
| 296 | } | 293 | } |
| 297 | 294 | ||
| 298 | void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, | 295 | void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, |
| 299 | Service::Nvidia::NvFence* fences, size_t num_fences) { | 296 | std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) { |
| 300 | size_t current_request_counter{}; | 297 | size_t current_request_counter{}; |
| 301 | { | 298 | { |
| 302 | std::unique_lock<std::mutex> lk(request_swap_mutex); | 299 | std::unique_lock<std::mutex> lk(request_swap_mutex); |
| @@ -412,8 +409,8 @@ void GPU::FlushCommands() { | |||
| 412 | impl->FlushCommands(); | 409 | impl->FlushCommands(); |
| 413 | } | 410 | } |
| 414 | 411 | ||
| 415 | void GPU::SyncGuestHost() { | 412 | void GPU::InvalidateGPUCache() { |
| 416 | impl->SyncGuestHost(); | 413 | impl->InvalidateGPUCache(); |
| 417 | } | 414 | } |
| 418 | 415 | ||
| 419 | void GPU::OnCommandListEnd() { | 416 | void GPU::OnCommandListEnd() { |
| @@ -488,7 +485,7 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const { | |||
| 488 | } | 485 | } |
| 489 | 486 | ||
| 490 | void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, | 487 | void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, |
| 491 | Service::Nvidia::NvFence* fences, size_t num_fences) { | 488 | std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) { |
| 492 | impl->RequestSwapBuffers(framebuffer, fences, num_fences); | 489 | impl->RequestSwapBuffers(framebuffer, fences, num_fences); |
| 493 | } | 490 | } |
| 494 | 491 | ||
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 655373b33..0a4a8b14f 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -110,7 +110,7 @@ public: | |||
| 110 | /// Flush all current written commands into the host GPU for execution. | 110 | /// Flush all current written commands into the host GPU for execution. |
| 111 | void FlushCommands(); | 111 | void FlushCommands(); |
| 112 | /// Synchronizes CPU writes with Host GPU memory. | 112 | /// Synchronizes CPU writes with Host GPU memory. |
| 113 | void SyncGuestHost(); | 113 | void InvalidateGPUCache(); |
| 114 | /// Signal the ending of command list. | 114 | /// Signal the ending of command list. |
| 115 | void OnCommandListEnd(); | 115 | void OnCommandListEnd(); |
| 116 | 116 | ||
| @@ -180,7 +180,7 @@ public: | |||
| 180 | void RendererFrameEndNotify(); | 180 | void RendererFrameEndNotify(); |
| 181 | 181 | ||
| 182 | void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, | 182 | void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, |
| 183 | Service::Nvidia::NvFence* fences, size_t num_fences); | 183 | std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences); |
| 184 | 184 | ||
| 185 | /// Performs any additional setup necessary in order to begin GPU emulation. | 185 | /// Performs any additional setup necessary in order to begin GPU emulation. |
| 186 | /// This can be used to launch any necessary threads and register any necessary | 186 | /// This can be used to launch any necessary threads and register any necessary |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 2c03545bf..1bd477011 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -98,7 +98,7 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { | |||
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | void ThreadManager::TickGPU() { | 100 | void ThreadManager::TickGPU() { |
| 101 | PushCommand(GPUTickCommand(), true); | 101 | PushCommand(GPUTickCommand()); |
| 102 | } | 102 | } |
| 103 | 103 | ||
| 104 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | 104 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5362aafb6..cb07f3d38 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -62,7 +62,10 @@ public: | |||
| 62 | virtual void DisableGraphicsUniformBuffer(size_t stage, u32 index) = 0; | 62 | virtual void DisableGraphicsUniformBuffer(size_t stage, u32 index) = 0; |
| 63 | 63 | ||
| 64 | /// Signal a GPU based semaphore as a fence | 64 | /// Signal a GPU based semaphore as a fence |
| 65 | virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; | 65 | virtual void SignalFence(std::function<void()>&& func) = 0; |
| 66 | |||
| 67 | /// Send an operation to be done after a certain amount of flushes. | ||
| 68 | virtual void SyncOperation(std::function<void()>&& func) = 0; | ||
| 66 | 69 | ||
| 67 | /// Signal a GPU based syncpoint as a fence | 70 | /// Signal a GPU based syncpoint as a fence |
| 68 | virtual void SignalSyncPoint(u32 value) = 0; | 71 | virtual void SignalSyncPoint(u32 value) = 0; |
| @@ -89,7 +92,7 @@ public: | |||
| 89 | virtual void OnCPUWrite(VAddr addr, u64 size) = 0; | 92 | virtual void OnCPUWrite(VAddr addr, u64 size) = 0; |
| 90 | 93 | ||
| 91 | /// Sync memory between guest and host. | 94 | /// Sync memory between guest and host. |
| 92 | virtual void SyncGuestHost() = 0; | 95 | virtual void InvalidateGPUCache() = 0; |
| 93 | 96 | ||
| 94 | /// Unmap memory range | 97 | /// Unmap memory range |
| 95 | virtual void UnmapMemory(VAddr addr, u64 size) = 0; | 98 | virtual void UnmapMemory(VAddr addr, u64 size) = 0; |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index c76446b60..91463f854 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp | |||
| @@ -10,10 +10,7 @@ | |||
| 10 | 10 | ||
| 11 | namespace OpenGL { | 11 | namespace OpenGL { |
| 12 | 12 | ||
| 13 | GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {} | 13 | GLInnerFence::GLInnerFence(bool is_stubbed_) : FenceBase{is_stubbed_} {} |
| 14 | |||
| 15 | GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_) | ||
| 16 | : FenceBase{address_, payload_, is_stubbed_} {} | ||
| 17 | 14 | ||
| 18 | GLInnerFence::~GLInnerFence() = default; | 15 | GLInnerFence::~GLInnerFence() = default; |
| 19 | 16 | ||
| @@ -48,12 +45,8 @@ FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterize | |||
| 48 | BufferCache& buffer_cache_, QueryCache& query_cache_) | 45 | BufferCache& buffer_cache_, QueryCache& query_cache_) |
| 49 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} | 46 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} |
| 50 | 47 | ||
| 51 | Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { | 48 | Fence FenceManagerOpenGL::CreateFence(bool is_stubbed) { |
| 52 | return std::make_shared<GLInnerFence>(value, is_stubbed); | 49 | return std::make_shared<GLInnerFence>(is_stubbed); |
| 53 | } | ||
| 54 | |||
| 55 | Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) { | ||
| 56 | return std::make_shared<GLInnerFence>(addr, value, is_stubbed); | ||
| 57 | } | 50 | } |
| 58 | 51 | ||
| 59 | void FenceManagerOpenGL::QueueFence(Fence& fence) { | 52 | void FenceManagerOpenGL::QueueFence(Fence& fence) { |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index fced8d002..f1446e732 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h | |||
| @@ -16,8 +16,7 @@ namespace OpenGL { | |||
| 16 | 16 | ||
| 17 | class GLInnerFence : public VideoCommon::FenceBase { | 17 | class GLInnerFence : public VideoCommon::FenceBase { |
| 18 | public: | 18 | public: |
| 19 | explicit GLInnerFence(u32 payload_, bool is_stubbed_); | 19 | explicit GLInnerFence(bool is_stubbed_); |
| 20 | explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_); | ||
| 21 | ~GLInnerFence(); | 20 | ~GLInnerFence(); |
| 22 | 21 | ||
| 23 | void Queue(); | 22 | void Queue(); |
| @@ -40,8 +39,7 @@ public: | |||
| 40 | QueryCache& query_cache); | 39 | QueryCache& query_cache); |
| 41 | 40 | ||
| 42 | protected: | 41 | protected: |
| 43 | Fence CreateFence(u32 value, bool is_stubbed) override; | 42 | Fence CreateFence(bool is_stubbed) override; |
| 44 | Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override; | ||
| 45 | void QueueFence(Fence& fence) override; | 43 | void QueueFence(Fence& fence) override; |
| 46 | bool IsFenceSignaled(Fence& fence) const override; | 44 | bool IsFenceSignaled(Fence& fence) const override; |
| 47 | void WaitFence(Fence& fence) override; | 45 | void WaitFence(Fence& fence) override; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b572950a6..6ebd6cff9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -358,7 +358,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
| 358 | } | 358 | } |
| 359 | } | 359 | } |
| 360 | 360 | ||
| 361 | void RasterizerOpenGL::SyncGuestHost() { | 361 | void RasterizerOpenGL::InvalidateGPUCache() { |
| 362 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 362 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 363 | shader_cache.SyncGuestHost(); | 363 | shader_cache.SyncGuestHost(); |
| 364 | { | 364 | { |
| @@ -386,13 +386,12 @@ void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | |||
| 386 | } | 386 | } |
| 387 | } | 387 | } |
| 388 | 388 | ||
| 389 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { | 389 | void RasterizerOpenGL::SignalFence(std::function<void()>&& func) { |
| 390 | if (!gpu.IsAsync()) { | 390 | fence_manager.SignalFence(std::move(func)); |
| 391 | gpu_memory->Write<u32>(addr, value); | 391 | } |
| 392 | return; | 392 | |
| 393 | } | 393 | void RasterizerOpenGL::SyncOperation(std::function<void()>&& func) { |
| 394 | auto paddr = gpu_memory->GetPointer(addr); | 394 | fence_manager.SyncOperation(std::move(func)); |
| 395 | fence_manager.SignalSemaphore(paddr, value); | ||
| 396 | } | 395 | } |
| 397 | 396 | ||
| 398 | void RasterizerOpenGL::SignalSyncPoint(u32 value) { | 397 | void RasterizerOpenGL::SignalSyncPoint(u32 value) { |
| @@ -400,16 +399,10 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) { | |||
| 400 | } | 399 | } |
| 401 | 400 | ||
| 402 | void RasterizerOpenGL::SignalReference() { | 401 | void RasterizerOpenGL::SignalReference() { |
| 403 | if (!gpu.IsAsync()) { | ||
| 404 | return; | ||
| 405 | } | ||
| 406 | fence_manager.SignalOrdering(); | 402 | fence_manager.SignalOrdering(); |
| 407 | } | 403 | } |
| 408 | 404 | ||
| 409 | void RasterizerOpenGL::ReleaseFences() { | 405 | void RasterizerOpenGL::ReleaseFences() { |
| 410 | if (!gpu.IsAsync()) { | ||
| 411 | return; | ||
| 412 | } | ||
| 413 | fence_manager.WaitPendingFences(); | 406 | fence_manager.WaitPendingFences(); |
| 414 | } | 407 | } |
| 415 | 408 | ||
| @@ -426,6 +419,7 @@ void RasterizerOpenGL::WaitForIdle() { | |||
| 426 | } | 419 | } |
| 427 | 420 | ||
| 428 | void RasterizerOpenGL::FragmentBarrier() { | 421 | void RasterizerOpenGL::FragmentBarrier() { |
| 422 | glTextureBarrier(); | ||
| 429 | glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT); | 423 | glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT); |
| 430 | } | 424 | } |
| 431 | 425 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d469075a1..fe0ba979a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -80,10 +80,11 @@ public: | |||
| 80 | bool MustFlushRegion(VAddr addr, u64 size) override; | 80 | bool MustFlushRegion(VAddr addr, u64 size) override; |
| 81 | void InvalidateRegion(VAddr addr, u64 size) override; | 81 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 82 | void OnCPUWrite(VAddr addr, u64 size) override; | 82 | void OnCPUWrite(VAddr addr, u64 size) override; |
| 83 | void SyncGuestHost() override; | 83 | void InvalidateGPUCache() override; |
| 84 | void UnmapMemory(VAddr addr, u64 size) override; | 84 | void UnmapMemory(VAddr addr, u64 size) override; |
| 85 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | 85 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |
| 86 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | 86 | void SignalFence(std::function<void()>&& func) override; |
| 87 | void SyncOperation(std::function<void()>&& func) override; | ||
| 87 | void SignalSyncPoint(u32 value) override; | 88 | void SignalSyncPoint(u32 value) override; |
| 88 | void SignalReference() override; | 89 | void SignalReference() override; |
| 89 | void ReleaseFences() override; | 90 | void ReleaseFences() override; |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 301cbbabe..0214b103a 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
| @@ -11,11 +11,8 @@ | |||
| 11 | 11 | ||
| 12 | namespace Vulkan { | 12 | namespace Vulkan { |
| 13 | 13 | ||
| 14 | InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_) | 14 | InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_) |
| 15 | : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {} | 15 | : FenceBase{is_stubbed_}, scheduler{scheduler_} {} |
| 16 | |||
| 17 | InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_) | ||
| 18 | : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {} | ||
| 19 | 16 | ||
| 20 | InnerFence::~InnerFence() = default; | 17 | InnerFence::~InnerFence() = default; |
| 21 | 18 | ||
| @@ -48,12 +45,8 @@ FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::G | |||
| 48 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, | 45 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, |
| 49 | scheduler{scheduler_} {} | 46 | scheduler{scheduler_} {} |
| 50 | 47 | ||
| 51 | Fence FenceManager::CreateFence(u32 value, bool is_stubbed) { | 48 | Fence FenceManager::CreateFence(bool is_stubbed) { |
| 52 | return std::make_shared<InnerFence>(scheduler, value, is_stubbed); | 49 | return std::make_shared<InnerFence>(scheduler, is_stubbed); |
| 53 | } | ||
| 54 | |||
| 55 | Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) { | ||
| 56 | return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed); | ||
| 57 | } | 50 | } |
| 58 | 51 | ||
| 59 | void FenceManager::QueueFence(Fence& fence) { | 52 | void FenceManager::QueueFence(Fence& fence) { |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index ea9e88052..7fe2afcd9 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -25,8 +25,7 @@ class Scheduler; | |||
| 25 | 25 | ||
| 26 | class InnerFence : public VideoCommon::FenceBase { | 26 | class InnerFence : public VideoCommon::FenceBase { |
| 27 | public: | 27 | public: |
| 28 | explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_); | 28 | explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_); |
| 29 | explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_); | ||
| 30 | ~InnerFence(); | 29 | ~InnerFence(); |
| 31 | 30 | ||
| 32 | void Queue(); | 31 | void Queue(); |
| @@ -50,8 +49,7 @@ public: | |||
| 50 | QueryCache& query_cache, const Device& device, Scheduler& scheduler); | 49 | QueryCache& query_cache, const Device& device, Scheduler& scheduler); |
| 51 | 50 | ||
| 52 | protected: | 51 | protected: |
| 53 | Fence CreateFence(u32 value, bool is_stubbed) override; | 52 | Fence CreateFence(bool is_stubbed) override; |
| 54 | Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override; | ||
| 55 | void QueueFence(Fence& fence) override; | 53 | void QueueFence(Fence& fence) override; |
| 56 | bool IsFenceSignaled(Fence& fence) const override; | 54 | bool IsFenceSignaled(Fence& fence) const override; |
| 57 | void WaitFence(Fence& fence) override; | 55 | void WaitFence(Fence& fence) override; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d7b57e0f3..a35e41199 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -428,7 +428,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 428 | } | 428 | } |
| 429 | } | 429 | } |
| 430 | 430 | ||
| 431 | void RasterizerVulkan::SyncGuestHost() { | 431 | void RasterizerVulkan::InvalidateGPUCache() { |
| 432 | pipeline_cache.SyncGuestHost(); | 432 | pipeline_cache.SyncGuestHost(); |
| 433 | { | 433 | { |
| 434 | std::scoped_lock lock{buffer_cache.mutex}; | 434 | std::scoped_lock lock{buffer_cache.mutex}; |
| @@ -455,13 +455,12 @@ void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | |||
| 455 | } | 455 | } |
| 456 | } | 456 | } |
| 457 | 457 | ||
| 458 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { | 458 | void RasterizerVulkan::SignalFence(std::function<void()>&& func) { |
| 459 | if (!gpu.IsAsync()) { | 459 | fence_manager.SignalFence(std::move(func)); |
| 460 | gpu_memory->Write<u32>(addr, value); | 460 | } |
| 461 | return; | 461 | |
| 462 | } | 462 | void RasterizerVulkan::SyncOperation(std::function<void()>&& func) { |
| 463 | auto paddr = gpu_memory->GetPointer(addr); | 463 | fence_manager.SyncOperation(std::move(func)); |
| 464 | fence_manager.SignalSemaphore(paddr, value); | ||
| 465 | } | 464 | } |
| 466 | 465 | ||
| 467 | void RasterizerVulkan::SignalSyncPoint(u32 value) { | 466 | void RasterizerVulkan::SignalSyncPoint(u32 value) { |
| @@ -469,16 +468,10 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) { | |||
| 469 | } | 468 | } |
| 470 | 469 | ||
| 471 | void RasterizerVulkan::SignalReference() { | 470 | void RasterizerVulkan::SignalReference() { |
| 472 | if (!gpu.IsAsync()) { | ||
| 473 | return; | ||
| 474 | } | ||
| 475 | fence_manager.SignalOrdering(); | 471 | fence_manager.SignalOrdering(); |
| 476 | } | 472 | } |
| 477 | 473 | ||
| 478 | void RasterizerVulkan::ReleaseFences() { | 474 | void RasterizerVulkan::ReleaseFences() { |
| 479 | if (!gpu.IsAsync()) { | ||
| 480 | return; | ||
| 481 | } | ||
| 482 | fence_manager.WaitPendingFences(); | 475 | fence_manager.WaitPendingFences(); |
| 483 | } | 476 | } |
| 484 | 477 | ||
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c836158b8..fb9e83e8f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -76,10 +76,11 @@ public: | |||
| 76 | bool MustFlushRegion(VAddr addr, u64 size) override; | 76 | bool MustFlushRegion(VAddr addr, u64 size) override; |
| 77 | void InvalidateRegion(VAddr addr, u64 size) override; | 77 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 78 | void OnCPUWrite(VAddr addr, u64 size) override; | 78 | void OnCPUWrite(VAddr addr, u64 size) override; |
| 79 | void SyncGuestHost() override; | 79 | void InvalidateGPUCache() override; |
| 80 | void UnmapMemory(VAddr addr, u64 size) override; | 80 | void UnmapMemory(VAddr addr, u64 size) override; |
| 81 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | 81 | void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |
| 82 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | 82 | void SignalFence(std::function<void()>&& func) override; |
| 83 | void SyncOperation(std::function<void()>&& func) override; | ||
| 83 | void SignalSyncPoint(u32 value) override; | 84 | void SignalSyncPoint(u32 value) override; |
| 84 | void SignalReference() override; | 85 | void SignalReference() override; |
| 85 | void ReleaseFences() override; | 86 | void ReleaseFences() override; |