diff options
| author | 2023-09-25 09:18:29 -0400 | |
|---|---|---|
| committer | 2023-09-25 09:18:29 -0400 | |
| commit | 854457a392b6d38168f7f9d19d1fa8c43fad653c (patch) | |
| tree | 3bc1007b5776f1ce82c057875609105de0a1ca44 /src/video_core/engines | |
| parent | Merge pull request #11569 from german77/lle_applet (diff) | |
| parent | Query Cache: Fix Prefix Sums (diff) | |
| download | yuzu-854457a392b6d38168f7f9d19d1fa8c43fad653c.tar.gz yuzu-854457a392b6d38168f7f9d19d1fa8c43fad653c.tar.xz yuzu-854457a392b6d38168f7f9d19d1fa8c43fad653c.zip | |
Merge pull request #11225 from FernandoS27/no-laxatives-in-santas-cookies
Y.F.C: Rework the Query Cache.
Diffstat (limited to 'src/video_core/engines')
| -rw-r--r-- | src/video_core/engines/draw_manager.h | 1 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 74 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 3 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_dma.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/engines/puller.cpp | 13 |
5 files changed, 26 insertions, 77 deletions
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h index 7c22c49f1..18d959143 100644 --- a/src/video_core/engines/draw_manager.h +++ b/src/video_core/engines/draw_manager.h | |||
| @@ -46,6 +46,7 @@ public: | |||
| 46 | }; | 46 | }; |
| 47 | 47 | ||
| 48 | struct IndirectParams { | 48 | struct IndirectParams { |
| 49 | bool is_byte_count; | ||
| 49 | bool is_indexed; | 50 | bool is_indexed; |
| 50 | bool include_count; | 51 | bool include_count; |
| 51 | GPUVAddr count_start_address; | 52 | GPUVAddr count_start_address; |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 06e349e43..32d767d85 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -20,8 +20,6 @@ | |||
| 20 | 20 | ||
| 21 | namespace Tegra::Engines { | 21 | namespace Tegra::Engines { |
| 22 | 22 | ||
| 23 | using VideoCore::QueryType; | ||
| 24 | |||
| 25 | /// First register id that is actually a Macro call. | 23 | /// First register id that is actually a Macro call. |
| 26 | constexpr u32 MacroRegistersStart = 0xE00; | 24 | constexpr u32 MacroRegistersStart = 0xE00; |
| 27 | 25 | ||
| @@ -500,27 +498,21 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | |||
| 500 | } | 498 | } |
| 501 | 499 | ||
| 502 | void Maxwell3D::ProcessQueryGet() { | 500 | void Maxwell3D::ProcessQueryGet() { |
| 501 | VideoCommon::QueryPropertiesFlags flags{}; | ||
| 502 | if (regs.report_semaphore.query.short_query == 0) { | ||
| 503 | flags |= VideoCommon::QueryPropertiesFlags::HasTimeout; | ||
| 504 | } | ||
| 505 | const GPUVAddr sequence_address{regs.report_semaphore.Address()}; | ||
| 506 | const VideoCommon::QueryType query_type = | ||
| 507 | static_cast<VideoCommon::QueryType>(regs.report_semaphore.query.report.Value()); | ||
| 508 | const u32 payload = regs.report_semaphore.payload; | ||
| 509 | const u32 subreport = regs.report_semaphore.query.sub_report; | ||
| 503 | switch (regs.report_semaphore.query.operation) { | 510 | switch (regs.report_semaphore.query.operation) { |
| 504 | case Regs::ReportSemaphore::Operation::Release: | 511 | case Regs::ReportSemaphore::Operation::Release: |
| 505 | if (regs.report_semaphore.query.short_query != 0) { | 512 | if (regs.report_semaphore.query.short_query != 0) { |
| 506 | const GPUVAddr sequence_address{regs.report_semaphore.Address()}; | 513 | flags |= VideoCommon::QueryPropertiesFlags::IsAFence; |
| 507 | const u32 payload = regs.report_semaphore.payload; | ||
| 508 | std::function<void()> operation([this, sequence_address, payload] { | ||
| 509 | memory_manager.Write<u32>(sequence_address, payload); | ||
| 510 | }); | ||
| 511 | rasterizer->SignalFence(std::move(operation)); | ||
| 512 | } else { | ||
| 513 | struct LongQueryResult { | ||
| 514 | u64_le value; | ||
| 515 | u64_le timestamp; | ||
| 516 | }; | ||
| 517 | const GPUVAddr sequence_address{regs.report_semaphore.Address()}; | ||
| 518 | const u32 payload = regs.report_semaphore.payload; | ||
| 519 | [this, sequence_address, payload] { | ||
| 520 | memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks()); | ||
| 521 | memory_manager.Write<u64>(sequence_address, payload); | ||
| 522 | }(); | ||
| 523 | } | 514 | } |
| 515 | rasterizer->Query(sequence_address, query_type, flags, payload, subreport); | ||
| 524 | break; | 516 | break; |
| 525 | case Regs::ReportSemaphore::Operation::Acquire: | 517 | case Regs::ReportSemaphore::Operation::Acquire: |
| 526 | // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that | 518 | // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that |
| @@ -528,11 +520,7 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 528 | UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); | 520 | UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); |
| 529 | break; | 521 | break; |
| 530 | case Regs::ReportSemaphore::Operation::ReportOnly: | 522 | case Regs::ReportSemaphore::Operation::ReportOnly: |
| 531 | if (const std::optional<u64> result = GetQueryResult()) { | 523 | rasterizer->Query(sequence_address, query_type, flags, payload, subreport); |
| 532 | // If the query returns an empty optional it means it's cached and deferred. | ||
| 533 | // In this case we have a non-empty result, so we stamp it immediately. | ||
| 534 | StampQueryResult(*result, regs.report_semaphore.query.short_query == 0); | ||
| 535 | } | ||
| 536 | break; | 524 | break; |
| 537 | case Regs::ReportSemaphore::Operation::Trap: | 525 | case Regs::ReportSemaphore::Operation::Trap: |
| 538 | UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); | 526 | UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); |
| @@ -544,6 +532,10 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 544 | } | 532 | } |
| 545 | 533 | ||
| 546 | void Maxwell3D::ProcessQueryCondition() { | 534 | void Maxwell3D::ProcessQueryCondition() { |
| 535 | if (rasterizer->AccelerateConditionalRendering()) { | ||
| 536 | execute_on = true; | ||
| 537 | return; | ||
| 538 | } | ||
| 547 | const GPUVAddr condition_address{regs.render_enable.Address()}; | 539 | const GPUVAddr condition_address{regs.render_enable.Address()}; |
| 548 | switch (regs.render_enable_override) { | 540 | switch (regs.render_enable_override) { |
| 549 | case Regs::RenderEnable::Override::AlwaysRender: | 541 | case Regs::RenderEnable::Override::AlwaysRender: |
| @@ -553,10 +545,6 @@ void Maxwell3D::ProcessQueryCondition() { | |||
| 553 | execute_on = false; | 545 | execute_on = false; |
| 554 | break; | 546 | break; |
| 555 | case Regs::RenderEnable::Override::UseRenderEnable: { | 547 | case Regs::RenderEnable::Override::UseRenderEnable: { |
| 556 | if (rasterizer->AccelerateConditionalRendering()) { | ||
| 557 | execute_on = true; | ||
| 558 | return; | ||
| 559 | } | ||
| 560 | switch (regs.render_enable.mode) { | 548 | switch (regs.render_enable.mode) { |
| 561 | case Regs::RenderEnable::Mode::True: { | 549 | case Regs::RenderEnable::Mode::True: { |
| 562 | execute_on = true; | 550 | execute_on = true; |
| @@ -598,15 +586,9 @@ void Maxwell3D::ProcessQueryCondition() { | |||
| 598 | } | 586 | } |
| 599 | 587 | ||
| 600 | void Maxwell3D::ProcessCounterReset() { | 588 | void Maxwell3D::ProcessCounterReset() { |
| 601 | #if ANDROID | ||
| 602 | if (!Settings::IsGPULevelHigh()) { | ||
| 603 | // This is problematic on Android, disable on GPU Normal. | ||
| 604 | return; | ||
| 605 | } | ||
| 606 | #endif | ||
| 607 | switch (regs.clear_report_value) { | 589 | switch (regs.clear_report_value) { |
| 608 | case Regs::ClearReport::ZPassPixelCount: | 590 | case Regs::ClearReport::ZPassPixelCount: |
| 609 | rasterizer->ResetCounter(QueryType::SamplesPassed); | 591 | rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); |
| 610 | break; | 592 | break; |
| 611 | default: | 593 | default: |
| 612 | LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); | 594 | LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); |
| @@ -620,28 +602,6 @@ void Maxwell3D::ProcessSyncPoint() { | |||
| 620 | rasterizer->SignalSyncPoint(sync_point); | 602 | rasterizer->SignalSyncPoint(sync_point); |
| 621 | } | 603 | } |
| 622 | 604 | ||
| 623 | std::optional<u64> Maxwell3D::GetQueryResult() { | ||
| 624 | switch (regs.report_semaphore.query.report) { | ||
| 625 | case Regs::ReportSemaphore::Report::Payload: | ||
| 626 | return regs.report_semaphore.payload; | ||
| 627 | case Regs::ReportSemaphore::Report::ZPassPixelCount64: | ||
| 628 | #if ANDROID | ||
| 629 | if (!Settings::IsGPULevelHigh()) { | ||
| 630 | // This is problematic on Android, disable on GPU Normal. | ||
| 631 | return 120; | ||
| 632 | } | ||
| 633 | #endif | ||
| 634 | // Deferred. | ||
| 635 | rasterizer->Query(regs.report_semaphore.Address(), QueryType::SamplesPassed, | ||
| 636 | system.GPU().GetTicks()); | ||
| 637 | return std::nullopt; | ||
| 638 | default: | ||
| 639 | LOG_DEBUG(HW_GPU, "Unimplemented query report type {}", | ||
| 640 | regs.report_semaphore.query.report.Value()); | ||
| 641 | return 1; | ||
| 642 | } | ||
| 643 | } | ||
| 644 | |||
| 645 | void Maxwell3D::ProcessCBBind(size_t stage_index) { | 605 | void Maxwell3D::ProcessCBBind(size_t stage_index) { |
| 646 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader | 606 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader |
| 647 | // stage. | 607 | // stage. |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 6c19354e1..17faacc37 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -3182,9 +3182,6 @@ private: | |||
| 3182 | /// Handles writes to syncing register. | 3182 | /// Handles writes to syncing register. |
| 3183 | void ProcessSyncPoint(); | 3183 | void ProcessSyncPoint(); |
| 3184 | 3184 | ||
| 3185 | /// Returns a query's value or an empty object if the value will be deferred through a cache. | ||
| 3186 | std::optional<u64> GetQueryResult(); | ||
| 3187 | |||
| 3188 | void RefreshParametersImpl(); | 3185 | void RefreshParametersImpl(); |
| 3189 | 3186 | ||
| 3190 | bool IsMethodExecutable(u32 method); | 3187 | bool IsMethodExecutable(u32 method); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 279f0daa1..422d4d859 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -362,21 +362,17 @@ void MaxwellDMA::ReleaseSemaphore() { | |||
| 362 | const auto type = regs.launch_dma.semaphore_type; | 362 | const auto type = regs.launch_dma.semaphore_type; |
| 363 | const GPUVAddr address = regs.semaphore.address; | 363 | const GPUVAddr address = regs.semaphore.address; |
| 364 | const u32 payload = regs.semaphore.payload; | 364 | const u32 payload = regs.semaphore.payload; |
| 365 | VideoCommon::QueryPropertiesFlags flags{VideoCommon::QueryPropertiesFlags::IsAFence}; | ||
| 365 | switch (type) { | 366 | switch (type) { |
| 366 | case LaunchDMA::SemaphoreType::NONE: | 367 | case LaunchDMA::SemaphoreType::NONE: |
| 367 | break; | 368 | break; |
| 368 | case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { | 369 | case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { |
| 369 | std::function<void()> operation( | 370 | rasterizer->Query(address, VideoCommon::QueryType::Payload, flags, payload, 0); |
| 370 | [this, address, payload] { memory_manager.Write<u32>(address, payload); }); | ||
| 371 | rasterizer->SignalFence(std::move(operation)); | ||
| 372 | break; | 371 | break; |
| 373 | } | 372 | } |
| 374 | case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { | 373 | case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { |
| 375 | std::function<void()> operation([this, address, payload] { | 374 | rasterizer->Query(address, VideoCommon::QueryType::Payload, |
| 376 | memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks()); | 375 | flags | VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); |
| 377 | memory_manager.Write<u64>(address, payload); | ||
| 378 | }); | ||
| 379 | rasterizer->SignalFence(std::move(operation)); | ||
| 380 | break; | 376 | break; |
| 381 | } | 377 | } |
| 382 | default: | 378 | default: |
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 6de2543b7..8dd34c04a 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp | |||
| @@ -82,10 +82,8 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 82 | if (op == GpuSemaphoreOperation::WriteLong) { | 82 | if (op == GpuSemaphoreOperation::WriteLong) { |
| 83 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | 83 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; |
| 84 | const u32 payload = regs.semaphore_sequence; | 84 | const u32 payload = regs.semaphore_sequence; |
| 85 | [this, sequence_address, payload] { | 85 | rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, |
| 86 | memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks()); | 86 | VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); |
| 87 | memory_manager.Write<u64>(sequence_address, payload); | ||
| 88 | }(); | ||
| 89 | } else { | 87 | } else { |
| 90 | do { | 88 | do { |
| 91 | const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | 89 | const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; |
| @@ -120,10 +118,8 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| 120 | void Puller::ProcessSemaphoreRelease() { | 118 | void Puller::ProcessSemaphoreRelease() { |
| 121 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | 119 | const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; |
| 122 | const u32 payload = regs.semaphore_release; | 120 | const u32 payload = regs.semaphore_release; |
| 123 | std::function<void()> operation([this, sequence_address, payload] { | 121 | rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, |
| 124 | memory_manager.Write<u32>(sequence_address, payload); | 122 | VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); |
| 125 | }); | ||
| 126 | rasterizer->SignalFence(std::move(operation)); | ||
| 127 | } | 123 | } |
| 128 | 124 | ||
| 129 | void Puller::ProcessSemaphoreAcquire() { | 125 | void Puller::ProcessSemaphoreAcquire() { |
| @@ -132,7 +128,6 @@ void Puller::ProcessSemaphoreAcquire() { | |||
| 132 | while (word != value) { | 128 | while (word != value) { |
| 133 | regs.acquire_active = true; | 129 | regs.acquire_active = true; |
| 134 | regs.acquire_value = value; | 130 | regs.acquire_value = value; |
| 135 | std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||
| 136 | rasterizer->ReleaseFences(); | 131 | rasterizer->ReleaseFences(); |
| 137 | word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); | 132 | word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); |
| 138 | // TODO(kemathe73) figure out how to do the acquire_timeout | 133 | // TODO(kemathe73) figure out how to do the acquire_timeout |