summaryrefslogtreecommitdiff
path: root/src/video_core/engines
diff options
context:
space:
mode:
authorGravatar liamwhite2023-09-25 09:18:29 -0400
committerGravatar GitHub2023-09-25 09:18:29 -0400
commit854457a392b6d38168f7f9d19d1fa8c43fad653c (patch)
tree3bc1007b5776f1ce82c057875609105de0a1ca44 /src/video_core/engines
parentMerge pull request #11569 from german77/lle_applet (diff)
parentQuery Cache: Fix Prefix Sums (diff)
downloadyuzu-854457a392b6d38168f7f9d19d1fa8c43fad653c.tar.gz
yuzu-854457a392b6d38168f7f9d19d1fa8c43fad653c.tar.xz
yuzu-854457a392b6d38168f7f9d19d1fa8c43fad653c.zip
Merge pull request #11225 from FernandoS27/no-laxatives-in-santas-cookies
Y.F.C: Rework the Query Cache.
Diffstat (limited to 'src/video_core/engines')
-rw-r--r--src/video_core/engines/draw_manager.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp74
-rw-r--r--src/video_core/engines/maxwell_3d.h3
-rw-r--r--src/video_core/engines/maxwell_dma.cpp12
-rw-r--r--src/video_core/engines/puller.cpp13
5 files changed, 26 insertions, 77 deletions
diff --git a/src/video_core/engines/draw_manager.h b/src/video_core/engines/draw_manager.h
index 7c22c49f1..18d959143 100644
--- a/src/video_core/engines/draw_manager.h
+++ b/src/video_core/engines/draw_manager.h
@@ -46,6 +46,7 @@ public:
46 }; 46 };
47 47
48 struct IndirectParams { 48 struct IndirectParams {
49 bool is_byte_count;
49 bool is_indexed; 50 bool is_indexed;
50 bool include_count; 51 bool include_count;
51 GPUVAddr count_start_address; 52 GPUVAddr count_start_address;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 06e349e43..32d767d85 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -20,8 +20,6 @@
20 20
21namespace Tegra::Engines { 21namespace Tegra::Engines {
22 22
23using VideoCore::QueryType;
24
25/// First register id that is actually a Macro call. 23/// First register id that is actually a Macro call.
26constexpr u32 MacroRegistersStart = 0xE00; 24constexpr u32 MacroRegistersStart = 0xE00;
27 25
@@ -500,27 +498,21 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
500} 498}
501 499
502void Maxwell3D::ProcessQueryGet() { 500void Maxwell3D::ProcessQueryGet() {
501 VideoCommon::QueryPropertiesFlags flags{};
502 if (regs.report_semaphore.query.short_query == 0) {
503 flags |= VideoCommon::QueryPropertiesFlags::HasTimeout;
504 }
505 const GPUVAddr sequence_address{regs.report_semaphore.Address()};
506 const VideoCommon::QueryType query_type =
507 static_cast<VideoCommon::QueryType>(regs.report_semaphore.query.report.Value());
508 const u32 payload = regs.report_semaphore.payload;
509 const u32 subreport = regs.report_semaphore.query.sub_report;
503 switch (regs.report_semaphore.query.operation) { 510 switch (regs.report_semaphore.query.operation) {
504 case Regs::ReportSemaphore::Operation::Release: 511 case Regs::ReportSemaphore::Operation::Release:
505 if (regs.report_semaphore.query.short_query != 0) { 512 if (regs.report_semaphore.query.short_query != 0) {
506 const GPUVAddr sequence_address{regs.report_semaphore.Address()}; 513 flags |= VideoCommon::QueryPropertiesFlags::IsAFence;
507 const u32 payload = regs.report_semaphore.payload;
508 std::function<void()> operation([this, sequence_address, payload] {
509 memory_manager.Write<u32>(sequence_address, payload);
510 });
511 rasterizer->SignalFence(std::move(operation));
512 } else {
513 struct LongQueryResult {
514 u64_le value;
515 u64_le timestamp;
516 };
517 const GPUVAddr sequence_address{regs.report_semaphore.Address()};
518 const u32 payload = regs.report_semaphore.payload;
519 [this, sequence_address, payload] {
520 memory_manager.Write<u64>(sequence_address + sizeof(u64), system.GPU().GetTicks());
521 memory_manager.Write<u64>(sequence_address, payload);
522 }();
523 } 514 }
515 rasterizer->Query(sequence_address, query_type, flags, payload, subreport);
524 break; 516 break;
525 case Regs::ReportSemaphore::Operation::Acquire: 517 case Regs::ReportSemaphore::Operation::Acquire:
526 // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that 518 // TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
@@ -528,11 +520,7 @@ void Maxwell3D::ProcessQueryGet() {
528 UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); 520 UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
529 break; 521 break;
530 case Regs::ReportSemaphore::Operation::ReportOnly: 522 case Regs::ReportSemaphore::Operation::ReportOnly:
531 if (const std::optional<u64> result = GetQueryResult()) { 523 rasterizer->Query(sequence_address, query_type, flags, payload, subreport);
532 // If the query returns an empty optional it means it's cached and deferred.
533 // In this case we have a non-empty result, so we stamp it immediately.
534 StampQueryResult(*result, regs.report_semaphore.query.short_query == 0);
535 }
536 break; 524 break;
537 case Regs::ReportSemaphore::Operation::Trap: 525 case Regs::ReportSemaphore::Operation::Trap:
538 UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); 526 UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
@@ -544,6 +532,10 @@ void Maxwell3D::ProcessQueryGet() {
544} 532}
545 533
546void Maxwell3D::ProcessQueryCondition() { 534void Maxwell3D::ProcessQueryCondition() {
535 if (rasterizer->AccelerateConditionalRendering()) {
536 execute_on = true;
537 return;
538 }
547 const GPUVAddr condition_address{regs.render_enable.Address()}; 539 const GPUVAddr condition_address{regs.render_enable.Address()};
548 switch (regs.render_enable_override) { 540 switch (regs.render_enable_override) {
549 case Regs::RenderEnable::Override::AlwaysRender: 541 case Regs::RenderEnable::Override::AlwaysRender:
@@ -553,10 +545,6 @@ void Maxwell3D::ProcessQueryCondition() {
553 execute_on = false; 545 execute_on = false;
554 break; 546 break;
555 case Regs::RenderEnable::Override::UseRenderEnable: { 547 case Regs::RenderEnable::Override::UseRenderEnable: {
556 if (rasterizer->AccelerateConditionalRendering()) {
557 execute_on = true;
558 return;
559 }
560 switch (regs.render_enable.mode) { 548 switch (regs.render_enable.mode) {
561 case Regs::RenderEnable::Mode::True: { 549 case Regs::RenderEnable::Mode::True: {
562 execute_on = true; 550 execute_on = true;
@@ -598,15 +586,9 @@ void Maxwell3D::ProcessQueryCondition() {
598} 586}
599 587
600void Maxwell3D::ProcessCounterReset() { 588void Maxwell3D::ProcessCounterReset() {
601#if ANDROID
602 if (!Settings::IsGPULevelHigh()) {
603 // This is problematic on Android, disable on GPU Normal.
604 return;
605 }
606#endif
607 switch (regs.clear_report_value) { 589 switch (regs.clear_report_value) {
608 case Regs::ClearReport::ZPassPixelCount: 590 case Regs::ClearReport::ZPassPixelCount:
609 rasterizer->ResetCounter(QueryType::SamplesPassed); 591 rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);
610 break; 592 break;
611 default: 593 default:
612 LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); 594 LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value);
@@ -620,28 +602,6 @@ void Maxwell3D::ProcessSyncPoint() {
620 rasterizer->SignalSyncPoint(sync_point); 602 rasterizer->SignalSyncPoint(sync_point);
621} 603}
622 604
623std::optional<u64> Maxwell3D::GetQueryResult() {
624 switch (regs.report_semaphore.query.report) {
625 case Regs::ReportSemaphore::Report::Payload:
626 return regs.report_semaphore.payload;
627 case Regs::ReportSemaphore::Report::ZPassPixelCount64:
628#if ANDROID
629 if (!Settings::IsGPULevelHigh()) {
630 // This is problematic on Android, disable on GPU Normal.
631 return 120;
632 }
633#endif
634 // Deferred.
635 rasterizer->Query(regs.report_semaphore.Address(), QueryType::SamplesPassed,
636 system.GPU().GetTicks());
637 return std::nullopt;
638 default:
639 LOG_DEBUG(HW_GPU, "Unimplemented query report type {}",
640 regs.report_semaphore.query.report.Value());
641 return 1;
642 }
643}
644
645void Maxwell3D::ProcessCBBind(size_t stage_index) { 605void Maxwell3D::ProcessCBBind(size_t stage_index) {
646 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader 606 // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader
647 // stage. 607 // stage.
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 6c19354e1..17faacc37 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -3182,9 +3182,6 @@ private:
3182 /// Handles writes to syncing register. 3182 /// Handles writes to syncing register.
3183 void ProcessSyncPoint(); 3183 void ProcessSyncPoint();
3184 3184
3185 /// Returns a query's value or an empty object if the value will be deferred through a cache.
3186 std::optional<u64> GetQueryResult();
3187
3188 void RefreshParametersImpl(); 3185 void RefreshParametersImpl();
3189 3186
3190 bool IsMethodExecutable(u32 method); 3187 bool IsMethodExecutable(u32 method);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 279f0daa1..422d4d859 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -362,21 +362,17 @@ void MaxwellDMA::ReleaseSemaphore() {
362 const auto type = regs.launch_dma.semaphore_type; 362 const auto type = regs.launch_dma.semaphore_type;
363 const GPUVAddr address = regs.semaphore.address; 363 const GPUVAddr address = regs.semaphore.address;
364 const u32 payload = regs.semaphore.payload; 364 const u32 payload = regs.semaphore.payload;
365 VideoCommon::QueryPropertiesFlags flags{VideoCommon::QueryPropertiesFlags::IsAFence};
365 switch (type) { 366 switch (type) {
366 case LaunchDMA::SemaphoreType::NONE: 367 case LaunchDMA::SemaphoreType::NONE:
367 break; 368 break;
368 case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: { 369 case LaunchDMA::SemaphoreType::RELEASE_ONE_WORD_SEMAPHORE: {
369 std::function<void()> operation( 370 rasterizer->Query(address, VideoCommon::QueryType::Payload, flags, payload, 0);
370 [this, address, payload] { memory_manager.Write<u32>(address, payload); });
371 rasterizer->SignalFence(std::move(operation));
372 break; 371 break;
373 } 372 }
374 case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: { 373 case LaunchDMA::SemaphoreType::RELEASE_FOUR_WORD_SEMAPHORE: {
375 std::function<void()> operation([this, address, payload] { 374 rasterizer->Query(address, VideoCommon::QueryType::Payload,
376 memory_manager.Write<u64>(address + sizeof(u64), system.GPU().GetTicks()); 375 flags | VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0);
377 memory_manager.Write<u64>(address, payload);
378 });
379 rasterizer->SignalFence(std::move(operation));
380 break; 376 break;
381 } 377 }
382 default: 378 default:
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
index 6de2543b7..8dd34c04a 100644
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -82,10 +82,8 @@ void Puller::ProcessSemaphoreTriggerMethod() {
82 if (op == GpuSemaphoreOperation::WriteLong) { 82 if (op == GpuSemaphoreOperation::WriteLong) {
83 const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; 83 const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
84 const u32 payload = regs.semaphore_sequence; 84 const u32 payload = regs.semaphore_sequence;
85 [this, sequence_address, payload] { 85 rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload,
86 memory_manager.Write<u64>(sequence_address + sizeof(u64), gpu.GetTicks()); 86 VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0);
87 memory_manager.Write<u64>(sequence_address, payload);
88 }();
89 } else { 87 } else {
90 do { 88 do {
91 const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; 89 const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
@@ -120,10 +118,8 @@ void Puller::ProcessSemaphoreTriggerMethod() {
120void Puller::ProcessSemaphoreRelease() { 118void Puller::ProcessSemaphoreRelease() {
121 const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; 119 const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
122 const u32 payload = regs.semaphore_release; 120 const u32 payload = regs.semaphore_release;
123 std::function<void()> operation([this, sequence_address, payload] { 121 rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload,
124 memory_manager.Write<u32>(sequence_address, payload); 122 VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0);
125 });
126 rasterizer->SignalFence(std::move(operation));
127} 123}
128 124
129void Puller::ProcessSemaphoreAcquire() { 125void Puller::ProcessSemaphoreAcquire() {
@@ -132,7 +128,6 @@ void Puller::ProcessSemaphoreAcquire() {
132 while (word != value) { 128 while (word != value) {
133 regs.acquire_active = true; 129 regs.acquire_active = true;
134 regs.acquire_value = value; 130 regs.acquire_value = value;
135 std::this_thread::sleep_for(std::chrono::milliseconds(1));
136 rasterizer->ReleaseFences(); 131 rasterizer->ReleaseFences();
137 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); 132 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
138 // TODO(kemathe73) figure out how to do the acquire_timeout 133 // TODO(kemathe73) figure out how to do the acquire_timeout