summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp91
-rw-r--r--src/video_core/engines/maxwell_3d.h16
-rw-r--r--src/video_core/gpu.cpp16
-rw-r--r--src/video_core/gpu.h2
4 files changed, 73 insertions, 52 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7cea146f0..0b3e8749b 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -9,6 +9,7 @@
9#include "core/core_timing.h" 9#include "core/core_timing.h"
10#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
11#include "video_core/engines/shader_type.h" 11#include "video_core/engines/shader_type.h"
12#include "video_core/gpu.h"
12#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
13#include "video_core/rasterizer_interface.h" 14#include "video_core/rasterizer_interface.h"
14#include "video_core/textures/texture.h" 15#include "video_core/textures/texture.h"
@@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() {
519 regs.reg_array[0xd00] = 1; 520 regs.reg_array[0xd00] = 1;
520} 521}
521 522
522void Maxwell3D::ProcessQueryGet() { 523void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
524 struct LongQueryResult {
525 u64_le value;
526 u64_le timestamp;
527 };
528 static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
523 const GPUVAddr sequence_address{regs.query.QueryAddress()}; 529 const GPUVAddr sequence_address{regs.query.QueryAddress()};
524 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application 530 if (long_query) {
525 // VAddr before writing. 531 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
532 // GPU, this command may actually take a while to complete in real hardware due to GPU
533 // wait queues.
534 LongQueryResult query_result{payload, system.GPU().GetTicks()};
535 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
536 } else {
537 memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
538 }
539}
526 540
541void Maxwell3D::ProcessQueryGet() {
527 // TODO(Subv): Support the other query units. 542 // TODO(Subv): Support the other query units.
528 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 543 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
529 "Units other than CROP are unimplemented"); 544 "Units other than CROP are unimplemented");
530 545
531 u64 result = 0; 546 switch (regs.query.query_get.operation) {
532 547 case Regs::QueryOperation::Release: {
533 // TODO(Subv): Support the other query variables 548 const u64 result = regs.query.query_sequence;
534 switch (regs.query.query_get.select) { 549 StampQueryResult(result, regs.query.query_get.short_query == 0);
535 case Regs::QuerySelect::Zero:
536 // This seems to actually write the query sequence to the query address.
537 result = regs.query.query_sequence;
538 break; 550 break;
539 default:
540 result = 1;
541 UNIMPLEMENTED_MSG("Unimplemented query select type {}",
542 static_cast<u32>(regs.query.query_get.select.Value()));
543 } 551 }
544 552 case Regs::QueryOperation::Acquire: {
545 // TODO(Subv): Research and implement how query sync conditions work. 553 // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
546 554 // to write a value that matches the current payload.
547 struct LongQueryResult { 555 UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
548 u64_le value; 556 break;
549 u64_le timestamp; 557 }
550 }; 558 case Regs::QueryOperation::Counter: {
551 static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); 559 u64 result{};
552 560 switch (regs.query.query_get.select) {
553 switch (regs.query.query_get.mode) { 561 case Regs::QuerySelect::Zero:
554 case Regs::QueryMode::Write: 562 result = 0;
555 case Regs::QueryMode::Write2: { 563 break;
556 u32 sequence = regs.query.query_sequence; 564 default:
557 if (regs.query.query_get.short_query) { 565 result = 1;
558 // Write the current query sequence to the sequence address. 566 UNIMPLEMENTED_MSG("Unimplemented query select type {}",
559 // TODO(Subv): Find out what happens if you use a long query type but mark it as a short 567 static_cast<u32>(regs.query.query_get.select.Value()));
560 // query.
561 memory_manager.Write<u32>(sequence_address, sequence);
562 } else {
563 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
564 // GPU, this command may actually take a while to complete in real hardware due to GPU
565 // wait queues.
566 LongQueryResult query_result{};
567 query_result.value = result;
568 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
569 query_result.timestamp = system.CoreTiming().GetTicks();
570 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
571 } 568 }
569 StampQueryResult(result, regs.query.query_get.short_query == 0);
570 break;
571 }
572 case Regs::QueryOperation::Trap: {
573 UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
574 break;
575 }
576 default: {
577 UNIMPLEMENTED_MSG("Unknown query operation");
572 break; 578 break;
573 } 579 }
574 default:
575 UNIMPLEMENTED_MSG("Query mode {} not implemented",
576 static_cast<u32>(regs.query.query_get.mode.Value()));
577 } 580 }
578} 581}
579 582
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 7b1912a66..0a2af54e5 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -71,12 +71,11 @@ public:
71 static constexpr std::size_t MaxConstBuffers = 18; 71 static constexpr std::size_t MaxConstBuffers = 18;
72 static constexpr std::size_t MaxConstBufferSize = 0x10000; 72 static constexpr std::size_t MaxConstBufferSize = 0x10000;
73 73
74 enum class QueryMode : u32 { 74 enum class QueryOperation : u32 {
75 Write = 0, 75 Release = 0,
76 Sync = 1, 76 Acquire = 1,
77 // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 77 Counter = 2,
78 // is. 78 Trap = 3,
79 Write2 = 2,
80 }; 79 };
81 80
82 enum class QueryUnit : u32 { 81 enum class QueryUnit : u32 {
@@ -1081,7 +1080,7 @@ public:
1081 u32 query_sequence; 1080 u32 query_sequence;
1082 union { 1081 union {
1083 u32 raw; 1082 u32 raw;
1084 BitField<0, 2, QueryMode> mode; 1083 BitField<0, 2, QueryOperation> operation;
1085 BitField<4, 1, u32> fence; 1084 BitField<4, 1, u32> fence;
1086 BitField<12, 4, QueryUnit> unit; 1085 BitField<12, 4, QueryUnit> unit;
1087 BitField<16, 1, QuerySyncCondition> sync_cond; 1086 BitField<16, 1, QuerySyncCondition> sync_cond;
@@ -1413,6 +1412,9 @@ private:
1413 /// Handles a write to the QUERY_GET register. 1412 /// Handles a write to the QUERY_GET register.
1414 void ProcessQueryGet(); 1413 void ProcessQueryGet();
1415 1414
1415 // Writes the query result accordingly
1416 void StampQueryResult(u64 payload, bool long_query);
1417
1416 // Handles Conditional Rendering 1418 // Handles Conditional Rendering
1417 void ProcessQueryCondition(); 1419 void ProcessQueryCondition();
1418 1420
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 062ca83b8..4419ab735 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -6,6 +6,7 @@
6#include "common/microprofile.h" 6#include "common/microprofile.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/core_timing.h" 8#include "core/core_timing.h"
9#include "core/core_timing_util.h"
9#include "core/memory.h" 10#include "core/memory.h"
10#include "video_core/engines/fermi_2d.h" 11#include "video_core/engines/fermi_2d.h"
11#include "video_core/engines/kepler_compute.h" 12#include "video_core/engines/kepler_compute.h"
@@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
122 return true; 123 return true;
123} 124}
124 125
126u64 GPU::GetTicks() const {
127 // This values were reversed engineered by fincs from NVN
128 // The gpu clock is reported in units of 385/625 nanoseconds
129 constexpr u64 gpu_ticks_num = 384;
130 constexpr u64 gpu_ticks_den = 625;
131
132 const u64 cpu_ticks = system.CoreTiming().GetTicks();
133 const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
134 const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
135 const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
136 return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
137}
138
125void GPU::FlushCommands() { 139void GPU::FlushCommands() {
126 renderer.Rasterizer().FlushCommands(); 140 renderer.Rasterizer().FlushCommands();
127} 141}
@@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
340 block.sequence = regs.semaphore_sequence; 354 block.sequence = regs.semaphore_sequence;
341 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of 355 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
342 // CoreTiming 356 // CoreTiming
343 block.timestamp = system.CoreTiming().GetTicks(); 357 block.timestamp = GetTicks();
344 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, 358 memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
345 sizeof(block)); 359 sizeof(block));
346 } else { 360 } else {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b648317bb..07727210c 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -192,6 +192,8 @@ public:
192 192
193 bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); 193 bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
194 194
195 u64 GetTicks() const;
196
195 std::unique_lock<std::mutex> LockSync() { 197 std::unique_lock<std::mutex> LockSync() {
196 return std::unique_lock{sync_mutex}; 198 return std::unique_lock{sync_mutex};
197 } 199 }