diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 91 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 16 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 2 |
4 files changed, 73 insertions, 52 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7cea146f0..0b3e8749b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "core/core_timing.h" | 9 | #include "core/core_timing.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/engines/shader_type.h" | 11 | #include "video_core/engines/shader_type.h" |
| 12 | #include "video_core/gpu.h" | ||
| 12 | #include "video_core/memory_manager.h" | 13 | #include "video_core/memory_manager.h" |
| 13 | #include "video_core/rasterizer_interface.h" | 14 | #include "video_core/rasterizer_interface.h" |
| 14 | #include "video_core/textures/texture.h" | 15 | #include "video_core/textures/texture.h" |
| @@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() { | |||
| 519 | regs.reg_array[0xd00] = 1; | 520 | regs.reg_array[0xd00] = 1; |
| 520 | } | 521 | } |
| 521 | 522 | ||
| 522 | void Maxwell3D::ProcessQueryGet() { | 523 | void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { |
| 524 | struct LongQueryResult { | ||
| 525 | u64_le value; | ||
| 526 | u64_le timestamp; | ||
| 527 | }; | ||
| 528 | static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); | ||
| 523 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; | 529 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; |
| 524 | // Since the sequence address is given as a GPU VAddr, we have to convert it to an application | 530 | if (long_query) { |
| 525 | // VAddr before writing. | 531 | // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast |
| 532 | // GPU, this command may actually take a while to complete in real hardware due to GPU | ||
| 533 | // wait queues. | ||
| 534 | LongQueryResult query_result{payload, system.GPU().GetTicks()}; | ||
| 535 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | ||
| 536 | } else { | ||
| 537 | memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload)); | ||
| 538 | } | ||
| 539 | } | ||
| 526 | 540 | ||
| 541 | void Maxwell3D::ProcessQueryGet() { | ||
| 527 | // TODO(Subv): Support the other query units. | 542 | // TODO(Subv): Support the other query units. |
| 528 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, | 543 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, |
| 529 | "Units other than CROP are unimplemented"); | 544 | "Units other than CROP are unimplemented"); |
| 530 | 545 | ||
| 531 | u64 result = 0; | 546 | switch (regs.query.query_get.operation) { |
| 532 | 547 | case Regs::QueryOperation::Release: { | |
| 533 | // TODO(Subv): Support the other query variables | 548 | const u64 result = regs.query.query_sequence; |
| 534 | switch (regs.query.query_get.select) { | 549 | StampQueryResult(result, regs.query.query_get.short_query == 0); |
| 535 | case Regs::QuerySelect::Zero: | ||
| 536 | // This seems to actually write the query sequence to the query address. | ||
| 537 | result = regs.query.query_sequence; | ||
| 538 | break; | 550 | break; |
| 539 | default: | ||
| 540 | result = 1; | ||
| 541 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", | ||
| 542 | static_cast<u32>(regs.query.query_get.select.Value())); | ||
| 543 | } | 551 | } |
| 544 | 552 | case Regs::QueryOperation::Acquire: { | |
| 545 | // TODO(Subv): Research and implement how query sync conditions work. | 553 | // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU |
| 546 | 554 | // to write a value that matches the current payload. | |
| 547 | struct LongQueryResult { | 555 | UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); |
| 548 | u64_le value; | 556 | break; |
| 549 | u64_le timestamp; | 557 | } |
| 550 | }; | 558 | case Regs::QueryOperation::Counter: { |
| 551 | static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); | 559 | u64 result{}; |
| 552 | 560 | switch (regs.query.query_get.select) { | |
| 553 | switch (regs.query.query_get.mode) { | 561 | case Regs::QuerySelect::Zero: |
| 554 | case Regs::QueryMode::Write: | 562 | result = 0; |
| 555 | case Regs::QueryMode::Write2: { | 563 | break; |
| 556 | u32 sequence = regs.query.query_sequence; | 564 | default: |
| 557 | if (regs.query.query_get.short_query) { | 565 | result = 1; |
| 558 | // Write the current query sequence to the sequence address. | 566 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", |
| 559 | // TODO(Subv): Find out what happens if you use a long query type but mark it as a short | 567 | static_cast<u32>(regs.query.query_get.select.Value())); |
| 560 | // query. | ||
| 561 | memory_manager.Write<u32>(sequence_address, sequence); | ||
| 562 | } else { | ||
| 563 | // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast | ||
| 564 | // GPU, this command may actually take a while to complete in real hardware due to GPU | ||
| 565 | // wait queues. | ||
| 566 | LongQueryResult query_result{}; | ||
| 567 | query_result.value = result; | ||
| 568 | // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming | ||
| 569 | query_result.timestamp = system.CoreTiming().GetTicks(); | ||
| 570 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | ||
| 571 | } | 568 | } |
| 569 | StampQueryResult(result, regs.query.query_get.short_query == 0); | ||
| 570 | break; | ||
| 571 | } | ||
| 572 | case Regs::QueryOperation::Trap: { | ||
| 573 | UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); | ||
| 574 | break; | ||
| 575 | } | ||
| 576 | default: { | ||
| 577 | UNIMPLEMENTED_MSG("Unknown query operation"); | ||
| 572 | break; | 578 | break; |
| 573 | } | 579 | } |
| 574 | default: | ||
| 575 | UNIMPLEMENTED_MSG("Query mode {} not implemented", | ||
| 576 | static_cast<u32>(regs.query.query_get.mode.Value())); | ||
| 577 | } | 580 | } |
| 578 | } | 581 | } |
| 579 | 582 | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 7b1912a66..0a2af54e5 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -71,12 +71,11 @@ public: | |||
| 71 | static constexpr std::size_t MaxConstBuffers = 18; | 71 | static constexpr std::size_t MaxConstBuffers = 18; |
| 72 | static constexpr std::size_t MaxConstBufferSize = 0x10000; | 72 | static constexpr std::size_t MaxConstBufferSize = 0x10000; |
| 73 | 73 | ||
| 74 | enum class QueryMode : u32 { | 74 | enum class QueryOperation : u32 { |
| 75 | Write = 0, | 75 | Release = 0, |
| 76 | Sync = 1, | 76 | Acquire = 1, |
| 77 | // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 | 77 | Counter = 2, |
| 78 | // is. | 78 | Trap = 3, |
| 79 | Write2 = 2, | ||
| 80 | }; | 79 | }; |
| 81 | 80 | ||
| 82 | enum class QueryUnit : u32 { | 81 | enum class QueryUnit : u32 { |
| @@ -1081,7 +1080,7 @@ public: | |||
| 1081 | u32 query_sequence; | 1080 | u32 query_sequence; |
| 1082 | union { | 1081 | union { |
| 1083 | u32 raw; | 1082 | u32 raw; |
| 1084 | BitField<0, 2, QueryMode> mode; | 1083 | BitField<0, 2, QueryOperation> operation; |
| 1085 | BitField<4, 1, u32> fence; | 1084 | BitField<4, 1, u32> fence; |
| 1086 | BitField<12, 4, QueryUnit> unit; | 1085 | BitField<12, 4, QueryUnit> unit; |
| 1087 | BitField<16, 1, QuerySyncCondition> sync_cond; | 1086 | BitField<16, 1, QuerySyncCondition> sync_cond; |
| @@ -1413,6 +1412,9 @@ private: | |||
| 1413 | /// Handles a write to the QUERY_GET register. | 1412 | /// Handles a write to the QUERY_GET register. |
| 1414 | void ProcessQueryGet(); | 1413 | void ProcessQueryGet(); |
| 1415 | 1414 | ||
| 1415 | // Writes the query result accordingly | ||
| 1416 | void StampQueryResult(u64 payload, bool long_query); | ||
| 1417 | |||
| 1416 | // Handles Conditional Rendering | 1418 | // Handles Conditional Rendering |
| 1417 | void ProcessQueryCondition(); | 1419 | void ProcessQueryCondition(); |
| 1418 | 1420 | ||
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 062ca83b8..4419ab735 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/core_timing.h" | 8 | #include "core/core_timing.h" |
| 9 | #include "core/core_timing_util.h" | ||
| 9 | #include "core/memory.h" | 10 | #include "core/memory.h" |
| 10 | #include "video_core/engines/fermi_2d.h" | 11 | #include "video_core/engines/fermi_2d.h" |
| 11 | #include "video_core/engines/kepler_compute.h" | 12 | #include "video_core/engines/kepler_compute.h" |
| @@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | |||
| 122 | return true; | 123 | return true; |
| 123 | } | 124 | } |
| 124 | 125 | ||
| 126 | u64 GPU::GetTicks() const { | ||
| 127 | // This values were reversed engineered by fincs from NVN | ||
| 128 | // The gpu clock is reported in units of 385/625 nanoseconds | ||
| 129 | constexpr u64 gpu_ticks_num = 384; | ||
| 130 | constexpr u64 gpu_ticks_den = 625; | ||
| 131 | |||
| 132 | const u64 cpu_ticks = system.CoreTiming().GetTicks(); | ||
| 133 | const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); | ||
| 134 | const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; | ||
| 135 | const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; | ||
| 136 | return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; | ||
| 137 | } | ||
| 138 | |||
| 125 | void GPU::FlushCommands() { | 139 | void GPU::FlushCommands() { |
| 126 | renderer.Rasterizer().FlushCommands(); | 140 | renderer.Rasterizer().FlushCommands(); |
| 127 | } | 141 | } |
| @@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { | |||
| 340 | block.sequence = regs.semaphore_sequence; | 354 | block.sequence = regs.semaphore_sequence; |
| 341 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | 355 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of |
| 342 | // CoreTiming | 356 | // CoreTiming |
| 343 | block.timestamp = system.CoreTiming().GetTicks(); | 357 | block.timestamp = GetTicks(); |
| 344 | memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, | 358 | memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, |
| 345 | sizeof(block)); | 359 | sizeof(block)); |
| 346 | } else { | 360 | } else { |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b648317bb..07727210c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -192,6 +192,8 @@ public: | |||
| 192 | 192 | ||
| 193 | bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); | 193 | bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); |
| 194 | 194 | ||
| 195 | u64 GetTicks() const; | ||
| 196 | |||
| 195 | std::unique_lock<std::mutex> LockSync() { | 197 | std::unique_lock<std::mutex> LockSync() { |
| 196 | return std::unique_lock{sync_mutex}; | 198 | return std::unique_lock{sync_mutex}; |
| 197 | } | 199 | } |