diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/core/hle/kernel/address_arbiter.cpp | 53 | ||||
| -rw-r--r-- | src/core/hle/kernel/address_arbiter.h | 2 | ||||
| -rw-r--r-- | src/core/hle/kernel/thread.cpp | 4 | ||||
| -rw-r--r-- | src/core/hle/service/bcat/backend/backend.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 91 | ||||
| -rw-r--r-- | src/video_core/engines/maxwell_3d.h | 16 | ||||
| -rw-r--r-- | src/video_core/gpu.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/gpu.h | 2 |
8 files changed, 103 insertions, 85 deletions
diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 2ea3dcb61..8475b698c 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp | |||
| @@ -201,42 +201,39 @@ void AddressArbiter::HandleWakeupThread(std::shared_ptr<Thread> thread) { | |||
| 201 | void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) { | 201 | void AddressArbiter::InsertThread(std::shared_ptr<Thread> thread) { |
| 202 | const VAddr arb_addr = thread->GetArbiterWaitAddress(); | 202 | const VAddr arb_addr = thread->GetArbiterWaitAddress(); |
| 203 | std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; | 203 | std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; |
| 204 | auto it = thread_list.begin(); | 204 | |
| 205 | while (it != thread_list.end()) { | 205 | const auto iter = |
| 206 | const std::shared_ptr<Thread>& current_thread = *it; | 206 | std::find_if(thread_list.cbegin(), thread_list.cend(), [&thread](const auto& entry) { |
| 207 | if (current_thread->GetPriority() >= thread->GetPriority()) { | 207 | return entry->GetPriority() >= thread->GetPriority(); |
| 208 | thread_list.insert(it, thread); | 208 | }); |
| 209 | return; | 209 | |
| 210 | } | 210 | if (iter == thread_list.cend()) { |
| 211 | ++it; | 211 | thread_list.push_back(std::move(thread)); |
| 212 | } else { | ||
| 213 | thread_list.insert(iter, std::move(thread)); | ||
| 212 | } | 214 | } |
| 213 | thread_list.push_back(std::move(thread)); | ||
| 214 | } | 215 | } |
| 215 | 216 | ||
| 216 | void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) { | 217 | void AddressArbiter::RemoveThread(std::shared_ptr<Thread> thread) { |
| 217 | const VAddr arb_addr = thread->GetArbiterWaitAddress(); | 218 | const VAddr arb_addr = thread->GetArbiterWaitAddress(); |
| 218 | std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; | 219 | std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[arb_addr]; |
| 219 | auto it = thread_list.begin(); | 220 | |
| 220 | while (it != thread_list.end()) { | 221 | const auto iter = std::find_if(thread_list.cbegin(), thread_list.cend(), |
| 221 | const std::shared_ptr<Thread>& current_thread = *it; | 222 | [&thread](const auto& entry) { return thread == entry; }); |
| 222 | if (current_thread.get() == thread.get()) { | 223 | |
| 223 | thread_list.erase(it); | 224 | ASSERT(iter != thread_list.cend()); |
| 224 | return; | 225 | |
| 225 | } | 226 | thread_list.erase(iter); |
| 226 | ++it; | ||
| 227 | } | ||
| 228 | UNREACHABLE(); | ||
| 229 | } | 227 | } |
| 230 | 228 | ||
| 231 | std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) { | 229 | std::vector<std::shared_ptr<Thread>> AddressArbiter::GetThreadsWaitingOnAddress( |
| 232 | std::vector<std::shared_ptr<Thread>> result; | 230 | VAddr address) const { |
| 233 | std::list<std::shared_ptr<Thread>>& thread_list = arb_threads[address]; | 231 | const auto iter = arb_threads.find(address); |
| 234 | auto it = thread_list.begin(); | 232 | if (iter == arb_threads.cend()) { |
| 235 | while (it != thread_list.end()) { | 233 | return {}; |
| 236 | std::shared_ptr<Thread> current_thread = *it; | ||
| 237 | result.push_back(std::move(current_thread)); | ||
| 238 | ++it; | ||
| 239 | } | 234 | } |
| 240 | return result; | 235 | |
| 236 | const std::list<std::shared_ptr<Thread>>& thread_list = iter->second; | ||
| 237 | return {thread_list.cbegin(), thread_list.cend()}; | ||
| 241 | } | 238 | } |
| 242 | } // namespace Kernel | 239 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/address_arbiter.h b/src/core/hle/kernel/address_arbiter.h index 386983e54..f958eee5a 100644 --- a/src/core/hle/kernel/address_arbiter.h +++ b/src/core/hle/kernel/address_arbiter.h | |||
| @@ -86,7 +86,7 @@ private: | |||
| 86 | void RemoveThread(std::shared_ptr<Thread> thread); | 86 | void RemoveThread(std::shared_ptr<Thread> thread); |
| 87 | 87 | ||
| 88 | // Gets the threads waiting on an address. | 88 | // Gets the threads waiting on an address. |
| 89 | std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address); | 89 | std::vector<std::shared_ptr<Thread>> GetThreadsWaitingOnAddress(VAddr address) const; |
| 90 | 90 | ||
| 91 | /// List of threads waiting for a address arbiter | 91 | /// List of threads waiting for a address arbiter |
| 92 | std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads; | 92 | std::unordered_map<VAddr, std::list<std::shared_ptr<Thread>>> arb_threads; |
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index e965b5b04..ad464e03b 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -474,7 +474,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { | |||
| 474 | if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { | 474 | if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { |
| 475 | return; | 475 | return; |
| 476 | } | 476 | } |
| 477 | auto& scheduler = Core::System::GetInstance().GlobalScheduler(); | 477 | auto& scheduler = kernel.GlobalScheduler(); |
| 478 | if (processor_id >= 0) { | 478 | if (processor_id >= 0) { |
| 479 | scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this); | 479 | scheduler.Unschedule(old_priority, static_cast<u32>(processor_id), this); |
| 480 | } | 480 | } |
| @@ -506,7 +506,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { | |||
| 506 | } | 506 | } |
| 507 | 507 | ||
| 508 | void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { | 508 | void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { |
| 509 | auto& scheduler = Core::System::GetInstance().GlobalScheduler(); | 509 | auto& scheduler = kernel.GlobalScheduler(); |
| 510 | if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || | 510 | if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || |
| 511 | current_priority >= THREADPRIO_COUNT) { | 511 | current_priority >= THREADPRIO_COUNT) { |
| 512 | return; | 512 | return; |
diff --git a/src/core/hle/service/bcat/backend/backend.cpp b/src/core/hle/service/bcat/backend/backend.cpp index 6f5ea095a..def3410cc 100644 --- a/src/core/hle/service/bcat/backend/backend.cpp +++ b/src/core/hle/service/bcat/backend/backend.cpp | |||
| @@ -117,13 +117,13 @@ bool NullBackend::SynchronizeDirectory(TitleIDVersion title, std::string name, | |||
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | bool NullBackend::Clear(u64 title_id) { | 119 | bool NullBackend::Clear(u64 title_id) { |
| 120 | LOG_DEBUG(Service_BCAT, "called, title_id={:016X}"); | 120 | LOG_DEBUG(Service_BCAT, "called, title_id={:016X}", title_id); |
| 121 | 121 | ||
| 122 | return true; | 122 | return true; |
| 123 | } | 123 | } |
| 124 | 124 | ||
| 125 | void NullBackend::SetPassphrase(u64 title_id, const Passphrase& passphrase) { | 125 | void NullBackend::SetPassphrase(u64 title_id, const Passphrase& passphrase) { |
| 126 | LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase = {}", title_id, | 126 | LOG_DEBUG(Service_BCAT, "called, title_id={:016X}, passphrase={}", title_id, |
| 127 | Common::HexToString(passphrase)); | 127 | Common::HexToString(passphrase)); |
| 128 | } | 128 | } |
| 129 | 129 | ||
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7cea146f0..0b3e8749b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | #include "core/core_timing.h" | 9 | #include "core/core_timing.h" |
| 10 | #include "video_core/engines/maxwell_3d.h" | 10 | #include "video_core/engines/maxwell_3d.h" |
| 11 | #include "video_core/engines/shader_type.h" | 11 | #include "video_core/engines/shader_type.h" |
| 12 | #include "video_core/gpu.h" | ||
| 12 | #include "video_core/memory_manager.h" | 13 | #include "video_core/memory_manager.h" |
| 13 | #include "video_core/rasterizer_interface.h" | 14 | #include "video_core/rasterizer_interface.h" |
| 14 | #include "video_core/textures/texture.h" | 15 | #include "video_core/textures/texture.h" |
| @@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() { | |||
| 519 | regs.reg_array[0xd00] = 1; | 520 | regs.reg_array[0xd00] = 1; |
| 520 | } | 521 | } |
| 521 | 522 | ||
| 522 | void Maxwell3D::ProcessQueryGet() { | 523 | void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { |
| 524 | struct LongQueryResult { | ||
| 525 | u64_le value; | ||
| 526 | u64_le timestamp; | ||
| 527 | }; | ||
| 528 | static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); | ||
| 523 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; | 529 | const GPUVAddr sequence_address{regs.query.QueryAddress()}; |
| 524 | // Since the sequence address is given as a GPU VAddr, we have to convert it to an application | 530 | if (long_query) { |
| 525 | // VAddr before writing. | 531 | // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast |
| 532 | // GPU, this command may actually take a while to complete in real hardware due to GPU | ||
| 533 | // wait queues. | ||
| 534 | LongQueryResult query_result{payload, system.GPU().GetTicks()}; | ||
| 535 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | ||
| 536 | } else { | ||
| 537 | memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload)); | ||
| 538 | } | ||
| 539 | } | ||
| 526 | 540 | ||
| 541 | void Maxwell3D::ProcessQueryGet() { | ||
| 527 | // TODO(Subv): Support the other query units. | 542 | // TODO(Subv): Support the other query units. |
| 528 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, | 543 | ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, |
| 529 | "Units other than CROP are unimplemented"); | 544 | "Units other than CROP are unimplemented"); |
| 530 | 545 | ||
| 531 | u64 result = 0; | 546 | switch (regs.query.query_get.operation) { |
| 532 | 547 | case Regs::QueryOperation::Release: { | |
| 533 | // TODO(Subv): Support the other query variables | 548 | const u64 result = regs.query.query_sequence; |
| 534 | switch (regs.query.query_get.select) { | 549 | StampQueryResult(result, regs.query.query_get.short_query == 0); |
| 535 | case Regs::QuerySelect::Zero: | ||
| 536 | // This seems to actually write the query sequence to the query address. | ||
| 537 | result = regs.query.query_sequence; | ||
| 538 | break; | 550 | break; |
| 539 | default: | ||
| 540 | result = 1; | ||
| 541 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", | ||
| 542 | static_cast<u32>(regs.query.query_get.select.Value())); | ||
| 543 | } | 551 | } |
| 544 | 552 | case Regs::QueryOperation::Acquire: { | |
| 545 | // TODO(Subv): Research and implement how query sync conditions work. | 553 | // Todo(Blinkhawk): Under this operation, the GPU waits for the CPU |
| 546 | 554 | // to write a value that matches the current payload. | |
| 547 | struct LongQueryResult { | 555 | UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE"); |
| 548 | u64_le value; | 556 | break; |
| 549 | u64_le timestamp; | 557 | } |
| 550 | }; | 558 | case Regs::QueryOperation::Counter: { |
| 551 | static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size"); | 559 | u64 result{}; |
| 552 | 560 | switch (regs.query.query_get.select) { | |
| 553 | switch (regs.query.query_get.mode) { | 561 | case Regs::QuerySelect::Zero: |
| 554 | case Regs::QueryMode::Write: | 562 | result = 0; |
| 555 | case Regs::QueryMode::Write2: { | 563 | break; |
| 556 | u32 sequence = regs.query.query_sequence; | 564 | default: |
| 557 | if (regs.query.query_get.short_query) { | 565 | result = 1; |
| 558 | // Write the current query sequence to the sequence address. | 566 | UNIMPLEMENTED_MSG("Unimplemented query select type {}", |
| 559 | // TODO(Subv): Find out what happens if you use a long query type but mark it as a short | 567 | static_cast<u32>(regs.query.query_get.select.Value())); |
| 560 | // query. | ||
| 561 | memory_manager.Write<u32>(sequence_address, sequence); | ||
| 562 | } else { | ||
| 563 | // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast | ||
| 564 | // GPU, this command may actually take a while to complete in real hardware due to GPU | ||
| 565 | // wait queues. | ||
| 566 | LongQueryResult query_result{}; | ||
| 567 | query_result.value = result; | ||
| 568 | // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming | ||
| 569 | query_result.timestamp = system.CoreTiming().GetTicks(); | ||
| 570 | memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | ||
| 571 | } | 568 | } |
| 569 | StampQueryResult(result, regs.query.query_get.short_query == 0); | ||
| 570 | break; | ||
| 571 | } | ||
| 572 | case Regs::QueryOperation::Trap: { | ||
| 573 | UNIMPLEMENTED_MSG("Unimplemented query operation TRAP"); | ||
| 574 | break; | ||
| 575 | } | ||
| 576 | default: { | ||
| 577 | UNIMPLEMENTED_MSG("Unknown query operation"); | ||
| 572 | break; | 578 | break; |
| 573 | } | 579 | } |
| 574 | default: | ||
| 575 | UNIMPLEMENTED_MSG("Query mode {} not implemented", | ||
| 576 | static_cast<u32>(regs.query.query_get.mode.Value())); | ||
| 577 | } | 580 | } |
| 578 | } | 581 | } |
| 579 | 582 | ||
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 7b1912a66..0a2af54e5 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -71,12 +71,11 @@ public: | |||
| 71 | static constexpr std::size_t MaxConstBuffers = 18; | 71 | static constexpr std::size_t MaxConstBuffers = 18; |
| 72 | static constexpr std::size_t MaxConstBufferSize = 0x10000; | 72 | static constexpr std::size_t MaxConstBufferSize = 0x10000; |
| 73 | 73 | ||
| 74 | enum class QueryMode : u32 { | 74 | enum class QueryOperation : u32 { |
| 75 | Write = 0, | 75 | Release = 0, |
| 76 | Sync = 1, | 76 | Acquire = 1, |
| 77 | // TODO(Subv): It is currently unknown what the difference between method 2 and method 0 | 77 | Counter = 2, |
| 78 | // is. | 78 | Trap = 3, |
| 79 | Write2 = 2, | ||
| 80 | }; | 79 | }; |
| 81 | 80 | ||
| 82 | enum class QueryUnit : u32 { | 81 | enum class QueryUnit : u32 { |
| @@ -1081,7 +1080,7 @@ public: | |||
| 1081 | u32 query_sequence; | 1080 | u32 query_sequence; |
| 1082 | union { | 1081 | union { |
| 1083 | u32 raw; | 1082 | u32 raw; |
| 1084 | BitField<0, 2, QueryMode> mode; | 1083 | BitField<0, 2, QueryOperation> operation; |
| 1085 | BitField<4, 1, u32> fence; | 1084 | BitField<4, 1, u32> fence; |
| 1086 | BitField<12, 4, QueryUnit> unit; | 1085 | BitField<12, 4, QueryUnit> unit; |
| 1087 | BitField<16, 1, QuerySyncCondition> sync_cond; | 1086 | BitField<16, 1, QuerySyncCondition> sync_cond; |
| @@ -1413,6 +1412,9 @@ private: | |||
| 1413 | /// Handles a write to the QUERY_GET register. | 1412 | /// Handles a write to the QUERY_GET register. |
| 1414 | void ProcessQueryGet(); | 1413 | void ProcessQueryGet(); |
| 1415 | 1414 | ||
| 1415 | // Writes the query result accordingly | ||
| 1416 | void StampQueryResult(u64 payload, bool long_query); | ||
| 1417 | |||
| 1416 | // Handles Conditional Rendering | 1418 | // Handles Conditional Rendering |
| 1417 | void ProcessQueryCondition(); | 1419 | void ProcessQueryCondition(); |
| 1418 | 1420 | ||
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 062ca83b8..4419ab735 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include "common/microprofile.h" | 6 | #include "common/microprofile.h" |
| 7 | #include "core/core.h" | 7 | #include "core/core.h" |
| 8 | #include "core/core_timing.h" | 8 | #include "core/core_timing.h" |
| 9 | #include "core/core_timing_util.h" | ||
| 9 | #include "core/memory.h" | 10 | #include "core/memory.h" |
| 10 | #include "video_core/engines/fermi_2d.h" | 11 | #include "video_core/engines/fermi_2d.h" |
| 11 | #include "video_core/engines/kepler_compute.h" | 12 | #include "video_core/engines/kepler_compute.h" |
| @@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { | |||
| 122 | return true; | 123 | return true; |
| 123 | } | 124 | } |
| 124 | 125 | ||
| 126 | u64 GPU::GetTicks() const { | ||
| 127 | // This values were reversed engineered by fincs from NVN | ||
| 128 | // The gpu clock is reported in units of 385/625 nanoseconds | ||
| 129 | constexpr u64 gpu_ticks_num = 384; | ||
| 130 | constexpr u64 gpu_ticks_den = 625; | ||
| 131 | |||
| 132 | const u64 cpu_ticks = system.CoreTiming().GetTicks(); | ||
| 133 | const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); | ||
| 134 | const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; | ||
| 135 | const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; | ||
| 136 | return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; | ||
| 137 | } | ||
| 138 | |||
| 125 | void GPU::FlushCommands() { | 139 | void GPU::FlushCommands() { |
| 126 | renderer.Rasterizer().FlushCommands(); | 140 | renderer.Rasterizer().FlushCommands(); |
| 127 | } | 141 | } |
| @@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() { | |||
| 340 | block.sequence = regs.semaphore_sequence; | 354 | block.sequence = regs.semaphore_sequence; |
| 341 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of | 355 | // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of |
| 342 | // CoreTiming | 356 | // CoreTiming |
| 343 | block.timestamp = system.CoreTiming().GetTicks(); | 357 | block.timestamp = GetTicks(); |
| 344 | memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, | 358 | memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, |
| 345 | sizeof(block)); | 359 | sizeof(block)); |
| 346 | } else { | 360 | } else { |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b648317bb..07727210c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -192,6 +192,8 @@ public: | |||
| 192 | 192 | ||
| 193 | bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); | 193 | bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); |
| 194 | 194 | ||
| 195 | u64 GetTicks() const; | ||
| 196 | |||
| 195 | std::unique_lock<std::mutex> LockSync() { | 197 | std::unique_lock<std::mutex> LockSync() { |
| 196 | return std::unique_lock{sync_mutex}; | 198 | return std::unique_lock{sync_mutex}; |
| 197 | } | 199 | } |