diff options
122 files changed, 3817 insertions, 3445 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 987076956..1662ec63d 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -148,7 +148,7 @@ add_library(core STATIC | |||
| 148 | hle/kernel/client_session.h | 148 | hle/kernel/client_session.h |
| 149 | hle/kernel/code_set.cpp | 149 | hle/kernel/code_set.cpp |
| 150 | hle/kernel/code_set.h | 150 | hle/kernel/code_set.h |
| 151 | hle/kernel/errors.h | 151 | hle/kernel/svc_results.h |
| 152 | hle/kernel/global_scheduler_context.cpp | 152 | hle/kernel/global_scheduler_context.cpp |
| 153 | hle/kernel/global_scheduler_context.h | 153 | hle/kernel/global_scheduler_context.h |
| 154 | hle/kernel/handle_table.cpp | 154 | hle/kernel/handle_table.cpp |
| @@ -174,6 +174,7 @@ add_library(core STATIC | |||
| 174 | hle/kernel/k_scheduler.h | 174 | hle/kernel/k_scheduler.h |
| 175 | hle/kernel/k_scheduler_lock.h | 175 | hle/kernel/k_scheduler_lock.h |
| 176 | hle/kernel/k_scoped_lock.h | 176 | hle/kernel/k_scoped_lock.h |
| 177 | hle/kernel/k_scoped_resource_reservation.h | ||
| 177 | hle/kernel/k_scoped_scheduler_lock_and_sleep.h | 178 | hle/kernel/k_scoped_scheduler_lock_and_sleep.h |
| 178 | hle/kernel/k_synchronization_object.cpp | 179 | hle/kernel/k_synchronization_object.cpp |
| 179 | hle/kernel/k_synchronization_object.h | 180 | hle/kernel/k_synchronization_object.h |
| @@ -223,7 +224,6 @@ add_library(core STATIC | |||
| 223 | hle/kernel/svc.cpp | 224 | hle/kernel/svc.cpp |
| 224 | hle/kernel/svc.h | 225 | hle/kernel/svc.h |
| 225 | hle/kernel/svc_common.h | 226 | hle/kernel/svc_common.h |
| 226 | hle/kernel/svc_results.h | ||
| 227 | hle/kernel/svc_types.h | 227 | hle/kernel/svc_types.h |
| 228 | hle/kernel/svc_wrap.h | 228 | hle/kernel/svc_wrap.h |
| 229 | hle/kernel/time_manager.cpp | 229 | hle/kernel/time_manager.cpp |
diff --git a/src/core/hle/kernel/client_port.cpp b/src/core/hle/kernel/client_port.cpp index f8f005f15..0b6957e31 100644 --- a/src/core/hle/kernel/client_port.cpp +++ b/src/core/hle/kernel/client_port.cpp | |||
| @@ -4,11 +4,11 @@ | |||
| 4 | 4 | ||
| 5 | #include "core/hle/kernel/client_port.h" | 5 | #include "core/hle/kernel/client_port.h" |
| 6 | #include "core/hle/kernel/client_session.h" | 6 | #include "core/hle/kernel/client_session.h" |
| 7 | #include "core/hle/kernel/errors.h" | ||
| 8 | #include "core/hle/kernel/hle_ipc.h" | 7 | #include "core/hle/kernel/hle_ipc.h" |
| 9 | #include "core/hle/kernel/object.h" | 8 | #include "core/hle/kernel/object.h" |
| 10 | #include "core/hle/kernel/server_port.h" | 9 | #include "core/hle/kernel/server_port.h" |
| 11 | #include "core/hle/kernel/session.h" | 10 | #include "core/hle/kernel/session.h" |
| 11 | #include "core/hle/kernel/svc_results.h" | ||
| 12 | 12 | ||
| 13 | namespace Kernel { | 13 | namespace Kernel { |
| 14 | 14 | ||
| @@ -21,7 +21,7 @@ std::shared_ptr<ServerPort> ClientPort::GetServerPort() const { | |||
| 21 | 21 | ||
| 22 | ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() { | 22 | ResultVal<std::shared_ptr<ClientSession>> ClientPort::Connect() { |
| 23 | if (active_sessions >= max_sessions) { | 23 | if (active_sessions >= max_sessions) { |
| 24 | return ERR_MAX_CONNECTIONS_REACHED; | 24 | return ResultMaxConnectionsReached; |
| 25 | } | 25 | } |
| 26 | active_sessions++; | 26 | active_sessions++; |
| 27 | 27 | ||
diff --git a/src/core/hle/kernel/client_session.cpp b/src/core/hle/kernel/client_session.cpp index a2be1a8f6..e230f365a 100644 --- a/src/core/hle/kernel/client_session.cpp +++ b/src/core/hle/kernel/client_session.cpp | |||
| @@ -3,11 +3,11 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/hle/kernel/client_session.h" | 5 | #include "core/hle/kernel/client_session.h" |
| 6 | #include "core/hle/kernel/errors.h" | ||
| 7 | #include "core/hle/kernel/hle_ipc.h" | 6 | #include "core/hle/kernel/hle_ipc.h" |
| 8 | #include "core/hle/kernel/k_thread.h" | 7 | #include "core/hle/kernel/k_thread.h" |
| 9 | #include "core/hle/kernel/server_session.h" | 8 | #include "core/hle/kernel/server_session.h" |
| 10 | #include "core/hle/kernel/session.h" | 9 | #include "core/hle/kernel/session.h" |
| 10 | #include "core/hle/kernel/svc_results.h" | ||
| 11 | #include "core/hle/result.h" | 11 | #include "core/hle/result.h" |
| 12 | 12 | ||
| 13 | namespace Kernel { | 13 | namespace Kernel { |
| @@ -43,7 +43,7 @@ ResultCode ClientSession::SendSyncRequest(std::shared_ptr<KThread> thread, | |||
| 43 | Core::Timing::CoreTiming& core_timing) { | 43 | Core::Timing::CoreTiming& core_timing) { |
| 44 | // Keep ServerSession alive until we're done working with it. | 44 | // Keep ServerSession alive until we're done working with it. |
| 45 | if (!parent->Server()) { | 45 | if (!parent->Server()) { |
| 46 | return ERR_SESSION_CLOSED_BY_REMOTE; | 46 | return ResultSessionClosedByRemote; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | // Signal the server session that new data is available | 49 | // Signal the server session that new data is available |
diff --git a/src/core/hle/kernel/errors.h b/src/core/hle/kernel/errors.h deleted file mode 100644 index 7d32a39f0..000000000 --- a/src/core/hle/kernel/errors.h +++ /dev/null | |||
| @@ -1,43 +0,0 @@ | |||
| 1 | // Copyright 2018 yuzu emulator team | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "core/hle/result.h" | ||
| 8 | |||
| 9 | namespace Kernel { | ||
| 10 | |||
| 11 | // Confirmed Switch kernel error codes | ||
| 12 | |||
| 13 | constexpr ResultCode ERR_MAX_CONNECTIONS_REACHED{ErrorModule::Kernel, 7}; | ||
| 14 | constexpr ResultCode ERR_INVALID_CAPABILITY_DESCRIPTOR{ErrorModule::Kernel, 14}; | ||
| 15 | constexpr ResultCode ERR_THREAD_TERMINATING{ErrorModule::Kernel, 59}; | ||
| 16 | constexpr ResultCode ERR_TERMINATION_REQUESTED{ErrorModule::Kernel, 59}; | ||
| 17 | constexpr ResultCode ERR_INVALID_SIZE{ErrorModule::Kernel, 101}; | ||
| 18 | constexpr ResultCode ERR_INVALID_ADDRESS{ErrorModule::Kernel, 102}; | ||
| 19 | constexpr ResultCode ERR_OUT_OF_RESOURCES{ErrorModule::Kernel, 103}; | ||
| 20 | constexpr ResultCode ERR_OUT_OF_MEMORY{ErrorModule::Kernel, 104}; | ||
| 21 | constexpr ResultCode ERR_HANDLE_TABLE_FULL{ErrorModule::Kernel, 105}; | ||
| 22 | constexpr ResultCode ERR_INVALID_ADDRESS_STATE{ErrorModule::Kernel, 106}; | ||
| 23 | constexpr ResultCode ERR_INVALID_CURRENT_MEMORY{ErrorModule::Kernel, 106}; | ||
| 24 | constexpr ResultCode ERR_INVALID_MEMORY_PERMISSIONS{ErrorModule::Kernel, 108}; | ||
| 25 | constexpr ResultCode ERR_INVALID_MEMORY_RANGE{ErrorModule::Kernel, 110}; | ||
| 26 | constexpr ResultCode ERR_INVALID_PROCESSOR_ID{ErrorModule::Kernel, 113}; | ||
| 27 | constexpr ResultCode ERR_INVALID_THREAD_PRIORITY{ErrorModule::Kernel, 112}; | ||
| 28 | constexpr ResultCode ERR_INVALID_HANDLE{ErrorModule::Kernel, 114}; | ||
| 29 | constexpr ResultCode ERR_INVALID_POINTER{ErrorModule::Kernel, 115}; | ||
| 30 | constexpr ResultCode ERR_INVALID_COMBINATION{ErrorModule::Kernel, 116}; | ||
| 31 | constexpr ResultCode RESULT_TIMEOUT{ErrorModule::Kernel, 117}; | ||
| 32 | constexpr ResultCode ERR_SYNCHRONIZATION_CANCELED{ErrorModule::Kernel, 118}; | ||
| 33 | constexpr ResultCode ERR_CANCELLED{ErrorModule::Kernel, 118}; | ||
| 34 | constexpr ResultCode ERR_OUT_OF_RANGE{ErrorModule::Kernel, 119}; | ||
| 35 | constexpr ResultCode ERR_INVALID_ENUM_VALUE{ErrorModule::Kernel, 120}; | ||
| 36 | constexpr ResultCode ERR_NOT_FOUND{ErrorModule::Kernel, 121}; | ||
| 37 | constexpr ResultCode ERR_BUSY{ErrorModule::Kernel, 122}; | ||
| 38 | constexpr ResultCode ERR_SESSION_CLOSED_BY_REMOTE{ErrorModule::Kernel, 123}; | ||
| 39 | constexpr ResultCode ERR_INVALID_STATE{ErrorModule::Kernel, 125}; | ||
| 40 | constexpr ResultCode ERR_RESERVED_VALUE{ErrorModule::Kernel, 126}; | ||
| 41 | constexpr ResultCode ERR_RESOURCE_LIMIT_EXCEEDED{ErrorModule::Kernel, 132}; | ||
| 42 | |||
| 43 | } // namespace Kernel | ||
diff --git a/src/core/hle/kernel/handle_table.cpp b/src/core/hle/kernel/handle_table.cpp index 1a2fa9cd8..f96d34078 100644 --- a/src/core/hle/kernel/handle_table.cpp +++ b/src/core/hle/kernel/handle_table.cpp | |||
| @@ -6,12 +6,12 @@ | |||
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/logging/log.h" | 7 | #include "common/logging/log.h" |
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "core/hle/kernel/errors.h" | ||
| 10 | #include "core/hle/kernel/handle_table.h" | 9 | #include "core/hle/kernel/handle_table.h" |
| 11 | #include "core/hle/kernel/k_scheduler.h" | 10 | #include "core/hle/kernel/k_scheduler.h" |
| 12 | #include "core/hle/kernel/k_thread.h" | 11 | #include "core/hle/kernel/k_thread.h" |
| 13 | #include "core/hle/kernel/kernel.h" | 12 | #include "core/hle/kernel/kernel.h" |
| 14 | #include "core/hle/kernel/process.h" | 13 | #include "core/hle/kernel/process.h" |
| 14 | #include "core/hle/kernel/svc_results.h" | ||
| 15 | 15 | ||
| 16 | namespace Kernel { | 16 | namespace Kernel { |
| 17 | namespace { | 17 | namespace { |
| @@ -33,7 +33,7 @@ HandleTable::~HandleTable() = default; | |||
| 33 | ResultCode HandleTable::SetSize(s32 handle_table_size) { | 33 | ResultCode HandleTable::SetSize(s32 handle_table_size) { |
| 34 | if (static_cast<u32>(handle_table_size) > MAX_COUNT) { | 34 | if (static_cast<u32>(handle_table_size) > MAX_COUNT) { |
| 35 | LOG_ERROR(Kernel, "Handle table size {} is greater than {}", handle_table_size, MAX_COUNT); | 35 | LOG_ERROR(Kernel, "Handle table size {} is greater than {}", handle_table_size, MAX_COUNT); |
| 36 | return ERR_OUT_OF_MEMORY; | 36 | return ResultOutOfMemory; |
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | // Values less than or equal to zero indicate to use the maximum allowable | 39 | // Values less than or equal to zero indicate to use the maximum allowable |
| @@ -53,7 +53,7 @@ ResultVal<Handle> HandleTable::Create(std::shared_ptr<Object> obj) { | |||
| 53 | const u16 slot = next_free_slot; | 53 | const u16 slot = next_free_slot; |
| 54 | if (slot >= table_size) { | 54 | if (slot >= table_size) { |
| 55 | LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); | 55 | LOG_ERROR(Kernel, "Unable to allocate Handle, too many slots in use."); |
| 56 | return ERR_HANDLE_TABLE_FULL; | 56 | return ResultHandleTableFull; |
| 57 | } | 57 | } |
| 58 | next_free_slot = generations[slot]; | 58 | next_free_slot = generations[slot]; |
| 59 | 59 | ||
| @@ -76,7 +76,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) { | |||
| 76 | std::shared_ptr<Object> object = GetGeneric(handle); | 76 | std::shared_ptr<Object> object = GetGeneric(handle); |
| 77 | if (object == nullptr) { | 77 | if (object == nullptr) { |
| 78 | LOG_ERROR(Kernel, "Tried to duplicate invalid handle: {:08X}", handle); | 78 | LOG_ERROR(Kernel, "Tried to duplicate invalid handle: {:08X}", handle); |
| 79 | return ERR_INVALID_HANDLE; | 79 | return ResultInvalidHandle; |
| 80 | } | 80 | } |
| 81 | return Create(std::move(object)); | 81 | return Create(std::move(object)); |
| 82 | } | 82 | } |
| @@ -84,7 +84,7 @@ ResultVal<Handle> HandleTable::Duplicate(Handle handle) { | |||
| 84 | ResultCode HandleTable::Close(Handle handle) { | 84 | ResultCode HandleTable::Close(Handle handle) { |
| 85 | if (!IsValid(handle)) { | 85 | if (!IsValid(handle)) { |
| 86 | LOG_ERROR(Kernel, "Handle is not valid! handle={:08X}", handle); | 86 | LOG_ERROR(Kernel, "Handle is not valid! handle={:08X}", handle); |
| 87 | return ERR_INVALID_HANDLE; | 87 | return ResultInvalidHandle; |
| 88 | } | 88 | } |
| 89 | 89 | ||
| 90 | const u16 slot = GetSlot(handle); | 90 | const u16 slot = GetSlot(handle); |
diff --git a/src/core/hle/kernel/hle_ipc.cpp b/src/core/hle/kernel/hle_ipc.cpp index 7ec62cf18..161d9f782 100644 --- a/src/core/hle/kernel/hle_ipc.cpp +++ b/src/core/hle/kernel/hle_ipc.cpp | |||
| @@ -14,7 +14,6 @@ | |||
| 14 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 15 | #include "common/logging/log.h" | 15 | #include "common/logging/log.h" |
| 16 | #include "core/hle/ipc_helpers.h" | 16 | #include "core/hle/ipc_helpers.h" |
| 17 | #include "core/hle/kernel/errors.h" | ||
| 18 | #include "core/hle/kernel/handle_table.h" | 17 | #include "core/hle/kernel/handle_table.h" |
| 19 | #include "core/hle/kernel/hle_ipc.h" | 18 | #include "core/hle/kernel/hle_ipc.h" |
| 20 | #include "core/hle/kernel/k_readable_event.h" | 19 | #include "core/hle/kernel/k_readable_event.h" |
| @@ -26,6 +25,7 @@ | |||
| 26 | #include "core/hle/kernel/object.h" | 25 | #include "core/hle/kernel/object.h" |
| 27 | #include "core/hle/kernel/process.h" | 26 | #include "core/hle/kernel/process.h" |
| 28 | #include "core/hle/kernel/server_session.h" | 27 | #include "core/hle/kernel/server_session.h" |
| 28 | #include "core/hle/kernel/svc_results.h" | ||
| 29 | #include "core/hle/kernel/time_manager.h" | 29 | #include "core/hle/kernel/time_manager.h" |
| 30 | #include "core/memory.h" | 30 | #include "core/memory.h" |
| 31 | 31 | ||
diff --git a/src/core/hle/kernel/k_address_arbiter.cpp b/src/core/hle/kernel/k_address_arbiter.cpp index d0e90fd60..7018f56da 100644 --- a/src/core/hle/kernel/k_address_arbiter.cpp +++ b/src/core/hle/kernel/k_address_arbiter.cpp | |||
| @@ -120,10 +120,10 @@ ResultCode KAddressArbiter::SignalAndIncrementIfEqual(VAddr addr, s32 value, s32 | |||
| 120 | s32 user_value{}; | 120 | s32 user_value{}; |
| 121 | if (!UpdateIfEqual(system, &user_value, addr, value, value + 1)) { | 121 | if (!UpdateIfEqual(system, &user_value, addr, value, value + 1)) { |
| 122 | LOG_ERROR(Kernel, "Invalid current memory!"); | 122 | LOG_ERROR(Kernel, "Invalid current memory!"); |
| 123 | return Svc::ResultInvalidCurrentMemory; | 123 | return ResultInvalidCurrentMemory; |
| 124 | } | 124 | } |
| 125 | if (user_value != value) { | 125 | if (user_value != value) { |
| 126 | return Svc::ResultInvalidState; | 126 | return ResultInvalidState; |
| 127 | } | 127 | } |
| 128 | 128 | ||
| 129 | auto it = thread_tree.nfind_light({addr, -1}); | 129 | auto it = thread_tree.nfind_light({addr, -1}); |
| @@ -189,10 +189,10 @@ ResultCode KAddressArbiter::SignalAndModifyByWaitingCountIfEqual(VAddr addr, s32 | |||
| 189 | 189 | ||
| 190 | if (!succeeded) { | 190 | if (!succeeded) { |
| 191 | LOG_ERROR(Kernel, "Invalid current memory!"); | 191 | LOG_ERROR(Kernel, "Invalid current memory!"); |
| 192 | return Svc::ResultInvalidCurrentMemory; | 192 | return ResultInvalidCurrentMemory; |
| 193 | } | 193 | } |
| 194 | if (user_value != value) { | 194 | if (user_value != value) { |
| 195 | return Svc::ResultInvalidState; | 195 | return ResultInvalidState; |
| 196 | } | 196 | } |
| 197 | 197 | ||
| 198 | while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && | 198 | while ((it != thread_tree.end()) && (count <= 0 || num_waiters < count) && |
| @@ -221,11 +221,11 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement | |||
| 221 | // Check that the thread isn't terminating. | 221 | // Check that the thread isn't terminating. |
| 222 | if (cur_thread->IsTerminationRequested()) { | 222 | if (cur_thread->IsTerminationRequested()) { |
| 223 | slp.CancelSleep(); | 223 | slp.CancelSleep(); |
| 224 | return Svc::ResultTerminationRequested; | 224 | return ResultTerminationRequested; |
| 225 | } | 225 | } |
| 226 | 226 | ||
| 227 | // Set the synced object. | 227 | // Set the synced object. |
| 228 | cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); | 228 | cur_thread->SetSyncedObject(nullptr, ResultTimedOut); |
| 229 | 229 | ||
| 230 | // Read the value from userspace. | 230 | // Read the value from userspace. |
| 231 | s32 user_value{}; | 231 | s32 user_value{}; |
| @@ -238,19 +238,19 @@ ResultCode KAddressArbiter::WaitIfLessThan(VAddr addr, s32 value, bool decrement | |||
| 238 | 238 | ||
| 239 | if (!succeeded) { | 239 | if (!succeeded) { |
| 240 | slp.CancelSleep(); | 240 | slp.CancelSleep(); |
| 241 | return Svc::ResultInvalidCurrentMemory; | 241 | return ResultInvalidCurrentMemory; |
| 242 | } | 242 | } |
| 243 | 243 | ||
| 244 | // Check that the value is less than the specified one. | 244 | // Check that the value is less than the specified one. |
| 245 | if (user_value >= value) { | 245 | if (user_value >= value) { |
| 246 | slp.CancelSleep(); | 246 | slp.CancelSleep(); |
| 247 | return Svc::ResultInvalidState; | 247 | return ResultInvalidState; |
| 248 | } | 248 | } |
| 249 | 249 | ||
| 250 | // Check that the timeout is non-zero. | 250 | // Check that the timeout is non-zero. |
| 251 | if (timeout == 0) { | 251 | if (timeout == 0) { |
| 252 | slp.CancelSleep(); | 252 | slp.CancelSleep(); |
| 253 | return Svc::ResultTimedOut; | 253 | return ResultTimedOut; |
| 254 | } | 254 | } |
| 255 | 255 | ||
| 256 | // Set the arbiter. | 256 | // Set the arbiter. |
| @@ -288,29 +288,29 @@ ResultCode KAddressArbiter::WaitIfEqual(VAddr addr, s32 value, s64 timeout) { | |||
| 288 | // Check that the thread isn't terminating. | 288 | // Check that the thread isn't terminating. |
| 289 | if (cur_thread->IsTerminationRequested()) { | 289 | if (cur_thread->IsTerminationRequested()) { |
| 290 | slp.CancelSleep(); | 290 | slp.CancelSleep(); |
| 291 | return Svc::ResultTerminationRequested; | 291 | return ResultTerminationRequested; |
| 292 | } | 292 | } |
| 293 | 293 | ||
| 294 | // Set the synced object. | 294 | // Set the synced object. |
| 295 | cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); | 295 | cur_thread->SetSyncedObject(nullptr, ResultTimedOut); |
| 296 | 296 | ||
| 297 | // Read the value from userspace. | 297 | // Read the value from userspace. |
| 298 | s32 user_value{}; | 298 | s32 user_value{}; |
| 299 | if (!ReadFromUser(system, &user_value, addr)) { | 299 | if (!ReadFromUser(system, &user_value, addr)) { |
| 300 | slp.CancelSleep(); | 300 | slp.CancelSleep(); |
| 301 | return Svc::ResultInvalidCurrentMemory; | 301 | return ResultInvalidCurrentMemory; |
| 302 | } | 302 | } |
| 303 | 303 | ||
| 304 | // Check that the value is equal. | 304 | // Check that the value is equal. |
| 305 | if (value != user_value) { | 305 | if (value != user_value) { |
| 306 | slp.CancelSleep(); | 306 | slp.CancelSleep(); |
| 307 | return Svc::ResultInvalidState; | 307 | return ResultInvalidState; |
| 308 | } | 308 | } |
| 309 | 309 | ||
| 310 | // Check that the timeout is non-zero. | 310 | // Check that the timeout is non-zero. |
| 311 | if (timeout == 0) { | 311 | if (timeout == 0) { |
| 312 | slp.CancelSleep(); | 312 | slp.CancelSleep(); |
| 313 | return Svc::ResultTimedOut; | 313 | return ResultTimedOut; |
| 314 | } | 314 | } |
| 315 | 315 | ||
| 316 | // Set the arbiter. | 316 | // Set the arbiter. |
diff --git a/src/core/hle/kernel/k_condition_variable.cpp b/src/core/hle/kernel/k_condition_variable.cpp index f0ad8b390..170d8fa0d 100644 --- a/src/core/hle/kernel/k_condition_variable.cpp +++ b/src/core/hle/kernel/k_condition_variable.cpp | |||
| @@ -92,10 +92,10 @@ ResultCode KConditionVariable::SignalToAddress(VAddr addr) { | |||
| 92 | // Write the value to userspace. | 92 | // Write the value to userspace. |
| 93 | if (!WriteToUser(system, addr, std::addressof(next_value))) { | 93 | if (!WriteToUser(system, addr, std::addressof(next_value))) { |
| 94 | if (next_owner_thread) { | 94 | if (next_owner_thread) { |
| 95 | next_owner_thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory); | 95 | next_owner_thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory); |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | return Svc::ResultInvalidCurrentMemory; | 98 | return ResultInvalidCurrentMemory; |
| 99 | } | 99 | } |
| 100 | } | 100 | } |
| 101 | 101 | ||
| @@ -114,20 +114,20 @@ ResultCode KConditionVariable::WaitForAddress(Handle handle, VAddr addr, u32 val | |||
| 114 | cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS); | 114 | cur_thread->SetSyncedObject(nullptr, RESULT_SUCCESS); |
| 115 | 115 | ||
| 116 | // Check if the thread should terminate. | 116 | // Check if the thread should terminate. |
| 117 | R_UNLESS(!cur_thread->IsTerminationRequested(), Svc::ResultTerminationRequested); | 117 | R_UNLESS(!cur_thread->IsTerminationRequested(), ResultTerminationRequested); |
| 118 | 118 | ||
| 119 | { | 119 | { |
| 120 | // Read the tag from userspace. | 120 | // Read the tag from userspace. |
| 121 | u32 test_tag{}; | 121 | u32 test_tag{}; |
| 122 | R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr), | 122 | R_UNLESS(ReadFromUser(system, std::addressof(test_tag), addr), |
| 123 | Svc::ResultInvalidCurrentMemory); | 123 | ResultInvalidCurrentMemory); |
| 124 | 124 | ||
| 125 | // If the tag isn't the handle (with wait mask), we're done. | 125 | // If the tag isn't the handle (with wait mask), we're done. |
| 126 | R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS); | 126 | R_UNLESS(test_tag == (handle | Svc::HandleWaitMask), RESULT_SUCCESS); |
| 127 | 127 | ||
| 128 | // Get the lock owner thread. | 128 | // Get the lock owner thread. |
| 129 | owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<KThread>(handle); | 129 | owner_thread = kernel.CurrentProcess()->GetHandleTable().Get<KThread>(handle); |
| 130 | R_UNLESS(owner_thread, Svc::ResultInvalidHandle); | 130 | R_UNLESS(owner_thread, ResultInvalidHandle); |
| 131 | 131 | ||
| 132 | // Update the lock. | 132 | // Update the lock. |
| 133 | cur_thread->SetAddressKey(addr, value); | 133 | cur_thread->SetAddressKey(addr, value); |
| @@ -191,13 +191,13 @@ KThread* KConditionVariable::SignalImpl(KThread* thread) { | |||
| 191 | thread_to_close = owner_thread.get(); | 191 | thread_to_close = owner_thread.get(); |
| 192 | } else { | 192 | } else { |
| 193 | // The lock was tagged with a thread that doesn't exist. | 193 | // The lock was tagged with a thread that doesn't exist. |
| 194 | thread->SetSyncedObject(nullptr, Svc::ResultInvalidState); | 194 | thread->SetSyncedObject(nullptr, ResultInvalidState); |
| 195 | thread->Wakeup(); | 195 | thread->Wakeup(); |
| 196 | } | 196 | } |
| 197 | } | 197 | } |
| 198 | } else { | 198 | } else { |
| 199 | // If the address wasn't accessible, note so. | 199 | // If the address wasn't accessible, note so. |
| 200 | thread->SetSyncedObject(nullptr, Svc::ResultInvalidCurrentMemory); | 200 | thread->SetSyncedObject(nullptr, ResultInvalidCurrentMemory); |
| 201 | thread->Wakeup(); | 201 | thread->Wakeup(); |
| 202 | } | 202 | } |
| 203 | 203 | ||
| @@ -263,12 +263,12 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout) | |||
| 263 | KScopedSchedulerLockAndSleep slp{kernel, cur_thread, timeout}; | 263 | KScopedSchedulerLockAndSleep slp{kernel, cur_thread, timeout}; |
| 264 | 264 | ||
| 265 | // Set the synced object. | 265 | // Set the synced object. |
| 266 | cur_thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); | 266 | cur_thread->SetSyncedObject(nullptr, ResultTimedOut); |
| 267 | 267 | ||
| 268 | // Check that the thread isn't terminating. | 268 | // Check that the thread isn't terminating. |
| 269 | if (cur_thread->IsTerminationRequested()) { | 269 | if (cur_thread->IsTerminationRequested()) { |
| 270 | slp.CancelSleep(); | 270 | slp.CancelSleep(); |
| 271 | return Svc::ResultTerminationRequested; | 271 | return ResultTerminationRequested; |
| 272 | } | 272 | } |
| 273 | 273 | ||
| 274 | // Update the value and process for the next owner. | 274 | // Update the value and process for the next owner. |
| @@ -302,7 +302,7 @@ ResultCode KConditionVariable::Wait(VAddr addr, u64 key, u32 value, s64 timeout) | |||
| 302 | // Write the value to userspace. | 302 | // Write the value to userspace. |
| 303 | if (!WriteToUser(system, addr, std::addressof(next_value))) { | 303 | if (!WriteToUser(system, addr, std::addressof(next_value))) { |
| 304 | slp.CancelSleep(); | 304 | slp.CancelSleep(); |
| 305 | return Svc::ResultInvalidCurrentMemory; | 305 | return ResultInvalidCurrentMemory; |
| 306 | } | 306 | } |
| 307 | } | 307 | } |
| 308 | 308 | ||
diff --git a/src/core/hle/kernel/k_readable_event.cpp b/src/core/hle/kernel/k_readable_event.cpp index d8a42dbaf..4b4d34857 100644 --- a/src/core/hle/kernel/k_readable_event.cpp +++ b/src/core/hle/kernel/k_readable_event.cpp | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/common_funcs.h" | 7 | #include "common/common_funcs.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "core/hle/kernel/errors.h" | ||
| 10 | #include "core/hle/kernel/k_readable_event.h" | 9 | #include "core/hle/kernel/k_readable_event.h" |
| 11 | #include "core/hle/kernel/k_scheduler.h" | 10 | #include "core/hle/kernel/k_scheduler.h" |
| 12 | #include "core/hle/kernel/k_thread.h" | 11 | #include "core/hle/kernel/k_thread.h" |
| @@ -47,7 +46,7 @@ ResultCode KReadableEvent::Reset() { | |||
| 47 | KScopedSchedulerLock lk{kernel}; | 46 | KScopedSchedulerLock lk{kernel}; |
| 48 | 47 | ||
| 49 | if (!is_signaled) { | 48 | if (!is_signaled) { |
| 50 | return Svc::ResultInvalidState; | 49 | return ResultInvalidState; |
| 51 | } | 50 | } |
| 52 | 51 | ||
| 53 | is_signaled = false; | 52 | is_signaled = false; |
diff --git a/src/core/hle/kernel/k_resource_limit.cpp b/src/core/hle/kernel/k_resource_limit.cpp index ab2ab683f..d7a4a38e6 100644 --- a/src/core/hle/kernel/k_resource_limit.cpp +++ b/src/core/hle/kernel/k_resource_limit.cpp | |||
| @@ -75,7 +75,7 @@ s64 KResourceLimit::GetFreeValue(LimitableResource which) const { | |||
| 75 | ResultCode KResourceLimit::SetLimitValue(LimitableResource which, s64 value) { | 75 | ResultCode KResourceLimit::SetLimitValue(LimitableResource which, s64 value) { |
| 76 | const auto index = static_cast<std::size_t>(which); | 76 | const auto index = static_cast<std::size_t>(which); |
| 77 | KScopedLightLock lk(lock); | 77 | KScopedLightLock lk(lock); |
| 78 | R_UNLESS(current_values[index] <= value, Svc::ResultInvalidState); | 78 | R_UNLESS(current_values[index] <= value, ResultInvalidState); |
| 79 | 79 | ||
| 80 | limit_values[index] = value; | 80 | limit_values[index] = value; |
| 81 | 81 | ||
diff --git a/src/core/hle/kernel/k_scoped_resource_reservation.h b/src/core/hle/kernel/k_scoped_resource_reservation.h new file mode 100644 index 000000000..c5deca00b --- /dev/null +++ b/src/core/hle/kernel/k_scoped_resource_reservation.h | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | // This file references various implementation details from Atmosphere, an open-source firmware for | ||
| 6 | // the Nintendo Switch. Copyright 2018-2020 Atmosphere-NX. | ||
| 7 | |||
| 8 | #pragma once | ||
| 9 | |||
| 10 | #include "common/common_types.h" | ||
| 11 | #include "core/hle/kernel/k_resource_limit.h" | ||
| 12 | #include "core/hle/kernel/process.h" | ||
| 13 | |||
| 14 | namespace Kernel { | ||
| 15 | |||
| 16 | class KScopedResourceReservation { | ||
| 17 | public: | ||
| 18 | explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r, | ||
| 19 | s64 v, s64 timeout) | ||
| 20 | : resource_limit(std::move(l)), value(v), resource(r) { | ||
| 21 | if (resource_limit && value) { | ||
| 22 | success = resource_limit->Reserve(resource, value, timeout); | ||
| 23 | } else { | ||
| 24 | success = true; | ||
| 25 | } | ||
| 26 | } | ||
| 27 | |||
| 28 | explicit KScopedResourceReservation(std::shared_ptr<KResourceLimit> l, LimitableResource r, | ||
| 29 | s64 v = 1) | ||
| 30 | : resource_limit(std::move(l)), value(v), resource(r) { | ||
| 31 | if (resource_limit && value) { | ||
| 32 | success = resource_limit->Reserve(resource, value); | ||
| 33 | } else { | ||
| 34 | success = true; | ||
| 35 | } | ||
| 36 | } | ||
| 37 | |||
| 38 | explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v, s64 t) | ||
| 39 | : KScopedResourceReservation(p->GetResourceLimit(), r, v, t) {} | ||
| 40 | |||
| 41 | explicit KScopedResourceReservation(const Process* p, LimitableResource r, s64 v = 1) | ||
| 42 | : KScopedResourceReservation(p->GetResourceLimit(), r, v) {} | ||
| 43 | |||
| 44 | ~KScopedResourceReservation() noexcept { | ||
| 45 | if (resource_limit && value && success) { | ||
| 46 | // resource was not committed, release the reservation. | ||
| 47 | resource_limit->Release(resource, value); | ||
| 48 | } | ||
| 49 | } | ||
| 50 | |||
| 51 | /// Commit the resource reservation, destruction of this object does not release the resource | ||
| 52 | void Commit() { | ||
| 53 | resource_limit = nullptr; | ||
| 54 | } | ||
| 55 | |||
| 56 | [[nodiscard]] bool Succeeded() const { | ||
| 57 | return success; | ||
| 58 | } | ||
| 59 | |||
| 60 | private: | ||
| 61 | std::shared_ptr<KResourceLimit> resource_limit; | ||
| 62 | s64 value; | ||
| 63 | LimitableResource resource; | ||
| 64 | bool success; | ||
| 65 | }; | ||
| 66 | |||
| 67 | } // namespace Kernel | ||
diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp index 140cc46a7..82f72a0fe 100644 --- a/src/core/hle/kernel/k_synchronization_object.cpp +++ b/src/core/hle/kernel/k_synchronization_object.cpp | |||
| @@ -40,20 +40,20 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index, | |||
| 40 | // Check if the timeout is zero. | 40 | // Check if the timeout is zero. |
| 41 | if (timeout == 0) { | 41 | if (timeout == 0) { |
| 42 | slp.CancelSleep(); | 42 | slp.CancelSleep(); |
| 43 | return Svc::ResultTimedOut; | 43 | return ResultTimedOut; |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | // Check if the thread should terminate. | 46 | // Check if the thread should terminate. |
| 47 | if (thread->IsTerminationRequested()) { | 47 | if (thread->IsTerminationRequested()) { |
| 48 | slp.CancelSleep(); | 48 | slp.CancelSleep(); |
| 49 | return Svc::ResultTerminationRequested; | 49 | return ResultTerminationRequested; |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | // Check if waiting was canceled. | 52 | // Check if waiting was canceled. |
| 53 | if (thread->IsWaitCancelled()) { | 53 | if (thread->IsWaitCancelled()) { |
| 54 | slp.CancelSleep(); | 54 | slp.CancelSleep(); |
| 55 | thread->ClearWaitCancelled(); | 55 | thread->ClearWaitCancelled(); |
| 56 | return Svc::ResultCancelled; | 56 | return ResultCancelled; |
| 57 | } | 57 | } |
| 58 | 58 | ||
| 59 | // Add the waiters. | 59 | // Add the waiters. |
| @@ -75,7 +75,7 @@ ResultCode KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index, | |||
| 75 | 75 | ||
| 76 | // Mark the thread as waiting. | 76 | // Mark the thread as waiting. |
| 77 | thread->SetCancellable(); | 77 | thread->SetCancellable(); |
| 78 | thread->SetSyncedObject(nullptr, Svc::ResultTimedOut); | 78 | thread->SetSyncedObject(nullptr, ResultTimedOut); |
| 79 | thread->SetState(ThreadState::Waiting); | 79 | thread->SetState(ThreadState::Waiting); |
| 80 | thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization); | 80 | thread->SetWaitReasonForDebugging(ThreadWaitReasonForDebugging::Synchronization); |
| 81 | } | 81 | } |
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp index b59259c4f..e5620da5a 100644 --- a/src/core/hle/kernel/k_thread.cpp +++ b/src/core/hle/kernel/k_thread.cpp | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include "core/core.h" | 18 | #include "core/core.h" |
| 19 | #include "core/cpu_manager.h" | 19 | #include "core/cpu_manager.h" |
| 20 | #include "core/hardware_properties.h" | 20 | #include "core/hardware_properties.h" |
| 21 | #include "core/hle/kernel/errors.h" | ||
| 22 | #include "core/hle/kernel/handle_table.h" | 21 | #include "core/hle/kernel/handle_table.h" |
| 23 | #include "core/hle/kernel/k_condition_variable.h" | 22 | #include "core/hle/kernel/k_condition_variable.h" |
| 24 | #include "core/hle/kernel/k_resource_limit.h" | 23 | #include "core/hle/kernel/k_resource_limit.h" |
| @@ -127,7 +126,7 @@ ResultCode KThread::Initialize(KThreadFunction func, uintptr_t arg, VAddr user_s | |||
| 127 | 126 | ||
| 128 | // Set core ID and wait result. | 127 | // Set core ID and wait result. |
| 129 | core_id = phys_core; | 128 | core_id = phys_core; |
| 130 | wait_result = Svc::ResultNoSynchronizationObject; | 129 | wait_result = ResultNoSynchronizationObject; |
| 131 | 130 | ||
| 132 | // Set priorities. | 131 | // Set priorities. |
| 133 | priority = prio; | 132 | priority = prio; |
| @@ -238,7 +237,7 @@ void KThread::Finalize() { | |||
| 238 | while (it != waiter_list.end()) { | 237 | while (it != waiter_list.end()) { |
| 239 | // The thread shouldn't be a kernel waiter. | 238 | // The thread shouldn't be a kernel waiter. |
| 240 | it->SetLockOwner(nullptr); | 239 | it->SetLockOwner(nullptr); |
| 241 | it->SetSyncedObject(nullptr, Svc::ResultInvalidState); | 240 | it->SetSyncedObject(nullptr, ResultInvalidState); |
| 242 | it->Wakeup(); | 241 | it->Wakeup(); |
| 243 | it = waiter_list.erase(it); | 242 | it = waiter_list.erase(it); |
| 244 | } | 243 | } |
| @@ -447,7 +446,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) { | |||
| 447 | // If the core id is no-update magic, preserve the ideal core id. | 446 | // If the core id is no-update magic, preserve the ideal core id. |
| 448 | if (core_id == Svc::IdealCoreNoUpdate) { | 447 | if (core_id == Svc::IdealCoreNoUpdate) { |
| 449 | core_id = virtual_ideal_core_id; | 448 | core_id = virtual_ideal_core_id; |
| 450 | R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, Svc::ResultInvalidCombination); | 449 | R_UNLESS(((1ULL << core_id) & v_affinity_mask) != 0, ResultInvalidCombination); |
| 451 | } | 450 | } |
| 452 | 451 | ||
| 453 | // Set the virtual core/affinity mask. | 452 | // Set the virtual core/affinity mask. |
| @@ -526,7 +525,7 @@ ResultCode KThread::SetCoreMask(s32 core_id, u64 v_affinity_mask) { | |||
| 526 | if (GetStackParameters().is_pinned) { | 525 | if (GetStackParameters().is_pinned) { |
| 527 | // Verify that the current thread isn't terminating. | 526 | // Verify that the current thread isn't terminating. |
| 528 | R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), | 527 | R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), |
| 529 | Svc::ResultTerminationRequested); | 528 | ResultTerminationRequested); |
| 530 | 529 | ||
| 531 | // Note that the thread was pinned. | 530 | // Note that the thread was pinned. |
| 532 | thread_is_pinned = true; | 531 | thread_is_pinned = true; |
| @@ -604,7 +603,7 @@ void KThread::WaitCancel() { | |||
| 604 | sleeping_queue->WakeupThread(this); | 603 | sleeping_queue->WakeupThread(this); |
| 605 | wait_cancelled = true; | 604 | wait_cancelled = true; |
| 606 | } else { | 605 | } else { |
| 607 | SetSyncedObject(nullptr, Svc::ResultCancelled); | 606 | SetSyncedObject(nullptr, ResultCancelled); |
| 608 | SetState(ThreadState::Runnable); | 607 | SetState(ThreadState::Runnable); |
| 609 | wait_cancelled = false; | 608 | wait_cancelled = false; |
| 610 | } | 609 | } |
| @@ -663,12 +662,12 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) { | |||
| 663 | // Verify our state. | 662 | // Verify our state. |
| 664 | const auto cur_state = GetState(); | 663 | const auto cur_state = GetState(); |
| 665 | R_UNLESS((cur_state == ThreadState::Waiting || cur_state == ThreadState::Runnable), | 664 | R_UNLESS((cur_state == ThreadState::Waiting || cur_state == ThreadState::Runnable), |
| 666 | Svc::ResultInvalidState); | 665 | ResultInvalidState); |
| 667 | 666 | ||
| 668 | // Either pause or resume. | 667 | // Either pause or resume. |
| 669 | if (activity == Svc::ThreadActivity::Paused) { | 668 | if (activity == Svc::ThreadActivity::Paused) { |
| 670 | // Verify that we're not suspended. | 669 | // Verify that we're not suspended. |
| 671 | R_UNLESS(!IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState); | 670 | R_UNLESS(!IsSuspendRequested(SuspendType::Thread), ResultInvalidState); |
| 672 | 671 | ||
| 673 | // Suspend. | 672 | // Suspend. |
| 674 | RequestSuspend(SuspendType::Thread); | 673 | RequestSuspend(SuspendType::Thread); |
| @@ -676,7 +675,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) { | |||
| 676 | ASSERT(activity == Svc::ThreadActivity::Runnable); | 675 | ASSERT(activity == Svc::ThreadActivity::Runnable); |
| 677 | 676 | ||
| 678 | // Verify that we're suspended. | 677 | // Verify that we're suspended. |
| 679 | R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState); | 678 | R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState); |
| 680 | 679 | ||
| 681 | // Resume. | 680 | // Resume. |
| 682 | Resume(SuspendType::Thread); | 681 | Resume(SuspendType::Thread); |
| @@ -698,7 +697,7 @@ ResultCode KThread::SetActivity(Svc::ThreadActivity activity) { | |||
| 698 | if (GetStackParameters().is_pinned) { | 697 | if (GetStackParameters().is_pinned) { |
| 699 | // Verify that the current thread isn't terminating. | 698 | // Verify that the current thread isn't terminating. |
| 700 | R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), | 699 | R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), |
| 701 | Svc::ResultTerminationRequested); | 700 | ResultTerminationRequested); |
| 702 | 701 | ||
| 703 | // Note that the thread was pinned and not current. | 702 | // Note that the thread was pinned and not current. |
| 704 | thread_is_pinned = true; | 703 | thread_is_pinned = true; |
| @@ -745,7 +744,7 @@ ResultCode KThread::GetThreadContext3(std::vector<u8>& out) { | |||
| 745 | KScopedSchedulerLock sl{kernel}; | 744 | KScopedSchedulerLock sl{kernel}; |
| 746 | 745 | ||
| 747 | // Verify that we're suspended. | 746 | // Verify that we're suspended. |
| 748 | R_UNLESS(IsSuspendRequested(SuspendType::Thread), Svc::ResultInvalidState); | 747 | R_UNLESS(IsSuspendRequested(SuspendType::Thread), ResultInvalidState); |
| 749 | 748 | ||
| 750 | // If we're not terminating, get the thread's user context. | 749 | // If we're not terminating, get the thread's user context. |
| 751 | if (!IsTerminationRequested()) { | 750 | if (!IsTerminationRequested()) { |
| @@ -905,12 +904,11 @@ ResultCode KThread::Run() { | |||
| 905 | KScopedSchedulerLock lk{kernel}; | 904 | KScopedSchedulerLock lk{kernel}; |
| 906 | 905 | ||
| 907 | // If either this thread or the current thread are requesting termination, note it. | 906 | // If either this thread or the current thread are requesting termination, note it. |
| 908 | R_UNLESS(!IsTerminationRequested(), Svc::ResultTerminationRequested); | 907 | R_UNLESS(!IsTerminationRequested(), ResultTerminationRequested); |
| 909 | R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), | 908 | R_UNLESS(!GetCurrentThread(kernel).IsTerminationRequested(), ResultTerminationRequested); |
| 910 | Svc::ResultTerminationRequested); | ||
| 911 | 909 | ||
| 912 | // Ensure our thread state is correct. | 910 | // Ensure our thread state is correct. |
| 913 | R_UNLESS(GetState() == ThreadState::Initialized, Svc::ResultInvalidState); | 911 | R_UNLESS(GetState() == ThreadState::Initialized, ResultInvalidState); |
| 914 | 912 | ||
| 915 | // If the current thread has been asked to suspend, suspend it and retry. | 913 | // If the current thread has been asked to suspend, suspend it and retry. |
| 916 | if (GetCurrentThread(kernel).IsSuspended()) { | 914 | if (GetCurrentThread(kernel).IsSuspended()) { |
| @@ -962,7 +960,7 @@ ResultCode KThread::Sleep(s64 timeout) { | |||
| 962 | // Check if the thread should terminate. | 960 | // Check if the thread should terminate. |
| 963 | if (IsTerminationRequested()) { | 961 | if (IsTerminationRequested()) { |
| 964 | slp.CancelSleep(); | 962 | slp.CancelSleep(); |
| 965 | return Svc::ResultTerminationRequested; | 963 | return ResultTerminationRequested; |
| 966 | } | 964 | } |
| 967 | 965 | ||
| 968 | // Mark the thread as waiting. | 966 | // Mark the thread as waiting. |
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index b20c2d13a..b6e6f115e 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp | |||
| @@ -26,7 +26,6 @@ | |||
| 26 | #include "core/device_memory.h" | 26 | #include "core/device_memory.h" |
| 27 | #include "core/hardware_properties.h" | 27 | #include "core/hardware_properties.h" |
| 28 | #include "core/hle/kernel/client_port.h" | 28 | #include "core/hle/kernel/client_port.h" |
| 29 | #include "core/hle/kernel/errors.h" | ||
| 30 | #include "core/hle/kernel/handle_table.h" | 29 | #include "core/hle/kernel/handle_table.h" |
| 31 | #include "core/hle/kernel/k_resource_limit.h" | 30 | #include "core/hle/kernel/k_resource_limit.h" |
| 32 | #include "core/hle/kernel/k_scheduler.h" | 31 | #include "core/hle/kernel/k_scheduler.h" |
| @@ -39,6 +38,7 @@ | |||
| 39 | #include "core/hle/kernel/process.h" | 38 | #include "core/hle/kernel/process.h" |
| 40 | #include "core/hle/kernel/service_thread.h" | 39 | #include "core/hle/kernel/service_thread.h" |
| 41 | #include "core/hle/kernel/shared_memory.h" | 40 | #include "core/hle/kernel/shared_memory.h" |
| 41 | #include "core/hle/kernel/svc_results.h" | ||
| 42 | #include "core/hle/kernel/time_manager.h" | 42 | #include "core/hle/kernel/time_manager.h" |
| 43 | #include "core/hle/lock.h" | 43 | #include "core/hle/lock.h" |
| 44 | #include "core/hle/result.h" | 44 | #include "core/hle/result.h" |
| @@ -141,11 +141,17 @@ struct KernelCore::Impl { | |||
| 141 | ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Events, 700).IsSuccess()); | 141 | ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Events, 700).IsSuccess()); |
| 142 | ASSERT(system_resource_limit->SetLimitValue(LimitableResource::TransferMemory, 200) | 142 | ASSERT(system_resource_limit->SetLimitValue(LimitableResource::TransferMemory, 200) |
| 143 | .IsSuccess()); | 143 | .IsSuccess()); |
| 144 | ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 900).IsSuccess()); | 144 | ASSERT(system_resource_limit->SetLimitValue(LimitableResource::Sessions, 933).IsSuccess()); |
| 145 | 145 | ||
| 146 | if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, 0x60000)) { | 146 | // Derived from recent software updates. The kernel reserves 27MB |
| 147 | constexpr u64 kernel_size{0x1b00000}; | ||
| 148 | if (!system_resource_limit->Reserve(LimitableResource::PhysicalMemory, kernel_size)) { | ||
| 147 | UNREACHABLE(); | 149 | UNREACHABLE(); |
| 148 | } | 150 | } |
| 151 | // Reserve secure applet memory, introduced in firmware 5.0.0 | ||
| 152 | constexpr u64 secure_applet_memory_size{0x400000}; | ||
| 153 | ASSERT(system_resource_limit->Reserve(LimitableResource::PhysicalMemory, | ||
| 154 | secure_applet_memory_size)); | ||
| 149 | } | 155 | } |
| 150 | 156 | ||
| 151 | void InitializePreemption(KernelCore& kernel) { | 157 | void InitializePreemption(KernelCore& kernel) { |
| @@ -302,8 +308,11 @@ struct KernelCore::Impl { | |||
| 302 | // Allocate slab heaps | 308 | // Allocate slab heaps |
| 303 | user_slab_heap_pages = std::make_unique<Memory::SlabHeap<Memory::Page>>(); | 309 | user_slab_heap_pages = std::make_unique<Memory::SlabHeap<Memory::Page>>(); |
| 304 | 310 | ||
| 311 | constexpr u64 user_slab_heap_size{0x1ef000}; | ||
| 312 | // Reserve slab heaps | ||
| 313 | ASSERT( | ||
| 314 | system_resource_limit->Reserve(LimitableResource::PhysicalMemory, user_slab_heap_size)); | ||
| 305 | // Initialize slab heaps | 315 | // Initialize slab heaps |
| 306 | constexpr u64 user_slab_heap_size{0x3de000}; | ||
| 307 | user_slab_heap_pages->Initialize( | 316 | user_slab_heap_pages->Initialize( |
| 308 | system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase), | 317 | system.DeviceMemory().GetPointer(Core::DramMemoryMap::SlabHeapBase), |
| 309 | user_slab_heap_size); | 318 | user_slab_heap_size); |
diff --git a/src/core/hle/kernel/memory/memory_manager.cpp b/src/core/hle/kernel/memory/memory_manager.cpp index acf13585c..77f135cdc 100644 --- a/src/core/hle/kernel/memory/memory_manager.cpp +++ b/src/core/hle/kernel/memory/memory_manager.cpp | |||
| @@ -8,9 +8,9 @@ | |||
| 8 | #include "common/assert.h" | 8 | #include "common/assert.h" |
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "common/scope_exit.h" | 10 | #include "common/scope_exit.h" |
| 11 | #include "core/hle/kernel/errors.h" | ||
| 12 | #include "core/hle/kernel/memory/memory_manager.h" | 11 | #include "core/hle/kernel/memory/memory_manager.h" |
| 13 | #include "core/hle/kernel/memory/page_linked_list.h" | 12 | #include "core/hle/kernel/memory/page_linked_list.h" |
| 13 | #include "core/hle/kernel/svc_results.h" | ||
| 14 | 14 | ||
| 15 | namespace Kernel::Memory { | 15 | namespace Kernel::Memory { |
| 16 | 16 | ||
| @@ -95,7 +95,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa | |||
| 95 | // Choose a heap based on our page size request | 95 | // Choose a heap based on our page size request |
| 96 | const s32 heap_index{PageHeap::GetBlockIndex(num_pages)}; | 96 | const s32 heap_index{PageHeap::GetBlockIndex(num_pages)}; |
| 97 | if (heap_index < 0) { | 97 | if (heap_index < 0) { |
| 98 | return ERR_OUT_OF_MEMORY; | 98 | return ResultOutOfMemory; |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | // TODO (bunnei): Support multiple managers | 101 | // TODO (bunnei): Support multiple managers |
| @@ -140,7 +140,7 @@ ResultCode MemoryManager::Allocate(PageLinkedList& page_list, std::size_t num_pa | |||
| 140 | 140 | ||
| 141 | // Only succeed if we allocated as many pages as we wanted | 141 | // Only succeed if we allocated as many pages as we wanted |
| 142 | if (num_pages) { | 142 | if (num_pages) { |
| 143 | return ERR_OUT_OF_MEMORY; | 143 | return ResultOutOfMemory; |
| 144 | } | 144 | } |
| 145 | 145 | ||
| 146 | // We succeeded! | 146 | // We succeeded! |
diff --git a/src/core/hle/kernel/memory/page_table.cpp b/src/core/hle/kernel/memory/page_table.cpp index 7de91c768..00ed9b881 100644 --- a/src/core/hle/kernel/memory/page_table.cpp +++ b/src/core/hle/kernel/memory/page_table.cpp | |||
| @@ -6,8 +6,7 @@ | |||
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "common/scope_exit.h" | 7 | #include "common/scope_exit.h" |
| 8 | #include "core/core.h" | 8 | #include "core/core.h" |
| 9 | #include "core/hle/kernel/errors.h" | 9 | #include "core/hle/kernel/k_scoped_resource_reservation.h" |
| 10 | #include "core/hle/kernel/k_resource_limit.h" | ||
| 11 | #include "core/hle/kernel/kernel.h" | 10 | #include "core/hle/kernel/kernel.h" |
| 12 | #include "core/hle/kernel/memory/address_space_info.h" | 11 | #include "core/hle/kernel/memory/address_space_info.h" |
| 13 | #include "core/hle/kernel/memory/memory_block.h" | 12 | #include "core/hle/kernel/memory/memory_block.h" |
| @@ -16,6 +15,7 @@ | |||
| 16 | #include "core/hle/kernel/memory/page_table.h" | 15 | #include "core/hle/kernel/memory/page_table.h" |
| 17 | #include "core/hle/kernel/memory/system_control.h" | 16 | #include "core/hle/kernel/memory/system_control.h" |
| 18 | #include "core/hle/kernel/process.h" | 17 | #include "core/hle/kernel/process.h" |
| 18 | #include "core/hle/kernel/svc_results.h" | ||
| 19 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 20 | 20 | ||
| 21 | namespace Kernel::Memory { | 21 | namespace Kernel::Memory { |
| @@ -141,7 +141,7 @@ ResultCode PageTable::InitializeForProcess(FileSys::ProgramAddressSpaceType as_t | |||
| 141 | (alias_region_size + heap_region_size + stack_region_size + kernel_map_region_size)}; | 141 | (alias_region_size + heap_region_size + stack_region_size + kernel_map_region_size)}; |
| 142 | if (alloc_size < needed_size) { | 142 | if (alloc_size < needed_size) { |
| 143 | UNREACHABLE(); | 143 | UNREACHABLE(); |
| 144 | return ERR_OUT_OF_MEMORY; | 144 | return ResultOutOfMemory; |
| 145 | } | 145 | } |
| 146 | 146 | ||
| 147 | const std::size_t remaining_size{alloc_size - needed_size}; | 147 | const std::size_t remaining_size{alloc_size - needed_size}; |
| @@ -277,11 +277,11 @@ ResultCode PageTable::MapProcessCode(VAddr addr, std::size_t num_pages, MemorySt | |||
| 277 | const u64 size{num_pages * PageSize}; | 277 | const u64 size{num_pages * PageSize}; |
| 278 | 278 | ||
| 279 | if (!CanContain(addr, size, state)) { | 279 | if (!CanContain(addr, size, state)) { |
| 280 | return ERR_INVALID_ADDRESS_STATE; | 280 | return ResultInvalidCurrentMemory; |
| 281 | } | 281 | } |
| 282 | 282 | ||
| 283 | if (IsRegionMapped(addr, size)) { | 283 | if (IsRegionMapped(addr, size)) { |
| 284 | return ERR_INVALID_ADDRESS_STATE; | 284 | return ResultInvalidCurrentMemory; |
| 285 | } | 285 | } |
| 286 | 286 | ||
| 287 | PageLinkedList page_linked_list; | 287 | PageLinkedList page_linked_list; |
| @@ -307,7 +307,7 @@ ResultCode PageTable::MapProcessCodeMemory(VAddr dst_addr, VAddr src_addr, std:: | |||
| 307 | MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); | 307 | MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); |
| 308 | 308 | ||
| 309 | if (IsRegionMapped(dst_addr, size)) { | 309 | if (IsRegionMapped(dst_addr, size)) { |
| 310 | return ERR_INVALID_ADDRESS_STATE; | 310 | return ResultInvalidCurrentMemory; |
| 311 | } | 311 | } |
| 312 | 312 | ||
| 313 | PageLinkedList page_linked_list; | 313 | PageLinkedList page_linked_list; |
| @@ -409,27 +409,25 @@ ResultCode PageTable::MapPhysicalMemory(VAddr addr, std::size_t size) { | |||
| 409 | return RESULT_SUCCESS; | 409 | return RESULT_SUCCESS; |
| 410 | } | 410 | } |
| 411 | 411 | ||
| 412 | auto process{system.Kernel().CurrentProcess()}; | ||
| 413 | const std::size_t remaining_size{size - mapped_size}; | 412 | const std::size_t remaining_size{size - mapped_size}; |
| 414 | const std::size_t remaining_pages{remaining_size / PageSize}; | 413 | const std::size_t remaining_pages{remaining_size / PageSize}; |
| 415 | 414 | ||
| 416 | if (process->GetResourceLimit() && | 415 | // Reserve the memory from the process resource limit. |
| 417 | !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, remaining_size)) { | 416 | KScopedResourceReservation memory_reservation( |
| 418 | return ERR_RESOURCE_LIMIT_EXCEEDED; | 417 | system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory, |
| 418 | remaining_size); | ||
| 419 | if (!memory_reservation.Succeeded()) { | ||
| 420 | LOG_ERROR(Kernel, "Could not reserve remaining {:X} bytes", remaining_size); | ||
| 421 | return ResultResourceLimitedExceeded; | ||
| 419 | } | 422 | } |
| 420 | 423 | ||
| 421 | PageLinkedList page_linked_list; | 424 | PageLinkedList page_linked_list; |
| 422 | { | ||
| 423 | auto block_guard = detail::ScopeExit([&] { | ||
| 424 | system.Kernel().MemoryManager().Free(page_linked_list, remaining_pages, memory_pool); | ||
| 425 | process->GetResourceLimit()->Release(LimitableResource::PhysicalMemory, remaining_size); | ||
| 426 | }); | ||
| 427 | 425 | ||
| 428 | CASCADE_CODE(system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages, | 426 | CASCADE_CODE( |
| 429 | memory_pool)); | 427 | system.Kernel().MemoryManager().Allocate(page_linked_list, remaining_pages, memory_pool)); |
| 430 | 428 | ||
| 431 | block_guard.Cancel(); | 429 | // We succeeded, so commit the memory reservation. |
| 432 | } | 430 | memory_reservation.Commit(); |
| 433 | 431 | ||
| 434 | MapPhysicalMemory(page_linked_list, addr, end_addr); | 432 | MapPhysicalMemory(page_linked_list, addr, end_addr); |
| 435 | 433 | ||
| @@ -454,12 +452,12 @@ ResultCode PageTable::UnmapPhysicalMemory(VAddr addr, std::size_t size) { | |||
| 454 | block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) { | 452 | block_manager->IterateForRange(addr, end_addr, [&](const MemoryInfo& info) { |
| 455 | if (info.state == MemoryState::Normal) { | 453 | if (info.state == MemoryState::Normal) { |
| 456 | if (info.attribute != MemoryAttribute::None) { | 454 | if (info.attribute != MemoryAttribute::None) { |
| 457 | result = ERR_INVALID_ADDRESS_STATE; | 455 | result = ResultInvalidCurrentMemory; |
| 458 | return; | 456 | return; |
| 459 | } | 457 | } |
| 460 | mapped_size += GetSizeInRange(info, addr, end_addr); | 458 | mapped_size += GetSizeInRange(info, addr, end_addr); |
| 461 | } else if (info.state != MemoryState::Free) { | 459 | } else if (info.state != MemoryState::Free) { |
| 462 | result = ERR_INVALID_ADDRESS_STATE; | 460 | result = ResultInvalidCurrentMemory; |
| 463 | } | 461 | } |
| 464 | }); | 462 | }); |
| 465 | 463 | ||
| @@ -526,7 +524,7 @@ ResultCode PageTable::Map(VAddr dst_addr, VAddr src_addr, std::size_t size) { | |||
| 526 | MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); | 524 | MemoryAttribute::Mask, MemoryAttribute::None, MemoryAttribute::IpcAndDeviceMapped)); |
| 527 | 525 | ||
| 528 | if (IsRegionMapped(dst_addr, size)) { | 526 | if (IsRegionMapped(dst_addr, size)) { |
| 529 | return ERR_INVALID_ADDRESS_STATE; | 527 | return ResultInvalidCurrentMemory; |
| 530 | } | 528 | } |
| 531 | 529 | ||
| 532 | PageLinkedList page_linked_list; | 530 | PageLinkedList page_linked_list; |
| @@ -577,7 +575,7 @@ ResultCode PageTable::Unmap(VAddr dst_addr, VAddr src_addr, std::size_t size) { | |||
| 577 | AddRegionToPages(dst_addr, num_pages, dst_pages); | 575 | AddRegionToPages(dst_addr, num_pages, dst_pages); |
| 578 | 576 | ||
| 579 | if (!dst_pages.IsEqual(src_pages)) { | 577 | if (!dst_pages.IsEqual(src_pages)) { |
| 580 | return ERR_INVALID_MEMORY_RANGE; | 578 | return ResultInvalidMemoryRange; |
| 581 | } | 579 | } |
| 582 | 580 | ||
| 583 | { | 581 | { |
| @@ -626,11 +624,11 @@ ResultCode PageTable::MapPages(VAddr addr, PageLinkedList& page_linked_list, Mem | |||
| 626 | const std::size_t size{num_pages * PageSize}; | 624 | const std::size_t size{num_pages * PageSize}; |
| 627 | 625 | ||
| 628 | if (!CanContain(addr, size, state)) { | 626 | if (!CanContain(addr, size, state)) { |
| 629 | return ERR_INVALID_ADDRESS_STATE; | 627 | return ResultInvalidCurrentMemory; |
| 630 | } | 628 | } |
| 631 | 629 | ||
| 632 | if (IsRegionMapped(addr, num_pages * PageSize)) { | 630 | if (IsRegionMapped(addr, num_pages * PageSize)) { |
| 633 | return ERR_INVALID_ADDRESS_STATE; | 631 | return ResultInvalidCurrentMemory; |
| 634 | } | 632 | } |
| 635 | 633 | ||
| 636 | CASCADE_CODE(MapPages(addr, page_linked_list, perm)); | 634 | CASCADE_CODE(MapPages(addr, page_linked_list, perm)); |
| @@ -768,7 +766,7 @@ ResultCode PageTable::SetHeapCapacity(std::size_t new_heap_capacity) { | |||
| 768 | ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) { | 766 | ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) { |
| 769 | 767 | ||
| 770 | if (size > heap_region_end - heap_region_start) { | 768 | if (size > heap_region_end - heap_region_start) { |
| 771 | return ERR_OUT_OF_MEMORY; | 769 | return ResultOutOfMemory; |
| 772 | } | 770 | } |
| 773 | 771 | ||
| 774 | const u64 previous_heap_size{GetHeapSize()}; | 772 | const u64 previous_heap_size{GetHeapSize()}; |
| @@ -781,10 +779,14 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) { | |||
| 781 | 779 | ||
| 782 | const u64 delta{size - previous_heap_size}; | 780 | const u64 delta{size - previous_heap_size}; |
| 783 | 781 | ||
| 784 | auto process{system.Kernel().CurrentProcess()}; | 782 | // Reserve memory for the heap extension. |
| 785 | if (process->GetResourceLimit() && delta != 0 && | 783 | KScopedResourceReservation memory_reservation( |
| 786 | !process->GetResourceLimit()->Reserve(LimitableResource::PhysicalMemory, delta)) { | 784 | system.Kernel().CurrentProcess()->GetResourceLimit(), LimitableResource::PhysicalMemory, |
| 787 | return ERR_RESOURCE_LIMIT_EXCEEDED; | 785 | delta); |
| 786 | |||
| 787 | if (!memory_reservation.Succeeded()) { | ||
| 788 | LOG_ERROR(Kernel, "Could not reserve heap extension of size {:X} bytes", delta); | ||
| 789 | return ResultResourceLimitedExceeded; | ||
| 788 | } | 790 | } |
| 789 | 791 | ||
| 790 | PageLinkedList page_linked_list; | 792 | PageLinkedList page_linked_list; |
| @@ -794,12 +796,15 @@ ResultVal<VAddr> PageTable::SetHeapSize(std::size_t size) { | |||
| 794 | system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool)); | 796 | system.Kernel().MemoryManager().Allocate(page_linked_list, num_pages, memory_pool)); |
| 795 | 797 | ||
| 796 | if (IsRegionMapped(current_heap_addr, delta)) { | 798 | if (IsRegionMapped(current_heap_addr, delta)) { |
| 797 | return ERR_INVALID_ADDRESS_STATE; | 799 | return ResultInvalidCurrentMemory; |
| 798 | } | 800 | } |
| 799 | 801 | ||
| 800 | CASCADE_CODE( | 802 | CASCADE_CODE( |
| 801 | Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup)); | 803 | Operate(current_heap_addr, num_pages, page_linked_list, OperationType::MapGroup)); |
| 802 | 804 | ||
| 805 | // Succeeded in allocation, commit the resource reservation | ||
| 806 | memory_reservation.Commit(); | ||
| 807 | |||
| 803 | block_manager->Update(current_heap_addr, num_pages, MemoryState::Normal, | 808 | block_manager->Update(current_heap_addr, num_pages, MemoryState::Normal, |
| 804 | MemoryPermission::ReadAndWrite); | 809 | MemoryPermission::ReadAndWrite); |
| 805 | 810 | ||
| @@ -816,17 +821,17 @@ ResultVal<VAddr> PageTable::AllocateAndMapMemory(std::size_t needed_num_pages, s | |||
| 816 | std::lock_guard lock{page_table_lock}; | 821 | std::lock_guard lock{page_table_lock}; |
| 817 | 822 | ||
| 818 | if (!CanContain(region_start, region_num_pages * PageSize, state)) { | 823 | if (!CanContain(region_start, region_num_pages * PageSize, state)) { |
| 819 | return ERR_INVALID_ADDRESS_STATE; | 824 | return ResultInvalidCurrentMemory; |
| 820 | } | 825 | } |
| 821 | 826 | ||
| 822 | if (region_num_pages <= needed_num_pages) { | 827 | if (region_num_pages <= needed_num_pages) { |
| 823 | return ERR_OUT_OF_MEMORY; | 828 | return ResultOutOfMemory; |
| 824 | } | 829 | } |
| 825 | 830 | ||
| 826 | const VAddr addr{ | 831 | const VAddr addr{ |
| 827 | AllocateVirtualMemory(region_start, region_num_pages, needed_num_pages, align)}; | 832 | AllocateVirtualMemory(region_start, region_num_pages, needed_num_pages, align)}; |
| 828 | if (!addr) { | 833 | if (!addr) { |
| 829 | return ERR_OUT_OF_MEMORY; | 834 | return ResultOutOfMemory; |
| 830 | } | 835 | } |
| 831 | 836 | ||
| 832 | if (is_map_only) { | 837 | if (is_map_only) { |
| @@ -1105,13 +1110,13 @@ constexpr ResultCode PageTable::CheckMemoryState(const MemoryInfo& info, MemoryS | |||
| 1105 | MemoryAttribute attr) const { | 1110 | MemoryAttribute attr) const { |
| 1106 | // Validate the states match expectation | 1111 | // Validate the states match expectation |
| 1107 | if ((info.state & state_mask) != state) { | 1112 | if ((info.state & state_mask) != state) { |
| 1108 | return ERR_INVALID_ADDRESS_STATE; | 1113 | return ResultInvalidCurrentMemory; |
| 1109 | } | 1114 | } |
| 1110 | if ((info.perm & perm_mask) != perm) { | 1115 | if ((info.perm & perm_mask) != perm) { |
| 1111 | return ERR_INVALID_ADDRESS_STATE; | 1116 | return ResultInvalidCurrentMemory; |
| 1112 | } | 1117 | } |
| 1113 | if ((info.attribute & attr_mask) != attr) { | 1118 | if ((info.attribute & attr_mask) != attr) { |
| 1114 | return ERR_INVALID_ADDRESS_STATE; | 1119 | return ResultInvalidCurrentMemory; |
| 1115 | } | 1120 | } |
| 1116 | 1121 | ||
| 1117 | return RESULT_SUCCESS; | 1122 | return RESULT_SUCCESS; |
| @@ -1138,14 +1143,14 @@ ResultCode PageTable::CheckMemoryState(MemoryState* out_state, MemoryPermission* | |||
| 1138 | while (true) { | 1143 | while (true) { |
| 1139 | // Validate the current block | 1144 | // Validate the current block |
| 1140 | if (!(info.state == first_state)) { | 1145 | if (!(info.state == first_state)) { |
| 1141 | return ERR_INVALID_ADDRESS_STATE; | 1146 | return ResultInvalidCurrentMemory; |
| 1142 | } | 1147 | } |
| 1143 | if (!(info.perm == first_perm)) { | 1148 | if (!(info.perm == first_perm)) { |
| 1144 | return ERR_INVALID_ADDRESS_STATE; | 1149 | return ResultInvalidCurrentMemory; |
| 1145 | } | 1150 | } |
| 1146 | if (!((info.attribute | static_cast<MemoryAttribute>(ignore_attr)) == | 1151 | if (!((info.attribute | static_cast<MemoryAttribute>(ignore_attr)) == |
| 1147 | (first_attr | static_cast<MemoryAttribute>(ignore_attr)))) { | 1152 | (first_attr | static_cast<MemoryAttribute>(ignore_attr)))) { |
| 1148 | return ERR_INVALID_ADDRESS_STATE; | 1153 | return ResultInvalidCurrentMemory; |
| 1149 | } | 1154 | } |
| 1150 | 1155 | ||
| 1151 | // Validate against the provided masks | 1156 | // Validate against the provided masks |
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 2286b292d..47b3ac57b 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp | |||
| @@ -14,9 +14,9 @@ | |||
| 14 | #include "core/device_memory.h" | 14 | #include "core/device_memory.h" |
| 15 | #include "core/file_sys/program_metadata.h" | 15 | #include "core/file_sys/program_metadata.h" |
| 16 | #include "core/hle/kernel/code_set.h" | 16 | #include "core/hle/kernel/code_set.h" |
| 17 | #include "core/hle/kernel/errors.h" | ||
| 18 | #include "core/hle/kernel/k_resource_limit.h" | 17 | #include "core/hle/kernel/k_resource_limit.h" |
| 19 | #include "core/hle/kernel/k_scheduler.h" | 18 | #include "core/hle/kernel/k_scheduler.h" |
| 19 | #include "core/hle/kernel/k_scoped_resource_reservation.h" | ||
| 20 | #include "core/hle/kernel/k_thread.h" | 20 | #include "core/hle/kernel/k_thread.h" |
| 21 | #include "core/hle/kernel/kernel.h" | 21 | #include "core/hle/kernel/kernel.h" |
| 22 | #include "core/hle/kernel/memory/memory_block_manager.h" | 22 | #include "core/hle/kernel/memory/memory_block_manager.h" |
| @@ -39,6 +39,7 @@ namespace { | |||
| 39 | */ | 39 | */ |
| 40 | void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) { | 40 | void SetupMainThread(Core::System& system, Process& owner_process, u32 priority, VAddr stack_top) { |
| 41 | const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart(); | 41 | const VAddr entry_point = owner_process.PageTable().GetCodeRegionStart(); |
| 42 | ASSERT(owner_process.GetResourceLimit()->Reserve(LimitableResource::Threads, 1)); | ||
| 42 | auto thread_res = KThread::Create(system, ThreadType::User, "main", entry_point, priority, 0, | 43 | auto thread_res = KThread::Create(system, ThreadType::User, "main", entry_point, priority, 0, |
| 43 | owner_process.GetIdealCoreId(), stack_top, &owner_process); | 44 | owner_process.GetIdealCoreId(), stack_top, &owner_process); |
| 44 | 45 | ||
| @@ -117,6 +118,9 @@ std::shared_ptr<Process> Process::Create(Core::System& system, std::string name, | |||
| 117 | 118 | ||
| 118 | std::shared_ptr<Process> process = std::make_shared<Process>(system); | 119 | std::shared_ptr<Process> process = std::make_shared<Process>(system); |
| 119 | process->name = std::move(name); | 120 | process->name = std::move(name); |
| 121 | |||
| 122 | // TODO: This is inaccurate | ||
| 123 | // The process should hold a reference to the kernel-wide resource limit. | ||
| 120 | process->resource_limit = std::make_shared<KResourceLimit>(kernel, system); | 124 | process->resource_limit = std::make_shared<KResourceLimit>(kernel, system); |
| 121 | process->status = ProcessStatus::Created; | 125 | process->status = ProcessStatus::Created; |
| 122 | process->program_id = 0; | 126 | process->program_id = 0; |
| @@ -155,6 +159,9 @@ void Process::DecrementThreadCount() { | |||
| 155 | } | 159 | } |
| 156 | 160 | ||
| 157 | u64 Process::GetTotalPhysicalMemoryAvailable() const { | 161 | u64 Process::GetTotalPhysicalMemoryAvailable() const { |
| 162 | // TODO: This is expected to always return the application memory pool size after accurately | ||
| 163 | // reserving kernel resources. The current workaround uses a process-local resource limit of | ||
| 164 | // application memory pool size, which is inaccurate. | ||
| 158 | const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) + | 165 | const u64 capacity{resource_limit->GetFreeValue(LimitableResource::PhysicalMemory) + |
| 159 | page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size + | 166 | page_table->GetTotalHeapSize() + GetSystemResourceSize() + image_size + |
| 160 | main_thread_stack_size}; | 167 | main_thread_stack_size}; |
| @@ -248,8 +255,8 @@ ResultCode Process::Reset() { | |||
| 248 | KScopedSchedulerLock sl{kernel}; | 255 | KScopedSchedulerLock sl{kernel}; |
| 249 | 256 | ||
| 250 | // Validate that we're in a state that we can reset. | 257 | // Validate that we're in a state that we can reset. |
| 251 | R_UNLESS(status != ProcessStatus::Exited, Svc::ResultInvalidState); | 258 | R_UNLESS(status != ProcessStatus::Exited, ResultInvalidState); |
| 252 | R_UNLESS(is_signaled, Svc::ResultInvalidState); | 259 | R_UNLESS(is_signaled, ResultInvalidState); |
| 253 | 260 | ||
| 254 | // Clear signaled. | 261 | // Clear signaled. |
| 255 | is_signaled = false; | 262 | is_signaled = false; |
| @@ -264,6 +271,17 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, | |||
| 264 | system_resource_size = metadata.GetSystemResourceSize(); | 271 | system_resource_size = metadata.GetSystemResourceSize(); |
| 265 | image_size = code_size; | 272 | image_size = code_size; |
| 266 | 273 | ||
| 274 | // Set initial resource limits | ||
| 275 | resource_limit->SetLimitValue( | ||
| 276 | LimitableResource::PhysicalMemory, | ||
| 277 | kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application)); | ||
| 278 | KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory, | ||
| 279 | code_size + system_resource_size); | ||
| 280 | if (!memory_reservation.Succeeded()) { | ||
| 281 | LOG_ERROR(Kernel, "Could not reserve process memory requirements of size {:X} bytes", | ||
| 282 | code_size + system_resource_size); | ||
| 283 | return ResultResourceLimitedExceeded; | ||
| 284 | } | ||
| 267 | // Initialize proces address space | 285 | // Initialize proces address space |
| 268 | if (const ResultCode result{ | 286 | if (const ResultCode result{ |
| 269 | page_table->InitializeForProcess(metadata.GetAddressSpaceType(), false, 0x8000000, | 287 | page_table->InitializeForProcess(metadata.GetAddressSpaceType(), false, 0x8000000, |
| @@ -305,24 +323,22 @@ ResultCode Process::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, | |||
| 305 | UNREACHABLE(); | 323 | UNREACHABLE(); |
| 306 | } | 324 | } |
| 307 | 325 | ||
| 308 | // Set initial resource limits | ||
| 309 | resource_limit->SetLimitValue( | ||
| 310 | LimitableResource::PhysicalMemory, | ||
| 311 | kernel.MemoryManager().GetSize(Memory::MemoryManager::Pool::Application)); | ||
| 312 | resource_limit->SetLimitValue(LimitableResource::Threads, 608); | 326 | resource_limit->SetLimitValue(LimitableResource::Threads, 608); |
| 313 | resource_limit->SetLimitValue(LimitableResource::Events, 700); | 327 | resource_limit->SetLimitValue(LimitableResource::Events, 700); |
| 314 | resource_limit->SetLimitValue(LimitableResource::TransferMemory, 128); | 328 | resource_limit->SetLimitValue(LimitableResource::TransferMemory, 128); |
| 315 | resource_limit->SetLimitValue(LimitableResource::Sessions, 894); | 329 | resource_limit->SetLimitValue(LimitableResource::Sessions, 894); |
| 316 | ASSERT(resource_limit->Reserve(LimitableResource::PhysicalMemory, code_size)); | ||
| 317 | 330 | ||
| 318 | // Create TLS region | 331 | // Create TLS region |
| 319 | tls_region_address = CreateTLSRegion(); | 332 | tls_region_address = CreateTLSRegion(); |
| 333 | memory_reservation.Commit(); | ||
| 320 | 334 | ||
| 321 | return handle_table.SetSize(capabilities.GetHandleTableSize()); | 335 | return handle_table.SetSize(capabilities.GetHandleTableSize()); |
| 322 | } | 336 | } |
| 323 | 337 | ||
| 324 | void Process::Run(s32 main_thread_priority, u64 stack_size) { | 338 | void Process::Run(s32 main_thread_priority, u64 stack_size) { |
| 325 | AllocateMainThreadStack(stack_size); | 339 | AllocateMainThreadStack(stack_size); |
| 340 | resource_limit->Reserve(LimitableResource::Threads, 1); | ||
| 341 | resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size); | ||
| 326 | 342 | ||
| 327 | const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size}; | 343 | const std::size_t heap_capacity{memory_usage_capacity - main_thread_stack_size - image_size}; |
| 328 | ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError()); | 344 | ASSERT(!page_table->SetHeapCapacity(heap_capacity).IsError()); |
| @@ -330,8 +346,6 @@ void Process::Run(s32 main_thread_priority, u64 stack_size) { | |||
| 330 | ChangeStatus(ProcessStatus::Running); | 346 | ChangeStatus(ProcessStatus::Running); |
| 331 | 347 | ||
| 332 | SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top); | 348 | SetupMainThread(system, *this, main_thread_priority, main_thread_stack_top); |
| 333 | resource_limit->Reserve(LimitableResource::Threads, 1); | ||
| 334 | resource_limit->Reserve(LimitableResource::PhysicalMemory, main_thread_stack_size); | ||
| 335 | } | 349 | } |
| 336 | 350 | ||
| 337 | void Process::PrepareForTermination() { | 351 | void Process::PrepareForTermination() { |
| @@ -358,6 +372,11 @@ void Process::PrepareForTermination() { | |||
| 358 | FreeTLSRegion(tls_region_address); | 372 | FreeTLSRegion(tls_region_address); |
| 359 | tls_region_address = 0; | 373 | tls_region_address = 0; |
| 360 | 374 | ||
| 375 | if (resource_limit) { | ||
| 376 | resource_limit->Release(LimitableResource::PhysicalMemory, | ||
| 377 | main_thread_stack_size + image_size); | ||
| 378 | } | ||
| 379 | |||
| 361 | ChangeStatus(ProcessStatus::Exited); | 380 | ChangeStatus(ProcessStatus::Exited); |
| 362 | } | 381 | } |
| 363 | 382 | ||
diff --git a/src/core/hle/kernel/process_capability.cpp b/src/core/hle/kernel/process_capability.cpp index 0566311b6..7c567049e 100644 --- a/src/core/hle/kernel/process_capability.cpp +++ b/src/core/hle/kernel/process_capability.cpp | |||
| @@ -6,10 +6,10 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/bit_util.h" | 7 | #include "common/bit_util.h" |
| 8 | #include "common/logging/log.h" | 8 | #include "common/logging/log.h" |
| 9 | #include "core/hle/kernel/errors.h" | ||
| 10 | #include "core/hle/kernel/handle_table.h" | 9 | #include "core/hle/kernel/handle_table.h" |
| 11 | #include "core/hle/kernel/memory/page_table.h" | 10 | #include "core/hle/kernel/memory/page_table.h" |
| 12 | #include "core/hle/kernel/process_capability.h" | 11 | #include "core/hle/kernel/process_capability.h" |
| 12 | #include "core/hle/kernel/svc_results.h" | ||
| 13 | 13 | ||
| 14 | namespace Kernel { | 14 | namespace Kernel { |
| 15 | namespace { | 15 | namespace { |
| @@ -123,13 +123,13 @@ ResultCode ProcessCapabilities::ParseCapabilities(const u32* capabilities, | |||
| 123 | // If there's only one, then there's a problem. | 123 | // If there's only one, then there's a problem. |
| 124 | if (i >= num_capabilities) { | 124 | if (i >= num_capabilities) { |
| 125 | LOG_ERROR(Kernel, "Invalid combination! i={}", i); | 125 | LOG_ERROR(Kernel, "Invalid combination! i={}", i); |
| 126 | return ERR_INVALID_COMBINATION; | 126 | return ResultInvalidCombination; |
| 127 | } | 127 | } |
| 128 | 128 | ||
| 129 | const auto size_flags = capabilities[i]; | 129 | const auto size_flags = capabilities[i]; |
| 130 | if (GetCapabilityType(size_flags) != CapabilityType::MapPhysical) { | 130 | if (GetCapabilityType(size_flags) != CapabilityType::MapPhysical) { |
| 131 | LOG_ERROR(Kernel, "Invalid capability type! size_flags={}", size_flags); | 131 | LOG_ERROR(Kernel, "Invalid capability type! size_flags={}", size_flags); |
| 132 | return ERR_INVALID_COMBINATION; | 132 | return ResultInvalidCombination; |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | const auto result = HandleMapPhysicalFlags(descriptor, size_flags, page_table); | 135 | const auto result = HandleMapPhysicalFlags(descriptor, size_flags, page_table); |
| @@ -159,7 +159,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s | |||
| 159 | const auto type = GetCapabilityType(flag); | 159 | const auto type = GetCapabilityType(flag); |
| 160 | 160 | ||
| 161 | if (type == CapabilityType::Unset) { | 161 | if (type == CapabilityType::Unset) { |
| 162 | return ERR_INVALID_CAPABILITY_DESCRIPTOR; | 162 | return ResultInvalidCapabilityDescriptor; |
| 163 | } | 163 | } |
| 164 | 164 | ||
| 165 | // Bail early on ignorable entries, as one would expect, | 165 | // Bail early on ignorable entries, as one would expect, |
| @@ -176,7 +176,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s | |||
| 176 | LOG_ERROR(Kernel, | 176 | LOG_ERROR(Kernel, |
| 177 | "Attempted to initialize flags that may only be initialized once. set_flags={}", | 177 | "Attempted to initialize flags that may only be initialized once. set_flags={}", |
| 178 | set_flags); | 178 | set_flags); |
| 179 | return ERR_INVALID_COMBINATION; | 179 | return ResultInvalidCombination; |
| 180 | } | 180 | } |
| 181 | set_flags |= set_flag; | 181 | set_flags |= set_flag; |
| 182 | 182 | ||
| @@ -202,7 +202,7 @@ ResultCode ProcessCapabilities::ParseSingleFlagCapability(u32& set_flags, u32& s | |||
| 202 | } | 202 | } |
| 203 | 203 | ||
| 204 | LOG_ERROR(Kernel, "Invalid capability type! type={}", type); | 204 | LOG_ERROR(Kernel, "Invalid capability type! type={}", type); |
| 205 | return ERR_INVALID_CAPABILITY_DESCRIPTOR; | 205 | return ResultInvalidCapabilityDescriptor; |
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | void ProcessCapabilities::Clear() { | 208 | void ProcessCapabilities::Clear() { |
| @@ -225,7 +225,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) { | |||
| 225 | if (priority_mask != 0 || core_mask != 0) { | 225 | if (priority_mask != 0 || core_mask != 0) { |
| 226 | LOG_ERROR(Kernel, "Core or priority mask are not zero! priority_mask={}, core_mask={}", | 226 | LOG_ERROR(Kernel, "Core or priority mask are not zero! priority_mask={}, core_mask={}", |
| 227 | priority_mask, core_mask); | 227 | priority_mask, core_mask); |
| 228 | return ERR_INVALID_CAPABILITY_DESCRIPTOR; | 228 | return ResultInvalidCapabilityDescriptor; |
| 229 | } | 229 | } |
| 230 | 230 | ||
| 231 | const u32 core_num_min = (flags >> 16) & 0xFF; | 231 | const u32 core_num_min = (flags >> 16) & 0xFF; |
| @@ -233,7 +233,7 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) { | |||
| 233 | if (core_num_min > core_num_max) { | 233 | if (core_num_min > core_num_max) { |
| 234 | LOG_ERROR(Kernel, "Core min is greater than core max! core_num_min={}, core_num_max={}", | 234 | LOG_ERROR(Kernel, "Core min is greater than core max! core_num_min={}, core_num_max={}", |
| 235 | core_num_min, core_num_max); | 235 | core_num_min, core_num_max); |
| 236 | return ERR_INVALID_COMBINATION; | 236 | return ResultInvalidCombination; |
| 237 | } | 237 | } |
| 238 | 238 | ||
| 239 | const u32 priority_min = (flags >> 10) & 0x3F; | 239 | const u32 priority_min = (flags >> 10) & 0x3F; |
| @@ -242,13 +242,13 @@ ResultCode ProcessCapabilities::HandlePriorityCoreNumFlags(u32 flags) { | |||
| 242 | LOG_ERROR(Kernel, | 242 | LOG_ERROR(Kernel, |
| 243 | "Priority min is greater than priority max! priority_min={}, priority_max={}", | 243 | "Priority min is greater than priority max! priority_min={}, priority_max={}", |
| 244 | core_num_min, priority_max); | 244 | core_num_min, priority_max); |
| 245 | return ERR_INVALID_COMBINATION; | 245 | return ResultInvalidCombination; |
| 246 | } | 246 | } |
| 247 | 247 | ||
| 248 | // The switch only has 4 usable cores. | 248 | // The switch only has 4 usable cores. |
| 249 | if (core_num_max >= 4) { | 249 | if (core_num_max >= 4) { |
| 250 | LOG_ERROR(Kernel, "Invalid max cores specified! core_num_max={}", core_num_max); | 250 | LOG_ERROR(Kernel, "Invalid max cores specified! core_num_max={}", core_num_max); |
| 251 | return ERR_INVALID_PROCESSOR_ID; | 251 | return ResultInvalidCoreId; |
| 252 | } | 252 | } |
| 253 | 253 | ||
| 254 | const auto make_mask = [](u64 min, u64 max) { | 254 | const auto make_mask = [](u64 min, u64 max) { |
| @@ -269,7 +269,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags) | |||
| 269 | 269 | ||
| 270 | // If we've already set this svc before, bail. | 270 | // If we've already set this svc before, bail. |
| 271 | if ((set_svc_bits & svc_bit) != 0) { | 271 | if ((set_svc_bits & svc_bit) != 0) { |
| 272 | return ERR_INVALID_COMBINATION; | 272 | return ResultInvalidCombination; |
| 273 | } | 273 | } |
| 274 | set_svc_bits |= svc_bit; | 274 | set_svc_bits |= svc_bit; |
| 275 | 275 | ||
| @@ -283,7 +283,7 @@ ResultCode ProcessCapabilities::HandleSyscallFlags(u32& set_svc_bits, u32 flags) | |||
| 283 | 283 | ||
| 284 | if (svc_number >= svc_capabilities.size()) { | 284 | if (svc_number >= svc_capabilities.size()) { |
| 285 | LOG_ERROR(Kernel, "Process svc capability is out of range! svc_number={}", svc_number); | 285 | LOG_ERROR(Kernel, "Process svc capability is out of range! svc_number={}", svc_number); |
| 286 | return ERR_OUT_OF_RANGE; | 286 | return ResultOutOfRange; |
| 287 | } | 287 | } |
| 288 | 288 | ||
| 289 | svc_capabilities[svc_number] = true; | 289 | svc_capabilities[svc_number] = true; |
| @@ -321,7 +321,7 @@ ResultCode ProcessCapabilities::HandleInterruptFlags(u32 flags) { | |||
| 321 | if (interrupt >= interrupt_capabilities.size()) { | 321 | if (interrupt >= interrupt_capabilities.size()) { |
| 322 | LOG_ERROR(Kernel, "Process interrupt capability is out of range! svc_number={}", | 322 | LOG_ERROR(Kernel, "Process interrupt capability is out of range! svc_number={}", |
| 323 | interrupt); | 323 | interrupt); |
| 324 | return ERR_OUT_OF_RANGE; | 324 | return ResultOutOfRange; |
| 325 | } | 325 | } |
| 326 | 326 | ||
| 327 | interrupt_capabilities[interrupt] = true; | 327 | interrupt_capabilities[interrupt] = true; |
| @@ -334,7 +334,7 @@ ResultCode ProcessCapabilities::HandleProgramTypeFlags(u32 flags) { | |||
| 334 | const u32 reserved = flags >> 17; | 334 | const u32 reserved = flags >> 17; |
| 335 | if (reserved != 0) { | 335 | if (reserved != 0) { |
| 336 | LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); | 336 | LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); |
| 337 | return ERR_RESERVED_VALUE; | 337 | return ResultReservedValue; |
| 338 | } | 338 | } |
| 339 | 339 | ||
| 340 | program_type = static_cast<ProgramType>((flags >> 14) & 0b111); | 340 | program_type = static_cast<ProgramType>((flags >> 14) & 0b111); |
| @@ -354,7 +354,7 @@ ResultCode ProcessCapabilities::HandleKernelVersionFlags(u32 flags) { | |||
| 354 | LOG_ERROR(Kernel, | 354 | LOG_ERROR(Kernel, |
| 355 | "Kernel version is non zero or flags are too small! major_version={}, flags={}", | 355 | "Kernel version is non zero or flags are too small! major_version={}, flags={}", |
| 356 | major_version, flags); | 356 | major_version, flags); |
| 357 | return ERR_INVALID_CAPABILITY_DESCRIPTOR; | 357 | return ResultInvalidCapabilityDescriptor; |
| 358 | } | 358 | } |
| 359 | 359 | ||
| 360 | kernel_version = flags; | 360 | kernel_version = flags; |
| @@ -365,7 +365,7 @@ ResultCode ProcessCapabilities::HandleHandleTableFlags(u32 flags) { | |||
| 365 | const u32 reserved = flags >> 26; | 365 | const u32 reserved = flags >> 26; |
| 366 | if (reserved != 0) { | 366 | if (reserved != 0) { |
| 367 | LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); | 367 | LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); |
| 368 | return ERR_RESERVED_VALUE; | 368 | return ResultReservedValue; |
| 369 | } | 369 | } |
| 370 | 370 | ||
| 371 | handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF); | 371 | handle_table_size = static_cast<s32>((flags >> 16) & 0x3FF); |
| @@ -376,7 +376,7 @@ ResultCode ProcessCapabilities::HandleDebugFlags(u32 flags) { | |||
| 376 | const u32 reserved = flags >> 19; | 376 | const u32 reserved = flags >> 19; |
| 377 | if (reserved != 0) { | 377 | if (reserved != 0) { |
| 378 | LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); | 378 | LOG_ERROR(Kernel, "Reserved value is non-zero! reserved={}", reserved); |
| 379 | return ERR_RESERVED_VALUE; | 379 | return ResultReservedValue; |
| 380 | } | 380 | } |
| 381 | 381 | ||
| 382 | is_debuggable = (flags & 0x20000) != 0; | 382 | is_debuggable = (flags & 0x20000) != 0; |
diff --git a/src/core/hle/kernel/server_port.cpp b/src/core/hle/kernel/server_port.cpp index fe7a483c4..5d17346ad 100644 --- a/src/core/hle/kernel/server_port.cpp +++ b/src/core/hle/kernel/server_port.cpp | |||
| @@ -5,11 +5,11 @@ | |||
| 5 | #include <tuple> | 5 | #include <tuple> |
| 6 | #include "common/assert.h" | 6 | #include "common/assert.h" |
| 7 | #include "core/hle/kernel/client_port.h" | 7 | #include "core/hle/kernel/client_port.h" |
| 8 | #include "core/hle/kernel/errors.h" | ||
| 9 | #include "core/hle/kernel/k_thread.h" | 8 | #include "core/hle/kernel/k_thread.h" |
| 10 | #include "core/hle/kernel/object.h" | 9 | #include "core/hle/kernel/object.h" |
| 11 | #include "core/hle/kernel/server_port.h" | 10 | #include "core/hle/kernel/server_port.h" |
| 12 | #include "core/hle/kernel/server_session.h" | 11 | #include "core/hle/kernel/server_session.h" |
| 12 | #include "core/hle/kernel/svc_results.h" | ||
| 13 | 13 | ||
| 14 | namespace Kernel { | 14 | namespace Kernel { |
| 15 | 15 | ||
| @@ -18,7 +18,7 @@ ServerPort::~ServerPort() = default; | |||
| 18 | 18 | ||
| 19 | ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() { | 19 | ResultVal<std::shared_ptr<ServerSession>> ServerPort::Accept() { |
| 20 | if (pending_sessions.empty()) { | 20 | if (pending_sessions.empty()) { |
| 21 | return ERR_NOT_FOUND; | 21 | return ResultNotFound; |
| 22 | } | 22 | } |
| 23 | 23 | ||
| 24 | auto session = std::move(pending_sessions.back()); | 24 | auto session = std::move(pending_sessions.back()); |
diff --git a/src/core/hle/kernel/session.cpp b/src/core/hle/kernel/session.cpp index 75304b961..8830d4e91 100644 --- a/src/core/hle/kernel/session.cpp +++ b/src/core/hle/kernel/session.cpp | |||
| @@ -4,15 +4,23 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "core/hle/kernel/client_session.h" | 6 | #include "core/hle/kernel/client_session.h" |
| 7 | #include "core/hle/kernel/k_scoped_resource_reservation.h" | ||
| 7 | #include "core/hle/kernel/server_session.h" | 8 | #include "core/hle/kernel/server_session.h" |
| 8 | #include "core/hle/kernel/session.h" | 9 | #include "core/hle/kernel/session.h" |
| 9 | 10 | ||
| 10 | namespace Kernel { | 11 | namespace Kernel { |
| 11 | 12 | ||
| 12 | Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {} | 13 | Session::Session(KernelCore& kernel) : KSynchronizationObject{kernel} {} |
| 13 | Session::~Session() = default; | 14 | Session::~Session() { |
| 15 | // Release reserved resource when the Session pair was created. | ||
| 16 | kernel.GetSystemResourceLimit()->Release(LimitableResource::Sessions, 1); | ||
| 17 | } | ||
| 14 | 18 | ||
| 15 | Session::SessionPair Session::Create(KernelCore& kernel, std::string name) { | 19 | Session::SessionPair Session::Create(KernelCore& kernel, std::string name) { |
| 20 | // Reserve a new session from the resource limit. | ||
| 21 | KScopedResourceReservation session_reservation(kernel.GetSystemResourceLimit(), | ||
| 22 | LimitableResource::Sessions); | ||
| 23 | ASSERT(session_reservation.Succeeded()); | ||
| 16 | auto session{std::make_shared<Session>(kernel)}; | 24 | auto session{std::make_shared<Session>(kernel)}; |
| 17 | auto client_session{Kernel::ClientSession::Create(kernel, session, name + "_Client").Unwrap()}; | 25 | auto client_session{Kernel::ClientSession::Create(kernel, session, name + "_Client").Unwrap()}; |
| 18 | auto server_session{Kernel::ServerSession::Create(kernel, session, name + "_Server").Unwrap()}; | 26 | auto server_session{Kernel::ServerSession::Create(kernel, session, name + "_Server").Unwrap()}; |
| @@ -21,6 +29,7 @@ Session::SessionPair Session::Create(KernelCore& kernel, std::string name) { | |||
| 21 | session->client = client_session; | 29 | session->client = client_session; |
| 22 | session->server = server_session; | 30 | session->server = server_session; |
| 23 | 31 | ||
| 32 | session_reservation.Commit(); | ||
| 24 | return std::make_pair(std::move(client_session), std::move(server_session)); | 33 | return std::make_pair(std::move(client_session), std::move(server_session)); |
| 25 | } | 34 | } |
| 26 | 35 | ||
diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp index 0cd467110..2eadd51d7 100644 --- a/src/core/hle/kernel/shared_memory.cpp +++ b/src/core/hle/kernel/shared_memory.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include "common/assert.h" | 5 | #include "common/assert.h" |
| 6 | #include "core/core.h" | 6 | #include "core/core.h" |
| 7 | #include "core/hle/kernel/k_scoped_resource_reservation.h" | ||
| 7 | #include "core/hle/kernel/kernel.h" | 8 | #include "core/hle/kernel/kernel.h" |
| 8 | #include "core/hle/kernel/memory/page_table.h" | 9 | #include "core/hle/kernel/memory/page_table.h" |
| 9 | #include "core/hle/kernel/shared_memory.h" | 10 | #include "core/hle/kernel/shared_memory.h" |
| @@ -13,7 +14,9 @@ namespace Kernel { | |||
| 13 | SharedMemory::SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory) | 14 | SharedMemory::SharedMemory(KernelCore& kernel, Core::DeviceMemory& device_memory) |
| 14 | : Object{kernel}, device_memory{device_memory} {} | 15 | : Object{kernel}, device_memory{device_memory} {} |
| 15 | 16 | ||
| 16 | SharedMemory::~SharedMemory() = default; | 17 | SharedMemory::~SharedMemory() { |
| 18 | kernel.GetSystemResourceLimit()->Release(LimitableResource::PhysicalMemory, size); | ||
| 19 | } | ||
| 17 | 20 | ||
| 18 | std::shared_ptr<SharedMemory> SharedMemory::Create( | 21 | std::shared_ptr<SharedMemory> SharedMemory::Create( |
| 19 | KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process, | 22 | KernelCore& kernel, Core::DeviceMemory& device_memory, Process* owner_process, |
| @@ -21,6 +24,11 @@ std::shared_ptr<SharedMemory> SharedMemory::Create( | |||
| 21 | Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size, | 24 | Memory::MemoryPermission user_permission, PAddr physical_address, std::size_t size, |
| 22 | std::string name) { | 25 | std::string name) { |
| 23 | 26 | ||
| 27 | const auto resource_limit = kernel.GetSystemResourceLimit(); | ||
| 28 | KScopedResourceReservation memory_reservation(resource_limit, LimitableResource::PhysicalMemory, | ||
| 29 | size); | ||
| 30 | ASSERT(memory_reservation.Succeeded()); | ||
| 31 | |||
| 24 | std::shared_ptr<SharedMemory> shared_memory{ | 32 | std::shared_ptr<SharedMemory> shared_memory{ |
| 25 | std::make_shared<SharedMemory>(kernel, device_memory)}; | 33 | std::make_shared<SharedMemory>(kernel, device_memory)}; |
| 26 | 34 | ||
| @@ -32,6 +40,7 @@ std::shared_ptr<SharedMemory> SharedMemory::Create( | |||
| 32 | shared_memory->size = size; | 40 | shared_memory->size = size; |
| 33 | shared_memory->name = name; | 41 | shared_memory->name = name; |
| 34 | 42 | ||
| 43 | memory_reservation.Commit(); | ||
| 35 | return shared_memory; | 44 | return shared_memory; |
| 36 | } | 45 | } |
| 37 | 46 | ||
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 26650a513..31d899e06 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -23,7 +23,6 @@ | |||
| 23 | #include "core/cpu_manager.h" | 23 | #include "core/cpu_manager.h" |
| 24 | #include "core/hle/kernel/client_port.h" | 24 | #include "core/hle/kernel/client_port.h" |
| 25 | #include "core/hle/kernel/client_session.h" | 25 | #include "core/hle/kernel/client_session.h" |
| 26 | #include "core/hle/kernel/errors.h" | ||
| 27 | #include "core/hle/kernel/handle_table.h" | 26 | #include "core/hle/kernel/handle_table.h" |
| 28 | #include "core/hle/kernel/k_address_arbiter.h" | 27 | #include "core/hle/kernel/k_address_arbiter.h" |
| 29 | #include "core/hle/kernel/k_condition_variable.h" | 28 | #include "core/hle/kernel/k_condition_variable.h" |
| @@ -31,6 +30,7 @@ | |||
| 31 | #include "core/hle/kernel/k_readable_event.h" | 30 | #include "core/hle/kernel/k_readable_event.h" |
| 32 | #include "core/hle/kernel/k_resource_limit.h" | 31 | #include "core/hle/kernel/k_resource_limit.h" |
| 33 | #include "core/hle/kernel/k_scheduler.h" | 32 | #include "core/hle/kernel/k_scheduler.h" |
| 33 | #include "core/hle/kernel/k_scoped_resource_reservation.h" | ||
| 34 | #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" | 34 | #include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" |
| 35 | #include "core/hle/kernel/k_synchronization_object.h" | 35 | #include "core/hle/kernel/k_synchronization_object.h" |
| 36 | #include "core/hle/kernel/k_thread.h" | 36 | #include "core/hle/kernel/k_thread.h" |
| @@ -71,49 +71,49 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds | |||
| 71 | VAddr src_addr, u64 size) { | 71 | VAddr src_addr, u64 size) { |
| 72 | if (!Common::Is4KBAligned(dst_addr)) { | 72 | if (!Common::Is4KBAligned(dst_addr)) { |
| 73 | LOG_ERROR(Kernel_SVC, "Destination address is not aligned to 4KB, 0x{:016X}", dst_addr); | 73 | LOG_ERROR(Kernel_SVC, "Destination address is not aligned to 4KB, 0x{:016X}", dst_addr); |
| 74 | return ERR_INVALID_ADDRESS; | 74 | return ResultInvalidAddress; |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | if (!Common::Is4KBAligned(src_addr)) { | 77 | if (!Common::Is4KBAligned(src_addr)) { |
| 78 | LOG_ERROR(Kernel_SVC, "Source address is not aligned to 4KB, 0x{:016X}", src_addr); | 78 | LOG_ERROR(Kernel_SVC, "Source address is not aligned to 4KB, 0x{:016X}", src_addr); |
| 79 | return ERR_INVALID_SIZE; | 79 | return ResultInvalidSize; |
| 80 | } | 80 | } |
| 81 | 81 | ||
| 82 | if (size == 0) { | 82 | if (size == 0) { |
| 83 | LOG_ERROR(Kernel_SVC, "Size is 0"); | 83 | LOG_ERROR(Kernel_SVC, "Size is 0"); |
| 84 | return ERR_INVALID_SIZE; | 84 | return ResultInvalidSize; |
| 85 | } | 85 | } |
| 86 | 86 | ||
| 87 | if (!Common::Is4KBAligned(size)) { | 87 | if (!Common::Is4KBAligned(size)) { |
| 88 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:016X}", size); | 88 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:016X}", size); |
| 89 | return ERR_INVALID_SIZE; | 89 | return ResultInvalidSize; |
| 90 | } | 90 | } |
| 91 | 91 | ||
| 92 | if (!IsValidAddressRange(dst_addr, size)) { | 92 | if (!IsValidAddressRange(dst_addr, size)) { |
| 93 | LOG_ERROR(Kernel_SVC, | 93 | LOG_ERROR(Kernel_SVC, |
| 94 | "Destination is not a valid address range, addr=0x{:016X}, size=0x{:016X}", | 94 | "Destination is not a valid address range, addr=0x{:016X}, size=0x{:016X}", |
| 95 | dst_addr, size); | 95 | dst_addr, size); |
| 96 | return ERR_INVALID_ADDRESS_STATE; | 96 | return ResultInvalidCurrentMemory; |
| 97 | } | 97 | } |
| 98 | 98 | ||
| 99 | if (!IsValidAddressRange(src_addr, size)) { | 99 | if (!IsValidAddressRange(src_addr, size)) { |
| 100 | LOG_ERROR(Kernel_SVC, "Source is not a valid address range, addr=0x{:016X}, size=0x{:016X}", | 100 | LOG_ERROR(Kernel_SVC, "Source is not a valid address range, addr=0x{:016X}, size=0x{:016X}", |
| 101 | src_addr, size); | 101 | src_addr, size); |
| 102 | return ERR_INVALID_ADDRESS_STATE; | 102 | return ResultInvalidCurrentMemory; |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | if (!manager.IsInsideAddressSpace(src_addr, size)) { | 105 | if (!manager.IsInsideAddressSpace(src_addr, size)) { |
| 106 | LOG_ERROR(Kernel_SVC, | 106 | LOG_ERROR(Kernel_SVC, |
| 107 | "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", | 107 | "Source is not within the address space, addr=0x{:016X}, size=0x{:016X}", |
| 108 | src_addr, size); | 108 | src_addr, size); |
| 109 | return ERR_INVALID_ADDRESS_STATE; | 109 | return ResultInvalidCurrentMemory; |
| 110 | } | 110 | } |
| 111 | 111 | ||
| 112 | if (manager.IsOutsideStackRegion(dst_addr, size)) { | 112 | if (manager.IsOutsideStackRegion(dst_addr, size)) { |
| 113 | LOG_ERROR(Kernel_SVC, | 113 | LOG_ERROR(Kernel_SVC, |
| 114 | "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}", | 114 | "Destination is not within the stack region, addr=0x{:016X}, size=0x{:016X}", |
| 115 | dst_addr, size); | 115 | dst_addr, size); |
| 116 | return ERR_INVALID_MEMORY_RANGE; | 116 | return ResultInvalidMemoryRange; |
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | if (manager.IsInsideHeapRegion(dst_addr, size)) { | 119 | if (manager.IsInsideHeapRegion(dst_addr, size)) { |
| @@ -121,7 +121,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds | |||
| 121 | "Destination does not fit within the heap region, addr=0x{:016X}, " | 121 | "Destination does not fit within the heap region, addr=0x{:016X}, " |
| 122 | "size=0x{:016X}", | 122 | "size=0x{:016X}", |
| 123 | dst_addr, size); | 123 | dst_addr, size); |
| 124 | return ERR_INVALID_MEMORY_RANGE; | 124 | return ResultInvalidMemoryRange; |
| 125 | } | 125 | } |
| 126 | 126 | ||
| 127 | if (manager.IsInsideAliasRegion(dst_addr, size)) { | 127 | if (manager.IsInsideAliasRegion(dst_addr, size)) { |
| @@ -129,7 +129,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds | |||
| 129 | "Destination does not fit within the map region, addr=0x{:016X}, " | 129 | "Destination does not fit within the map region, addr=0x{:016X}, " |
| 130 | "size=0x{:016X}", | 130 | "size=0x{:016X}", |
| 131 | dst_addr, size); | 131 | dst_addr, size); |
| 132 | return ERR_INVALID_MEMORY_RANGE; | 132 | return ResultInvalidMemoryRange; |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | return RESULT_SUCCESS; | 135 | return RESULT_SUCCESS; |
| @@ -138,6 +138,7 @@ ResultCode MapUnmapMemorySanityChecks(const Memory::PageTable& manager, VAddr ds | |||
| 138 | enum class ResourceLimitValueType { | 138 | enum class ResourceLimitValueType { |
| 139 | CurrentValue, | 139 | CurrentValue, |
| 140 | LimitValue, | 140 | LimitValue, |
| 141 | PeakValue, | ||
| 141 | }; | 142 | }; |
| 142 | 143 | ||
| 143 | ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit, | 144 | ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_limit, |
| @@ -146,7 +147,7 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_ | |||
| 146 | const auto type = static_cast<LimitableResource>(resource_type); | 147 | const auto type = static_cast<LimitableResource>(resource_type); |
| 147 | if (!IsValidResourceType(type)) { | 148 | if (!IsValidResourceType(type)) { |
| 148 | LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); | 149 | LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); |
| 149 | return ERR_INVALID_ENUM_VALUE; | 150 | return ResultInvalidEnumValue; |
| 150 | } | 151 | } |
| 151 | 152 | ||
| 152 | const auto* const current_process = system.Kernel().CurrentProcess(); | 153 | const auto* const current_process = system.Kernel().CurrentProcess(); |
| @@ -157,14 +158,20 @@ ResultVal<s64> RetrieveResourceLimitValue(Core::System& system, Handle resource_ | |||
| 157 | if (!resource_limit_object) { | 158 | if (!resource_limit_object) { |
| 158 | LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", | 159 | LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", |
| 159 | resource_limit); | 160 | resource_limit); |
| 160 | return ERR_INVALID_HANDLE; | 161 | return ResultInvalidHandle; |
| 161 | } | 162 | } |
| 162 | 163 | ||
| 163 | if (value_type == ResourceLimitValueType::CurrentValue) { | 164 | switch (value_type) { |
| 165 | case ResourceLimitValueType::CurrentValue: | ||
| 164 | return MakeResult(resource_limit_object->GetCurrentValue(type)); | 166 | return MakeResult(resource_limit_object->GetCurrentValue(type)); |
| 167 | case ResourceLimitValueType::LimitValue: | ||
| 168 | return MakeResult(resource_limit_object->GetLimitValue(type)); | ||
| 169 | case ResourceLimitValueType::PeakValue: | ||
| 170 | return MakeResult(resource_limit_object->GetPeakValue(type)); | ||
| 171 | default: | ||
| 172 | LOG_ERROR(Kernel_SVC, "Invalid resource value_type: '{}'", value_type); | ||
| 173 | return ResultInvalidEnumValue; | ||
| 165 | } | 174 | } |
| 166 | |||
| 167 | return MakeResult(resource_limit_object->GetLimitValue(type)); | ||
| 168 | } | 175 | } |
| 169 | } // Anonymous namespace | 176 | } // Anonymous namespace |
| 170 | 177 | ||
| @@ -177,12 +184,12 @@ static ResultCode SetHeapSize(Core::System& system, VAddr* heap_addr, u64 heap_s | |||
| 177 | if ((heap_size % 0x200000) != 0) { | 184 | if ((heap_size % 0x200000) != 0) { |
| 178 | LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}", | 185 | LOG_ERROR(Kernel_SVC, "The heap size is not a multiple of 2MB, heap_size=0x{:016X}", |
| 179 | heap_size); | 186 | heap_size); |
| 180 | return ERR_INVALID_SIZE; | 187 | return ResultInvalidSize; |
| 181 | } | 188 | } |
| 182 | 189 | ||
| 183 | if (heap_size >= 0x200000000) { | 190 | if (heap_size >= 0x200000000) { |
| 184 | LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size); | 191 | LOG_ERROR(Kernel_SVC, "The heap size is not less than 8GB, heap_size=0x{:016X}", heap_size); |
| 185 | return ERR_INVALID_SIZE; | 192 | return ResultInvalidSize; |
| 186 | } | 193 | } |
| 187 | 194 | ||
| 188 | auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; | 195 | auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; |
| @@ -208,19 +215,19 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si | |||
| 208 | 215 | ||
| 209 | if (!Common::Is4KBAligned(address)) { | 216 | if (!Common::Is4KBAligned(address)) { |
| 210 | LOG_ERROR(Kernel_SVC, "Address not page aligned (0x{:016X})", address); | 217 | LOG_ERROR(Kernel_SVC, "Address not page aligned (0x{:016X})", address); |
| 211 | return ERR_INVALID_ADDRESS; | 218 | return ResultInvalidAddress; |
| 212 | } | 219 | } |
| 213 | 220 | ||
| 214 | if (size == 0 || !Common::Is4KBAligned(size)) { | 221 | if (size == 0 || !Common::Is4KBAligned(size)) { |
| 215 | LOG_ERROR(Kernel_SVC, "Invalid size (0x{:X}). Size must be non-zero and page aligned.", | 222 | LOG_ERROR(Kernel_SVC, "Invalid size (0x{:X}). Size must be non-zero and page aligned.", |
| 216 | size); | 223 | size); |
| 217 | return ERR_INVALID_ADDRESS; | 224 | return ResultInvalidAddress; |
| 218 | } | 225 | } |
| 219 | 226 | ||
| 220 | if (!IsValidAddressRange(address, size)) { | 227 | if (!IsValidAddressRange(address, size)) { |
| 221 | LOG_ERROR(Kernel_SVC, "Address range overflowed (Address: 0x{:016X}, Size: 0x{:016X})", | 228 | LOG_ERROR(Kernel_SVC, "Address range overflowed (Address: 0x{:016X}, Size: 0x{:016X})", |
| 222 | address, size); | 229 | address, size); |
| 223 | return ERR_INVALID_ADDRESS_STATE; | 230 | return ResultInvalidCurrentMemory; |
| 224 | } | 231 | } |
| 225 | 232 | ||
| 226 | const auto attributes{static_cast<Memory::MemoryAttribute>(mask | attribute)}; | 233 | const auto attributes{static_cast<Memory::MemoryAttribute>(mask | attribute)}; |
| @@ -229,7 +236,7 @@ static ResultCode SetMemoryAttribute(Core::System& system, VAddr address, u64 si | |||
| 229 | LOG_ERROR(Kernel_SVC, | 236 | LOG_ERROR(Kernel_SVC, |
| 230 | "Memory attribute doesn't match the given mask (Attribute: 0x{:X}, Mask: {:X}", | 237 | "Memory attribute doesn't match the given mask (Attribute: 0x{:X}, Mask: {:X}", |
| 231 | attribute, mask); | 238 | attribute, mask); |
| 232 | return ERR_INVALID_COMBINATION; | 239 | return ResultInvalidCombination; |
| 233 | } | 240 | } |
| 234 | 241 | ||
| 235 | auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; | 242 | auto& page_table{system.Kernel().CurrentProcess()->PageTable()}; |
| @@ -293,7 +300,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, | |||
| 293 | LOG_ERROR(Kernel_SVC, | 300 | LOG_ERROR(Kernel_SVC, |
| 294 | "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}", | 301 | "Port Name Address is not a valid virtual address, port_name_address=0x{:016X}", |
| 295 | port_name_address); | 302 | port_name_address); |
| 296 | return ERR_NOT_FOUND; | 303 | return ResultNotFound; |
| 297 | } | 304 | } |
| 298 | 305 | ||
| 299 | static constexpr std::size_t PortNameMaxLength = 11; | 306 | static constexpr std::size_t PortNameMaxLength = 11; |
| @@ -302,7 +309,7 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, | |||
| 302 | if (port_name.size() > PortNameMaxLength) { | 309 | if (port_name.size() > PortNameMaxLength) { |
| 303 | LOG_ERROR(Kernel_SVC, "Port name is too long, expected {} but got {}", PortNameMaxLength, | 310 | LOG_ERROR(Kernel_SVC, "Port name is too long, expected {} but got {}", PortNameMaxLength, |
| 304 | port_name.size()); | 311 | port_name.size()); |
| 305 | return ERR_OUT_OF_RANGE; | 312 | return ResultOutOfRange; |
| 306 | } | 313 | } |
| 307 | 314 | ||
| 308 | LOG_TRACE(Kernel_SVC, "called port_name={}", port_name); | 315 | LOG_TRACE(Kernel_SVC, "called port_name={}", port_name); |
| @@ -311,11 +318,9 @@ static ResultCode ConnectToNamedPort(Core::System& system, Handle* out_handle, | |||
| 311 | const auto it = kernel.FindNamedPort(port_name); | 318 | const auto it = kernel.FindNamedPort(port_name); |
| 312 | if (!kernel.IsValidNamedPort(it)) { | 319 | if (!kernel.IsValidNamedPort(it)) { |
| 313 | LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name); | 320 | LOG_WARNING(Kernel_SVC, "tried to connect to unknown port: {}", port_name); |
| 314 | return ERR_NOT_FOUND; | 321 | return ResultNotFound; |
| 315 | } | 322 | } |
| 316 | 323 | ||
| 317 | ASSERT(kernel.CurrentProcess()->GetResourceLimit()->Reserve(LimitableResource::Sessions, 1)); | ||
| 318 | |||
| 319 | auto client_port = it->second; | 324 | auto client_port = it->second; |
| 320 | 325 | ||
| 321 | std::shared_ptr<ClientSession> client_session; | 326 | std::shared_ptr<ClientSession> client_session; |
| @@ -340,7 +345,7 @@ static ResultCode SendSyncRequest(Core::System& system, Handle handle) { | |||
| 340 | std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle); | 345 | std::shared_ptr<ClientSession> session = handle_table.Get<ClientSession>(handle); |
| 341 | if (!session) { | 346 | if (!session) { |
| 342 | LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle); | 347 | LOG_ERROR(Kernel_SVC, "called with invalid handle=0x{:08X}", handle); |
| 343 | return ERR_INVALID_HANDLE; | 348 | return ResultInvalidHandle; |
| 344 | } | 349 | } |
| 345 | 350 | ||
| 346 | LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName()); | 351 | LOG_TRACE(Kernel_SVC, "called handle=0x{:08X}({})", handle, session->GetName()); |
| @@ -405,7 +410,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han | |||
| 405 | const Process* const owner_process = thread->GetOwnerProcess(); | 410 | const Process* const owner_process = thread->GetOwnerProcess(); |
| 406 | if (!owner_process) { | 411 | if (!owner_process) { |
| 407 | LOG_ERROR(Kernel_SVC, "Non-existent owning process encountered."); | 412 | LOG_ERROR(Kernel_SVC, "Non-existent owning process encountered."); |
| 408 | return ERR_INVALID_HANDLE; | 413 | return ResultInvalidHandle; |
| 409 | } | 414 | } |
| 410 | 415 | ||
| 411 | *process_id = owner_process->GetProcessID(); | 416 | *process_id = owner_process->GetProcessID(); |
| @@ -415,7 +420,7 @@ static ResultCode GetProcessId(Core::System& system, u64* process_id, Handle han | |||
| 415 | // NOTE: This should also handle debug objects before returning. | 420 | // NOTE: This should also handle debug objects before returning. |
| 416 | 421 | ||
| 417 | LOG_ERROR(Kernel_SVC, "Handle does not exist, handle=0x{:08X}", handle); | 422 | LOG_ERROR(Kernel_SVC, "Handle does not exist, handle=0x{:08X}", handle); |
| 418 | return ERR_INVALID_HANDLE; | 423 | return ResultInvalidHandle; |
| 419 | } | 424 | } |
| 420 | 425 | ||
| 421 | static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high, | 426 | static ResultCode GetProcessId32(Core::System& system, u32* process_id_low, u32* process_id_high, |
| @@ -438,7 +443,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha | |||
| 438 | LOG_ERROR(Kernel_SVC, | 443 | LOG_ERROR(Kernel_SVC, |
| 439 | "Handle address is not a valid virtual address, handle_address=0x{:016X}", | 444 | "Handle address is not a valid virtual address, handle_address=0x{:016X}", |
| 440 | handles_address); | 445 | handles_address); |
| 441 | return ERR_INVALID_POINTER; | 446 | return ResultInvalidPointer; |
| 442 | } | 447 | } |
| 443 | 448 | ||
| 444 | static constexpr u64 MaxHandles = 0x40; | 449 | static constexpr u64 MaxHandles = 0x40; |
| @@ -446,7 +451,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha | |||
| 446 | if (handle_count > MaxHandles) { | 451 | if (handle_count > MaxHandles) { |
| 447 | LOG_ERROR(Kernel_SVC, "Handle count specified is too large, expected {} but got {}", | 452 | LOG_ERROR(Kernel_SVC, "Handle count specified is too large, expected {} but got {}", |
| 448 | MaxHandles, handle_count); | 453 | MaxHandles, handle_count); |
| 449 | return ERR_OUT_OF_RANGE; | 454 | return ResultOutOfRange; |
| 450 | } | 455 | } |
| 451 | 456 | ||
| 452 | auto& kernel = system.Kernel(); | 457 | auto& kernel = system.Kernel(); |
| @@ -459,7 +464,7 @@ static ResultCode WaitSynchronization(Core::System& system, s32* index, VAddr ha | |||
| 459 | 464 | ||
| 460 | if (object == nullptr) { | 465 | if (object == nullptr) { |
| 461 | LOG_ERROR(Kernel_SVC, "Object is a nullptr"); | 466 | LOG_ERROR(Kernel_SVC, "Object is a nullptr"); |
| 462 | return ERR_INVALID_HANDLE; | 467 | return ResultInvalidHandle; |
| 463 | } | 468 | } |
| 464 | 469 | ||
| 465 | objects[i] = object.get(); | 470 | objects[i] = object.get(); |
| @@ -481,6 +486,7 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand | |||
| 481 | // Get the thread from its handle. | 486 | // Get the thread from its handle. |
| 482 | const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); | 487 | const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); |
| 483 | std::shared_ptr<KThread> thread = handle_table.Get<KThread>(thread_handle); | 488 | std::shared_ptr<KThread> thread = handle_table.Get<KThread>(thread_handle); |
| 489 | |||
| 484 | if (!thread) { | 490 | if (!thread) { |
| 485 | LOG_ERROR(Kernel_SVC, "Invalid thread handle provided (handle={:08X})", thread_handle); | 491 | LOG_ERROR(Kernel_SVC, "Invalid thread handle provided (handle={:08X})", thread_handle); |
| 486 | return ResultInvalidHandle; | 492 | return ResultInvalidHandle; |
| @@ -525,6 +531,7 @@ static ResultCode ArbitrateUnlock(Core::System& system, VAddr address) { | |||
| 525 | LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address); | 531 | LOG_TRACE(Kernel_SVC, "called address=0x{:X}", address); |
| 526 | 532 | ||
| 527 | // Validate the input address. | 533 | // Validate the input address. |
| 534 | |||
| 528 | if (Memory::IsKernelAddress(address)) { | 535 | if (Memory::IsKernelAddress(address)) { |
| 529 | LOG_ERROR(Kernel_SVC, | 536 | LOG_ERROR(Kernel_SVC, |
| 530 | "Attempting to arbitrate an unlock on a kernel address (address={:08X})", | 537 | "Attempting to arbitrate an unlock on a kernel address (address={:08X})", |
| @@ -735,7 +742,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 735 | if (info_sub_id != 0) { | 742 | if (info_sub_id != 0) { |
| 736 | LOG_ERROR(Kernel_SVC, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id, | 743 | LOG_ERROR(Kernel_SVC, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id, |
| 737 | info_sub_id); | 744 | info_sub_id); |
| 738 | return ERR_INVALID_ENUM_VALUE; | 745 | return ResultInvalidEnumValue; |
| 739 | } | 746 | } |
| 740 | 747 | ||
| 741 | const auto& current_process_handle_table = | 748 | const auto& current_process_handle_table = |
| @@ -744,7 +751,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 744 | if (!process) { | 751 | if (!process) { |
| 745 | LOG_ERROR(Kernel_SVC, "Process is not valid! info_id={}, info_sub_id={}, handle={:08X}", | 752 | LOG_ERROR(Kernel_SVC, "Process is not valid! info_id={}, info_sub_id={}, handle={:08X}", |
| 746 | info_id, info_sub_id, handle); | 753 | info_id, info_sub_id, handle); |
| 747 | return ERR_INVALID_HANDLE; | 754 | return ResultInvalidHandle; |
| 748 | } | 755 | } |
| 749 | 756 | ||
| 750 | switch (info_id_type) { | 757 | switch (info_id_type) { |
| @@ -826,7 +833,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 826 | } | 833 | } |
| 827 | 834 | ||
| 828 | LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id); | 835 | LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id); |
| 829 | return ERR_INVALID_ENUM_VALUE; | 836 | return ResultInvalidEnumValue; |
| 830 | } | 837 | } |
| 831 | 838 | ||
| 832 | case GetInfoType::IsCurrentProcessBeingDebugged: | 839 | case GetInfoType::IsCurrentProcessBeingDebugged: |
| @@ -836,13 +843,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 836 | case GetInfoType::RegisterResourceLimit: { | 843 | case GetInfoType::RegisterResourceLimit: { |
| 837 | if (handle != 0) { | 844 | if (handle != 0) { |
| 838 | LOG_ERROR(Kernel, "Handle is non zero! handle={:08X}", handle); | 845 | LOG_ERROR(Kernel, "Handle is non zero! handle={:08X}", handle); |
| 839 | return ERR_INVALID_HANDLE; | 846 | return ResultInvalidHandle; |
| 840 | } | 847 | } |
| 841 | 848 | ||
| 842 | if (info_sub_id != 0) { | 849 | if (info_sub_id != 0) { |
| 843 | LOG_ERROR(Kernel, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id, | 850 | LOG_ERROR(Kernel, "Info sub id is non zero! info_id={}, info_sub_id={}", info_id, |
| 844 | info_sub_id); | 851 | info_sub_id); |
| 845 | return ERR_INVALID_COMBINATION; | 852 | return ResultInvalidCombination; |
| 846 | } | 853 | } |
| 847 | 854 | ||
| 848 | Process* const current_process = system.Kernel().CurrentProcess(); | 855 | Process* const current_process = system.Kernel().CurrentProcess(); |
| @@ -867,13 +874,13 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 867 | if (handle != 0) { | 874 | if (handle != 0) { |
| 868 | LOG_ERROR(Kernel_SVC, "Process Handle is non zero, expected 0 result but got {:016X}", | 875 | LOG_ERROR(Kernel_SVC, "Process Handle is non zero, expected 0 result but got {:016X}", |
| 869 | handle); | 876 | handle); |
| 870 | return ERR_INVALID_HANDLE; | 877 | return ResultInvalidHandle; |
| 871 | } | 878 | } |
| 872 | 879 | ||
| 873 | if (info_sub_id >= Process::RANDOM_ENTROPY_SIZE) { | 880 | if (info_sub_id >= Process::RANDOM_ENTROPY_SIZE) { |
| 874 | LOG_ERROR(Kernel_SVC, "Entropy size is out of range, expected {} but got {}", | 881 | LOG_ERROR(Kernel_SVC, "Entropy size is out of range, expected {} but got {}", |
| 875 | Process::RANDOM_ENTROPY_SIZE, info_sub_id); | 882 | Process::RANDOM_ENTROPY_SIZE, info_sub_id); |
| 876 | return ERR_INVALID_COMBINATION; | 883 | return ResultInvalidCombination; |
| 877 | } | 884 | } |
| 878 | 885 | ||
| 879 | *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id); | 886 | *result = system.Kernel().CurrentProcess()->GetRandomEntropy(info_sub_id); |
| @@ -890,7 +897,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 890 | if (info_sub_id != 0xFFFFFFFFFFFFFFFF && info_sub_id >= num_cpus) { | 897 | if (info_sub_id != 0xFFFFFFFFFFFFFFFF && info_sub_id >= num_cpus) { |
| 891 | LOG_ERROR(Kernel_SVC, "Core count is out of range, expected {} but got {}", num_cpus, | 898 | LOG_ERROR(Kernel_SVC, "Core count is out of range, expected {} but got {}", num_cpus, |
| 892 | info_sub_id); | 899 | info_sub_id); |
| 893 | return ERR_INVALID_COMBINATION; | 900 | return ResultInvalidCombination; |
| 894 | } | 901 | } |
| 895 | 902 | ||
| 896 | const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<KThread>( | 903 | const auto thread = system.Kernel().CurrentProcess()->GetHandleTable().Get<KThread>( |
| @@ -898,7 +905,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 898 | if (!thread) { | 905 | if (!thread) { |
| 899 | LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", | 906 | LOG_ERROR(Kernel_SVC, "Thread handle does not exist, handle=0x{:08X}", |
| 900 | static_cast<Handle>(handle)); | 907 | static_cast<Handle>(handle)); |
| 901 | return ERR_INVALID_HANDLE; | 908 | return ResultInvalidHandle; |
| 902 | } | 909 | } |
| 903 | 910 | ||
| 904 | const auto& core_timing = system.CoreTiming(); | 911 | const auto& core_timing = system.CoreTiming(); |
| @@ -922,7 +929,7 @@ static ResultCode GetInfo(Core::System& system, u64* result, u64 info_id, u64 ha | |||
| 922 | 929 | ||
| 923 | default: | 930 | default: |
| 924 | LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id); | 931 | LOG_ERROR(Kernel_SVC, "Unimplemented svcGetInfo id=0x{:016X}", info_id); |
| 925 | return ERR_INVALID_ENUM_VALUE; | 932 | return ResultInvalidEnumValue; |
| 926 | } | 933 | } |
| 927 | } | 934 | } |
| 928 | 935 | ||
| @@ -945,22 +952,22 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) | |||
| 945 | 952 | ||
| 946 | if (!Common::Is4KBAligned(addr)) { | 953 | if (!Common::Is4KBAligned(addr)) { |
| 947 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); | 954 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); |
| 948 | return ERR_INVALID_ADDRESS; | 955 | return ResultInvalidAddress; |
| 949 | } | 956 | } |
| 950 | 957 | ||
| 951 | if (!Common::Is4KBAligned(size)) { | 958 | if (!Common::Is4KBAligned(size)) { |
| 952 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); | 959 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); |
| 953 | return ERR_INVALID_SIZE; | 960 | return ResultInvalidSize; |
| 954 | } | 961 | } |
| 955 | 962 | ||
| 956 | if (size == 0) { | 963 | if (size == 0) { |
| 957 | LOG_ERROR(Kernel_SVC, "Size is zero"); | 964 | LOG_ERROR(Kernel_SVC, "Size is zero"); |
| 958 | return ERR_INVALID_SIZE; | 965 | return ResultInvalidSize; |
| 959 | } | 966 | } |
| 960 | 967 | ||
| 961 | if (!(addr < addr + size)) { | 968 | if (!(addr < addr + size)) { |
| 962 | LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); | 969 | LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); |
| 963 | return ERR_INVALID_MEMORY_RANGE; | 970 | return ResultInvalidMemoryRange; |
| 964 | } | 971 | } |
| 965 | 972 | ||
| 966 | Process* const current_process{system.Kernel().CurrentProcess()}; | 973 | Process* const current_process{system.Kernel().CurrentProcess()}; |
| @@ -968,21 +975,21 @@ static ResultCode MapPhysicalMemory(Core::System& system, VAddr addr, u64 size) | |||
| 968 | 975 | ||
| 969 | if (current_process->GetSystemResourceSize() == 0) { | 976 | if (current_process->GetSystemResourceSize() == 0) { |
| 970 | LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); | 977 | LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); |
| 971 | return ERR_INVALID_STATE; | 978 | return ResultInvalidState; |
| 972 | } | 979 | } |
| 973 | 980 | ||
| 974 | if (!page_table.IsInsideAddressSpace(addr, size)) { | 981 | if (!page_table.IsInsideAddressSpace(addr, size)) { |
| 975 | LOG_ERROR(Kernel_SVC, | 982 | LOG_ERROR(Kernel_SVC, |
| 976 | "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, | 983 | "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, |
| 977 | size); | 984 | size); |
| 978 | return ERR_INVALID_MEMORY_RANGE; | 985 | return ResultInvalidMemoryRange; |
| 979 | } | 986 | } |
| 980 | 987 | ||
| 981 | if (page_table.IsOutsideAliasRegion(addr, size)) { | 988 | if (page_table.IsOutsideAliasRegion(addr, size)) { |
| 982 | LOG_ERROR(Kernel_SVC, | 989 | LOG_ERROR(Kernel_SVC, |
| 983 | "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr, | 990 | "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr, |
| 984 | size); | 991 | size); |
| 985 | return ERR_INVALID_MEMORY_RANGE; | 992 | return ResultInvalidMemoryRange; |
| 986 | } | 993 | } |
| 987 | 994 | ||
| 988 | return page_table.MapPhysicalMemory(addr, size); | 995 | return page_table.MapPhysicalMemory(addr, size); |
| @@ -999,22 +1006,22 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size | |||
| 999 | 1006 | ||
| 1000 | if (!Common::Is4KBAligned(addr)) { | 1007 | if (!Common::Is4KBAligned(addr)) { |
| 1001 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); | 1008 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, 0x{:016X}", addr); |
| 1002 | return ERR_INVALID_ADDRESS; | 1009 | return ResultInvalidAddress; |
| 1003 | } | 1010 | } |
| 1004 | 1011 | ||
| 1005 | if (!Common::Is4KBAligned(size)) { | 1012 | if (!Common::Is4KBAligned(size)) { |
| 1006 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); | 1013 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, 0x{:X}", size); |
| 1007 | return ERR_INVALID_SIZE; | 1014 | return ResultInvalidSize; |
| 1008 | } | 1015 | } |
| 1009 | 1016 | ||
| 1010 | if (size == 0) { | 1017 | if (size == 0) { |
| 1011 | LOG_ERROR(Kernel_SVC, "Size is zero"); | 1018 | LOG_ERROR(Kernel_SVC, "Size is zero"); |
| 1012 | return ERR_INVALID_SIZE; | 1019 | return ResultInvalidSize; |
| 1013 | } | 1020 | } |
| 1014 | 1021 | ||
| 1015 | if (!(addr < addr + size)) { | 1022 | if (!(addr < addr + size)) { |
| 1016 | LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); | 1023 | LOG_ERROR(Kernel_SVC, "Size causes 64-bit overflow of address"); |
| 1017 | return ERR_INVALID_MEMORY_RANGE; | 1024 | return ResultInvalidMemoryRange; |
| 1018 | } | 1025 | } |
| 1019 | 1026 | ||
| 1020 | Process* const current_process{system.Kernel().CurrentProcess()}; | 1027 | Process* const current_process{system.Kernel().CurrentProcess()}; |
| @@ -1022,21 +1029,21 @@ static ResultCode UnmapPhysicalMemory(Core::System& system, VAddr addr, u64 size | |||
| 1022 | 1029 | ||
| 1023 | if (current_process->GetSystemResourceSize() == 0) { | 1030 | if (current_process->GetSystemResourceSize() == 0) { |
| 1024 | LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); | 1031 | LOG_ERROR(Kernel_SVC, "System Resource Size is zero"); |
| 1025 | return ERR_INVALID_STATE; | 1032 | return ResultInvalidState; |
| 1026 | } | 1033 | } |
| 1027 | 1034 | ||
| 1028 | if (!page_table.IsInsideAddressSpace(addr, size)) { | 1035 | if (!page_table.IsInsideAddressSpace(addr, size)) { |
| 1029 | LOG_ERROR(Kernel_SVC, | 1036 | LOG_ERROR(Kernel_SVC, |
| 1030 | "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, | 1037 | "Address is not within the address space, addr=0x{:016X}, size=0x{:016X}", addr, |
| 1031 | size); | 1038 | size); |
| 1032 | return ERR_INVALID_MEMORY_RANGE; | 1039 | return ResultInvalidMemoryRange; |
| 1033 | } | 1040 | } |
| 1034 | 1041 | ||
| 1035 | if (page_table.IsOutsideAliasRegion(addr, size)) { | 1042 | if (page_table.IsOutsideAliasRegion(addr, size)) { |
| 1036 | LOG_ERROR(Kernel_SVC, | 1043 | LOG_ERROR(Kernel_SVC, |
| 1037 | "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr, | 1044 | "Address is not within the alias region, addr=0x{:016X}, size=0x{:016X}", addr, |
| 1038 | size); | 1045 | size); |
| 1039 | return ERR_INVALID_MEMORY_RANGE; | 1046 | return ResultInvalidMemoryRange; |
| 1040 | } | 1047 | } |
| 1041 | 1048 | ||
| 1042 | return page_table.UnmapPhysicalMemory(addr, size); | 1049 | return page_table.UnmapPhysicalMemory(addr, size); |
| @@ -1206,23 +1213,23 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han | |||
| 1206 | 1213 | ||
| 1207 | if (!Common::Is4KBAligned(addr)) { | 1214 | if (!Common::Is4KBAligned(addr)) { |
| 1208 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, addr=0x{:016X}", addr); | 1215 | LOG_ERROR(Kernel_SVC, "Address is not aligned to 4KB, addr=0x{:016X}", addr); |
| 1209 | return ERR_INVALID_ADDRESS; | 1216 | return ResultInvalidAddress; |
| 1210 | } | 1217 | } |
| 1211 | 1218 | ||
| 1212 | if (size == 0) { | 1219 | if (size == 0) { |
| 1213 | LOG_ERROR(Kernel_SVC, "Size is 0"); | 1220 | LOG_ERROR(Kernel_SVC, "Size is 0"); |
| 1214 | return ERR_INVALID_SIZE; | 1221 | return ResultInvalidSize; |
| 1215 | } | 1222 | } |
| 1216 | 1223 | ||
| 1217 | if (!Common::Is4KBAligned(size)) { | 1224 | if (!Common::Is4KBAligned(size)) { |
| 1218 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, size=0x{:016X}", size); | 1225 | LOG_ERROR(Kernel_SVC, "Size is not aligned to 4KB, size=0x{:016X}", size); |
| 1219 | return ERR_INVALID_SIZE; | 1226 | return ResultInvalidSize; |
| 1220 | } | 1227 | } |
| 1221 | 1228 | ||
| 1222 | if (!IsValidAddressRange(addr, size)) { | 1229 | if (!IsValidAddressRange(addr, size)) { |
| 1223 | LOG_ERROR(Kernel_SVC, "Region is not a valid address range, addr=0x{:016X}, size=0x{:016X}", | 1230 | LOG_ERROR(Kernel_SVC, "Region is not a valid address range, addr=0x{:016X}, size=0x{:016X}", |
| 1224 | addr, size); | 1231 | addr, size); |
| 1225 | return ERR_INVALID_ADDRESS_STATE; | 1232 | return ResultInvalidCurrentMemory; |
| 1226 | } | 1233 | } |
| 1227 | 1234 | ||
| 1228 | const auto permission_type = static_cast<Memory::MemoryPermission>(permissions); | 1235 | const auto permission_type = static_cast<Memory::MemoryPermission>(permissions); |
| @@ -1230,7 +1237,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han | |||
| 1230 | Memory::MemoryPermission::ReadAndWrite) { | 1237 | Memory::MemoryPermission::ReadAndWrite) { |
| 1231 | LOG_ERROR(Kernel_SVC, "Expected Read or ReadWrite permission but got permissions=0x{:08X}", | 1238 | LOG_ERROR(Kernel_SVC, "Expected Read or ReadWrite permission but got permissions=0x{:08X}", |
| 1232 | permissions); | 1239 | permissions); |
| 1233 | return ERR_INVALID_MEMORY_PERMISSIONS; | 1240 | return ResultInvalidMemoryPermissions; |
| 1234 | } | 1241 | } |
| 1235 | 1242 | ||
| 1236 | auto* const current_process{system.Kernel().CurrentProcess()}; | 1243 | auto* const current_process{system.Kernel().CurrentProcess()}; |
| @@ -1241,7 +1248,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han | |||
| 1241 | "Addr does not fit within the valid region, addr=0x{:016X}, " | 1248 | "Addr does not fit within the valid region, addr=0x{:016X}, " |
| 1242 | "size=0x{:016X}", | 1249 | "size=0x{:016X}", |
| 1243 | addr, size); | 1250 | addr, size); |
| 1244 | return ERR_INVALID_MEMORY_RANGE; | 1251 | return ResultInvalidMemoryRange; |
| 1245 | } | 1252 | } |
| 1246 | 1253 | ||
| 1247 | if (page_table.IsInsideHeapRegion(addr, size)) { | 1254 | if (page_table.IsInsideHeapRegion(addr, size)) { |
| @@ -1249,7 +1256,7 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han | |||
| 1249 | "Addr does not fit within the heap region, addr=0x{:016X}, " | 1256 | "Addr does not fit within the heap region, addr=0x{:016X}, " |
| 1250 | "size=0x{:016X}", | 1257 | "size=0x{:016X}", |
| 1251 | addr, size); | 1258 | addr, size); |
| 1252 | return ERR_INVALID_MEMORY_RANGE; | 1259 | return ResultInvalidMemoryRange; |
| 1253 | } | 1260 | } |
| 1254 | 1261 | ||
| 1255 | if (page_table.IsInsideAliasRegion(addr, size)) { | 1262 | if (page_table.IsInsideAliasRegion(addr, size)) { |
| @@ -1257,14 +1264,14 @@ static ResultCode MapSharedMemory(Core::System& system, Handle shared_memory_han | |||
| 1257 | "Address does not fit within the map region, addr=0x{:016X}, " | 1264 | "Address does not fit within the map region, addr=0x{:016X}, " |
| 1258 | "size=0x{:016X}", | 1265 | "size=0x{:016X}", |
| 1259 | addr, size); | 1266 | addr, size); |
| 1260 | return ERR_INVALID_MEMORY_RANGE; | 1267 | return ResultInvalidMemoryRange; |
| 1261 | } | 1268 | } |
| 1262 | 1269 | ||
| 1263 | auto shared_memory{current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle)}; | 1270 | auto shared_memory{current_process->GetHandleTable().Get<SharedMemory>(shared_memory_handle)}; |
| 1264 | if (!shared_memory) { | 1271 | if (!shared_memory) { |
| 1265 | LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}", | 1272 | LOG_ERROR(Kernel_SVC, "Shared memory does not exist, shared_memory_handle=0x{:08X}", |
| 1266 | shared_memory_handle); | 1273 | shared_memory_handle); |
| 1267 | return ERR_INVALID_HANDLE; | 1274 | return ResultInvalidHandle; |
| 1268 | } | 1275 | } |
| 1269 | 1276 | ||
| 1270 | return shared_memory->Map(*current_process, addr, size, permission_type); | 1277 | return shared_memory->Map(*current_process, addr, size, permission_type); |
| @@ -1285,7 +1292,7 @@ static ResultCode QueryProcessMemory(Core::System& system, VAddr memory_info_add | |||
| 1285 | if (!process) { | 1292 | if (!process) { |
| 1286 | LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", | 1293 | LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", |
| 1287 | process_handle); | 1294 | process_handle); |
| 1288 | return ERR_INVALID_HANDLE; | 1295 | return ResultInvalidHandle; |
| 1289 | } | 1296 | } |
| 1290 | 1297 | ||
| 1291 | auto& memory{system.Memory()}; | 1298 | auto& memory{system.Memory()}; |
| @@ -1332,18 +1339,18 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand | |||
| 1332 | if (!Common::Is4KBAligned(src_address)) { | 1339 | if (!Common::Is4KBAligned(src_address)) { |
| 1333 | LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", | 1340 | LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", |
| 1334 | src_address); | 1341 | src_address); |
| 1335 | return ERR_INVALID_ADDRESS; | 1342 | return ResultInvalidAddress; |
| 1336 | } | 1343 | } |
| 1337 | 1344 | ||
| 1338 | if (!Common::Is4KBAligned(dst_address)) { | 1345 | if (!Common::Is4KBAligned(dst_address)) { |
| 1339 | LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", | 1346 | LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", |
| 1340 | dst_address); | 1347 | dst_address); |
| 1341 | return ERR_INVALID_ADDRESS; | 1348 | return ResultInvalidAddress; |
| 1342 | } | 1349 | } |
| 1343 | 1350 | ||
| 1344 | if (size == 0 || !Common::Is4KBAligned(size)) { | 1351 | if (size == 0 || !Common::Is4KBAligned(size)) { |
| 1345 | LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size); | 1352 | LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X})", size); |
| 1346 | return ERR_INVALID_SIZE; | 1353 | return ResultInvalidSize; |
| 1347 | } | 1354 | } |
| 1348 | 1355 | ||
| 1349 | if (!IsValidAddressRange(dst_address, size)) { | 1356 | if (!IsValidAddressRange(dst_address, size)) { |
| @@ -1351,7 +1358,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand | |||
| 1351 | "Destination address range overflows the address space (dst_address=0x{:016X}, " | 1358 | "Destination address range overflows the address space (dst_address=0x{:016X}, " |
| 1352 | "size=0x{:016X}).", | 1359 | "size=0x{:016X}).", |
| 1353 | dst_address, size); | 1360 | dst_address, size); |
| 1354 | return ERR_INVALID_ADDRESS_STATE; | 1361 | return ResultInvalidCurrentMemory; |
| 1355 | } | 1362 | } |
| 1356 | 1363 | ||
| 1357 | if (!IsValidAddressRange(src_address, size)) { | 1364 | if (!IsValidAddressRange(src_address, size)) { |
| @@ -1359,7 +1366,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand | |||
| 1359 | "Source address range overflows the address space (src_address=0x{:016X}, " | 1366 | "Source address range overflows the address space (src_address=0x{:016X}, " |
| 1360 | "size=0x{:016X}).", | 1367 | "size=0x{:016X}).", |
| 1361 | src_address, size); | 1368 | src_address, size); |
| 1362 | return ERR_INVALID_ADDRESS_STATE; | 1369 | return ResultInvalidCurrentMemory; |
| 1363 | } | 1370 | } |
| 1364 | 1371 | ||
| 1365 | const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); | 1372 | const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); |
| @@ -1367,7 +1374,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand | |||
| 1367 | if (!process) { | 1374 | if (!process) { |
| 1368 | LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", | 1375 | LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", |
| 1369 | process_handle); | 1376 | process_handle); |
| 1370 | return ERR_INVALID_HANDLE; | 1377 | return ResultInvalidHandle; |
| 1371 | } | 1378 | } |
| 1372 | 1379 | ||
| 1373 | auto& page_table = process->PageTable(); | 1380 | auto& page_table = process->PageTable(); |
| @@ -1376,7 +1383,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand | |||
| 1376 | "Source address range is not within the address space (src_address=0x{:016X}, " | 1383 | "Source address range is not within the address space (src_address=0x{:016X}, " |
| 1377 | "size=0x{:016X}).", | 1384 | "size=0x{:016X}).", |
| 1378 | src_address, size); | 1385 | src_address, size); |
| 1379 | return ERR_INVALID_ADDRESS_STATE; | 1386 | return ResultInvalidCurrentMemory; |
| 1380 | } | 1387 | } |
| 1381 | 1388 | ||
| 1382 | if (!page_table.IsInsideASLRRegion(dst_address, size)) { | 1389 | if (!page_table.IsInsideASLRRegion(dst_address, size)) { |
| @@ -1384,7 +1391,7 @@ static ResultCode MapProcessCodeMemory(Core::System& system, Handle process_hand | |||
| 1384 | "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " | 1391 | "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " |
| 1385 | "size=0x{:016X}).", | 1392 | "size=0x{:016X}).", |
| 1386 | dst_address, size); | 1393 | dst_address, size); |
| 1387 | return ERR_INVALID_MEMORY_RANGE; | 1394 | return ResultInvalidMemoryRange; |
| 1388 | } | 1395 | } |
| 1389 | 1396 | ||
| 1390 | return page_table.MapProcessCodeMemory(dst_address, src_address, size); | 1397 | return page_table.MapProcessCodeMemory(dst_address, src_address, size); |
| @@ -1400,18 +1407,18 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha | |||
| 1400 | if (!Common::Is4KBAligned(dst_address)) { | 1407 | if (!Common::Is4KBAligned(dst_address)) { |
| 1401 | LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", | 1408 | LOG_ERROR(Kernel_SVC, "dst_address is not page-aligned (dst_address=0x{:016X}).", |
| 1402 | dst_address); | 1409 | dst_address); |
| 1403 | return ERR_INVALID_ADDRESS; | 1410 | return ResultInvalidAddress; |
| 1404 | } | 1411 | } |
| 1405 | 1412 | ||
| 1406 | if (!Common::Is4KBAligned(src_address)) { | 1413 | if (!Common::Is4KBAligned(src_address)) { |
| 1407 | LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", | 1414 | LOG_ERROR(Kernel_SVC, "src_address is not page-aligned (src_address=0x{:016X}).", |
| 1408 | src_address); | 1415 | src_address); |
| 1409 | return ERR_INVALID_ADDRESS; | 1416 | return ResultInvalidAddress; |
| 1410 | } | 1417 | } |
| 1411 | 1418 | ||
| 1412 | if (size == 0 || Common::Is4KBAligned(size)) { | 1419 | if (size == 0 || Common::Is4KBAligned(size)) { |
| 1413 | LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size); | 1420 | LOG_ERROR(Kernel_SVC, "Size is zero or not page-aligned (size=0x{:016X}).", size); |
| 1414 | return ERR_INVALID_SIZE; | 1421 | return ResultInvalidSize; |
| 1415 | } | 1422 | } |
| 1416 | 1423 | ||
| 1417 | if (!IsValidAddressRange(dst_address, size)) { | 1424 | if (!IsValidAddressRange(dst_address, size)) { |
| @@ -1419,7 +1426,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha | |||
| 1419 | "Destination address range overflows the address space (dst_address=0x{:016X}, " | 1426 | "Destination address range overflows the address space (dst_address=0x{:016X}, " |
| 1420 | "size=0x{:016X}).", | 1427 | "size=0x{:016X}).", |
| 1421 | dst_address, size); | 1428 | dst_address, size); |
| 1422 | return ERR_INVALID_ADDRESS_STATE; | 1429 | return ResultInvalidCurrentMemory; |
| 1423 | } | 1430 | } |
| 1424 | 1431 | ||
| 1425 | if (!IsValidAddressRange(src_address, size)) { | 1432 | if (!IsValidAddressRange(src_address, size)) { |
| @@ -1427,7 +1434,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha | |||
| 1427 | "Source address range overflows the address space (src_address=0x{:016X}, " | 1434 | "Source address range overflows the address space (src_address=0x{:016X}, " |
| 1428 | "size=0x{:016X}).", | 1435 | "size=0x{:016X}).", |
| 1429 | src_address, size); | 1436 | src_address, size); |
| 1430 | return ERR_INVALID_ADDRESS_STATE; | 1437 | return ResultInvalidCurrentMemory; |
| 1431 | } | 1438 | } |
| 1432 | 1439 | ||
| 1433 | const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); | 1440 | const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); |
| @@ -1435,7 +1442,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha | |||
| 1435 | if (!process) { | 1442 | if (!process) { |
| 1436 | LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", | 1443 | LOG_ERROR(Kernel_SVC, "Invalid process handle specified (handle=0x{:08X}).", |
| 1437 | process_handle); | 1444 | process_handle); |
| 1438 | return ERR_INVALID_HANDLE; | 1445 | return ResultInvalidHandle; |
| 1439 | } | 1446 | } |
| 1440 | 1447 | ||
| 1441 | auto& page_table = process->PageTable(); | 1448 | auto& page_table = process->PageTable(); |
| @@ -1444,7 +1451,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha | |||
| 1444 | "Source address range is not within the address space (src_address=0x{:016X}, " | 1451 | "Source address range is not within the address space (src_address=0x{:016X}, " |
| 1445 | "size=0x{:016X}).", | 1452 | "size=0x{:016X}).", |
| 1446 | src_address, size); | 1453 | src_address, size); |
| 1447 | return ERR_INVALID_ADDRESS_STATE; | 1454 | return ResultInvalidCurrentMemory; |
| 1448 | } | 1455 | } |
| 1449 | 1456 | ||
| 1450 | if (!page_table.IsInsideASLRRegion(dst_address, size)) { | 1457 | if (!page_table.IsInsideASLRRegion(dst_address, size)) { |
| @@ -1452,7 +1459,7 @@ static ResultCode UnmapProcessCodeMemory(Core::System& system, Handle process_ha | |||
| 1452 | "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " | 1459 | "Destination address range is not within the ASLR region (dst_address=0x{:016X}, " |
| 1453 | "size=0x{:016X}).", | 1460 | "size=0x{:016X}).", |
| 1454 | dst_address, size); | 1461 | dst_address, size); |
| 1455 | return ERR_INVALID_MEMORY_RANGE; | 1462 | return ResultInvalidMemoryRange; |
| 1456 | } | 1463 | } |
| 1457 | 1464 | ||
| 1458 | return page_table.UnmapProcessCodeMemory(dst_address, src_address, size); | 1465 | return page_table.UnmapProcessCodeMemory(dst_address, src_address, size); |
| @@ -1515,8 +1522,13 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e | |||
| 1515 | return ResultInvalidPriority; | 1522 | return ResultInvalidPriority; |
| 1516 | } | 1523 | } |
| 1517 | 1524 | ||
| 1518 | ASSERT(process.GetResourceLimit()->Reserve( | 1525 | KScopedResourceReservation thread_reservation( |
| 1519 | LimitableResource::Threads, 1, system.CoreTiming().GetGlobalTimeNs().count() + 100000000)); | 1526 | kernel.CurrentProcess(), LimitableResource::Threads, 1, |
| 1527 | system.CoreTiming().GetGlobalTimeNs().count() + 100000000); | ||
| 1528 | if (!thread_reservation.Succeeded()) { | ||
| 1529 | LOG_ERROR(Kernel_SVC, "Could not reserve a new thread"); | ||
| 1530 | return ResultResourceLimitedExceeded; | ||
| 1531 | } | ||
| 1520 | 1532 | ||
| 1521 | std::shared_ptr<KThread> thread; | 1533 | std::shared_ptr<KThread> thread; |
| 1522 | { | 1534 | { |
| @@ -1536,6 +1548,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e | |||
| 1536 | // Set the thread name for debugging purposes. | 1548 | // Set the thread name for debugging purposes. |
| 1537 | thread->SetName( | 1549 | thread->SetName( |
| 1538 | fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle)); | 1550 | fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle)); |
| 1551 | thread_reservation.Commit(); | ||
| 1539 | 1552 | ||
| 1540 | return RESULT_SUCCESS; | 1553 | return RESULT_SUCCESS; |
| 1541 | } | 1554 | } |
| @@ -1844,7 +1857,7 @@ static ResultCode ResetSignal(Core::System& system, Handle handle) { | |||
| 1844 | 1857 | ||
| 1845 | LOG_ERROR(Kernel_SVC, "invalid handle (0x{:08X})", handle); | 1858 | LOG_ERROR(Kernel_SVC, "invalid handle (0x{:08X})", handle); |
| 1846 | 1859 | ||
| 1847 | return Svc::ResultInvalidHandle; | 1860 | return ResultInvalidHandle; |
| 1848 | } | 1861 | } |
| 1849 | 1862 | ||
| 1850 | static ResultCode ResetSignal32(Core::System& system, Handle handle) { | 1863 | static ResultCode ResetSignal32(Core::System& system, Handle handle) { |
| @@ -1860,18 +1873,18 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd | |||
| 1860 | 1873 | ||
| 1861 | if (!Common::Is4KBAligned(addr)) { | 1874 | if (!Common::Is4KBAligned(addr)) { |
| 1862 | LOG_ERROR(Kernel_SVC, "Address ({:016X}) is not page aligned!", addr); | 1875 | LOG_ERROR(Kernel_SVC, "Address ({:016X}) is not page aligned!", addr); |
| 1863 | return ERR_INVALID_ADDRESS; | 1876 | return ResultInvalidAddress; |
| 1864 | } | 1877 | } |
| 1865 | 1878 | ||
| 1866 | if (!Common::Is4KBAligned(size) || size == 0) { | 1879 | if (!Common::Is4KBAligned(size) || size == 0) { |
| 1867 | LOG_ERROR(Kernel_SVC, "Size ({:016X}) is not page aligned or equal to zero!", size); | 1880 | LOG_ERROR(Kernel_SVC, "Size ({:016X}) is not page aligned or equal to zero!", size); |
| 1868 | return ERR_INVALID_ADDRESS; | 1881 | return ResultInvalidAddress; |
| 1869 | } | 1882 | } |
| 1870 | 1883 | ||
| 1871 | if (!IsValidAddressRange(addr, size)) { | 1884 | if (!IsValidAddressRange(addr, size)) { |
| 1872 | LOG_ERROR(Kernel_SVC, "Address and size cause overflow! (address={:016X}, size={:016X})", | 1885 | LOG_ERROR(Kernel_SVC, "Address and size cause overflow! (address={:016X}, size={:016X})", |
| 1873 | addr, size); | 1886 | addr, size); |
| 1874 | return ERR_INVALID_ADDRESS_STATE; | 1887 | return ResultInvalidCurrentMemory; |
| 1875 | } | 1888 | } |
| 1876 | 1889 | ||
| 1877 | const auto perms{static_cast<Memory::MemoryPermission>(permissions)}; | 1890 | const auto perms{static_cast<Memory::MemoryPermission>(permissions)}; |
| @@ -1879,10 +1892,17 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd | |||
| 1879 | perms == Memory::MemoryPermission::Write) { | 1892 | perms == Memory::MemoryPermission::Write) { |
| 1880 | LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})", | 1893 | LOG_ERROR(Kernel_SVC, "Invalid memory permissions for transfer memory! (perms={:08X})", |
| 1881 | permissions); | 1894 | permissions); |
| 1882 | return ERR_INVALID_MEMORY_PERMISSIONS; | 1895 | return ResultInvalidMemoryPermissions; |
| 1883 | } | 1896 | } |
| 1884 | 1897 | ||
| 1885 | auto& kernel = system.Kernel(); | 1898 | auto& kernel = system.Kernel(); |
| 1899 | // Reserve a new transfer memory from the process resource limit. | ||
| 1900 | KScopedResourceReservation trmem_reservation(kernel.CurrentProcess(), | ||
| 1901 | LimitableResource::TransferMemory); | ||
| 1902 | if (!trmem_reservation.Succeeded()) { | ||
| 1903 | LOG_ERROR(Kernel_SVC, "Could not reserve a new transfer memory"); | ||
| 1904 | return ResultResourceLimitedExceeded; | ||
| 1905 | } | ||
| 1886 | auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms); | 1906 | auto transfer_mem_handle = TransferMemory::Create(kernel, system.Memory(), addr, size, perms); |
| 1887 | 1907 | ||
| 1888 | if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) { | 1908 | if (const auto reserve_result{transfer_mem_handle->Reserve()}; reserve_result.IsError()) { |
| @@ -1894,6 +1914,7 @@ static ResultCode CreateTransferMemory(Core::System& system, Handle* handle, VAd | |||
| 1894 | if (result.Failed()) { | 1914 | if (result.Failed()) { |
| 1895 | return result.Code(); | 1915 | return result.Code(); |
| 1896 | } | 1916 | } |
| 1917 | trmem_reservation.Commit(); | ||
| 1897 | 1918 | ||
| 1898 | *handle = *result; | 1919 | *handle = *result; |
| 1899 | return RESULT_SUCCESS; | 1920 | return RESULT_SUCCESS; |
| @@ -1989,7 +2010,6 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, | |||
| 1989 | LOG_ERROR(Kernel_SVC, "Unable to successfully set core mask (result={})", set_result.raw); | 2010 | LOG_ERROR(Kernel_SVC, "Unable to successfully set core mask (result={})", set_result.raw); |
| 1990 | return set_result; | 2011 | return set_result; |
| 1991 | } | 2012 | } |
| 1992 | |||
| 1993 | return RESULT_SUCCESS; | 2013 | return RESULT_SUCCESS; |
| 1994 | } | 2014 | } |
| 1995 | 2015 | ||
| @@ -2002,8 +2022,17 @@ static ResultCode SetThreadCoreMask32(Core::System& system, Handle thread_handle | |||
| 2002 | static ResultCode SignalEvent(Core::System& system, Handle event_handle) { | 2022 | static ResultCode SignalEvent(Core::System& system, Handle event_handle) { |
| 2003 | LOG_DEBUG(Kernel_SVC, "called, event_handle=0x{:08X}", event_handle); | 2023 | LOG_DEBUG(Kernel_SVC, "called, event_handle=0x{:08X}", event_handle); |
| 2004 | 2024 | ||
| 2025 | auto& kernel = system.Kernel(); | ||
| 2005 | // Get the current handle table. | 2026 | // Get the current handle table. |
| 2006 | const HandleTable& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); | 2027 | const HandleTable& handle_table = kernel.CurrentProcess()->GetHandleTable(); |
| 2028 | |||
| 2029 | // Reserve a new event from the process resource limit. | ||
| 2030 | KScopedResourceReservation event_reservation(kernel.CurrentProcess(), | ||
| 2031 | LimitableResource::Events); | ||
| 2032 | if (!event_reservation.Succeeded()) { | ||
| 2033 | LOG_ERROR(Kernel, "Could not reserve a new event"); | ||
| 2034 | return ResultResourceLimitedExceeded; | ||
| 2035 | } | ||
| 2007 | 2036 | ||
| 2008 | // Get the writable event. | 2037 | // Get the writable event. |
| 2009 | auto writable_event = handle_table.Get<KWritableEvent>(event_handle); | 2038 | auto writable_event = handle_table.Get<KWritableEvent>(event_handle); |
| @@ -2012,6 +2041,9 @@ static ResultCode SignalEvent(Core::System& system, Handle event_handle) { | |||
| 2012 | return ResultInvalidHandle; | 2041 | return ResultInvalidHandle; |
| 2013 | } | 2042 | } |
| 2014 | 2043 | ||
| 2044 | // Commit the successfuly reservation. | ||
| 2045 | event_reservation.Commit(); | ||
| 2046 | |||
| 2015 | return writable_event->Signal(); | 2047 | return writable_event->Signal(); |
| 2016 | } | 2048 | } |
| 2017 | 2049 | ||
| @@ -2043,7 +2075,7 @@ static ResultCode ClearEvent(Core::System& system, Handle event_handle) { | |||
| 2043 | 2075 | ||
| 2044 | LOG_ERROR(Kernel_SVC, "Event handle does not exist, event_handle=0x{:08X}", event_handle); | 2076 | LOG_ERROR(Kernel_SVC, "Event handle does not exist, event_handle=0x{:08X}", event_handle); |
| 2045 | 2077 | ||
| 2046 | return Svc::ResultInvalidHandle; | 2078 | return ResultInvalidHandle; |
| 2047 | } | 2079 | } |
| 2048 | 2080 | ||
| 2049 | static ResultCode ClearEvent32(Core::System& system, Handle event_handle) { | 2081 | static ResultCode ClearEvent32(Core::System& system, Handle event_handle) { |
| @@ -2106,13 +2138,13 @@ static ResultCode GetProcessInfo(Core::System& system, u64* out, Handle process_ | |||
| 2106 | if (!process) { | 2138 | if (!process) { |
| 2107 | LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", | 2139 | LOG_ERROR(Kernel_SVC, "Process handle does not exist, process_handle=0x{:08X}", |
| 2108 | process_handle); | 2140 | process_handle); |
| 2109 | return ERR_INVALID_HANDLE; | 2141 | return ResultInvalidHandle; |
| 2110 | } | 2142 | } |
| 2111 | 2143 | ||
| 2112 | const auto info_type = static_cast<InfoType>(type); | 2144 | const auto info_type = static_cast<InfoType>(type); |
| 2113 | if (info_type != InfoType::Status) { | 2145 | if (info_type != InfoType::Status) { |
| 2114 | LOG_ERROR(Kernel_SVC, "Expected info_type to be Status but got {} instead", type); | 2146 | LOG_ERROR(Kernel_SVC, "Expected info_type to be Status but got {} instead", type); |
| 2115 | return ERR_INVALID_ENUM_VALUE; | 2147 | return ResultInvalidEnumValue; |
| 2116 | } | 2148 | } |
| 2117 | 2149 | ||
| 2118 | *out = static_cast<u64>(process->GetStatus()); | 2150 | *out = static_cast<u64>(process->GetStatus()); |
| @@ -2174,7 +2206,7 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour | |||
| 2174 | const auto type = static_cast<LimitableResource>(resource_type); | 2206 | const auto type = static_cast<LimitableResource>(resource_type); |
| 2175 | if (!IsValidResourceType(type)) { | 2207 | if (!IsValidResourceType(type)) { |
| 2176 | LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); | 2208 | LOG_ERROR(Kernel_SVC, "Invalid resource limit type: '{}'", resource_type); |
| 2177 | return ERR_INVALID_ENUM_VALUE; | 2209 | return ResultInvalidEnumValue; |
| 2178 | } | 2210 | } |
| 2179 | 2211 | ||
| 2180 | auto* const current_process = system.Kernel().CurrentProcess(); | 2212 | auto* const current_process = system.Kernel().CurrentProcess(); |
| @@ -2185,16 +2217,16 @@ static ResultCode SetResourceLimitLimitValue(Core::System& system, Handle resour | |||
| 2185 | if (!resource_limit_object) { | 2217 | if (!resource_limit_object) { |
| 2186 | LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", | 2218 | LOG_ERROR(Kernel_SVC, "Handle to non-existent resource limit instance used. Handle={:08X}", |
| 2187 | resource_limit); | 2219 | resource_limit); |
| 2188 | return ERR_INVALID_HANDLE; | 2220 | return ResultInvalidHandle; |
| 2189 | } | 2221 | } |
| 2190 | 2222 | ||
| 2191 | const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value)); | 2223 | const auto set_result = resource_limit_object->SetLimitValue(type, static_cast<s64>(value)); |
| 2192 | if (set_result.IsError()) { | 2224 | if (set_result.IsError()) { |
| 2193 | LOG_ERROR( | 2225 | LOG_ERROR(Kernel_SVC, |
| 2194 | Kernel_SVC, | 2226 | "Attempted to lower resource limit ({}) for category '{}' below its current " |
| 2195 | "Attempted to lower resource limit ({}) for category '{}' below its current value ({})", | 2227 | "value ({})", |
| 2196 | resource_limit_object->GetLimitValue(type), resource_type, | 2228 | resource_limit_object->GetLimitValue(type), resource_type, |
| 2197 | resource_limit_object->GetCurrentValue(type)); | 2229 | resource_limit_object->GetCurrentValue(type)); |
| 2198 | return set_result; | 2230 | return set_result; |
| 2199 | } | 2231 | } |
| 2200 | 2232 | ||
| @@ -2211,7 +2243,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes, | |||
| 2211 | LOG_ERROR(Kernel_SVC, | 2243 | LOG_ERROR(Kernel_SVC, |
| 2212 | "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}", | 2244 | "Supplied size outside [0, 0x0FFFFFFF] range. out_process_ids_size={}", |
| 2213 | out_process_ids_size); | 2245 | out_process_ids_size); |
| 2214 | return ERR_OUT_OF_RANGE; | 2246 | return ResultOutOfRange; |
| 2215 | } | 2247 | } |
| 2216 | 2248 | ||
| 2217 | const auto& kernel = system.Kernel(); | 2249 | const auto& kernel = system.Kernel(); |
| @@ -2221,7 +2253,7 @@ static ResultCode GetProcessList(Core::System& system, u32* out_num_processes, | |||
| 2221 | out_process_ids, total_copy_size)) { | 2253 | out_process_ids, total_copy_size)) { |
| 2222 | LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}", | 2254 | LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}", |
| 2223 | out_process_ids, out_process_ids + total_copy_size); | 2255 | out_process_ids, out_process_ids + total_copy_size); |
| 2224 | return ERR_INVALID_ADDRESS_STATE; | 2256 | return ResultInvalidCurrentMemory; |
| 2225 | } | 2257 | } |
| 2226 | 2258 | ||
| 2227 | auto& memory = system.Memory(); | 2259 | auto& memory = system.Memory(); |
| @@ -2250,7 +2282,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd | |||
| 2250 | if ((out_thread_ids_size & 0xF0000000) != 0) { | 2282 | if ((out_thread_ids_size & 0xF0000000) != 0) { |
| 2251 | LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}", | 2283 | LOG_ERROR(Kernel_SVC, "Supplied size outside [0, 0x0FFFFFFF] range. size={}", |
| 2252 | out_thread_ids_size); | 2284 | out_thread_ids_size); |
| 2253 | return ERR_OUT_OF_RANGE; | 2285 | return ResultOutOfRange; |
| 2254 | } | 2286 | } |
| 2255 | 2287 | ||
| 2256 | const auto* const current_process = system.Kernel().CurrentProcess(); | 2288 | const auto* const current_process = system.Kernel().CurrentProcess(); |
| @@ -2260,7 +2292,7 @@ static ResultCode GetThreadList(Core::System& system, u32* out_num_threads, VAdd | |||
| 2260 | !current_process->PageTable().IsInsideAddressSpace(out_thread_ids, total_copy_size)) { | 2292 | !current_process->PageTable().IsInsideAddressSpace(out_thread_ids, total_copy_size)) { |
| 2261 | LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}", | 2293 | LOG_ERROR(Kernel_SVC, "Address range outside address space. begin=0x{:016X}, end=0x{:016X}", |
| 2262 | out_thread_ids, out_thread_ids + total_copy_size); | 2294 | out_thread_ids, out_thread_ids + total_copy_size); |
| 2263 | return ERR_INVALID_ADDRESS_STATE; | 2295 | return ResultInvalidCurrentMemory; |
| 2264 | } | 2296 | } |
| 2265 | 2297 | ||
| 2266 | auto& memory = system.Memory(); | 2298 | auto& memory = system.Memory(); |
diff --git a/src/core/hle/kernel/svc_results.h b/src/core/hle/kernel/svc_results.h index 204cd989d..a26d9f2c9 100644 --- a/src/core/hle/kernel/svc_results.h +++ b/src/core/hle/kernel/svc_results.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | // Copyright 2020 yuzu emulator team | 1 | // Copyright 2018 yuzu emulator team |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| @@ -6,21 +6,36 @@ | |||
| 6 | 6 | ||
| 7 | #include "core/hle/result.h" | 7 | #include "core/hle/result.h" |
| 8 | 8 | ||
| 9 | namespace Kernel::Svc { | 9 | namespace Kernel { |
| 10 | 10 | ||
| 11 | // Confirmed Switch kernel error codes | ||
| 12 | |||
| 13 | constexpr ResultCode ResultMaxConnectionsReached{ErrorModule::Kernel, 7}; | ||
| 14 | constexpr ResultCode ResultInvalidCapabilityDescriptor{ErrorModule::Kernel, 14}; | ||
| 11 | constexpr ResultCode ResultNoSynchronizationObject{ErrorModule::Kernel, 57}; | 15 | constexpr ResultCode ResultNoSynchronizationObject{ErrorModule::Kernel, 57}; |
| 12 | constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59}; | 16 | constexpr ResultCode ResultTerminationRequested{ErrorModule::Kernel, 59}; |
| 17 | constexpr ResultCode ResultInvalidSize{ErrorModule::Kernel, 101}; | ||
| 13 | constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102}; | 18 | constexpr ResultCode ResultInvalidAddress{ErrorModule::Kernel, 102}; |
| 14 | constexpr ResultCode ResultOutOfResource{ErrorModule::Kernel, 103}; | 19 | constexpr ResultCode ResultOutOfResource{ErrorModule::Kernel, 103}; |
| 20 | constexpr ResultCode ResultOutOfMemory{ErrorModule::Kernel, 104}; | ||
| 21 | constexpr ResultCode ResultHandleTableFull{ErrorModule::Kernel, 105}; | ||
| 15 | constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106}; | 22 | constexpr ResultCode ResultInvalidCurrentMemory{ErrorModule::Kernel, 106}; |
| 23 | constexpr ResultCode ResultInvalidMemoryPermissions{ErrorModule::Kernel, 108}; | ||
| 24 | constexpr ResultCode ResultInvalidMemoryRange{ErrorModule::Kernel, 110}; | ||
| 16 | constexpr ResultCode ResultInvalidPriority{ErrorModule::Kernel, 112}; | 25 | constexpr ResultCode ResultInvalidPriority{ErrorModule::Kernel, 112}; |
| 17 | constexpr ResultCode ResultInvalidCoreId{ErrorModule::Kernel, 113}; | 26 | constexpr ResultCode ResultInvalidCoreId{ErrorModule::Kernel, 113}; |
| 18 | constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114}; | 27 | constexpr ResultCode ResultInvalidHandle{ErrorModule::Kernel, 114}; |
| 28 | constexpr ResultCode ResultInvalidPointer{ErrorModule::Kernel, 115}; | ||
| 19 | constexpr ResultCode ResultInvalidCombination{ErrorModule::Kernel, 116}; | 29 | constexpr ResultCode ResultInvalidCombination{ErrorModule::Kernel, 116}; |
| 20 | constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117}; | 30 | constexpr ResultCode ResultTimedOut{ErrorModule::Kernel, 117}; |
| 21 | constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118}; | 31 | constexpr ResultCode ResultCancelled{ErrorModule::Kernel, 118}; |
| 32 | constexpr ResultCode ResultOutOfRange{ErrorModule::Kernel, 119}; | ||
| 22 | constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120}; | 33 | constexpr ResultCode ResultInvalidEnumValue{ErrorModule::Kernel, 120}; |
| 34 | constexpr ResultCode ResultNotFound{ErrorModule::Kernel, 121}; | ||
| 23 | constexpr ResultCode ResultBusy{ErrorModule::Kernel, 122}; | 35 | constexpr ResultCode ResultBusy{ErrorModule::Kernel, 122}; |
| 36 | constexpr ResultCode ResultSessionClosedByRemote{ErrorModule::Kernel, 123}; | ||
| 24 | constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125}; | 37 | constexpr ResultCode ResultInvalidState{ErrorModule::Kernel, 125}; |
| 38 | constexpr ResultCode ResultReservedValue{ErrorModule::Kernel, 126}; | ||
| 39 | constexpr ResultCode ResultResourceLimitedExceeded{ErrorModule::Kernel, 132}; | ||
| 25 | 40 | ||
| 26 | } // namespace Kernel::Svc | 41 | } // namespace Kernel |
diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp index 765f408c3..6b0fc1591 100644 --- a/src/core/hle/kernel/transfer_memory.cpp +++ b/src/core/hle/kernel/transfer_memory.cpp | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "core/hle/kernel/k_resource_limit.h" | ||
| 5 | #include "core/hle/kernel/kernel.h" | 6 | #include "core/hle/kernel/kernel.h" |
| 6 | #include "core/hle/kernel/memory/page_table.h" | 7 | #include "core/hle/kernel/memory/page_table.h" |
| 7 | #include "core/hle/kernel/process.h" | 8 | #include "core/hle/kernel/process.h" |
| @@ -17,6 +18,7 @@ TransferMemory::TransferMemory(KernelCore& kernel, Core::Memory::Memory& memory) | |||
| 17 | TransferMemory::~TransferMemory() { | 18 | TransferMemory::~TransferMemory() { |
| 18 | // Release memory region when transfer memory is destroyed | 19 | // Release memory region when transfer memory is destroyed |
| 19 | Reset(); | 20 | Reset(); |
| 21 | owner_process->GetResourceLimit()->Release(LimitableResource::TransferMemory, 1); | ||
| 20 | } | 22 | } |
| 21 | 23 | ||
| 22 | std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, | 24 | std::shared_ptr<TransferMemory> TransferMemory::Create(KernelCore& kernel, |
diff --git a/src/core/hle/service/ldr/ldr.cpp b/src/core/hle/service/ldr/ldr.cpp index 9da786b4e..c724d2554 100644 --- a/src/core/hle/service/ldr/ldr.cpp +++ b/src/core/hle/service/ldr/ldr.cpp | |||
| @@ -11,10 +11,10 @@ | |||
| 11 | #include "common/scope_exit.h" | 11 | #include "common/scope_exit.h" |
| 12 | #include "core/core.h" | 12 | #include "core/core.h" |
| 13 | #include "core/hle/ipc_helpers.h" | 13 | #include "core/hle/ipc_helpers.h" |
| 14 | #include "core/hle/kernel/errors.h" | ||
| 15 | #include "core/hle/kernel/memory/page_table.h" | 14 | #include "core/hle/kernel/memory/page_table.h" |
| 16 | #include "core/hle/kernel/memory/system_control.h" | 15 | #include "core/hle/kernel/memory/system_control.h" |
| 17 | #include "core/hle/kernel/process.h" | 16 | #include "core/hle/kernel/process.h" |
| 17 | #include "core/hle/kernel/svc_results.h" | ||
| 18 | #include "core/hle/service/ldr/ldr.h" | 18 | #include "core/hle/service/ldr/ldr.h" |
| 19 | #include "core/hle/service/service.h" | 19 | #include "core/hle/service/service.h" |
| 20 | #include "core/loader/nro.h" | 20 | #include "core/loader/nro.h" |
| @@ -330,7 +330,7 @@ public: | |||
| 330 | const VAddr addr{GetRandomMapRegion(page_table, size)}; | 330 | const VAddr addr{GetRandomMapRegion(page_table, size)}; |
| 331 | const ResultCode result{page_table.MapProcessCodeMemory(addr, baseAddress, size)}; | 331 | const ResultCode result{page_table.MapProcessCodeMemory(addr, baseAddress, size)}; |
| 332 | 332 | ||
| 333 | if (result == Kernel::ERR_INVALID_ADDRESS_STATE) { | 333 | if (result == Kernel::ResultInvalidCurrentMemory) { |
| 334 | continue; | 334 | continue; |
| 335 | } | 335 | } |
| 336 | 336 | ||
| @@ -361,7 +361,7 @@ public: | |||
| 361 | const ResultCode result{ | 361 | const ResultCode result{ |
| 362 | page_table.MapProcessCodeMemory(addr + nro_size, bss_addr, bss_size)}; | 362 | page_table.MapProcessCodeMemory(addr + nro_size, bss_addr, bss_size)}; |
| 363 | 363 | ||
| 364 | if (result == Kernel::ERR_INVALID_ADDRESS_STATE) { | 364 | if (result == Kernel::ResultInvalidCurrentMemory) { |
| 365 | continue; | 365 | continue; |
| 366 | } | 366 | } |
| 367 | 367 | ||
diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp index 651633e9e..edced69bb 100644 --- a/src/tests/video_core/buffer_base.cpp +++ b/src/tests/video_core/buffer_base.cpp | |||
| @@ -471,3 +471,79 @@ TEST_CASE("BufferBase: Unaligned page region query") { | |||
| 471 | REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000)); | 471 | REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000)); |
| 472 | REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1)); | 472 | REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1)); |
| 473 | } | 473 | } |
| 474 | |||
| 475 | TEST_CASE("BufferBase: Cached write") { | ||
| 476 | RasterizerInterface rasterizer; | ||
| 477 | BufferBase buffer(rasterizer, c, WORD); | ||
| 478 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 479 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 480 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 481 | buffer.FlushCachedWrites(); | ||
| 482 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 483 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 484 | REQUIRE(rasterizer.Count() == 0); | ||
| 485 | } | ||
| 486 | |||
| 487 | TEST_CASE("BufferBase: Multiple cached write") { | ||
| 488 | RasterizerInterface rasterizer; | ||
| 489 | BufferBase buffer(rasterizer, c, WORD); | ||
| 490 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 491 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 492 | buffer.CachedCpuWrite(c + PAGE * 3, PAGE); | ||
| 493 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 494 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 495 | buffer.FlushCachedWrites(); | ||
| 496 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 497 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 3, PAGE)); | ||
| 498 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 499 | REQUIRE(rasterizer.Count() == 0); | ||
| 500 | } | ||
| 501 | |||
| 502 | TEST_CASE("BufferBase: Cached write unmarked") { | ||
| 503 | RasterizerInterface rasterizer; | ||
| 504 | BufferBase buffer(rasterizer, c, WORD); | ||
| 505 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 506 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 507 | buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE); | ||
| 508 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 509 | buffer.FlushCachedWrites(); | ||
| 510 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 511 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 512 | REQUIRE(rasterizer.Count() == 0); | ||
| 513 | } | ||
| 514 | |||
| 515 | TEST_CASE("BufferBase: Cached write iterated") { | ||
| 516 | RasterizerInterface rasterizer; | ||
| 517 | BufferBase buffer(rasterizer, c, WORD); | ||
| 518 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 519 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 520 | int num = 0; | ||
| 521 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 522 | REQUIRE(num == 0); | ||
| 523 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 524 | buffer.FlushCachedWrites(); | ||
| 525 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 526 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 527 | REQUIRE(rasterizer.Count() == 0); | ||
| 528 | } | ||
| 529 | |||
| 530 | TEST_CASE("BufferBase: Cached write downloads") { | ||
| 531 | RasterizerInterface rasterizer; | ||
| 532 | BufferBase buffer(rasterizer, c, WORD); | ||
| 533 | buffer.UnmarkRegionAsCpuModified(c, WORD); | ||
| 534 | REQUIRE(rasterizer.Count() == 64); | ||
| 535 | buffer.CachedCpuWrite(c + PAGE, PAGE); | ||
| 536 | REQUIRE(rasterizer.Count() == 63); | ||
| 537 | buffer.MarkRegionAsGpuModified(c + PAGE, PAGE); | ||
| 538 | int num = 0; | ||
| 539 | buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 540 | buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||
| 541 | REQUIRE(num == 0); | ||
| 542 | REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 543 | REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 544 | buffer.FlushCachedWrites(); | ||
| 545 | REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); | ||
| 546 | REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE)); | ||
| 547 | buffer.MarkRegionAsCpuModified(c, WORD); | ||
| 548 | REQUIRE(rasterizer.Count() == 0); | ||
| 549 | } | ||
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index dd4c29ed3..9b931976a 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -2,10 +2,8 @@ add_subdirectory(host_shaders) | |||
| 2 | 2 | ||
| 3 | add_library(video_core STATIC | 3 | add_library(video_core STATIC |
| 4 | buffer_cache/buffer_base.h | 4 | buffer_cache/buffer_base.h |
| 5 | buffer_cache/buffer_block.h | 5 | buffer_cache/buffer_cache.cpp |
| 6 | buffer_cache/buffer_cache.h | 6 | buffer_cache/buffer_cache.h |
| 7 | buffer_cache/map_interval.cpp | ||
| 8 | buffer_cache/map_interval.h | ||
| 9 | cdma_pusher.cpp | 7 | cdma_pusher.cpp |
| 10 | cdma_pusher.h | 8 | cdma_pusher.h |
| 11 | command_classes/codecs/codec.cpp | 9 | command_classes/codecs/codec.cpp |
| @@ -152,8 +150,6 @@ add_library(video_core STATIC | |||
| 152 | renderer_vulkan/vk_staging_buffer_pool.h | 150 | renderer_vulkan/vk_staging_buffer_pool.h |
| 153 | renderer_vulkan/vk_state_tracker.cpp | 151 | renderer_vulkan/vk_state_tracker.cpp |
| 154 | renderer_vulkan/vk_state_tracker.h | 152 | renderer_vulkan/vk_state_tracker.h |
| 155 | renderer_vulkan/vk_stream_buffer.cpp | ||
| 156 | renderer_vulkan/vk_stream_buffer.h | ||
| 157 | renderer_vulkan/vk_swapchain.cpp | 153 | renderer_vulkan/vk_swapchain.cpp |
| 158 | renderer_vulkan/vk_swapchain.h | 154 | renderer_vulkan/vk_swapchain.h |
| 159 | renderer_vulkan/vk_texture_cache.cpp | 155 | renderer_vulkan/vk_texture_cache.cpp |
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index ee8602ce9..0c00ae280 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h | |||
| @@ -19,6 +19,7 @@ namespace VideoCommon { | |||
| 19 | 19 | ||
| 20 | enum class BufferFlagBits { | 20 | enum class BufferFlagBits { |
| 21 | Picked = 1 << 0, | 21 | Picked = 1 << 0, |
| 22 | CachedWrites = 1 << 1, | ||
| 22 | }; | 23 | }; |
| 23 | DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) | 24 | DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) |
| 24 | 25 | ||
| @@ -40,7 +41,7 @@ class BufferBase { | |||
| 40 | static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; | 41 | static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; |
| 41 | 42 | ||
| 42 | /// Vector tracking modified pages tightly packed with small vector optimization | 43 | /// Vector tracking modified pages tightly packed with small vector optimization |
| 43 | union WrittenWords { | 44 | union WordsArray { |
| 44 | /// Returns the pointer to the words state | 45 | /// Returns the pointer to the words state |
| 45 | [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { | 46 | [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { |
| 46 | return is_short ? &stack : heap; | 47 | return is_short ? &stack : heap; |
| @@ -55,49 +56,59 @@ class BufferBase { | |||
| 55 | u64* heap; ///< Not-small buffers pointer to the storage | 56 | u64* heap; ///< Not-small buffers pointer to the storage |
| 56 | }; | 57 | }; |
| 57 | 58 | ||
| 58 | struct GpuCpuWords { | 59 | struct Words { |
| 59 | explicit GpuCpuWords() = default; | 60 | explicit Words() = default; |
| 60 | explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} { | 61 | explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} { |
| 61 | if (IsShort()) { | 62 | if (IsShort()) { |
| 62 | cpu.stack = ~u64{0}; | 63 | cpu.stack = ~u64{0}; |
| 63 | gpu.stack = 0; | 64 | gpu.stack = 0; |
| 65 | cached_cpu.stack = 0; | ||
| 66 | untracked.stack = ~u64{0}; | ||
| 64 | } else { | 67 | } else { |
| 65 | // Share allocation between CPU and GPU pages and set their default values | 68 | // Share allocation between CPU and GPU pages and set their default values |
| 66 | const size_t num_words = NumWords(); | 69 | const size_t num_words = NumWords(); |
| 67 | u64* const alloc = new u64[num_words * 2]; | 70 | u64* const alloc = new u64[num_words * 4]; |
| 68 | cpu.heap = alloc; | 71 | cpu.heap = alloc; |
| 69 | gpu.heap = alloc + num_words; | 72 | gpu.heap = alloc + num_words; |
| 73 | cached_cpu.heap = alloc + num_words * 2; | ||
| 74 | untracked.heap = alloc + num_words * 3; | ||
| 70 | std::fill_n(cpu.heap, num_words, ~u64{0}); | 75 | std::fill_n(cpu.heap, num_words, ~u64{0}); |
| 71 | std::fill_n(gpu.heap, num_words, 0); | 76 | std::fill_n(gpu.heap, num_words, 0); |
| 77 | std::fill_n(cached_cpu.heap, num_words, 0); | ||
| 78 | std::fill_n(untracked.heap, num_words, ~u64{0}); | ||
| 72 | } | 79 | } |
| 73 | // Clean up tailing bits | 80 | // Clean up tailing bits |
| 74 | const u64 last_local_page = | 81 | const u64 last_word_size = size_bytes % BYTES_PER_WORD; |
| 75 | Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE); | 82 | const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE); |
| 76 | const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; | 83 | const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; |
| 77 | u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1]; | 84 | const u64 last_word = (~u64{0} << shift) >> shift; |
| 78 | last_word = (last_word << shift) >> shift; | 85 | cpu.Pointer(IsShort())[NumWords() - 1] = last_word; |
| 86 | untracked.Pointer(IsShort())[NumWords() - 1] = last_word; | ||
| 79 | } | 87 | } |
| 80 | 88 | ||
| 81 | ~GpuCpuWords() { | 89 | ~Words() { |
| 82 | Release(); | 90 | Release(); |
| 83 | } | 91 | } |
| 84 | 92 | ||
| 85 | GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept { | 93 | Words& operator=(Words&& rhs) noexcept { |
| 86 | Release(); | 94 | Release(); |
| 87 | size_bytes = rhs.size_bytes; | 95 | size_bytes = rhs.size_bytes; |
| 88 | cpu = rhs.cpu; | 96 | cpu = rhs.cpu; |
| 89 | gpu = rhs.gpu; | 97 | gpu = rhs.gpu; |
| 98 | cached_cpu = rhs.cached_cpu; | ||
| 99 | untracked = rhs.untracked; | ||
| 90 | rhs.cpu.heap = nullptr; | 100 | rhs.cpu.heap = nullptr; |
| 91 | return *this; | 101 | return *this; |
| 92 | } | 102 | } |
| 93 | 103 | ||
| 94 | GpuCpuWords(GpuCpuWords&& rhs) noexcept | 104 | Words(Words&& rhs) noexcept |
| 95 | : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} { | 105 | : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu}, |
| 106 | cached_cpu{rhs.cached_cpu}, untracked{rhs.untracked} { | ||
| 96 | rhs.cpu.heap = nullptr; | 107 | rhs.cpu.heap = nullptr; |
| 97 | } | 108 | } |
| 98 | 109 | ||
| 99 | GpuCpuWords& operator=(const GpuCpuWords&) = delete; | 110 | Words& operator=(const Words&) = delete; |
| 100 | GpuCpuWords(const GpuCpuWords&) = delete; | 111 | Words(const Words&) = delete; |
| 101 | 112 | ||
| 102 | /// Returns true when the buffer fits in the small vector optimization | 113 | /// Returns true when the buffer fits in the small vector optimization |
| 103 | [[nodiscard]] bool IsShort() const noexcept { | 114 | [[nodiscard]] bool IsShort() const noexcept { |
| @@ -118,8 +129,17 @@ class BufferBase { | |||
| 118 | } | 129 | } |
| 119 | 130 | ||
| 120 | u64 size_bytes = 0; | 131 | u64 size_bytes = 0; |
| 121 | WrittenWords cpu; | 132 | WordsArray cpu; |
| 122 | WrittenWords gpu; | 133 | WordsArray gpu; |
| 134 | WordsArray cached_cpu; | ||
| 135 | WordsArray untracked; | ||
| 136 | }; | ||
| 137 | |||
| 138 | enum class Type { | ||
| 139 | CPU, | ||
| 140 | GPU, | ||
| 141 | CachedCPU, | ||
| 142 | Untracked, | ||
| 123 | }; | 143 | }; |
| 124 | 144 | ||
| 125 | public: | 145 | public: |
| @@ -132,68 +152,93 @@ public: | |||
| 132 | BufferBase& operator=(const BufferBase&) = delete; | 152 | BufferBase& operator=(const BufferBase&) = delete; |
| 133 | BufferBase(const BufferBase&) = delete; | 153 | BufferBase(const BufferBase&) = delete; |
| 134 | 154 | ||
| 155 | BufferBase& operator=(BufferBase&&) = default; | ||
| 156 | BufferBase(BufferBase&&) = default; | ||
| 157 | |||
| 135 | /// Returns the inclusive CPU modified range in a begin end pair | 158 | /// Returns the inclusive CPU modified range in a begin end pair |
| 136 | [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, | 159 | [[nodiscard]] std::pair<u64, u64> ModifiedCpuRegion(VAddr query_cpu_addr, |
| 137 | u64 query_size) const noexcept { | 160 | u64 query_size) const noexcept { |
| 138 | const u64 offset = query_cpu_addr - cpu_addr; | 161 | const u64 offset = query_cpu_addr - cpu_addr; |
| 139 | return ModifiedRegion<false>(offset, query_size); | 162 | return ModifiedRegion<Type::CPU>(offset, query_size); |
| 140 | } | 163 | } |
| 141 | 164 | ||
| 142 | /// Returns the inclusive GPU modified range in a begin end pair | 165 | /// Returns the inclusive GPU modified range in a begin end pair |
| 143 | [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, | 166 | [[nodiscard]] std::pair<u64, u64> ModifiedGpuRegion(VAddr query_cpu_addr, |
| 144 | u64 query_size) const noexcept { | 167 | u64 query_size) const noexcept { |
| 145 | const u64 offset = query_cpu_addr - cpu_addr; | 168 | const u64 offset = query_cpu_addr - cpu_addr; |
| 146 | return ModifiedRegion<true>(offset, query_size); | 169 | return ModifiedRegion<Type::GPU>(offset, query_size); |
| 147 | } | 170 | } |
| 148 | 171 | ||
| 149 | /// Returns true if a region has been modified from the CPU | 172 | /// Returns true if a region has been modified from the CPU |
| 150 | [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { | 173 | [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { |
| 151 | const u64 offset = query_cpu_addr - cpu_addr; | 174 | const u64 offset = query_cpu_addr - cpu_addr; |
| 152 | return IsRegionModified<false>(offset, query_size); | 175 | return IsRegionModified<Type::CPU>(offset, query_size); |
| 153 | } | 176 | } |
| 154 | 177 | ||
| 155 | /// Returns true if a region has been modified from the GPU | 178 | /// Returns true if a region has been modified from the GPU |
| 156 | [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { | 179 | [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { |
| 157 | const u64 offset = query_cpu_addr - cpu_addr; | 180 | const u64 offset = query_cpu_addr - cpu_addr; |
| 158 | return IsRegionModified<true>(offset, query_size); | 181 | return IsRegionModified<Type::GPU>(offset, query_size); |
| 159 | } | 182 | } |
| 160 | 183 | ||
| 161 | /// Mark region as CPU modified, notifying the rasterizer about this change | 184 | /// Mark region as CPU modified, notifying the rasterizer about this change |
| 162 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { | 185 | void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { |
| 163 | ChangeRegionState<true, true>(words.cpu, dirty_cpu_addr, size); | 186 | ChangeRegionState<Type::CPU, true>(dirty_cpu_addr, size); |
| 164 | } | 187 | } |
| 165 | 188 | ||
| 166 | /// Unmark region as CPU modified, notifying the rasterizer about this change | 189 | /// Unmark region as CPU modified, notifying the rasterizer about this change |
| 167 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { | 190 | void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { |
| 168 | ChangeRegionState<false, true>(words.cpu, dirty_cpu_addr, size); | 191 | ChangeRegionState<Type::CPU, false>(dirty_cpu_addr, size); |
| 169 | } | 192 | } |
| 170 | 193 | ||
| 171 | /// Mark region as modified from the host GPU | 194 | /// Mark region as modified from the host GPU |
| 172 | void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { | 195 | void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { |
| 173 | ChangeRegionState<true, false>(words.gpu, dirty_cpu_addr, size); | 196 | ChangeRegionState<Type::GPU, true>(dirty_cpu_addr, size); |
| 174 | } | 197 | } |
| 175 | 198 | ||
| 176 | /// Unmark region as modified from the host GPU | 199 | /// Unmark region as modified from the host GPU |
| 177 | void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { | 200 | void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { |
| 178 | ChangeRegionState<false, false>(words.gpu, dirty_cpu_addr, size); | 201 | ChangeRegionState<Type::GPU, false>(dirty_cpu_addr, size); |
| 202 | } | ||
| 203 | |||
| 204 | /// Mark region as modified from the CPU | ||
| 205 | /// but don't mark it as modified until FlusHCachedWrites is called. | ||
| 206 | void CachedCpuWrite(VAddr dirty_cpu_addr, u64 size) { | ||
| 207 | flags |= BufferFlagBits::CachedWrites; | ||
| 208 | ChangeRegionState<Type::CachedCPU, true>(dirty_cpu_addr, size); | ||
| 209 | } | ||
| 210 | |||
| 211 | /// Flushes cached CPU writes, and notify the rasterizer about the deltas | ||
| 212 | void FlushCachedWrites() noexcept { | ||
| 213 | flags &= ~BufferFlagBits::CachedWrites; | ||
| 214 | const u64 num_words = NumWords(); | ||
| 215 | const u64* const cached_words = Array<Type::CachedCPU>(); | ||
| 216 | u64* const untracked_words = Array<Type::Untracked>(); | ||
| 217 | u64* const cpu_words = Array<Type::CPU>(); | ||
| 218 | for (u64 word_index = 0; word_index < num_words; ++word_index) { | ||
| 219 | const u64 cached_bits = cached_words[word_index]; | ||
| 220 | NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits); | ||
| 221 | untracked_words[word_index] |= cached_bits; | ||
| 222 | cpu_words[word_index] |= cached_bits; | ||
| 223 | } | ||
| 179 | } | 224 | } |
| 180 | 225 | ||
| 181 | /// Call 'func' for each CPU modified range and unmark those pages as CPU modified | 226 | /// Call 'func' for each CPU modified range and unmark those pages as CPU modified |
| 182 | template <typename Func> | 227 | template <typename Func> |
| 183 | void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { | 228 | void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { |
| 184 | ForEachModifiedRange<false, true>(query_cpu_range, size, func); | 229 | ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func); |
| 185 | } | 230 | } |
| 186 | 231 | ||
| 187 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | 232 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified |
| 188 | template <typename Func> | 233 | template <typename Func> |
| 189 | void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { | 234 | void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { |
| 190 | ForEachModifiedRange<true, false>(query_cpu_range, size, func); | 235 | ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func); |
| 191 | } | 236 | } |
| 192 | 237 | ||
| 193 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified | 238 | /// Call 'func' for each GPU modified range and unmark those pages as GPU modified |
| 194 | template <typename Func> | 239 | template <typename Func> |
| 195 | void ForEachDownloadRange(Func&& func) { | 240 | void ForEachDownloadRange(Func&& func) { |
| 196 | ForEachModifiedRange<true, false>(cpu_addr, SizeBytes(), func); | 241 | ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func); |
| 197 | } | 242 | } |
| 198 | 243 | ||
| 199 | /// Mark buffer as picked | 244 | /// Mark buffer as picked |
| @@ -206,6 +251,16 @@ public: | |||
| 206 | flags &= ~BufferFlagBits::Picked; | 251 | flags &= ~BufferFlagBits::Picked; |
| 207 | } | 252 | } |
| 208 | 253 | ||
| 254 | /// Increases the likeliness of this being a stream buffer | ||
| 255 | void IncreaseStreamScore(int score) noexcept { | ||
| 256 | stream_score += score; | ||
| 257 | } | ||
| 258 | |||
| 259 | /// Returns the likeliness of this being a stream buffer | ||
| 260 | [[nodiscard]] int StreamScore() const noexcept { | ||
| 261 | return stream_score; | ||
| 262 | } | ||
| 263 | |||
| 209 | /// Returns true when vaddr -> vaddr+size is fully contained in the buffer | 264 | /// Returns true when vaddr -> vaddr+size is fully contained in the buffer |
| 210 | [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { | 265 | [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { |
| 211 | return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); | 266 | return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); |
| @@ -216,6 +271,11 @@ public: | |||
| 216 | return True(flags & BufferFlagBits::Picked); | 271 | return True(flags & BufferFlagBits::Picked); |
| 217 | } | 272 | } |
| 218 | 273 | ||
| 274 | /// Returns true when the buffer has pending cached writes | ||
| 275 | [[nodiscard]] bool HasCachedWrites() const noexcept { | ||
| 276 | return True(flags & BufferFlagBits::CachedWrites); | ||
| 277 | } | ||
| 278 | |||
| 219 | /// Returns the base CPU address of the buffer | 279 | /// Returns the base CPU address of the buffer |
| 220 | [[nodiscard]] VAddr CpuAddr() const noexcept { | 280 | [[nodiscard]] VAddr CpuAddr() const noexcept { |
| 221 | return cpu_addr; | 281 | return cpu_addr; |
| @@ -233,26 +293,48 @@ public: | |||
| 233 | } | 293 | } |
| 234 | 294 | ||
| 235 | private: | 295 | private: |
| 296 | template <Type type> | ||
| 297 | u64* Array() noexcept { | ||
| 298 | if constexpr (type == Type::CPU) { | ||
| 299 | return words.cpu.Pointer(IsShort()); | ||
| 300 | } else if constexpr (type == Type::GPU) { | ||
| 301 | return words.gpu.Pointer(IsShort()); | ||
| 302 | } else if constexpr (type == Type::CachedCPU) { | ||
| 303 | return words.cached_cpu.Pointer(IsShort()); | ||
| 304 | } else if constexpr (type == Type::Untracked) { | ||
| 305 | return words.untracked.Pointer(IsShort()); | ||
| 306 | } | ||
| 307 | } | ||
| 308 | |||
| 309 | template <Type type> | ||
| 310 | const u64* Array() const noexcept { | ||
| 311 | if constexpr (type == Type::CPU) { | ||
| 312 | return words.cpu.Pointer(IsShort()); | ||
| 313 | } else if constexpr (type == Type::GPU) { | ||
| 314 | return words.gpu.Pointer(IsShort()); | ||
| 315 | } else if constexpr (type == Type::CachedCPU) { | ||
| 316 | return words.cached_cpu.Pointer(IsShort()); | ||
| 317 | } else if constexpr (type == Type::Untracked) { | ||
| 318 | return words.untracked.Pointer(IsShort()); | ||
| 319 | } | ||
| 320 | } | ||
| 321 | |||
| 236 | /** | 322 | /** |
| 237 | * Change the state of a range of pages | 323 | * Change the state of a range of pages |
| 238 | * | 324 | * |
| 239 | * @param written_words Pages to be marked or unmarked as modified | ||
| 240 | * @param dirty_addr Base address to mark or unmark as modified | 325 | * @param dirty_addr Base address to mark or unmark as modified |
| 241 | * @param size Size in bytes to mark or unmark as modified | 326 | * @param size Size in bytes to mark or unmark as modified |
| 242 | * | ||
| 243 | * @tparam enable True when the bits will be set to one, false for zero | ||
| 244 | * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes | ||
| 245 | */ | 327 | */ |
| 246 | template <bool enable, bool notify_rasterizer> | 328 | template <Type type, bool enable> |
| 247 | void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr, | 329 | void ChangeRegionState(u64 dirty_addr, s64 size) noexcept(type == Type::GPU) { |
| 248 | s64 size) noexcept(!notify_rasterizer) { | ||
| 249 | const s64 difference = dirty_addr - cpu_addr; | 330 | const s64 difference = dirty_addr - cpu_addr; |
| 250 | const u64 offset = std::max<s64>(difference, 0); | 331 | const u64 offset = std::max<s64>(difference, 0); |
| 251 | size += std::min<s64>(difference, 0); | 332 | size += std::min<s64>(difference, 0); |
| 252 | if (offset >= SizeBytes() || size < 0) { | 333 | if (offset >= SizeBytes() || size < 0) { |
| 253 | return; | 334 | return; |
| 254 | } | 335 | } |
| 255 | u64* const state_words = written_words.Pointer(IsShort()); | 336 | u64* const untracked_words = Array<Type::Untracked>(); |
| 337 | u64* const state_words = Array<type>(); | ||
| 256 | const u64 offset_end = std::min(offset + size, SizeBytes()); | 338 | const u64 offset_end = std::min(offset + size, SizeBytes()); |
| 257 | const u64 begin_page_index = offset / BYTES_PER_PAGE; | 339 | const u64 begin_page_index = offset / BYTES_PER_PAGE; |
| 258 | const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; | 340 | const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; |
| @@ -268,13 +350,19 @@ private: | |||
| 268 | u64 bits = ~u64{0}; | 350 | u64 bits = ~u64{0}; |
| 269 | bits = (bits >> right_offset) << right_offset; | 351 | bits = (bits >> right_offset) << right_offset; |
| 270 | bits = (bits << left_offset) >> left_offset; | 352 | bits = (bits << left_offset) >> left_offset; |
| 271 | if constexpr (notify_rasterizer) { | 353 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { |
| 272 | NotifyRasterizer<!enable>(word_index, state_words[word_index], bits); | 354 | NotifyRasterizer<!enable>(word_index, untracked_words[word_index], bits); |
| 273 | } | 355 | } |
| 274 | if constexpr (enable) { | 356 | if constexpr (enable) { |
| 275 | state_words[word_index] |= bits; | 357 | state_words[word_index] |= bits; |
| 358 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 359 | untracked_words[word_index] |= bits; | ||
| 360 | } | ||
| 276 | } else { | 361 | } else { |
| 277 | state_words[word_index] &= ~bits; | 362 | state_words[word_index] &= ~bits; |
| 363 | if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||
| 364 | untracked_words[word_index] &= ~bits; | ||
| 365 | } | ||
| 278 | } | 366 | } |
| 279 | page_index = 0; | 367 | page_index = 0; |
| 280 | ++word_index; | 368 | ++word_index; |
| @@ -291,7 +379,7 @@ private: | |||
| 291 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages | 379 | * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages |
| 292 | */ | 380 | */ |
| 293 | template <bool add_to_rasterizer> | 381 | template <bool add_to_rasterizer> |
| 294 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) { | 382 | void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) const { |
| 295 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; | 383 | u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; |
| 296 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; | 384 | VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; |
| 297 | while (changed_bits != 0) { | 385 | while (changed_bits != 0) { |
| @@ -315,21 +403,20 @@ private: | |||
| 315 | * @param query_cpu_range Base CPU address to loop over | 403 | * @param query_cpu_range Base CPU address to loop over |
| 316 | * @param size Size in bytes of the CPU range to loop over | 404 | * @param size Size in bytes of the CPU range to loop over |
| 317 | * @param func Function to call for each turned off region | 405 | * @param func Function to call for each turned off region |
| 318 | * | ||
| 319 | * @tparam gpu True for host GPU pages, false for CPU pages | ||
| 320 | * @tparam notify_rasterizer True when the rasterizer should be notified about state changes | ||
| 321 | */ | 406 | */ |
| 322 | template <bool gpu, bool notify_rasterizer, typename Func> | 407 | template <Type type, typename Func> |
| 323 | void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { | 408 | void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { |
| 409 | static_assert(type != Type::Untracked); | ||
| 410 | |||
| 324 | const s64 difference = query_cpu_range - cpu_addr; | 411 | const s64 difference = query_cpu_range - cpu_addr; |
| 325 | const u64 query_begin = std::max<s64>(difference, 0); | 412 | const u64 query_begin = std::max<s64>(difference, 0); |
| 326 | size += std::min<s64>(difference, 0); | 413 | size += std::min<s64>(difference, 0); |
| 327 | if (query_begin >= SizeBytes() || size < 0) { | 414 | if (query_begin >= SizeBytes() || size < 0) { |
| 328 | return; | 415 | return; |
| 329 | } | 416 | } |
| 330 | const u64* const cpu_words = words.cpu.Pointer(IsShort()); | 417 | u64* const untracked_words = Array<Type::Untracked>(); |
| 418 | u64* const state_words = Array<type>(); | ||
| 331 | const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); | 419 | const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes()); |
| 332 | u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); | ||
| 333 | u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; | 420 | u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; |
| 334 | u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); | 421 | u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); |
| 335 | 422 | ||
| @@ -345,7 +432,8 @@ private: | |||
| 345 | const u64 word_index_end = std::distance(state_words, last_modified_word); | 432 | const u64 word_index_end = std::distance(state_words, last_modified_word); |
| 346 | 433 | ||
| 347 | const unsigned local_page_begin = std::countr_zero(*first_modified_word); | 434 | const unsigned local_page_begin = std::countr_zero(*first_modified_word); |
| 348 | const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]); | 435 | const unsigned local_page_end = |
| 436 | static_cast<unsigned>(PAGES_PER_WORD) - std::countl_zero(last_modified_word[-1]); | ||
| 349 | const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; | 437 | const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; |
| 350 | const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; | 438 | const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; |
| 351 | const u64 query_page_begin = query_begin / BYTES_PER_PAGE; | 439 | const u64 query_page_begin = query_begin / BYTES_PER_PAGE; |
| @@ -371,11 +459,13 @@ private: | |||
| 371 | const u64 current_word = state_words[word_index] & bits; | 459 | const u64 current_word = state_words[word_index] & bits; |
| 372 | state_words[word_index] &= ~bits; | 460 | state_words[word_index] &= ~bits; |
| 373 | 461 | ||
| 374 | // Exclude CPU modified pages when visiting GPU pages | 462 | if constexpr (type == Type::CPU) { |
| 375 | const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0); | 463 | const u64 current_bits = untracked_words[word_index] & bits; |
| 376 | if constexpr (notify_rasterizer) { | 464 | untracked_words[word_index] &= ~bits; |
| 377 | NotifyRasterizer<true>(word_index, word, ~u64{0}); | 465 | NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); |
| 378 | } | 466 | } |
| 467 | // Exclude CPU modified pages when visiting GPU pages | ||
| 468 | const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); | ||
| 379 | u64 page = page_begin; | 469 | u64 page = page_begin; |
| 380 | page_begin = 0; | 470 | page_begin = 0; |
| 381 | 471 | ||
| @@ -416,17 +506,20 @@ private: | |||
| 416 | * @param offset Offset in bytes from the start of the buffer | 506 | * @param offset Offset in bytes from the start of the buffer |
| 417 | * @param size Size in bytes of the region to query for modifications | 507 | * @param size Size in bytes of the region to query for modifications |
| 418 | */ | 508 | */ |
| 419 | template <bool gpu> | 509 | template <Type type> |
| 420 | [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { | 510 | [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { |
| 421 | const u64* const cpu_words = words.cpu.Pointer(IsShort()); | 511 | static_assert(type != Type::Untracked); |
| 422 | const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); | 512 | |
| 513 | const u64* const untracked_words = Array<Type::Untracked>(); | ||
| 514 | const u64* const state_words = Array<type>(); | ||
| 423 | const u64 num_query_words = size / BYTES_PER_WORD + 1; | 515 | const u64 num_query_words = size / BYTES_PER_WORD + 1; |
| 424 | const u64 word_begin = offset / BYTES_PER_WORD; | 516 | const u64 word_begin = offset / BYTES_PER_WORD; |
| 425 | const u64 word_end = std::min(word_begin + num_query_words, NumWords()); | 517 | const u64 word_end = std::min(word_begin + num_query_words, NumWords()); |
| 426 | const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); | 518 | const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); |
| 427 | u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; | 519 | u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; |
| 428 | for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { | 520 | for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { |
| 429 | const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); | 521 | const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; |
| 522 | const u64 word = state_words[word_index] & ~off_word; | ||
| 430 | if (word == 0) { | 523 | if (word == 0) { |
| 431 | continue; | 524 | continue; |
| 432 | } | 525 | } |
| @@ -445,13 +538,13 @@ private: | |||
| 445 | * | 538 | * |
| 446 | * @param offset Offset in bytes from the start of the buffer | 539 | * @param offset Offset in bytes from the start of the buffer |
| 447 | * @param size Size in bytes of the region to query for modifications | 540 | * @param size Size in bytes of the region to query for modifications |
| 448 | * | ||
| 449 | * @tparam gpu True to query GPU modified pages, false for CPU pages | ||
| 450 | */ | 541 | */ |
| 451 | template <bool gpu> | 542 | template <Type type> |
| 452 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | 543 | [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { |
| 453 | const u64* const cpu_words = words.cpu.Pointer(IsShort()); | 544 | static_assert(type != Type::Untracked); |
| 454 | const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); | 545 | |
| 546 | const u64* const untracked_words = Array<Type::Untracked>(); | ||
| 547 | const u64* const state_words = Array<type>(); | ||
| 455 | const u64 num_query_words = size / BYTES_PER_WORD + 1; | 548 | const u64 num_query_words = size / BYTES_PER_WORD + 1; |
| 456 | const u64 word_begin = offset / BYTES_PER_WORD; | 549 | const u64 word_begin = offset / BYTES_PER_WORD; |
| 457 | const u64 word_end = std::min(word_begin + num_query_words, NumWords()); | 550 | const u64 word_end = std::min(word_begin + num_query_words, NumWords()); |
| @@ -460,7 +553,8 @@ private: | |||
| 460 | u64 begin = std::numeric_limits<u64>::max(); | 553 | u64 begin = std::numeric_limits<u64>::max(); |
| 461 | u64 end = 0; | 554 | u64 end = 0; |
| 462 | for (u64 word_index = word_begin; word_index < word_end; ++word_index) { | 555 | for (u64 word_index = word_begin; word_index < word_end; ++word_index) { |
| 463 | const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); | 556 | const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; |
| 557 | const u64 word = state_words[word_index] & ~off_word; | ||
| 464 | if (word == 0) { | 558 | if (word == 0) { |
| 465 | continue; | 559 | continue; |
| 466 | } | 560 | } |
| @@ -488,8 +582,9 @@ private: | |||
| 488 | 582 | ||
| 489 | RasterizerInterface* rasterizer = nullptr; | 583 | RasterizerInterface* rasterizer = nullptr; |
| 490 | VAddr cpu_addr = 0; | 584 | VAddr cpu_addr = 0; |
| 491 | GpuCpuWords words; | 585 | Words words; |
| 492 | BufferFlagBits flags{}; | 586 | BufferFlagBits flags{}; |
| 587 | int stream_score = 0; | ||
| 493 | }; | 588 | }; |
| 494 | 589 | ||
| 495 | } // namespace VideoCommon | 590 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h deleted file mode 100644 index e9306194a..000000000 --- a/src/video_core/buffer_cache/buffer_block.h +++ /dev/null | |||
| @@ -1,62 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "common/common_types.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | class BufferBlock { | ||
| 12 | public: | ||
| 13 | [[nodiscard]] bool Overlaps(VAddr start, VAddr end) const { | ||
| 14 | return (cpu_addr < end) && (cpu_addr_end > start); | ||
| 15 | } | ||
| 16 | |||
| 17 | [[nodiscard]] bool IsInside(VAddr other_start, VAddr other_end) const { | ||
| 18 | return cpu_addr <= other_start && other_end <= cpu_addr_end; | ||
| 19 | } | ||
| 20 | |||
| 21 | [[nodiscard]] std::size_t Offset(VAddr in_addr) const { | ||
| 22 | return static_cast<std::size_t>(in_addr - cpu_addr); | ||
| 23 | } | ||
| 24 | |||
| 25 | [[nodiscard]] VAddr CpuAddr() const { | ||
| 26 | return cpu_addr; | ||
| 27 | } | ||
| 28 | |||
| 29 | [[nodiscard]] VAddr CpuAddrEnd() const { | ||
| 30 | return cpu_addr_end; | ||
| 31 | } | ||
| 32 | |||
| 33 | void SetCpuAddr(VAddr new_addr) { | ||
| 34 | cpu_addr = new_addr; | ||
| 35 | cpu_addr_end = new_addr + size; | ||
| 36 | } | ||
| 37 | |||
| 38 | [[nodiscard]] std::size_t Size() const { | ||
| 39 | return size; | ||
| 40 | } | ||
| 41 | |||
| 42 | [[nodiscard]] u64 Epoch() const { | ||
| 43 | return epoch; | ||
| 44 | } | ||
| 45 | |||
| 46 | void SetEpoch(u64 new_epoch) { | ||
| 47 | epoch = new_epoch; | ||
| 48 | } | ||
| 49 | |||
| 50 | protected: | ||
| 51 | explicit BufferBlock(VAddr cpu_addr_, std::size_t size_) : size{size_} { | ||
| 52 | SetCpuAddr(cpu_addr_); | ||
| 53 | } | ||
| 54 | |||
| 55 | private: | ||
| 56 | VAddr cpu_addr{}; | ||
| 57 | VAddr cpu_addr_end{}; | ||
| 58 | std::size_t size{}; | ||
| 59 | u64 epoch{}; | ||
| 60 | }; | ||
| 61 | |||
| 62 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp new file mode 100644 index 000000000..ab32294c8 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache.cpp | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "common/microprofile.h" | ||
| 6 | |||
| 7 | namespace VideoCommon { | ||
| 8 | |||
| 9 | MICROPROFILE_DEFINE(GPU_PrepareBuffers, "GPU", "Prepare buffers", MP_RGB(224, 128, 128)); | ||
| 10 | MICROPROFILE_DEFINE(GPU_BindUploadBuffers, "GPU", "Bind and upload buffers", MP_RGB(224, 128, 128)); | ||
| 11 | MICROPROFILE_DEFINE(GPU_DownloadMemory, "GPU", "Download buffers", MP_RGB(224, 128, 128)); | ||
| 12 | |||
| 13 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 83b9ee871..2a6844ab1 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -4,591 +4,1289 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <list> | 7 | #include <algorithm> |
| 8 | #include <array> | ||
| 9 | #include <deque> | ||
| 8 | #include <memory> | 10 | #include <memory> |
| 9 | #include <mutex> | 11 | #include <mutex> |
| 12 | #include <span> | ||
| 10 | #include <unordered_map> | 13 | #include <unordered_map> |
| 11 | #include <unordered_set> | ||
| 12 | #include <utility> | ||
| 13 | #include <vector> | 14 | #include <vector> |
| 14 | 15 | ||
| 15 | #include <boost/container/small_vector.hpp> | 16 | #include <boost/container/small_vector.hpp> |
| 16 | #include <boost/icl/interval_set.hpp> | ||
| 17 | #include <boost/intrusive/set.hpp> | ||
| 18 | 17 | ||
| 19 | #include "common/alignment.h" | ||
| 20 | #include "common/assert.h" | ||
| 21 | #include "common/common_types.h" | 18 | #include "common/common_types.h" |
| 22 | #include "common/logging/log.h" | 19 | #include "common/div_ceil.h" |
| 23 | #include "core/core.h" | 20 | #include "common/microprofile.h" |
| 21 | #include "common/scope_exit.h" | ||
| 24 | #include "core/memory.h" | 22 | #include "core/memory.h" |
| 25 | #include "core/settings.h" | 23 | #include "core/settings.h" |
| 26 | #include "video_core/buffer_cache/buffer_block.h" | 24 | #include "video_core/buffer_cache/buffer_base.h" |
| 27 | #include "video_core/buffer_cache/map_interval.h" | 25 | #include "video_core/delayed_destruction_ring.h" |
| 26 | #include "video_core/dirty_flags.h" | ||
| 27 | #include "video_core/engines/kepler_compute.h" | ||
| 28 | #include "video_core/engines/maxwell_3d.h" | ||
| 28 | #include "video_core/memory_manager.h" | 29 | #include "video_core/memory_manager.h" |
| 29 | #include "video_core/rasterizer_interface.h" | 30 | #include "video_core/rasterizer_interface.h" |
| 31 | #include "video_core/texture_cache/slot_vector.h" | ||
| 32 | #include "video_core/texture_cache/types.h" | ||
| 30 | 33 | ||
| 31 | namespace VideoCommon { | 34 | namespace VideoCommon { |
| 32 | 35 | ||
| 33 | template <typename Buffer, typename BufferType, typename StreamBuffer> | 36 | MICROPROFILE_DECLARE(GPU_PrepareBuffers); |
| 37 | MICROPROFILE_DECLARE(GPU_BindUploadBuffers); | ||
| 38 | MICROPROFILE_DECLARE(GPU_DownloadMemory); | ||
| 39 | |||
| 40 | using BufferId = SlotId; | ||
| 41 | |||
| 42 | constexpr u32 NUM_VERTEX_BUFFERS = 32; | ||
| 43 | constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; | ||
| 44 | constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; | ||
| 45 | constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; | ||
| 46 | constexpr u32 NUM_STORAGE_BUFFERS = 16; | ||
| 47 | constexpr u32 NUM_STAGES = 5; | ||
| 48 | |||
| 49 | template <typename P> | ||
| 34 | class BufferCache { | 50 | class BufferCache { |
| 35 | using IntervalSet = boost::icl::interval_set<VAddr>; | 51 | // Page size for caching purposes. |
| 36 | using IntervalType = typename IntervalSet::interval_type; | 52 | // This is unrelated to the CPU page size and it can be changed as it seems optimal. |
| 37 | using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; | 53 | static constexpr u32 PAGE_BITS = 16; |
| 54 | static constexpr u64 PAGE_SIZE = u64{1} << PAGE_BITS; | ||
| 55 | |||
| 56 | static constexpr bool IS_OPENGL = P::IS_OPENGL; | ||
| 57 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = | ||
| 58 | P::HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS; | ||
| 59 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = | ||
| 60 | P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; | ||
| 61 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; | ||
| 62 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; | ||
| 63 | static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | ||
| 64 | |||
| 65 | static constexpr BufferId NULL_BUFFER_ID{0}; | ||
| 66 | |||
| 67 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||
| 68 | |||
| 69 | using Runtime = typename P::Runtime; | ||
| 70 | using Buffer = typename P::Buffer; | ||
| 71 | |||
| 72 | struct Empty {}; | ||
| 73 | |||
| 74 | struct OverlapResult { | ||
| 75 | std::vector<BufferId> ids; | ||
| 76 | VAddr begin; | ||
| 77 | VAddr end; | ||
| 78 | bool has_stream_leap = false; | ||
| 79 | }; | ||
| 38 | 80 | ||
| 39 | static constexpr u64 WRITE_PAGE_BIT = 11; | 81 | struct Binding { |
| 40 | static constexpr u64 BLOCK_PAGE_BITS = 21; | 82 | VAddr cpu_addr{}; |
| 41 | static constexpr u64 BLOCK_PAGE_SIZE = 1ULL << BLOCK_PAGE_BITS; | 83 | u32 size{}; |
| 84 | BufferId buffer_id; | ||
| 85 | }; | ||
| 42 | 86 | ||
| 43 | public: | 87 | static constexpr Binding NULL_BINDING{ |
| 44 | struct BufferInfo { | 88 | .cpu_addr = 0, |
| 45 | BufferType handle; | 89 | .size = 0, |
| 46 | u64 offset; | 90 | .buffer_id = NULL_BUFFER_ID, |
| 47 | u64 address; | ||
| 48 | }; | 91 | }; |
| 49 | 92 | ||
| 50 | BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | 93 | public: |
| 51 | bool is_written = false, bool use_fast_cbuf = false) { | 94 | static constexpr u32 SKIP_CACHE_SIZE = 4096; |
| 52 | std::lock_guard lock{mutex}; | ||
| 53 | 95 | ||
| 54 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 96 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, |
| 55 | if (!cpu_addr) { | 97 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| 56 | return GetEmptyBuffer(size); | 98 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 57 | } | 99 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, |
| 100 | Runtime& runtime_); | ||
| 58 | 101 | ||
| 59 | // Cache management is a big overhead, so only cache entries with a given size. | 102 | void TickFrame(); |
| 60 | // TODO: Figure out which size is the best for given games. | ||
| 61 | constexpr std::size_t max_stream_size = 0x800; | ||
| 62 | if (use_fast_cbuf || size < max_stream_size) { | ||
| 63 | if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) { | ||
| 64 | const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size); | ||
| 65 | if (use_fast_cbuf) { | ||
| 66 | u8* dest; | ||
| 67 | if (is_granular) { | ||
| 68 | dest = gpu_memory.GetPointer(gpu_addr); | ||
| 69 | } else { | ||
| 70 | staging_buffer.resize(size); | ||
| 71 | dest = staging_buffer.data(); | ||
| 72 | gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size); | ||
| 73 | } | ||
| 74 | return ConstBufferUpload(dest, size); | ||
| 75 | } | ||
| 76 | if (is_granular) { | ||
| 77 | u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); | ||
| 78 | return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) { | ||
| 79 | std::memcpy(dest, host_ptr, size); | ||
| 80 | }); | ||
| 81 | } else { | ||
| 82 | return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) { | ||
| 83 | gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size); | ||
| 84 | }); | ||
| 85 | } | ||
| 86 | } | ||
| 87 | } | ||
| 88 | 103 | ||
| 89 | Buffer* const block = GetBlock(*cpu_addr, size); | 104 | void WriteMemory(VAddr cpu_addr, u64 size); |
| 90 | MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size); | ||
| 91 | if (!map) { | ||
| 92 | return GetEmptyBuffer(size); | ||
| 93 | } | ||
| 94 | if (is_written) { | ||
| 95 | map->MarkAsModified(true, GetModifiedTicks()); | ||
| 96 | if (Settings::IsGPULevelHigh() && | ||
| 97 | Settings::values.use_asynchronous_gpu_emulation.GetValue()) { | ||
| 98 | MarkForAsyncFlush(map); | ||
| 99 | } | ||
| 100 | if (!map->is_written) { | ||
| 101 | map->is_written = true; | ||
| 102 | MarkRegionAsWritten(map->start, map->end - 1); | ||
| 103 | } | ||
| 104 | } | ||
| 105 | 105 | ||
| 106 | return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()}; | 106 | void CachedWriteMemory(VAddr cpu_addr, u64 size); |
| 107 | } | ||
| 108 | 107 | ||
| 109 | /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. | 108 | void DownloadMemory(VAddr cpu_addr, u64 size); |
| 110 | BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, | ||
| 111 | std::size_t alignment = 4) { | ||
| 112 | std::lock_guard lock{mutex}; | ||
| 113 | return StreamBufferUpload(size, alignment, [raw_pointer, size](u8* dest) { | ||
| 114 | std::memcpy(dest, raw_pointer, size); | ||
| 115 | }); | ||
| 116 | } | ||
| 117 | 109 | ||
| 118 | /// Prepares the buffer cache for data uploading | 110 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size); |
| 119 | /// @param max_size Maximum number of bytes that will be uploaded | ||
| 120 | /// @return True when a stream buffer invalidation was required, false otherwise | ||
| 121 | void Map(std::size_t max_size) { | ||
| 122 | std::lock_guard lock{mutex}; | ||
| 123 | 111 | ||
| 124 | std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4); | 112 | void UpdateGraphicsBuffers(bool is_indexed); |
| 125 | buffer_offset = buffer_offset_base; | ||
| 126 | } | ||
| 127 | 113 | ||
| 128 | /// Finishes the upload stream | 114 | void UpdateComputeBuffers(); |
| 129 | void Unmap() { | ||
| 130 | std::lock_guard lock{mutex}; | ||
| 131 | stream_buffer.Unmap(buffer_offset - buffer_offset_base); | ||
| 132 | } | ||
| 133 | 115 | ||
| 134 | /// Function called at the end of each frame, inteded for deferred operations | 116 | void BindHostGeometryBuffers(bool is_indexed); |
| 135 | void TickFrame() { | ||
| 136 | ++epoch; | ||
| 137 | 117 | ||
| 138 | while (!pending_destruction.empty()) { | 118 | void BindHostStageBuffers(size_t stage); |
| 139 | // Delay at least 4 frames before destruction. | ||
| 140 | // This is due to triple buffering happening on some drivers. | ||
| 141 | static constexpr u64 epochs_to_destroy = 5; | ||
| 142 | if (pending_destruction.front()->Epoch() + epochs_to_destroy > epoch) { | ||
| 143 | break; | ||
| 144 | } | ||
| 145 | pending_destruction.pop(); | ||
| 146 | } | ||
| 147 | } | ||
| 148 | 119 | ||
| 149 | /// Write any cached resources overlapping the specified region back to memory | 120 | void BindHostComputeBuffers(); |
| 150 | void FlushRegion(VAddr addr, std::size_t size) { | ||
| 151 | std::lock_guard lock{mutex}; | ||
| 152 | 121 | ||
| 153 | VectorMapInterval objects = GetMapsInRange(addr, size); | 122 | void SetEnabledUniformBuffers(size_t stage, u32 enabled); |
| 154 | std::sort(objects.begin(), objects.end(), | ||
| 155 | [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; }); | ||
| 156 | for (MapInterval* object : objects) { | ||
| 157 | if (object->is_modified && object->is_registered) { | ||
| 158 | mutex.unlock(); | ||
| 159 | FlushMap(object); | ||
| 160 | mutex.lock(); | ||
| 161 | } | ||
| 162 | } | ||
| 163 | } | ||
| 164 | 123 | ||
| 165 | bool MustFlushRegion(VAddr addr, std::size_t size) { | 124 | void SetEnabledComputeUniformBuffers(u32 enabled); |
| 166 | std::lock_guard lock{mutex}; | ||
| 167 | 125 | ||
| 168 | const VectorMapInterval objects = GetMapsInRange(addr, size); | 126 | void UnbindGraphicsStorageBuffers(size_t stage); |
| 169 | return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) { | ||
| 170 | return map->is_modified && map->is_registered; | ||
| 171 | }); | ||
| 172 | } | ||
| 173 | 127 | ||
| 174 | /// Mark the specified region as being invalidated | 128 | void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, |
| 175 | void InvalidateRegion(VAddr addr, u64 size) { | 129 | bool is_written); |
| 176 | std::lock_guard lock{mutex}; | ||
| 177 | 130 | ||
| 178 | for (auto& object : GetMapsInRange(addr, size)) { | 131 | void UnbindComputeStorageBuffers(); |
| 179 | if (object->is_registered) { | ||
| 180 | Unregister(object); | ||
| 181 | } | ||
| 182 | } | ||
| 183 | } | ||
| 184 | 132 | ||
| 185 | void OnCPUWrite(VAddr addr, std::size_t size) { | 133 | void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, |
| 186 | std::lock_guard lock{mutex}; | 134 | bool is_written); |
| 187 | 135 | ||
| 188 | for (MapInterval* object : GetMapsInRange(addr, size)) { | 136 | void FlushCachedWrites(); |
| 189 | if (object->is_memory_marked && object->is_registered) { | ||
| 190 | UnmarkMemory(object); | ||
| 191 | object->is_sync_pending = true; | ||
| 192 | marked_for_unregister.emplace_back(object); | ||
| 193 | } | ||
| 194 | } | ||
| 195 | } | ||
| 196 | 137 | ||
| 197 | void SyncGuestHost() { | 138 | /// Return true when there are uncommitted buffers to be downloaded |
| 198 | std::lock_guard lock{mutex}; | 139 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; |
| 199 | 140 | ||
| 200 | for (auto& object : marked_for_unregister) { | 141 | /// Return true when the caller should wait for async downloads |
| 201 | if (object->is_registered) { | 142 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; |
| 202 | object->is_sync_pending = false; | 143 | |
| 203 | Unregister(object); | 144 | /// Commit asynchronous downloads |
| 204 | } | 145 | void CommitAsyncFlushes(); |
| 146 | |||
| 147 | /// Pop asynchronous downloads | ||
| 148 | void PopAsyncFlushes(); | ||
| 149 | |||
| 150 | /// Return true when a CPU region is modified from the GPU | ||
| 151 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||
| 152 | |||
| 153 | std::mutex mutex; | ||
| 154 | |||
| 155 | private: | ||
| 156 | template <typename Func> | ||
| 157 | static void ForEachEnabledBit(u32 enabled_mask, Func&& func) { | ||
| 158 | for (u32 index = 0; enabled_mask != 0; ++index, enabled_mask >>= 1) { | ||
| 159 | const int disabled_bits = std::countr_zero(enabled_mask); | ||
| 160 | index += disabled_bits; | ||
| 161 | enabled_mask >>= disabled_bits; | ||
| 162 | func(index); | ||
| 205 | } | 163 | } |
| 206 | marked_for_unregister.clear(); | ||
| 207 | } | 164 | } |
| 208 | 165 | ||
| 209 | void CommitAsyncFlushes() { | 166 | template <typename Func> |
| 210 | if (uncommitted_flushes) { | 167 | void ForEachBufferInRange(VAddr cpu_addr, u64 size, Func&& func) { |
| 211 | auto commit_list = std::make_shared<std::list<MapInterval*>>(); | 168 | const u64 page_end = Common::DivCeil(cpu_addr + size, PAGE_SIZE); |
| 212 | for (MapInterval* map : *uncommitted_flushes) { | 169 | for (u64 page = cpu_addr >> PAGE_BITS; page < page_end;) { |
| 213 | if (map->is_registered && map->is_modified) { | 170 | const BufferId buffer_id = page_table[page]; |
| 214 | // TODO(Blinkhawk): Implement backend asynchronous flushing | 171 | if (!buffer_id) { |
| 215 | // AsyncFlushMap(map) | 172 | ++page; |
| 216 | commit_list->push_back(map); | 173 | continue; |
| 217 | } | ||
| 218 | } | ||
| 219 | if (!commit_list->empty()) { | ||
| 220 | committed_flushes.push_back(commit_list); | ||
| 221 | } else { | ||
| 222 | committed_flushes.emplace_back(); | ||
| 223 | } | 174 | } |
| 224 | } else { | 175 | Buffer& buffer = slot_buffers[buffer_id]; |
| 225 | committed_flushes.emplace_back(); | 176 | func(buffer_id, buffer); |
| 177 | |||
| 178 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 179 | page = Common::DivCeil(end_addr, PAGE_SIZE); | ||
| 226 | } | 180 | } |
| 227 | uncommitted_flushes.reset(); | ||
| 228 | } | 181 | } |
| 229 | 182 | ||
| 230 | bool ShouldWaitAsyncFlushes() const { | 183 | static bool IsRangeGranular(VAddr cpu_addr, size_t size) { |
| 231 | return !committed_flushes.empty() && committed_flushes.front() != nullptr; | 184 | return (cpu_addr & ~Core::Memory::PAGE_MASK) == |
| 185 | ((cpu_addr + size) & ~Core::Memory::PAGE_MASK); | ||
| 232 | } | 186 | } |
| 233 | 187 | ||
| 234 | bool HasUncommittedFlushes() const { | 188 | void BindHostIndexBuffer(); |
| 235 | return uncommitted_flushes != nullptr; | ||
| 236 | } | ||
| 237 | 189 | ||
| 238 | void PopAsyncFlushes() { | 190 | void BindHostVertexBuffers(); |
| 239 | if (committed_flushes.empty()) { | ||
| 240 | return; | ||
| 241 | } | ||
| 242 | auto& flush_list = committed_flushes.front(); | ||
| 243 | if (!flush_list) { | ||
| 244 | committed_flushes.pop_front(); | ||
| 245 | return; | ||
| 246 | } | ||
| 247 | for (MapInterval* map : *flush_list) { | ||
| 248 | if (map->is_registered) { | ||
| 249 | // TODO(Blinkhawk): Replace this for reading the asynchronous flush | ||
| 250 | FlushMap(map); | ||
| 251 | } | ||
| 252 | } | ||
| 253 | committed_flushes.pop_front(); | ||
| 254 | } | ||
| 255 | 191 | ||
| 256 | virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; | 192 | void BindHostGraphicsUniformBuffers(size_t stage); |
| 257 | 193 | ||
| 258 | protected: | 194 | void BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, bool needs_bind); |
| 259 | explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 260 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | ||
| 261 | StreamBuffer& stream_buffer_) | ||
| 262 | : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, | ||
| 263 | stream_buffer{stream_buffer_} {} | ||
| 264 | 195 | ||
| 265 | ~BufferCache() = default; | 196 | void BindHostGraphicsStorageBuffers(size_t stage); |
| 266 | 197 | ||
| 267 | virtual std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) = 0; | 198 | void BindHostTransformFeedbackBuffers(); |
| 268 | 199 | ||
| 269 | virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { | 200 | void BindHostComputeUniformBuffers(); |
| 270 | return {}; | ||
| 271 | } | ||
| 272 | 201 | ||
| 273 | /// Register an object into the cache | 202 | void BindHostComputeStorageBuffers(); |
| 274 | MapInterval* Register(MapInterval new_map, bool inherit_written = false) { | ||
| 275 | const VAddr cpu_addr = new_map.start; | ||
| 276 | if (!cpu_addr) { | ||
| 277 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", | ||
| 278 | new_map.gpu_addr); | ||
| 279 | return nullptr; | ||
| 280 | } | ||
| 281 | const std::size_t size = new_map.end - new_map.start; | ||
| 282 | new_map.is_registered = true; | ||
| 283 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 284 | new_map.is_memory_marked = true; | ||
| 285 | if (inherit_written) { | ||
| 286 | MarkRegionAsWritten(new_map.start, new_map.end - 1); | ||
| 287 | new_map.is_written = true; | ||
| 288 | } | ||
| 289 | MapInterval* const storage = mapped_addresses_allocator.Allocate(); | ||
| 290 | *storage = new_map; | ||
| 291 | mapped_addresses.insert(*storage); | ||
| 292 | return storage; | ||
| 293 | } | ||
| 294 | 203 | ||
| 295 | void UnmarkMemory(MapInterval* map) { | 204 | void DoUpdateGraphicsBuffers(bool is_indexed); |
| 296 | if (!map->is_memory_marked) { | 205 | |
| 297 | return; | 206 | void DoUpdateComputeBuffers(); |
| 298 | } | 207 | |
| 299 | const std::size_t size = map->end - map->start; | 208 | void UpdateIndexBuffer(); |
| 300 | rasterizer.UpdatePagesCachedCount(map->start, size, -1); | 209 | |
| 301 | map->is_memory_marked = false; | 210 | void UpdateVertexBuffers(); |
| 302 | } | 211 | |
| 303 | 212 | void UpdateVertexBuffer(u32 index); | |
| 304 | /// Unregisters an object from the cache | 213 | |
| 305 | void Unregister(MapInterval* map) { | 214 | void UpdateUniformBuffers(size_t stage); |
| 306 | UnmarkMemory(map); | 215 | |
| 307 | map->is_registered = false; | 216 | void UpdateStorageBuffers(size_t stage); |
| 308 | if (map->is_sync_pending) { | 217 | |
| 309 | map->is_sync_pending = false; | 218 | void UpdateTransformFeedbackBuffers(); |
| 310 | marked_for_unregister.remove(map); | 219 | |
| 220 | void UpdateTransformFeedbackBuffer(u32 index); | ||
| 221 | |||
| 222 | void UpdateComputeUniformBuffers(); | ||
| 223 | |||
| 224 | void UpdateComputeStorageBuffers(); | ||
| 225 | |||
| 226 | void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); | ||
| 227 | |||
| 228 | [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); | ||
| 229 | |||
| 230 | [[nodiscard]] OverlapResult ResolveOverlaps(VAddr cpu_addr, u32 wanted_size); | ||
| 231 | |||
| 232 | void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score); | ||
| 233 | |||
| 234 | [[nodiscard]] BufferId CreateBuffer(VAddr cpu_addr, u32 wanted_size); | ||
| 235 | |||
| 236 | void Register(BufferId buffer_id); | ||
| 237 | |||
| 238 | void Unregister(BufferId buffer_id); | ||
| 239 | |||
| 240 | template <bool insert> | ||
| 241 | void ChangeRegister(BufferId buffer_id); | ||
| 242 | |||
| 243 | void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 244 | |||
| 245 | void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); | ||
| 246 | |||
| 247 | void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||
| 248 | std::span<BufferCopy> copies); | ||
| 249 | |||
| 250 | void ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | ||
| 251 | std::span<const BufferCopy> copies); | ||
| 252 | |||
| 253 | void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies); | ||
| 254 | |||
| 255 | void DeleteBuffer(BufferId buffer_id); | ||
| 256 | |||
| 257 | void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); | ||
| 258 | |||
| 259 | void NotifyBufferDeletion(); | ||
| 260 | |||
| 261 | [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; | ||
| 262 | |||
| 263 | [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); | ||
| 264 | |||
| 265 | [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); | ||
| 266 | |||
| 267 | [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; | ||
| 268 | |||
| 269 | VideoCore::RasterizerInterface& rasterizer; | ||
| 270 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 271 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 272 | Tegra::MemoryManager& gpu_memory; | ||
| 273 | Core::Memory::Memory& cpu_memory; | ||
| 274 | Runtime& runtime; | ||
| 275 | |||
| 276 | SlotVector<Buffer> slot_buffers; | ||
| 277 | DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | ||
| 278 | |||
| 279 | u32 last_index_count = 0; | ||
| 280 | |||
| 281 | Binding index_buffer; | ||
| 282 | std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; | ||
| 283 | std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; | ||
| 284 | std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; | ||
| 285 | std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; | ||
| 286 | |||
| 287 | std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; | ||
| 288 | std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; | ||
| 289 | |||
| 290 | std::array<u32, NUM_STAGES> enabled_uniform_buffers{}; | ||
| 291 | u32 enabled_compute_uniform_buffers = 0; | ||
| 292 | |||
| 293 | std::array<u32, NUM_STAGES> enabled_storage_buffers{}; | ||
| 294 | std::array<u32, NUM_STAGES> written_storage_buffers{}; | ||
| 295 | u32 enabled_compute_storage_buffers = 0; | ||
| 296 | u32 written_compute_storage_buffers = 0; | ||
| 297 | |||
| 298 | std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; | ||
| 299 | |||
| 300 | bool has_deleted_buffers = false; | ||
| 301 | |||
| 302 | std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> | ||
| 303 | dirty_uniform_buffers{}; | ||
| 304 | |||
| 305 | std::vector<BufferId> cached_write_buffer_ids; | ||
| 306 | |||
| 307 | // TODO: This data structure is not optimal and it should be reworked | ||
| 308 | std::vector<BufferId> uncommitted_downloads; | ||
| 309 | std::deque<std::vector<BufferId>> committed_downloads; | ||
| 310 | |||
| 311 | size_t immediate_buffer_capacity = 0; | ||
| 312 | std::unique_ptr<u8[]> immediate_buffer_alloc; | ||
| 313 | |||
| 314 | std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; | ||
| 315 | }; | ||
| 316 | |||
| 317 | template <class P> | ||
| 318 | BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||
| 319 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 320 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 321 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | ||
| 322 | Runtime& runtime_) | ||
| 323 | : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, | ||
| 324 | gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { | ||
| 325 | // Ensure the first slot is used for the null buffer | ||
| 326 | void(slot_buffers.insert(runtime, NullBufferParams{})); | ||
| 327 | } | ||
| 328 | |||
| 329 | template <class P> | ||
| 330 | void BufferCache<P>::TickFrame() { | ||
| 331 | delayed_destruction_ring.Tick(); | ||
| 332 | } | ||
| 333 | |||
| 334 | template <class P> | ||
| 335 | void BufferCache<P>::WriteMemory(VAddr cpu_addr, u64 size) { | ||
| 336 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | ||
| 337 | buffer.MarkRegionAsCpuModified(cpu_addr, size); | ||
| 338 | }); | ||
| 339 | } | ||
| 340 | |||
| 341 | template <class P> | ||
| 342 | void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | ||
| 343 | ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) { | ||
| 344 | if (!buffer.HasCachedWrites()) { | ||
| 345 | cached_write_buffer_ids.push_back(buffer_id); | ||
| 311 | } | 346 | } |
| 312 | if (map->is_written) { | 347 | buffer.CachedCpuWrite(cpu_addr, size); |
| 313 | UnmarkRegionAsWritten(map->start, map->end - 1); | 348 | }); |
| 349 | } | ||
| 350 | |||
| 351 | template <class P> | ||
| 352 | void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | ||
| 353 | ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { | ||
| 354 | boost::container::small_vector<BufferCopy, 1> copies; | ||
| 355 | u64 total_size_bytes = 0; | ||
| 356 | u64 largest_copy = 0; | ||
| 357 | buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 358 | copies.push_back(BufferCopy{ | ||
| 359 | .src_offset = range_offset, | ||
| 360 | .dst_offset = total_size_bytes, | ||
| 361 | .size = range_size, | ||
| 362 | }); | ||
| 363 | total_size_bytes += range_size; | ||
| 364 | largest_copy = std::max(largest_copy, range_size); | ||
| 365 | }); | ||
| 366 | if (total_size_bytes == 0) { | ||
| 367 | return; | ||
| 314 | } | 368 | } |
| 315 | const auto it = mapped_addresses.find(*map); | 369 | MICROPROFILE_SCOPE(GPU_DownloadMemory); |
| 316 | ASSERT(it != mapped_addresses.end()); | 370 | |
| 317 | mapped_addresses.erase(it); | 371 | if constexpr (USE_MEMORY_MAPS) { |
| 318 | mapped_addresses_allocator.Release(map); | 372 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |
| 319 | } | 373 | const u8* const mapped_memory = download_staging.mapped_span.data(); |
| 320 | 374 | const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size()); | |
| 321 | private: | 375 | for (BufferCopy& copy : copies) { |
| 322 | MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { | 376 | // Modify copies to have the staging offset in mind |
| 323 | const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); | 377 | copy.dst_offset += download_staging.offset; |
| 324 | if (overlaps.empty()) { | ||
| 325 | const VAddr cpu_addr_end = cpu_addr + size; | ||
| 326 | if (gpu_memory.IsGranularRange(gpu_addr, size)) { | ||
| 327 | u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); | ||
| 328 | block->Upload(block->Offset(cpu_addr), size, host_ptr); | ||
| 329 | } else { | ||
| 330 | staging_buffer.resize(size); | ||
| 331 | gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||
| 332 | block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); | ||
| 333 | } | 378 | } |
| 334 | return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); | 379 | runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); |
| 335 | } | 380 | runtime.Finish(); |
| 336 | 381 | for (const BufferCopy& copy : copies) { | |
| 337 | const VAddr cpu_addr_end = cpu_addr + size; | 382 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; |
| 338 | if (overlaps.size() == 1) { | 383 | // Undo the modified offset |
| 339 | MapInterval* const current_map = overlaps[0]; | 384 | const u64 dst_offset = copy.dst_offset - download_staging.offset; |
| 340 | if (current_map->IsInside(cpu_addr, cpu_addr_end)) { | 385 | const u8* copy_mapped_memory = mapped_memory + dst_offset; |
| 341 | return current_map; | 386 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); |
| 387 | } | ||
| 388 | } else { | ||
| 389 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 390 | for (const BufferCopy& copy : copies) { | ||
| 391 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 392 | const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 393 | cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); | ||
| 342 | } | 394 | } |
| 343 | } | 395 | } |
| 344 | VAddr new_start = cpu_addr; | 396 | }); |
| 345 | VAddr new_end = cpu_addr_end; | 397 | } |
| 346 | bool write_inheritance = false; | 398 | |
| 347 | bool modified_inheritance = false; | 399 | template <class P> |
| 348 | // Calculate new buffer parameters | 400 | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, |
| 349 | for (MapInterval* overlap : overlaps) { | 401 | u32 size) { |
| 350 | new_start = std::min(overlap->start, new_start); | 402 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| 351 | new_end = std::max(overlap->end, new_end); | 403 | if (!cpu_addr) { |
| 352 | write_inheritance |= overlap->is_written; | 404 | uniform_buffers[stage][index] = NULL_BINDING; |
| 353 | modified_inheritance |= overlap->is_modified; | 405 | return; |
| 406 | } | ||
| 407 | const Binding binding{ | ||
| 408 | .cpu_addr = *cpu_addr, | ||
| 409 | .size = size, | ||
| 410 | .buffer_id = BufferId{}, | ||
| 411 | }; | ||
| 412 | uniform_buffers[stage][index] = binding; | ||
| 413 | } | ||
| 414 | |||
| 415 | template <class P> | ||
| 416 | void BufferCache<P>::UpdateGraphicsBuffers(bool is_indexed) { | ||
| 417 | MICROPROFILE_SCOPE(GPU_PrepareBuffers); | ||
| 418 | do { | ||
| 419 | has_deleted_buffers = false; | ||
| 420 | DoUpdateGraphicsBuffers(is_indexed); | ||
| 421 | } while (has_deleted_buffers); | ||
| 422 | } | ||
| 423 | |||
| 424 | template <class P> | ||
| 425 | void BufferCache<P>::UpdateComputeBuffers() { | ||
| 426 | MICROPROFILE_SCOPE(GPU_PrepareBuffers); | ||
| 427 | do { | ||
| 428 | has_deleted_buffers = false; | ||
| 429 | DoUpdateComputeBuffers(); | ||
| 430 | } while (has_deleted_buffers); | ||
| 431 | } | ||
| 432 | |||
| 433 | template <class P> | ||
| 434 | void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) { | ||
| 435 | MICROPROFILE_SCOPE(GPU_BindUploadBuffers); | ||
| 436 | if (is_indexed) { | ||
| 437 | BindHostIndexBuffer(); | ||
| 438 | } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | ||
| 439 | const auto& regs = maxwell3d.regs; | ||
| 440 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { | ||
| 441 | runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count); | ||
| 354 | } | 442 | } |
| 355 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; | 443 | } |
| 356 | for (auto& overlap : overlaps) { | 444 | BindHostVertexBuffers(); |
| 357 | Unregister(overlap); | 445 | BindHostTransformFeedbackBuffers(); |
| 446 | } | ||
| 447 | |||
| 448 | template <class P> | ||
| 449 | void BufferCache<P>::BindHostStageBuffers(size_t stage) { | ||
| 450 | MICROPROFILE_SCOPE(GPU_BindUploadBuffers); | ||
| 451 | BindHostGraphicsUniformBuffers(stage); | ||
| 452 | BindHostGraphicsStorageBuffers(stage); | ||
| 453 | } | ||
| 454 | |||
| 455 | template <class P> | ||
| 456 | void BufferCache<P>::BindHostComputeBuffers() { | ||
| 457 | MICROPROFILE_SCOPE(GPU_BindUploadBuffers); | ||
| 458 | BindHostComputeUniformBuffers(); | ||
| 459 | BindHostComputeStorageBuffers(); | ||
| 460 | } | ||
| 461 | |||
| 462 | template <class P> | ||
| 463 | void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) { | ||
| 464 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | ||
| 465 | if (enabled_uniform_buffers[stage] != enabled) { | ||
| 466 | dirty_uniform_buffers[stage] = ~u32{0}; | ||
| 358 | } | 467 | } |
| 359 | UpdateBlock(block, new_start, new_end, overlaps); | 468 | } |
| 360 | 469 | enabled_uniform_buffers[stage] = enabled; | |
| 361 | const MapInterval new_map{new_start, new_end, new_gpu_addr}; | 470 | } |
| 362 | MapInterval* const map = Register(new_map, write_inheritance); | 471 | |
| 363 | if (!map) { | 472 | template <class P> |
| 364 | return nullptr; | 473 | void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) { |
| 474 | enabled_compute_uniform_buffers = enabled; | ||
| 475 | } | ||
| 476 | |||
| 477 | template <class P> | ||
| 478 | void BufferCache<P>::UnbindGraphicsStorageBuffers(size_t stage) { | ||
| 479 | enabled_storage_buffers[stage] = 0; | ||
| 480 | written_storage_buffers[stage] = 0; | ||
| 481 | } | ||
| 482 | |||
| 483 | template <class P> | ||
| 484 | void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, | ||
| 485 | u32 cbuf_offset, bool is_written) { | ||
| 486 | enabled_storage_buffers[stage] |= 1U << ssbo_index; | ||
| 487 | written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index; | ||
| 488 | |||
| 489 | const auto& cbufs = maxwell3d.state.shader_stages[stage]; | ||
| 490 | const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset; | ||
| 491 | storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr); | ||
| 492 | } | ||
| 493 | |||
| 494 | template <class P> | ||
| 495 | void BufferCache<P>::UnbindComputeStorageBuffers() { | ||
| 496 | enabled_compute_storage_buffers = 0; | ||
| 497 | written_compute_storage_buffers = 0; | ||
| 498 | } | ||
| 499 | |||
| 500 | template <class P> | ||
| 501 | void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, | ||
| 502 | bool is_written) { | ||
| 503 | enabled_compute_storage_buffers |= 1U << ssbo_index; | ||
| 504 | written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index; | ||
| 505 | |||
| 506 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 507 | ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0); | ||
| 508 | |||
| 509 | const auto& cbufs = launch_desc.const_buffer_config; | ||
| 510 | const GPUVAddr ssbo_addr = cbufs[cbuf_index].Address() + cbuf_offset; | ||
| 511 | compute_storage_buffers[ssbo_index] = StorageBufferBinding(ssbo_addr); | ||
| 512 | } | ||
| 513 | |||
| 514 | template <class P> | ||
| 515 | void BufferCache<P>::FlushCachedWrites() { | ||
| 516 | for (const BufferId buffer_id : cached_write_buffer_ids) { | ||
| 517 | slot_buffers[buffer_id].FlushCachedWrites(); | ||
| 518 | } | ||
| 519 | cached_write_buffer_ids.clear(); | ||
| 520 | } | ||
| 521 | |||
| 522 | template <class P> | ||
| 523 | bool BufferCache<P>::HasUncommittedFlushes() const noexcept { | ||
| 524 | return !uncommitted_downloads.empty(); | ||
| 525 | } | ||
| 526 | |||
| 527 | template <class P> | ||
| 528 | bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept { | ||
| 529 | return !committed_downloads.empty() && !committed_downloads.front().empty(); | ||
| 530 | } | ||
| 531 | |||
| 532 | template <class P> | ||
| 533 | void BufferCache<P>::CommitAsyncFlushes() { | ||
| 534 | // This is intentionally passing the value by copy | ||
| 535 | committed_downloads.push_front(uncommitted_downloads); | ||
| 536 | uncommitted_downloads.clear(); | ||
| 537 | } | ||
| 538 | |||
| 539 | template <class P> | ||
| 540 | void BufferCache<P>::PopAsyncFlushes() { | ||
| 541 | if (committed_downloads.empty()) { | ||
| 542 | return; | ||
| 543 | } | ||
| 544 | auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); }); | ||
| 545 | const std::span<const BufferId> download_ids = committed_downloads.back(); | ||
| 546 | if (download_ids.empty()) { | ||
| 547 | return; | ||
| 548 | } | ||
| 549 | MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||
| 550 | |||
| 551 | boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; | ||
| 552 | u64 total_size_bytes = 0; | ||
| 553 | u64 largest_copy = 0; | ||
| 554 | for (const BufferId buffer_id : download_ids) { | ||
| 555 | slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) { | ||
| 556 | downloads.push_back({ | ||
| 557 | BufferCopy{ | ||
| 558 | .src_offset = range_offset, | ||
| 559 | .dst_offset = total_size_bytes, | ||
| 560 | .size = range_size, | ||
| 561 | }, | ||
| 562 | buffer_id, | ||
| 563 | }); | ||
| 564 | total_size_bytes += range_size; | ||
| 565 | largest_copy = std::max(largest_copy, range_size); | ||
| 566 | }); | ||
| 567 | } | ||
| 568 | if (downloads.empty()) { | ||
| 569 | return; | ||
| 570 | } | ||
| 571 | if constexpr (USE_MEMORY_MAPS) { | ||
| 572 | auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 573 | for (auto& [copy, buffer_id] : downloads) { | ||
| 574 | // Have in mind the staging buffer offset for the copy | ||
| 575 | copy.dst_offset += download_staging.offset; | ||
| 576 | const std::array copies{copy}; | ||
| 577 | runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies); | ||
| 365 | } | 578 | } |
| 366 | if (modified_inheritance) { | 579 | runtime.Finish(); |
| 367 | map->MarkAsModified(true, GetModifiedTicks()); | 580 | for (const auto [copy, buffer_id] : downloads) { |
| 368 | if (Settings::IsGPULevelHigh() && | 581 | const Buffer& buffer = slot_buffers[buffer_id]; |
| 369 | Settings::values.use_asynchronous_gpu_emulation.GetValue()) { | 582 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; |
| 370 | MarkForAsyncFlush(map); | 583 | // Undo the modified offset |
| 371 | } | 584 | const u64 dst_offset = copy.dst_offset - download_staging.offset; |
| 585 | const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | ||
| 586 | cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | ||
| 587 | } | ||
| 588 | } else { | ||
| 589 | const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 590 | for (const auto [copy, buffer_id] : downloads) { | ||
| 591 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 592 | buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); | ||
| 593 | const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||
| 594 | cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||
| 372 | } | 595 | } |
| 373 | return map; | ||
| 374 | } | 596 | } |
| 375 | 597 | } | |
| 376 | void UpdateBlock(Buffer* block, VAddr start, VAddr end, const VectorMapInterval& overlaps) { | 598 | |
| 377 | const IntervalType base_interval{start, end}; | 599 | template <class P> |
| 378 | IntervalSet interval_set{}; | 600 | bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { |
| 379 | interval_set.add(base_interval); | 601 | const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE); |
| 380 | for (auto& overlap : overlaps) { | 602 | for (u64 page = addr >> PAGE_BITS; page < page_end;) { |
| 381 | const IntervalType subtract{overlap->start, overlap->end}; | 603 | const BufferId image_id = page_table[page]; |
| 382 | interval_set.subtract(subtract); | 604 | if (!image_id) { |
| 605 | ++page; | ||
| 606 | continue; | ||
| 383 | } | 607 | } |
| 384 | for (auto& interval : interval_set) { | 608 | Buffer& buffer = slot_buffers[image_id]; |
| 385 | const std::size_t size = interval.upper() - interval.lower(); | 609 | if (buffer.IsRegionGpuModified(addr, size)) { |
| 386 | if (size == 0) { | 610 | return true; |
| 387 | continue; | ||
| 388 | } | ||
| 389 | staging_buffer.resize(size); | ||
| 390 | cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); | ||
| 391 | block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); | ||
| 392 | } | 611 | } |
| 612 | const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes(); | ||
| 613 | page = Common::DivCeil(end_addr, PAGE_SIZE); | ||
| 393 | } | 614 | } |
| 394 | 615 | return false; | |
| 395 | VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) { | 616 | } |
| 396 | VectorMapInterval result; | 617 | |
| 397 | if (size == 0) { | 618 | template <class P> |
| 398 | return result; | 619 | void BufferCache<P>::BindHostIndexBuffer() { |
| 620 | Buffer& buffer = slot_buffers[index_buffer.buffer_id]; | ||
| 621 | const u32 offset = buffer.Offset(index_buffer.cpu_addr); | ||
| 622 | const u32 size = index_buffer.size; | ||
| 623 | SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); | ||
| 624 | if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | ||
| 625 | runtime.BindIndexBuffer(buffer, offset, size); | ||
| 626 | } else { | ||
| 627 | runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format, | ||
| 628 | maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count, | ||
| 629 | buffer, offset, size); | ||
| 630 | } | ||
| 631 | } | ||
| 632 | |||
| 633 | template <class P> | ||
| 634 | void BufferCache<P>::BindHostVertexBuffers() { | ||
| 635 | auto& flags = maxwell3d.dirty.flags; | ||
| 636 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | ||
| 637 | const Binding& binding = vertex_buffers[index]; | ||
| 638 | Buffer& buffer = slot_buffers[binding.buffer_id]; | ||
| 639 | SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); | ||
| 640 | if (!flags[Dirty::VertexBuffer0 + index]) { | ||
| 641 | continue; | ||
| 399 | } | 642 | } |
| 643 | flags[Dirty::VertexBuffer0 + index] = false; | ||
| 400 | 644 | ||
| 401 | const VAddr addr_end = addr + size; | 645 | const u32 stride = maxwell3d.regs.vertex_array[index].stride; |
| 402 | auto it = mapped_addresses.lower_bound(addr); | 646 | const u32 offset = buffer.Offset(binding.cpu_addr); |
| 403 | if (it != mapped_addresses.begin()) { | 647 | runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride); |
| 404 | --it; | 648 | } |
| 649 | } | ||
| 650 | |||
| 651 | template <class P> | ||
| 652 | void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { | ||
| 653 | u32 dirty = ~0U; | ||
| 654 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | ||
| 655 | dirty = std::exchange(dirty_uniform_buffers[stage], 0); | ||
| 656 | } | ||
| 657 | u32 binding_index = 0; | ||
| 658 | ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { | ||
| 659 | const bool needs_bind = ((dirty >> index) & 1) != 0; | ||
| 660 | BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); | ||
| 661 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | ||
| 662 | ++binding_index; | ||
| 405 | } | 663 | } |
| 406 | while (it != mapped_addresses.end() && it->start < addr_end) { | 664 | }); |
| 407 | if (it->Overlaps(addr, addr_end)) { | 665 | } |
| 408 | result.push_back(&*it); | 666 | |
| 667 | template <class P> | ||
| 668 | void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 binding_index, | ||
| 669 | bool needs_bind) { | ||
| 670 | const Binding& binding = uniform_buffers[stage][index]; | ||
| 671 | const VAddr cpu_addr = binding.cpu_addr; | ||
| 672 | const u32 size = binding.size; | ||
| 673 | Buffer& buffer = slot_buffers[binding.buffer_id]; | ||
| 674 | if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { | ||
| 675 | if constexpr (IS_OPENGL) { | ||
| 676 | if (runtime.HasFastBufferSubData()) { | ||
| 677 | // Fast path for Nvidia | ||
| 678 | if (!HasFastUniformBufferBound(stage, binding_index)) { | ||
| 679 | // We only have to bind when the currently bound buffer is not the fast version | ||
| 680 | runtime.BindFastUniformBuffer(stage, binding_index, size); | ||
| 681 | } | ||
| 682 | const auto span = ImmediateBufferWithData(cpu_addr, size); | ||
| 683 | runtime.PushFastUniformBuffer(stage, binding_index, span); | ||
| 684 | return; | ||
| 409 | } | 685 | } |
| 410 | ++it; | ||
| 411 | } | 686 | } |
| 412 | return result; | 687 | fast_bound_uniform_buffers[stage] |= 1U << binding_index; |
| 413 | } | ||
| 414 | 688 | ||
| 415 | /// Returns a ticks counter used for tracking when cached objects were last modified | 689 | // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan |
| 416 | u64 GetModifiedTicks() { | 690 | const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); |
| 417 | return ++modified_ticks; | 691 | cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); |
| 692 | return; | ||
| 418 | } | 693 | } |
| 419 | 694 | // Classic cached path | |
| 420 | void FlushMap(MapInterval* map) { | 695 | SynchronizeBuffer(buffer, cpu_addr, size); |
| 421 | const auto it = blocks.find(map->start >> BLOCK_PAGE_BITS); | 696 | if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { |
| 422 | ASSERT_OR_EXECUTE(it != blocks.end(), return;); | 697 | // Skip binding if it's not needed and if the bound buffer is not the fast version |
| 423 | 698 | // This exists to avoid instances where the fast buffer is bound and a GPU write happens | |
| 424 | std::shared_ptr<Buffer> block = it->second; | 699 | return; |
| 425 | |||
| 426 | const std::size_t size = map->end - map->start; | ||
| 427 | staging_buffer.resize(size); | ||
| 428 | block->Download(block->Offset(map->start), size, staging_buffer.data()); | ||
| 429 | cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size); | ||
| 430 | map->MarkAsModified(false, 0); | ||
| 431 | } | 700 | } |
| 701 | fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); | ||
| 432 | 702 | ||
| 433 | template <typename Callable> | 703 | const u32 offset = buffer.Offset(cpu_addr); |
| 434 | BufferInfo StreamBufferUpload(std::size_t size, std::size_t alignment, Callable&& callable) { | 704 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { |
| 435 | AlignBuffer(alignment); | 705 | runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); |
| 436 | const std::size_t uploaded_offset = buffer_offset; | 706 | } else { |
| 437 | callable(buffer_ptr); | 707 | runtime.BindUniformBuffer(buffer, offset, size); |
| 438 | |||
| 439 | buffer_ptr += size; | ||
| 440 | buffer_offset += size; | ||
| 441 | return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()}; | ||
| 442 | } | 708 | } |
| 709 | } | ||
| 710 | |||
| 711 | template <class P> | ||
| 712 | void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | ||
| 713 | u32 binding_index = 0; | ||
| 714 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { | ||
| 715 | const Binding& binding = storage_buffers[stage][index]; | ||
| 716 | Buffer& buffer = slot_buffers[binding.buffer_id]; | ||
| 717 | const u32 size = binding.size; | ||
| 718 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | ||
| 719 | |||
| 720 | const u32 offset = buffer.Offset(binding.cpu_addr); | ||
| 721 | const bool is_written = ((written_storage_buffers[stage] >> index) & 1) != 0; | ||
| 722 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { | ||
| 723 | runtime.BindStorageBuffer(stage, binding_index, buffer, offset, size, is_written); | ||
| 724 | ++binding_index; | ||
| 725 | } else { | ||
| 726 | runtime.BindStorageBuffer(buffer, offset, size, is_written); | ||
| 727 | } | ||
| 728 | }); | ||
| 729 | } | ||
| 443 | 730 | ||
| 444 | void AlignBuffer(std::size_t alignment) { | 731 | template <class P> |
| 445 | // Align the offset, not the mapped pointer | 732 | void BufferCache<P>::BindHostTransformFeedbackBuffers() { |
| 446 | const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); | 733 | if (maxwell3d.regs.tfb_enabled == 0) { |
| 447 | buffer_ptr += offset_aligned - buffer_offset; | 734 | return; |
| 448 | buffer_offset = offset_aligned; | ||
| 449 | } | 735 | } |
| 736 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { | ||
| 737 | const Binding& binding = transform_feedback_buffers[index]; | ||
| 738 | Buffer& buffer = slot_buffers[binding.buffer_id]; | ||
| 739 | const u32 size = binding.size; | ||
| 740 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | ||
| 741 | |||
| 742 | const u32 offset = buffer.Offset(binding.cpu_addr); | ||
| 743 | runtime.BindTransformFeedbackBuffer(index, buffer, offset, size); | ||
| 744 | } | ||
| 745 | } | ||
| 450 | 746 | ||
| 451 | std::shared_ptr<Buffer> EnlargeBlock(std::shared_ptr<Buffer> buffer) { | 747 | template <class P> |
| 452 | const std::size_t old_size = buffer->Size(); | 748 | void BufferCache<P>::BindHostComputeUniformBuffers() { |
| 453 | const std::size_t new_size = old_size + BLOCK_PAGE_SIZE; | 749 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { |
| 454 | const VAddr cpu_addr = buffer->CpuAddr(); | 750 | // Mark all uniform buffers as dirty |
| 455 | std::shared_ptr<Buffer> new_buffer = CreateBlock(cpu_addr, new_size); | 751 | dirty_uniform_buffers.fill(~u32{0}); |
| 456 | new_buffer->CopyFrom(*buffer, 0, 0, old_size); | 752 | } |
| 457 | QueueDestruction(std::move(buffer)); | 753 | u32 binding_index = 0; |
| 458 | 754 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | |
| 459 | const VAddr cpu_addr_end = cpu_addr + new_size - 1; | 755 | const Binding& binding = compute_uniform_buffers[index]; |
| 460 | const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; | 756 | Buffer& buffer = slot_buffers[binding.buffer_id]; |
| 461 | for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { | 757 | const u32 size = binding.size; |
| 462 | blocks.insert_or_assign(page_start, new_buffer); | 758 | SynchronizeBuffer(buffer, binding.cpu_addr, size); |
| 759 | |||
| 760 | const u32 offset = buffer.Offset(binding.cpu_addr); | ||
| 761 | if constexpr (NEEDS_BIND_UNIFORM_INDEX) { | ||
| 762 | runtime.BindComputeUniformBuffer(binding_index, buffer, offset, size); | ||
| 763 | ++binding_index; | ||
| 764 | } else { | ||
| 765 | runtime.BindUniformBuffer(buffer, offset, size); | ||
| 463 | } | 766 | } |
| 767 | }); | ||
| 768 | } | ||
| 769 | |||
| 770 | template <class P> | ||
| 771 | void BufferCache<P>::BindHostComputeStorageBuffers() { | ||
| 772 | u32 binding_index = 0; | ||
| 773 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | ||
| 774 | const Binding& binding = compute_storage_buffers[index]; | ||
| 775 | Buffer& buffer = slot_buffers[binding.buffer_id]; | ||
| 776 | const u32 size = binding.size; | ||
| 777 | SynchronizeBuffer(buffer, binding.cpu_addr, size); | ||
| 778 | |||
| 779 | const u32 offset = buffer.Offset(binding.cpu_addr); | ||
| 780 | const bool is_written = ((written_compute_storage_buffers >> index) & 1) != 0; | ||
| 781 | if constexpr (NEEDS_BIND_STORAGE_INDEX) { | ||
| 782 | runtime.BindComputeStorageBuffer(binding_index, buffer, offset, size, is_written); | ||
| 783 | ++binding_index; | ||
| 784 | } else { | ||
| 785 | runtime.BindStorageBuffer(buffer, offset, size, is_written); | ||
| 786 | } | ||
| 787 | }); | ||
| 788 | } | ||
| 464 | 789 | ||
| 465 | return new_buffer; | 790 | template <class P> |
| 791 | void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { | ||
| 792 | if (is_indexed) { | ||
| 793 | UpdateIndexBuffer(); | ||
| 466 | } | 794 | } |
| 795 | UpdateVertexBuffers(); | ||
| 796 | UpdateTransformFeedbackBuffers(); | ||
| 797 | for (size_t stage = 0; stage < NUM_STAGES; ++stage) { | ||
| 798 | UpdateUniformBuffers(stage); | ||
| 799 | UpdateStorageBuffers(stage); | ||
| 800 | } | ||
| 801 | } | ||
| 802 | |||
| 803 | template <class P> | ||
| 804 | void BufferCache<P>::DoUpdateComputeBuffers() { | ||
| 805 | UpdateComputeUniformBuffers(); | ||
| 806 | UpdateComputeStorageBuffers(); | ||
| 807 | } | ||
| 808 | |||
| 809 | template <class P> | ||
| 810 | void BufferCache<P>::UpdateIndexBuffer() { | ||
| 811 | // We have to check for the dirty flags and index count | ||
| 812 | // The index count is currently changed without updating the dirty flags | ||
| 813 | const auto& index_array = maxwell3d.regs.index_array; | ||
| 814 | auto& flags = maxwell3d.dirty.flags; | ||
| 815 | if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) { | ||
| 816 | return; | ||
| 817 | } | ||
| 818 | flags[Dirty::IndexBuffer] = false; | ||
| 819 | last_index_count = index_array.count; | ||
| 820 | |||
| 821 | const GPUVAddr gpu_addr_begin = index_array.StartAddress(); | ||
| 822 | const GPUVAddr gpu_addr_end = index_array.EndAddress(); | ||
| 823 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); | ||
| 824 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | ||
| 825 | const u32 draw_size = index_array.count * index_array.FormatSizeInBytes(); | ||
| 826 | const u32 size = std::min(address_size, draw_size); | ||
| 827 | if (size == 0 || !cpu_addr) { | ||
| 828 | index_buffer = NULL_BINDING; | ||
| 829 | return; | ||
| 830 | } | ||
| 831 | index_buffer = Binding{ | ||
| 832 | .cpu_addr = *cpu_addr, | ||
| 833 | .size = size, | ||
| 834 | .buffer_id = FindBuffer(*cpu_addr, size), | ||
| 835 | }; | ||
| 836 | } | ||
| 467 | 837 | ||
| 468 | std::shared_ptr<Buffer> MergeBlocks(std::shared_ptr<Buffer> first, | 838 | template <class P> |
| 469 | std::shared_ptr<Buffer> second) { | 839 | void BufferCache<P>::UpdateVertexBuffers() { |
| 470 | const std::size_t size_1 = first->Size(); | 840 | auto& flags = maxwell3d.dirty.flags; |
| 471 | const std::size_t size_2 = second->Size(); | 841 | if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) { |
| 472 | const VAddr first_addr = first->CpuAddr(); | 842 | return; |
| 473 | const VAddr second_addr = second->CpuAddr(); | 843 | } |
| 474 | const VAddr new_addr = std::min(first_addr, second_addr); | 844 | flags[Dirty::VertexBuffers] = false; |
| 475 | const std::size_t new_size = size_1 + size_2; | ||
| 476 | |||
| 477 | std::shared_ptr<Buffer> new_buffer = CreateBlock(new_addr, new_size); | ||
| 478 | new_buffer->CopyFrom(*first, 0, new_buffer->Offset(first_addr), size_1); | ||
| 479 | new_buffer->CopyFrom(*second, 0, new_buffer->Offset(second_addr), size_2); | ||
| 480 | QueueDestruction(std::move(first)); | ||
| 481 | QueueDestruction(std::move(second)); | ||
| 482 | 845 | ||
| 483 | const VAddr cpu_addr_end = new_addr + new_size - 1; | 846 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { |
| 484 | const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; | 847 | UpdateVertexBuffer(index); |
| 485 | for (u64 page_start = new_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { | ||
| 486 | blocks.insert_or_assign(page_start, new_buffer); | ||
| 487 | } | ||
| 488 | return new_buffer; | ||
| 489 | } | 848 | } |
| 849 | } | ||
| 490 | 850 | ||
| 491 | Buffer* GetBlock(VAddr cpu_addr, std::size_t size) { | 851 | template <class P> |
| 492 | std::shared_ptr<Buffer> found; | 852 | void BufferCache<P>::UpdateVertexBuffer(u32 index) { |
| 853 | if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) { | ||
| 854 | return; | ||
| 855 | } | ||
| 856 | const auto& array = maxwell3d.regs.vertex_array[index]; | ||
| 857 | const auto& limit = maxwell3d.regs.vertex_array_limit[index]; | ||
| 858 | const GPUVAddr gpu_addr_begin = array.StartAddress(); | ||
| 859 | const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1; | ||
| 860 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); | ||
| 861 | const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | ||
| 862 | const u32 size = address_size; // TODO: Analyze stride and number of vertices | ||
| 863 | if (array.enable == 0 || size == 0 || !cpu_addr) { | ||
| 864 | vertex_buffers[index] = NULL_BINDING; | ||
| 865 | return; | ||
| 866 | } | ||
| 867 | vertex_buffers[index] = Binding{ | ||
| 868 | .cpu_addr = *cpu_addr, | ||
| 869 | .size = size, | ||
| 870 | .buffer_id = FindBuffer(*cpu_addr, size), | ||
| 871 | }; | ||
| 872 | } | ||
| 873 | |||
| 874 | template <class P> | ||
| 875 | void BufferCache<P>::UpdateUniformBuffers(size_t stage) { | ||
| 876 | ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { | ||
| 877 | Binding& binding = uniform_buffers[stage][index]; | ||
| 878 | if (binding.buffer_id) { | ||
| 879 | // Already updated | ||
| 880 | return; | ||
| 881 | } | ||
| 882 | // Mark as dirty | ||
| 883 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | ||
| 884 | dirty_uniform_buffers[stage] |= 1U << index; | ||
| 885 | } | ||
| 886 | // Resolve buffer | ||
| 887 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | ||
| 888 | }); | ||
| 889 | } | ||
| 890 | |||
| 891 | template <class P> | ||
| 892 | void BufferCache<P>::UpdateStorageBuffers(size_t stage) { | ||
| 893 | const u32 written_mask = written_storage_buffers[stage]; | ||
| 894 | ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { | ||
| 895 | // Resolve buffer | ||
| 896 | Binding& binding = storage_buffers[stage][index]; | ||
| 897 | const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); | ||
| 898 | binding.buffer_id = buffer_id; | ||
| 899 | // Mark buffer as written if needed | ||
| 900 | if (((written_mask >> index) & 1) != 0) { | ||
| 901 | MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); | ||
| 902 | } | ||
| 903 | }); | ||
| 904 | } | ||
| 493 | 905 | ||
| 494 | const VAddr cpu_addr_end = cpu_addr + size - 1; | 906 | template <class P> |
| 495 | const u64 page_end = cpu_addr_end >> BLOCK_PAGE_BITS; | 907 | void BufferCache<P>::UpdateTransformFeedbackBuffers() { |
| 496 | for (u64 page_start = cpu_addr >> BLOCK_PAGE_BITS; page_start <= page_end; ++page_start) { | 908 | if (maxwell3d.regs.tfb_enabled == 0) { |
| 497 | auto it = blocks.find(page_start); | 909 | return; |
| 498 | if (it == blocks.end()) { | 910 | } |
| 499 | if (found) { | 911 | for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { |
| 500 | found = EnlargeBlock(found); | 912 | UpdateTransformFeedbackBuffer(index); |
| 501 | continue; | 913 | } |
| 502 | } | 914 | } |
| 503 | const VAddr start_addr = page_start << BLOCK_PAGE_BITS; | 915 | |
| 504 | found = CreateBlock(start_addr, BLOCK_PAGE_SIZE); | 916 | template <class P> |
| 505 | blocks.insert_or_assign(page_start, found); | 917 | void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { |
| 506 | continue; | 918 | const auto& binding = maxwell3d.regs.tfb_bindings[index]; |
| 507 | } | 919 | const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset; |
| 508 | if (!found) { | 920 | const u32 size = binding.buffer_size; |
| 509 | found = it->second; | 921 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| 510 | continue; | 922 | if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) { |
| 511 | } | 923 | transform_feedback_buffers[index] = NULL_BINDING; |
| 512 | if (found != it->second) { | 924 | return; |
| 513 | found = MergeBlocks(std::move(found), it->second); | 925 | } |
| 926 | const BufferId buffer_id = FindBuffer(*cpu_addr, size); | ||
| 927 | transform_feedback_buffers[index] = Binding{ | ||
| 928 | .cpu_addr = *cpu_addr, | ||
| 929 | .size = size, | ||
| 930 | .buffer_id = buffer_id, | ||
| 931 | }; | ||
| 932 | MarkWrittenBuffer(buffer_id, *cpu_addr, size); | ||
| 933 | } | ||
| 934 | |||
| 935 | template <class P> | ||
| 936 | void BufferCache<P>::UpdateComputeUniformBuffers() { | ||
| 937 | ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { | ||
| 938 | Binding& binding = compute_uniform_buffers[index]; | ||
| 939 | binding = NULL_BINDING; | ||
| 940 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 941 | if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) { | ||
| 942 | const auto& cbuf = launch_desc.const_buffer_config[index]; | ||
| 943 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address()); | ||
| 944 | if (cpu_addr) { | ||
| 945 | binding.cpu_addr = *cpu_addr; | ||
| 946 | binding.size = cbuf.size; | ||
| 514 | } | 947 | } |
| 515 | } | 948 | } |
| 516 | return found.get(); | 949 | binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); |
| 950 | }); | ||
| 951 | } | ||
| 952 | |||
| 953 | template <class P> | ||
| 954 | void BufferCache<P>::UpdateComputeStorageBuffers() { | ||
| 955 | ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | ||
| 956 | // Resolve buffer | ||
| 957 | Binding& binding = compute_storage_buffers[index]; | ||
| 958 | const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); | ||
| 959 | binding.buffer_id = buffer_id; | ||
| 960 | // Mark as written if needed | ||
| 961 | if (((written_compute_storage_buffers >> index) & 1) != 0) { | ||
| 962 | MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); | ||
| 963 | } | ||
| 964 | }); | ||
| 965 | } | ||
| 966 | |||
| 967 | template <class P> | ||
| 968 | void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size) { | ||
| 969 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 970 | buffer.MarkRegionAsGpuModified(cpu_addr, size); | ||
| 971 | |||
| 972 | const bool is_accuracy_high = Settings::IsGPULevelHigh(); | ||
| 973 | const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); | ||
| 974 | if (!is_accuracy_high || !is_async) { | ||
| 975 | return; | ||
| 976 | } | ||
| 977 | if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) { | ||
| 978 | // Already inserted | ||
| 979 | return; | ||
| 517 | } | 980 | } |
| 981 | uncommitted_downloads.push_back(buffer_id); | ||
| 982 | } | ||
| 518 | 983 | ||
| 519 | void MarkRegionAsWritten(VAddr start, VAddr end) { | 984 | template <class P> |
| 520 | const u64 page_end = end >> WRITE_PAGE_BIT; | 985 | BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { |
| 521 | for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { | 986 | if (cpu_addr == 0) { |
| 522 | if (const auto [it, inserted] = written_pages.emplace(page_start, 1); !inserted) { | 987 | return NULL_BUFFER_ID; |
| 523 | ++it->second; | 988 | } |
| 524 | } | 989 | const u64 page = cpu_addr >> PAGE_BITS; |
| 990 | const BufferId buffer_id = page_table[page]; | ||
| 991 | if (!buffer_id) { | ||
| 992 | return CreateBuffer(cpu_addr, size); | ||
| 993 | } | ||
| 994 | const Buffer& buffer = slot_buffers[buffer_id]; | ||
| 995 | if (buffer.IsInBounds(cpu_addr, size)) { | ||
| 996 | return buffer_id; | ||
| 997 | } | ||
| 998 | return CreateBuffer(cpu_addr, size); | ||
| 999 | } | ||
| 1000 | |||
| 1001 | template <class P> | ||
| 1002 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, | ||
| 1003 | u32 wanted_size) { | ||
| 1004 | static constexpr int STREAM_LEAP_THRESHOLD = 16; | ||
| 1005 | std::vector<BufferId> overlap_ids; | ||
| 1006 | VAddr begin = cpu_addr; | ||
| 1007 | VAddr end = cpu_addr + wanted_size; | ||
| 1008 | int stream_score = 0; | ||
| 1009 | bool has_stream_leap = false; | ||
| 1010 | for (; cpu_addr >> PAGE_BITS < Common::DivCeil(end, PAGE_SIZE); cpu_addr += PAGE_SIZE) { | ||
| 1011 | const BufferId overlap_id = page_table[cpu_addr >> PAGE_BITS]; | ||
| 1012 | if (!overlap_id) { | ||
| 1013 | continue; | ||
| 1014 | } | ||
| 1015 | Buffer& overlap = slot_buffers[overlap_id]; | ||
| 1016 | if (overlap.IsPicked()) { | ||
| 1017 | continue; | ||
| 1018 | } | ||
| 1019 | overlap_ids.push_back(overlap_id); | ||
| 1020 | overlap.Pick(); | ||
| 1021 | const VAddr overlap_cpu_addr = overlap.CpuAddr(); | ||
| 1022 | if (overlap_cpu_addr < begin) { | ||
| 1023 | cpu_addr = begin = overlap_cpu_addr; | ||
| 1024 | } | ||
| 1025 | end = std::max(end, overlap_cpu_addr + overlap.SizeBytes()); | ||
| 1026 | |||
| 1027 | stream_score += overlap.StreamScore(); | ||
| 1028 | if (stream_score > STREAM_LEAP_THRESHOLD && !has_stream_leap) { | ||
| 1029 | // When this memory region has been joined a bunch of times, we assume it's being used | ||
| 1030 | // as a stream buffer. Increase the size to skip constantly recreating buffers. | ||
| 1031 | has_stream_leap = true; | ||
| 1032 | end += PAGE_SIZE * 256; | ||
| 525 | } | 1033 | } |
| 526 | } | 1034 | } |
| 527 | 1035 | return OverlapResult{ | |
| 528 | void UnmarkRegionAsWritten(VAddr start, VAddr end) { | 1036 | .ids = std::move(overlap_ids), |
| 529 | const u64 page_end = end >> WRITE_PAGE_BIT; | 1037 | .begin = begin, |
| 530 | for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { | 1038 | .end = end, |
| 531 | auto it = written_pages.find(page_start); | 1039 | .has_stream_leap = has_stream_leap, |
| 532 | if (it != written_pages.end()) { | 1040 | }; |
| 533 | if (it->second > 1) { | 1041 | } |
| 534 | --it->second; | 1042 | |
| 535 | } else { | 1043 | template <class P> |
| 536 | written_pages.erase(it); | 1044 | void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, |
| 537 | } | 1045 | bool accumulate_stream_score) { |
| 538 | } | 1046 | Buffer& new_buffer = slot_buffers[new_buffer_id]; |
| 1047 | Buffer& overlap = slot_buffers[overlap_id]; | ||
| 1048 | if (accumulate_stream_score) { | ||
| 1049 | new_buffer.IncreaseStreamScore(overlap.StreamScore() + 1); | ||
| 1050 | } | ||
| 1051 | std::vector<BufferCopy> copies; | ||
| 1052 | const size_t dst_base_offset = overlap.CpuAddr() - new_buffer.CpuAddr(); | ||
| 1053 | overlap.ForEachDownloadRange([&](u64 begin, u64 range_size) { | ||
| 1054 | copies.push_back(BufferCopy{ | ||
| 1055 | .src_offset = begin, | ||
| 1056 | .dst_offset = dst_base_offset + begin, | ||
| 1057 | .size = range_size, | ||
| 1058 | }); | ||
| 1059 | new_buffer.UnmarkRegionAsCpuModified(begin, range_size); | ||
| 1060 | new_buffer.MarkRegionAsGpuModified(begin, range_size); | ||
| 1061 | }); | ||
| 1062 | if (!copies.empty()) { | ||
| 1063 | runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies); | ||
| 1064 | } | ||
| 1065 | ReplaceBufferDownloads(overlap_id, new_buffer_id); | ||
| 1066 | DeleteBuffer(overlap_id); | ||
| 1067 | } | ||
| 1068 | |||
| 1069 | template <class P> | ||
| 1070 | BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { | ||
| 1071 | const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); | ||
| 1072 | const u32 size = static_cast<u32>(overlap.end - overlap.begin); | ||
| 1073 | const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); | ||
| 1074 | for (const BufferId overlap_id : overlap.ids) { | ||
| 1075 | JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); | ||
| 1076 | } | ||
| 1077 | Register(new_buffer_id); | ||
| 1078 | return new_buffer_id; | ||
| 1079 | } | ||
| 1080 | |||
| 1081 | template <class P> | ||
| 1082 | void BufferCache<P>::Register(BufferId buffer_id) { | ||
| 1083 | ChangeRegister<true>(buffer_id); | ||
| 1084 | } | ||
| 1085 | |||
| 1086 | template <class P> | ||
| 1087 | void BufferCache<P>::Unregister(BufferId buffer_id) { | ||
| 1088 | ChangeRegister<false>(buffer_id); | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | template <class P> | ||
| 1092 | template <bool insert> | ||
| 1093 | void BufferCache<P>::ChangeRegister(BufferId buffer_id) { | ||
| 1094 | const Buffer& buffer = slot_buffers[buffer_id]; | ||
| 1095 | const VAddr cpu_addr_begin = buffer.CpuAddr(); | ||
| 1096 | const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes(); | ||
| 1097 | const u64 page_begin = cpu_addr_begin / PAGE_SIZE; | ||
| 1098 | const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE); | ||
| 1099 | for (u64 page = page_begin; page != page_end; ++page) { | ||
| 1100 | if constexpr (insert) { | ||
| 1101 | page_table[page] = buffer_id; | ||
| 1102 | } else { | ||
| 1103 | page_table[page] = BufferId{}; | ||
| 539 | } | 1104 | } |
| 540 | } | 1105 | } |
| 1106 | } | ||
| 541 | 1107 | ||
| 542 | bool IsRegionWritten(VAddr start, VAddr end) const { | 1108 | template <class P> |
| 543 | const u64 page_end = end >> WRITE_PAGE_BIT; | 1109 | void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { |
| 544 | for (u64 page_start = start >> WRITE_PAGE_BIT; page_start <= page_end; ++page_start) { | 1110 | if (buffer.CpuAddr() == 0) { |
| 545 | if (written_pages.contains(page_start)) { | 1111 | return; |
| 546 | return true; | 1112 | } |
| 1113 | SynchronizeBufferImpl(buffer, cpu_addr, size); | ||
| 1114 | } | ||
| 1115 | |||
| 1116 | template <class P> | ||
| 1117 | void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) { | ||
| 1118 | boost::container::small_vector<BufferCopy, 4> copies; | ||
| 1119 | u64 total_size_bytes = 0; | ||
| 1120 | u64 largest_copy = 0; | ||
| 1121 | buffer.ForEachUploadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { | ||
| 1122 | copies.push_back(BufferCopy{ | ||
| 1123 | .src_offset = total_size_bytes, | ||
| 1124 | .dst_offset = range_offset, | ||
| 1125 | .size = range_size, | ||
| 1126 | }); | ||
| 1127 | total_size_bytes += range_size; | ||
| 1128 | largest_copy = std::max(largest_copy, range_size); | ||
| 1129 | }); | ||
| 1130 | if (total_size_bytes == 0) { | ||
| 1131 | return; | ||
| 1132 | } | ||
| 1133 | const std::span<BufferCopy> copies_span(copies.data(), copies.size()); | ||
| 1134 | UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | template <class P> | ||
| 1138 | void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, | ||
| 1139 | std::span<BufferCopy> copies) { | ||
| 1140 | if constexpr (USE_MEMORY_MAPS) { | ||
| 1141 | MappedUploadMemory(buffer, total_size_bytes, copies); | ||
| 1142 | } else { | ||
| 1143 | ImmediateUploadMemory(buffer, largest_copy, copies); | ||
| 1144 | } | ||
| 1145 | } | ||
| 1146 | |||
| 1147 | template <class P> | ||
| 1148 | void BufferCache<P>::ImmediateUploadMemory(Buffer& buffer, u64 largest_copy, | ||
| 1149 | std::span<const BufferCopy> copies) { | ||
| 1150 | std::span<u8> immediate_buffer; | ||
| 1151 | for (const BufferCopy& copy : copies) { | ||
| 1152 | std::span<const u8> upload_span; | ||
| 1153 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | ||
| 1154 | if (IsRangeGranular(cpu_addr, copy.size)) { | ||
| 1155 | upload_span = std::span(cpu_memory.GetPointer(cpu_addr), copy.size); | ||
| 1156 | } else { | ||
| 1157 | if (immediate_buffer.empty()) { | ||
| 1158 | immediate_buffer = ImmediateBuffer(largest_copy); | ||
| 547 | } | 1159 | } |
| 1160 | cpu_memory.ReadBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||
| 1161 | upload_span = immediate_buffer.subspan(0, copy.size); | ||
| 548 | } | 1162 | } |
| 549 | return false; | 1163 | buffer.ImmediateUpload(copy.dst_offset, upload_span); |
| 550 | } | 1164 | } |
| 551 | 1165 | } | |
| 552 | void QueueDestruction(std::shared_ptr<Buffer> buffer) { | 1166 | |
| 553 | buffer->SetEpoch(epoch); | 1167 | template <class P> |
| 554 | pending_destruction.push(std::move(buffer)); | 1168 | void BufferCache<P>::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, |
| 1169 | std::span<BufferCopy> copies) { | ||
| 1170 | auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); | ||
| 1171 | const std::span<u8> staging_pointer = upload_staging.mapped_span; | ||
| 1172 | for (BufferCopy& copy : copies) { | ||
| 1173 | u8* const src_pointer = staging_pointer.data() + copy.src_offset; | ||
| 1174 | const VAddr cpu_addr = buffer.CpuAddr() + copy.dst_offset; | ||
| 1175 | cpu_memory.ReadBlockUnsafe(cpu_addr, src_pointer, copy.size); | ||
| 1176 | |||
| 1177 | // Apply the staging offset | ||
| 1178 | copy.src_offset += upload_staging.offset; | ||
| 555 | } | 1179 | } |
| 556 | 1180 | runtime.CopyBuffer(buffer, upload_staging.buffer, copies); | |
| 557 | void MarkForAsyncFlush(MapInterval* map) { | 1181 | } |
| 558 | if (!uncommitted_flushes) { | 1182 | |
| 559 | uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); | 1183 | template <class P> |
| 1184 | void BufferCache<P>::DeleteBuffer(BufferId buffer_id) { | ||
| 1185 | const auto scalar_replace = [buffer_id](Binding& binding) { | ||
| 1186 | if (binding.buffer_id == buffer_id) { | ||
| 1187 | binding.buffer_id = BufferId{}; | ||
| 1188 | } | ||
| 1189 | }; | ||
| 1190 | const auto replace = [scalar_replace](std::span<Binding> bindings) { | ||
| 1191 | std::ranges::for_each(bindings, scalar_replace); | ||
| 1192 | }; | ||
| 1193 | scalar_replace(index_buffer); | ||
| 1194 | replace(vertex_buffers); | ||
| 1195 | std::ranges::for_each(uniform_buffers, replace); | ||
| 1196 | std::ranges::for_each(storage_buffers, replace); | ||
| 1197 | replace(transform_feedback_buffers); | ||
| 1198 | replace(compute_uniform_buffers); | ||
| 1199 | replace(compute_storage_buffers); | ||
| 1200 | std::erase(cached_write_buffer_ids, buffer_id); | ||
| 1201 | |||
| 1202 | // Mark the whole buffer as CPU written to stop tracking CPU writes | ||
| 1203 | Buffer& buffer = slot_buffers[buffer_id]; | ||
| 1204 | buffer.MarkRegionAsCpuModified(buffer.CpuAddr(), buffer.SizeBytes()); | ||
| 1205 | |||
| 1206 | Unregister(buffer_id); | ||
| 1207 | delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); | ||
| 1208 | |||
| 1209 | NotifyBufferDeletion(); | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | template <class P> | ||
| 1213 | void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) { | ||
| 1214 | const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) { | ||
| 1215 | std::ranges::replace(buffers, old_buffer_id, new_buffer_id); | ||
| 1216 | if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) { | ||
| 1217 | buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end()); | ||
| 560 | } | 1218 | } |
| 561 | uncommitted_flushes->insert(map); | 1219 | }; |
| 1220 | replace(uncommitted_downloads); | ||
| 1221 | std::ranges::for_each(committed_downloads, replace); | ||
| 1222 | } | ||
| 1223 | |||
| 1224 | template <class P> | ||
| 1225 | void BufferCache<P>::NotifyBufferDeletion() { | ||
| 1226 | if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { | ||
| 1227 | dirty_uniform_buffers.fill(~u32{0}); | ||
| 562 | } | 1228 | } |
| 1229 | auto& flags = maxwell3d.dirty.flags; | ||
| 1230 | flags[Dirty::IndexBuffer] = true; | ||
| 1231 | flags[Dirty::VertexBuffers] = true; | ||
| 1232 | for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { | ||
| 1233 | flags[Dirty::VertexBuffer0 + index] = true; | ||
| 1234 | } | ||
| 1235 | has_deleted_buffers = true; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | template <class P> | ||
| 1239 | typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const { | ||
| 1240 | const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr); | ||
| 1241 | const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8); | ||
| 1242 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 1243 | if (!cpu_addr || size == 0) { | ||
| 1244 | return NULL_BINDING; | ||
| 1245 | } | ||
| 1246 | // HACK(Rodrigo): This is the number of bytes bound in host beyond the guest API's range. | ||
| 1247 | // It exists due to some games like Astral Chain operate out of bounds. | ||
| 1248 | // Binding the whole map range would be technically correct, but games have large maps that make | ||
| 1249 | // this approach unaffordable for now. | ||
| 1250 | static constexpr u32 arbitrary_extra_bytes = 0xc000; | ||
| 1251 | const u32 bytes_to_map_end = static_cast<u32>(gpu_memory.BytesToMapEnd(gpu_addr)); | ||
| 1252 | const Binding binding{ | ||
| 1253 | .cpu_addr = *cpu_addr, | ||
| 1254 | .size = std::min(size + arbitrary_extra_bytes, bytes_to_map_end), | ||
| 1255 | .buffer_id = BufferId{}, | ||
| 1256 | }; | ||
| 1257 | return binding; | ||
| 1258 | } | ||
| 1259 | |||
| 1260 | template <class P> | ||
| 1261 | std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { | ||
| 1262 | u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); | ||
| 1263 | if (IsRangeGranular(cpu_addr, size) || | ||
| 1264 | base_pointer + size == cpu_memory.GetPointer(cpu_addr + size)) { | ||
| 1265 | return std::span(base_pointer, size); | ||
| 1266 | } else { | ||
| 1267 | const std::span<u8> span = ImmediateBuffer(size); | ||
| 1268 | cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); | ||
| 1269 | return span; | ||
| 1270 | } | ||
| 1271 | } | ||
| 563 | 1272 | ||
| 564 | VideoCore::RasterizerInterface& rasterizer; | 1273 | template <class P> |
| 565 | Tegra::MemoryManager& gpu_memory; | 1274 | std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) { |
| 566 | Core::Memory::Memory& cpu_memory; | 1275 | if (wanted_capacity > immediate_buffer_capacity) { |
| 567 | StreamBuffer& stream_buffer; | 1276 | immediate_buffer_capacity = wanted_capacity; |
| 568 | 1277 | immediate_buffer_alloc = std::make_unique<u8[]>(wanted_capacity); | |
| 569 | u8* buffer_ptr = nullptr; | 1278 | } |
| 570 | u64 buffer_offset = 0; | 1279 | return std::span<u8>(immediate_buffer_alloc.get(), wanted_capacity); |
| 571 | u64 buffer_offset_base = 0; | 1280 | } |
| 572 | 1281 | ||
| 573 | MapIntervalAllocator mapped_addresses_allocator; | 1282 | template <class P> |
| 574 | boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> | 1283 | bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept { |
| 575 | mapped_addresses; | 1284 | if constexpr (IS_OPENGL) { |
| 576 | 1285 | return ((fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; | |
| 577 | std::unordered_map<u64, u32> written_pages; | 1286 | } else { |
| 578 | std::unordered_map<u64, std::shared_ptr<Buffer>> blocks; | 1287 | // Only OpenGL has fast uniform buffers |
| 579 | 1288 | return false; | |
| 580 | std::queue<std::shared_ptr<Buffer>> pending_destruction; | 1289 | } |
| 581 | u64 epoch = 0; | 1290 | } |
| 582 | u64 modified_ticks = 0; | ||
| 583 | |||
| 584 | std::vector<u8> staging_buffer; | ||
| 585 | |||
| 586 | std::list<MapInterval*> marked_for_unregister; | ||
| 587 | |||
| 588 | std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; | ||
| 589 | std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes; | ||
| 590 | |||
| 591 | std::recursive_mutex mutex; | ||
| 592 | }; | ||
| 593 | 1291 | ||
| 594 | } // namespace VideoCommon | 1292 | } // namespace VideoCommon |
diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp deleted file mode 100644 index 62587e18a..000000000 --- a/src/video_core/buffer_cache/map_interval.cpp +++ /dev/null | |||
| @@ -1,33 +0,0 @@ | |||
| 1 | // Copyright 2020 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include <algorithm> | ||
| 6 | #include <array> | ||
| 7 | #include <cstddef> | ||
| 8 | #include <memory> | ||
| 9 | |||
| 10 | #include "video_core/buffer_cache/map_interval.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | MapIntervalAllocator::MapIntervalAllocator() { | ||
| 15 | FillFreeList(first_chunk); | ||
| 16 | } | ||
| 17 | |||
| 18 | MapIntervalAllocator::~MapIntervalAllocator() = default; | ||
| 19 | |||
| 20 | void MapIntervalAllocator::AllocateNewChunk() { | ||
| 21 | *new_chunk = std::make_unique<Chunk>(); | ||
| 22 | FillFreeList(**new_chunk); | ||
| 23 | new_chunk = &(*new_chunk)->next; | ||
| 24 | } | ||
| 25 | |||
| 26 | void MapIntervalAllocator::FillFreeList(Chunk& chunk) { | ||
| 27 | const std::size_t old_size = free_list.size(); | ||
| 28 | free_list.resize(old_size + chunk.data.size()); | ||
| 29 | std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size, | ||
| 30 | [](MapInterval& interval) { return &interval; }); | ||
| 31 | } | ||
| 32 | |||
| 33 | } // namespace VideoCommon | ||
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h deleted file mode 100644 index ef974b08a..000000000 --- a/src/video_core/buffer_cache/map_interval.h +++ /dev/null | |||
| @@ -1,93 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <array> | ||
| 8 | #include <cstddef> | ||
| 9 | #include <memory> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include <boost/intrusive/set_hook.hpp> | ||
| 13 | |||
| 14 | #include "common/common_types.h" | ||
| 15 | #include "video_core/gpu.h" | ||
| 16 | |||
| 17 | namespace VideoCommon { | ||
| 18 | |||
| 19 | struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> { | ||
| 20 | MapInterval() = default; | ||
| 21 | |||
| 22 | /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {} | ||
| 23 | |||
| 24 | explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept | ||
| 25 | : start{start_}, end{end_}, gpu_addr{gpu_addr_} {} | ||
| 26 | |||
| 27 | bool IsInside(VAddr other_start, VAddr other_end) const noexcept { | ||
| 28 | return start <= other_start && other_end <= end; | ||
| 29 | } | ||
| 30 | |||
| 31 | bool Overlaps(VAddr other_start, VAddr other_end) const noexcept { | ||
| 32 | return start < other_end && other_start < end; | ||
| 33 | } | ||
| 34 | |||
| 35 | void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { | ||
| 36 | is_modified = is_modified_; | ||
| 37 | ticks = ticks_; | ||
| 38 | } | ||
| 39 | |||
| 40 | boost::intrusive::set_member_hook<> member_hook_; | ||
| 41 | VAddr start = 0; | ||
| 42 | VAddr end = 0; | ||
| 43 | GPUVAddr gpu_addr = 0; | ||
| 44 | u64 ticks = 0; | ||
| 45 | bool is_written = false; | ||
| 46 | bool is_modified = false; | ||
| 47 | bool is_registered = false; | ||
| 48 | bool is_memory_marked = false; | ||
| 49 | bool is_sync_pending = false; | ||
| 50 | }; | ||
| 51 | |||
| 52 | struct MapIntervalCompare { | ||
| 53 | constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept { | ||
| 54 | return lhs.start < rhs.start; | ||
| 55 | } | ||
| 56 | }; | ||
| 57 | |||
| 58 | class MapIntervalAllocator { | ||
| 59 | public: | ||
| 60 | MapIntervalAllocator(); | ||
| 61 | ~MapIntervalAllocator(); | ||
| 62 | |||
| 63 | MapInterval* Allocate() { | ||
| 64 | if (free_list.empty()) { | ||
| 65 | AllocateNewChunk(); | ||
| 66 | } | ||
| 67 | MapInterval* const interval = free_list.back(); | ||
| 68 | free_list.pop_back(); | ||
| 69 | return interval; | ||
| 70 | } | ||
| 71 | |||
| 72 | void Release(MapInterval* interval) { | ||
| 73 | free_list.push_back(interval); | ||
| 74 | } | ||
| 75 | |||
| 76 | private: | ||
| 77 | struct Chunk { | ||
| 78 | std::unique_ptr<Chunk> next; | ||
| 79 | std::array<MapInterval, 0x8000> data; | ||
| 80 | }; | ||
| 81 | |||
| 82 | void AllocateNewChunk(); | ||
| 83 | |||
| 84 | void FillFreeList(Chunk& chunk); | ||
| 85 | |||
| 86 | std::vector<MapInterval*> free_list; | ||
| 87 | |||
| 88 | Chunk first_chunk; | ||
| 89 | |||
| 90 | std::unique_ptr<Chunk>* new_chunk = &first_chunk.next; | ||
| 91 | }; | ||
| 92 | |||
| 93 | } // namespace VideoCommon | ||
diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/command_classes/vic.cpp index 55e632346..2b7569335 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/command_classes/vic.cpp | |||
| @@ -110,12 +110,10 @@ void Vic::Execute() { | |||
| 110 | converted_frame_buffer.get(), block_height, 0, 0); | 110 | converted_frame_buffer.get(), block_height, 0, 0); |
| 111 | 111 | ||
| 112 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); | 112 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size); |
| 113 | gpu.Maxwell3D().OnMemoryWrite(); | ||
| 114 | } else { | 113 | } else { |
| 115 | // send pitch linear frame | 114 | // send pitch linear frame |
| 116 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, | 115 | gpu.MemoryManager().WriteBlock(output_surface_luma_address, converted_frame_buf_addr, |
| 117 | linear_size); | 116 | linear_size); |
| 118 | gpu.Maxwell3D().OnMemoryWrite(); | ||
| 119 | } | 117 | } |
| 120 | break; | 118 | break; |
| 121 | } | 119 | } |
| @@ -163,7 +161,6 @@ void Vic::Execute() { | |||
| 163 | } | 161 | } |
| 164 | gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), | 162 | gpu.MemoryManager().WriteBlock(output_surface_chroma_u_address, chroma_buffer.data(), |
| 165 | chroma_buffer.size()); | 163 | chroma_buffer.size()); |
| 166 | gpu.Maxwell3D().OnMemoryWrite(); | ||
| 167 | break; | 164 | break; |
| 168 | } | 165 | } |
| 169 | default: | 166 | default: |
diff --git a/src/video_core/dirty_flags.cpp b/src/video_core/dirty_flags.cpp index b1eaac00c..7149af290 100644 --- a/src/video_core/dirty_flags.cpp +++ b/src/video_core/dirty_flags.cpp | |||
| @@ -12,13 +12,30 @@ | |||
| 12 | #define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32))) | 12 | #define NUM(field_name) (sizeof(::Tegra::Engines::Maxwell3D::Regs::field_name) / (sizeof(u32))) |
| 13 | 13 | ||
| 14 | namespace VideoCommon::Dirty { | 14 | namespace VideoCommon::Dirty { |
| 15 | 15 | namespace { | |
| 16 | using Tegra::Engines::Maxwell3D; | 16 | using Tegra::Engines::Maxwell3D; |
| 17 | 17 | ||
| 18 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) { | 18 | void SetupDirtyVertexBuffers(Maxwell3D::DirtyState::Tables& tables) { |
| 19 | static constexpr std::size_t num_array = 3; | ||
| 20 | for (std::size_t i = 0; i < Maxwell3D::Regs::NumVertexArrays; ++i) { | ||
| 21 | const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); | ||
| 22 | const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]); | ||
| 23 | |||
| 24 | FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers); | ||
| 25 | FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers); | ||
| 26 | } | ||
| 27 | } | ||
| 28 | |||
| 29 | void SetupIndexBuffer(Maxwell3D::DirtyState::Tables& tables) { | ||
| 30 | FillBlock(tables[0], OFF(index_array), NUM(index_array), IndexBuffer); | ||
| 31 | } | ||
| 32 | |||
| 33 | void SetupDirtyDescriptors(Maxwell3D::DirtyState::Tables& tables) { | ||
| 19 | FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); | 34 | FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors); |
| 20 | FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); | 35 | FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors); |
| 36 | } | ||
| 21 | 37 | ||
| 38 | void SetupDirtyRenderTargets(Maxwell3D::DirtyState::Tables& tables) { | ||
| 22 | static constexpr std::size_t num_per_rt = NUM(rt[0]); | 39 | static constexpr std::size_t num_per_rt = NUM(rt[0]); |
| 23 | static constexpr std::size_t begin = OFF(rt); | 40 | static constexpr std::size_t begin = OFF(rt); |
| 24 | static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; | 41 | static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets; |
| @@ -41,5 +58,13 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl | |||
| 41 | FillBlock(table, OFF(zeta), NUM(zeta), flag); | 58 | FillBlock(table, OFF(zeta), NUM(zeta), flag); |
| 42 | } | 59 | } |
| 43 | } | 60 | } |
| 61 | } // Anonymous namespace | ||
| 62 | |||
| 63 | void SetupDirtyFlags(Maxwell3D::DirtyState::Tables& tables) { | ||
| 64 | SetupDirtyVertexBuffers(tables); | ||
| 65 | SetupIndexBuffer(tables); | ||
| 66 | SetupDirtyDescriptors(tables); | ||
| 67 | SetupDirtyRenderTargets(tables); | ||
| 68 | } | ||
| 44 | 69 | ||
| 45 | } // namespace VideoCommon::Dirty | 70 | } // namespace VideoCommon::Dirty |
diff --git a/src/video_core/dirty_flags.h b/src/video_core/dirty_flags.h index 875527ddd..702688ace 100644 --- a/src/video_core/dirty_flags.h +++ b/src/video_core/dirty_flags.h | |||
| @@ -30,6 +30,12 @@ enum : u8 { | |||
| 30 | ColorBuffer7, | 30 | ColorBuffer7, |
| 31 | ZetaBuffer, | 31 | ZetaBuffer, |
| 32 | 32 | ||
| 33 | VertexBuffers, | ||
| 34 | VertexBuffer0, | ||
| 35 | VertexBuffer31 = VertexBuffer0 + 31, | ||
| 36 | |||
| 37 | IndexBuffer, | ||
| 38 | |||
| 33 | LastCommonEntry, | 39 | LastCommonEntry, |
| 34 | }; | 40 | }; |
| 35 | 41 | ||
| @@ -47,6 +53,6 @@ void FillBlock(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables, std::size_ | |||
| 47 | FillBlock(tables[1], begin, num, index_b); | 53 | FillBlock(tables[1], begin, num, index_b); |
| 48 | } | 54 | } |
| 49 | 55 | ||
| 50 | void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); | 56 | void SetupDirtyFlags(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables); |
| 51 | 57 | ||
| 52 | } // namespace VideoCommon::Dirty | 58 | } // namespace VideoCommon::Dirty |
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 2c8b20024..8b33c04ab 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp | |||
| @@ -23,8 +23,6 @@ void DmaPusher::DispatchCalls() { | |||
| 23 | MICROPROFILE_SCOPE(DispatchCalls); | 23 | MICROPROFILE_SCOPE(DispatchCalls); |
| 24 | 24 | ||
| 25 | gpu.SyncGuestHost(); | 25 | gpu.SyncGuestHost(); |
| 26 | // On entering GPU code, assume all memory may be touched by the ARM core. | ||
| 27 | gpu.Maxwell3D().OnMemoryWrite(); | ||
| 28 | 26 | ||
| 29 | dma_pushbuffer_subindex = 0; | 27 | dma_pushbuffer_subindex = 0; |
| 30 | 28 | ||
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index a01d334ad..0f640fdae 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp | |||
| @@ -18,8 +18,8 @@ Fermi2D::Fermi2D() { | |||
| 18 | 18 | ||
| 19 | Fermi2D::~Fermi2D() = default; | 19 | Fermi2D::~Fermi2D() = default; |
| 20 | 20 | ||
| 21 | void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { | 21 | void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { |
| 22 | rasterizer = &rasterizer_; | 22 | rasterizer = rasterizer_; |
| 23 | } | 23 | } |
| 24 | 24 | ||
| 25 | void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | 25 | void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { |
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 0de3280a2..c808a577d 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h | |||
| @@ -38,7 +38,7 @@ public: | |||
| 38 | ~Fermi2D(); | 38 | ~Fermi2D(); |
| 39 | 39 | ||
| 40 | /// Binds a rasterizer to this engine. | 40 | /// Binds a rasterizer to this engine. |
| 41 | void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); | 41 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); |
| 42 | 42 | ||
| 43 | /// Write the value to the register identified by method. | 43 | /// Write the value to the register identified by method. |
| 44 | void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; | 44 | void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; |
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index ba387506e..a9b75091e 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp | |||
| @@ -21,8 +21,8 @@ KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manage | |||
| 21 | 21 | ||
| 22 | KeplerCompute::~KeplerCompute() = default; | 22 | KeplerCompute::~KeplerCompute() = default; |
| 23 | 23 | ||
| 24 | void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { | 24 | void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { |
| 25 | rasterizer = &rasterizer_; | 25 | rasterizer = rasterizer_; |
| 26 | } | 26 | } |
| 27 | 27 | ||
| 28 | void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | 28 | void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { |
| @@ -39,7 +39,6 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal | |||
| 39 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { | 39 | case KEPLER_COMPUTE_REG_INDEX(data_upload): { |
| 40 | upload_state.ProcessData(method_argument, is_last_call); | 40 | upload_state.ProcessData(method_argument, is_last_call); |
| 41 | if (is_last_call) { | 41 | if (is_last_call) { |
| 42 | system.GPU().Maxwell3D().OnMemoryWrite(); | ||
| 43 | } | 42 | } |
| 44 | break; | 43 | break; |
| 45 | } | 44 | } |
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 9f0a7b76d..7c40cba38 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h | |||
| @@ -46,7 +46,7 @@ public: | |||
| 46 | ~KeplerCompute(); | 46 | ~KeplerCompute(); |
| 47 | 47 | ||
| 48 | /// Binds a rasterizer to this engine. | 48 | /// Binds a rasterizer to this engine. |
| 49 | void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); | 49 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); |
| 50 | 50 | ||
| 51 | static constexpr std::size_t NumConstBuffers = 8; | 51 | static constexpr std::size_t NumConstBuffers = 8; |
| 52 | 52 | ||
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 9911140e9..560551157 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp | |||
| @@ -33,7 +33,6 @@ void KeplerMemory::CallMethod(u32 method, u32 method_argument, bool is_last_call | |||
| 33 | case KEPLERMEMORY_REG_INDEX(data): { | 33 | case KEPLERMEMORY_REG_INDEX(data): { |
| 34 | upload_state.ProcessData(method_argument, is_last_call); | 34 | upload_state.ProcessData(method_argument, is_last_call); |
| 35 | if (is_last_call) { | 35 | if (is_last_call) { |
| 36 | system.GPU().Maxwell3D().OnMemoryWrite(); | ||
| 37 | } | 36 | } |
| 38 | break; | 37 | break; |
| 39 | } | 38 | } |
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 116ad1722..75517a4f7 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -30,8 +30,8 @@ Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) | |||
| 30 | 30 | ||
| 31 | Maxwell3D::~Maxwell3D() = default; | 31 | Maxwell3D::~Maxwell3D() = default; |
| 32 | 32 | ||
| 33 | void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { | 33 | void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { |
| 34 | rasterizer = &rasterizer_; | 34 | rasterizer = rasterizer_; |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | void Maxwell3D::InitializeRegisterDefaults() { | 37 | void Maxwell3D::InitializeRegisterDefaults() { |
| @@ -223,7 +223,6 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume | |||
| 223 | case MAXWELL3D_REG_INDEX(data_upload): | 223 | case MAXWELL3D_REG_INDEX(data_upload): |
| 224 | upload_state.ProcessData(argument, is_last_call); | 224 | upload_state.ProcessData(argument, is_last_call); |
| 225 | if (is_last_call) { | 225 | if (is_last_call) { |
| 226 | OnMemoryWrite(); | ||
| 227 | } | 226 | } |
| 228 | return; | 227 | return; |
| 229 | case MAXWELL3D_REG_INDEX(fragment_barrier): | 228 | case MAXWELL3D_REG_INDEX(fragment_barrier): |
| @@ -570,17 +569,18 @@ std::optional<u64> Maxwell3D::GetQueryResult() { | |||
| 570 | } | 569 | } |
| 571 | } | 570 | } |
| 572 | 571 | ||
| 573 | void Maxwell3D::ProcessCBBind(std::size_t stage_index) { | 572 | void Maxwell3D::ProcessCBBind(size_t stage_index) { |
| 574 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. | 573 | // Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage. |
| 575 | auto& shader = state.shader_stages[stage_index]; | 574 | const auto& bind_data = regs.cb_bind[stage_index]; |
| 576 | auto& bind_data = regs.cb_bind[stage_index]; | 575 | auto& buffer = state.shader_stages[stage_index].const_buffers[bind_data.index]; |
| 577 | |||
| 578 | ASSERT(bind_data.index < Regs::MaxConstBuffers); | ||
| 579 | auto& buffer = shader.const_buffers[bind_data.index]; | ||
| 580 | |||
| 581 | buffer.enabled = bind_data.valid.Value() != 0; | 576 | buffer.enabled = bind_data.valid.Value() != 0; |
| 582 | buffer.address = regs.const_buffer.BufferAddress(); | 577 | buffer.address = regs.const_buffer.BufferAddress(); |
| 583 | buffer.size = regs.const_buffer.cb_size; | 578 | buffer.size = regs.const_buffer.cb_size; |
| 579 | |||
| 580 | const bool is_enabled = bind_data.valid.Value() != 0; | ||
| 581 | const GPUVAddr gpu_addr = is_enabled ? regs.const_buffer.BufferAddress() : 0; | ||
| 582 | const u32 size = is_enabled ? regs.const_buffer.cb_size : 0; | ||
| 583 | rasterizer->BindGraphicsUniformBuffer(stage_index, bind_data.index, gpu_addr, size); | ||
| 584 | } | 584 | } |
| 585 | 585 | ||
| 586 | void Maxwell3D::ProcessCBData(u32 value) { | 586 | void Maxwell3D::ProcessCBData(u32 value) { |
| @@ -635,7 +635,6 @@ void Maxwell3D::FinishCBData() { | |||
| 635 | 635 | ||
| 636 | const u32 id = cb_data_state.id; | 636 | const u32 id = cb_data_state.id; |
| 637 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); | 637 | memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size); |
| 638 | OnMemoryWrite(); | ||
| 639 | 638 | ||
| 640 | cb_data_state.id = null_cb_data; | 639 | cb_data_state.id = null_cb_data; |
| 641 | cb_data_state.current = null_cb_data; | 640 | cb_data_state.current = null_cb_data; |
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 002d1b3f9..ffed42a29 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h | |||
| @@ -55,7 +55,7 @@ public: | |||
| 55 | ~Maxwell3D(); | 55 | ~Maxwell3D(); |
| 56 | 56 | ||
| 57 | /// Binds a rasterizer to this engine. | 57 | /// Binds a rasterizer to this engine. |
| 58 | void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); | 58 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); |
| 59 | 59 | ||
| 60 | /// Register structure of the Maxwell3D engine. | 60 | /// Register structure of the Maxwell3D engine. |
| 61 | /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. | 61 | /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. |
| @@ -1314,8 +1314,7 @@ public: | |||
| 1314 | 1314 | ||
| 1315 | GPUVAddr LimitAddress() const { | 1315 | GPUVAddr LimitAddress() const { |
| 1316 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) | | 1316 | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(limit_high) << 32) | |
| 1317 | limit_low) + | 1317 | limit_low); |
| 1318 | 1; | ||
| 1319 | } | 1318 | } |
| 1320 | } vertex_array_limit[NumVertexArrays]; | 1319 | } vertex_array_limit[NumVertexArrays]; |
| 1321 | 1320 | ||
| @@ -1403,6 +1402,7 @@ public: | |||
| 1403 | }; | 1402 | }; |
| 1404 | 1403 | ||
| 1405 | std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; | 1404 | std::array<ShaderStageInfo, Regs::MaxShaderStage> shader_stages; |
| 1405 | |||
| 1406 | u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering. | 1406 | u32 current_instance = 0; ///< Current instance to be used to simulate instanced rendering. |
| 1407 | }; | 1407 | }; |
| 1408 | 1408 | ||
| @@ -1452,11 +1452,6 @@ public: | |||
| 1452 | return *rasterizer; | 1452 | return *rasterizer; |
| 1453 | } | 1453 | } |
| 1454 | 1454 | ||
| 1455 | /// Notify a memory write has happened. | ||
| 1456 | void OnMemoryWrite() { | ||
| 1457 | dirty.flags |= dirty.on_write_stores; | ||
| 1458 | } | ||
| 1459 | |||
| 1460 | enum class MMEDrawMode : u32 { | 1455 | enum class MMEDrawMode : u32 { |
| 1461 | Undefined, | 1456 | Undefined, |
| 1462 | Array, | 1457 | Array, |
| @@ -1478,7 +1473,6 @@ public: | |||
| 1478 | using Tables = std::array<Table, 2>; | 1473 | using Tables = std::array<Table, 2>; |
| 1479 | 1474 | ||
| 1480 | Flags flags; | 1475 | Flags flags; |
| 1481 | Flags on_write_stores; | ||
| 1482 | Tables tables{}; | 1476 | Tables tables{}; |
| 1483 | } dirty; | 1477 | } dirty; |
| 1484 | 1478 | ||
| @@ -1541,7 +1535,7 @@ private: | |||
| 1541 | void FinishCBData(); | 1535 | void FinishCBData(); |
| 1542 | 1536 | ||
| 1543 | /// Handles a write to the CB_BIND register. | 1537 | /// Handles a write to the CB_BIND register. |
| 1544 | void ProcessCBBind(std::size_t stage_index); | 1538 | void ProcessCBBind(size_t stage_index); |
| 1545 | 1539 | ||
| 1546 | /// Handles a write to the VERTEX_END_GL register, triggering a draw. | 1540 | /// Handles a write to the VERTEX_END_GL register, triggering a draw. |
| 1547 | void DrawArrays(); | 1541 | void DrawArrays(); |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index ba750748c..a2f19559f 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -60,9 +60,6 @@ void MaxwellDMA::Launch() { | |||
| 60 | return; | 60 | return; |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | // All copies here update the main memory, so mark all rasterizer states as invalid. | ||
| 64 | system.GPU().Maxwell3D().OnMemoryWrite(); | ||
| 65 | |||
| 66 | if (is_src_pitch && is_dst_pitch) { | 63 | if (is_src_pitch && is_dst_pitch) { |
| 67 | CopyPitchToPitch(); | 64 | CopyPitchToPitch(); |
| 68 | } else { | 65 | } else { |
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 3512283ff..f055b61e9 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h | |||
| @@ -143,22 +143,26 @@ private: | |||
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | bool ShouldWait() const { | 145 | bool ShouldWait() const { |
| 146 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 146 | return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || | 147 | return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() || |
| 147 | query_cache.ShouldWaitAsyncFlushes(); | 148 | query_cache.ShouldWaitAsyncFlushes(); |
| 148 | } | 149 | } |
| 149 | 150 | ||
| 150 | bool ShouldFlush() const { | 151 | bool ShouldFlush() const { |
| 152 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 151 | return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() || | 153 | return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() || |
| 152 | query_cache.HasUncommittedFlushes(); | 154 | query_cache.HasUncommittedFlushes(); |
| 153 | } | 155 | } |
| 154 | 156 | ||
| 155 | void PopAsyncFlushes() { | 157 | void PopAsyncFlushes() { |
| 158 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 156 | texture_cache.PopAsyncFlushes(); | 159 | texture_cache.PopAsyncFlushes(); |
| 157 | buffer_cache.PopAsyncFlushes(); | 160 | buffer_cache.PopAsyncFlushes(); |
| 158 | query_cache.PopAsyncFlushes(); | 161 | query_cache.PopAsyncFlushes(); |
| 159 | } | 162 | } |
| 160 | 163 | ||
| 161 | void CommitAsyncFlushes() { | 164 | void CommitAsyncFlushes() { |
| 165 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 162 | texture_cache.CommitAsyncFlushes(); | 166 | texture_cache.CommitAsyncFlushes(); |
| 163 | buffer_cache.CommitAsyncFlushes(); | 167 | buffer_cache.CommitAsyncFlushes(); |
| 164 | query_cache.CommitAsyncFlushes(); | 168 | query_cache.CommitAsyncFlushes(); |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 6ab06775f..2a9bd4121 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -44,8 +44,8 @@ GPU::~GPU() = default; | |||
| 44 | 44 | ||
| 45 | void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | 45 | void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { |
| 46 | renderer = std::move(renderer_); | 46 | renderer = std::move(renderer_); |
| 47 | rasterizer = renderer->ReadRasterizer(); | ||
| 47 | 48 | ||
| 48 | VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer(); | ||
| 49 | memory_manager->BindRasterizer(rasterizer); | 49 | memory_manager->BindRasterizer(rasterizer); |
| 50 | maxwell_3d->BindRasterizer(rasterizer); | 50 | maxwell_3d->BindRasterizer(rasterizer); |
| 51 | fermi_2d->BindRasterizer(rasterizer); | 51 | fermi_2d->BindRasterizer(rasterizer); |
| @@ -171,7 +171,7 @@ void GPU::TickWork() { | |||
| 171 | const std::size_t size = request.size; | 171 | const std::size_t size = request.size; |
| 172 | flush_requests.pop_front(); | 172 | flush_requests.pop_front(); |
| 173 | flush_request_mutex.unlock(); | 173 | flush_request_mutex.unlock(); |
| 174 | renderer->Rasterizer().FlushRegion(addr, size); | 174 | rasterizer->FlushRegion(addr, size); |
| 175 | current_flush_fence.store(fence); | 175 | current_flush_fence.store(fence); |
| 176 | flush_request_mutex.lock(); | 176 | flush_request_mutex.lock(); |
| 177 | } | 177 | } |
| @@ -193,11 +193,11 @@ u64 GPU::GetTicks() const { | |||
| 193 | } | 193 | } |
| 194 | 194 | ||
| 195 | void GPU::FlushCommands() { | 195 | void GPU::FlushCommands() { |
| 196 | renderer->Rasterizer().FlushCommands(); | 196 | rasterizer->FlushCommands(); |
| 197 | } | 197 | } |
| 198 | 198 | ||
| 199 | void GPU::SyncGuestHost() { | 199 | void GPU::SyncGuestHost() { |
| 200 | renderer->Rasterizer().SyncGuestHost(); | 200 | rasterizer->SyncGuestHost(); |
| 201 | } | 201 | } |
| 202 | 202 | ||
| 203 | enum class GpuSemaphoreOperation { | 203 | enum class GpuSemaphoreOperation { |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b4ce6b154..b2ee45496 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -366,6 +366,7 @@ protected: | |||
| 366 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; | 366 | std::unique_ptr<Tegra::DmaPusher> dma_pusher; |
| 367 | std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; | 367 | std::unique_ptr<Tegra::CDmaPusher> cdma_pusher; |
| 368 | std::unique_ptr<VideoCore::RendererBase> renderer; | 368 | std::unique_ptr<VideoCore::RendererBase> renderer; |
| 369 | VideoCore::RasterizerInterface* rasterizer = nullptr; | ||
| 369 | const bool use_nvdec; | 370 | const bool use_nvdec; |
| 370 | 371 | ||
| 371 | private: | 372 | private: |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 7e490bcc3..50319f1d5 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -38,6 +38,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||
| 38 | } | 38 | } |
| 39 | 39 | ||
| 40 | auto current_context = context.Acquire(); | 40 | auto current_context = context.Acquire(); |
| 41 | VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer(); | ||
| 41 | 42 | ||
| 42 | CommandDataContainer next; | 43 | CommandDataContainer next; |
| 43 | while (state.is_running) { | 44 | while (state.is_running) { |
| @@ -52,13 +53,13 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||
| 52 | } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { | 53 | } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { |
| 53 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); | 54 | renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); |
| 54 | } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { | 55 | } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) { |
| 55 | renderer.Rasterizer().ReleaseFences(); | 56 | rasterizer->ReleaseFences(); |
| 56 | } else if (std::holds_alternative<GPUTickCommand>(next.data)) { | 57 | } else if (std::holds_alternative<GPUTickCommand>(next.data)) { |
| 57 | system.GPU().TickWork(); | 58 | system.GPU().TickWork(); |
| 58 | } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { | 59 | } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { |
| 59 | renderer.Rasterizer().FlushRegion(flush->addr, flush->size); | 60 | rasterizer->FlushRegion(flush->addr, flush->size); |
| 60 | } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { | 61 | } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { |
| 61 | renderer.Rasterizer().OnCPUWrite(invalidate->addr, invalidate->size); | 62 | rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); |
| 62 | } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { | 63 | } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { |
| 63 | return; | 64 | return; |
| 64 | } else { | 65 | } else { |
| @@ -84,6 +85,7 @@ ThreadManager::~ThreadManager() { | |||
| 84 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | 85 | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, |
| 85 | Core::Frontend::GraphicsContext& context, | 86 | Core::Frontend::GraphicsContext& context, |
| 86 | Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) { | 87 | Tegra::DmaPusher& dma_pusher, Tegra::CDmaPusher& cdma_pusher) { |
| 88 | rasterizer = renderer.ReadRasterizer(); | ||
| 87 | thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), | 89 | thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), |
| 88 | std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher)); | 90 | std::ref(dma_pusher), std::ref(state), std::ref(cdma_pusher)); |
| 89 | } | 91 | } |
| @@ -129,12 +131,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { | |||
| 129 | } | 131 | } |
| 130 | 132 | ||
| 131 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | 133 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { |
| 132 | system.Renderer().Rasterizer().OnCPUWrite(addr, size); | 134 | rasterizer->OnCPUWrite(addr, size); |
| 133 | } | 135 | } |
| 134 | 136 | ||
| 135 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | 137 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 136 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important | 138 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important |
| 137 | system.Renderer().Rasterizer().OnCPUWrite(addr, size); | 139 | rasterizer->OnCPUWrite(addr, size); |
| 138 | } | 140 | } |
| 139 | 141 | ||
| 140 | void ThreadManager::WaitIdle() const { | 142 | void ThreadManager::WaitIdle() const { |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 2775629e7..4cd951169 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -27,6 +27,7 @@ class System; | |||
| 27 | } // namespace Core | 27 | } // namespace Core |
| 28 | 28 | ||
| 29 | namespace VideoCore { | 29 | namespace VideoCore { |
| 30 | class RasterizerInterface; | ||
| 30 | class RendererBase; | 31 | class RendererBase; |
| 31 | } // namespace VideoCore | 32 | } // namespace VideoCore |
| 32 | 33 | ||
| @@ -151,11 +152,12 @@ private: | |||
| 151 | /// Pushes a command to be executed by the GPU thread | 152 | /// Pushes a command to be executed by the GPU thread |
| 152 | u64 PushCommand(CommandData&& command_data); | 153 | u64 PushCommand(CommandData&& command_data); |
| 153 | 154 | ||
| 154 | SynchState state; | ||
| 155 | Core::System& system; | 155 | Core::System& system; |
| 156 | std::thread thread; | ||
| 157 | std::thread::id thread_id; | ||
| 158 | const bool is_async; | 156 | const bool is_async; |
| 157 | VideoCore::RasterizerInterface* rasterizer = nullptr; | ||
| 158 | |||
| 159 | SynchState state; | ||
| 160 | std::thread thread; | ||
| 159 | }; | 161 | }; |
| 160 | 162 | ||
| 161 | } // namespace VideoCommon::GPUThread | 163 | } // namespace VideoCommon::GPUThread |
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 28f2b8614..970120acc 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -12,7 +12,6 @@ set(SHADER_FILES | |||
| 12 | vulkan_blit_depth_stencil.frag | 12 | vulkan_blit_depth_stencil.frag |
| 13 | vulkan_present.frag | 13 | vulkan_present.frag |
| 14 | vulkan_present.vert | 14 | vulkan_present.vert |
| 15 | vulkan_quad_array.comp | ||
| 16 | vulkan_quad_indexed.comp | 15 | vulkan_quad_indexed.comp |
| 17 | vulkan_uint8.comp | 16 | vulkan_uint8.comp |
| 18 | ) | 17 | ) |
diff --git a/src/video_core/host_shaders/vulkan_quad_array.comp b/src/video_core/host_shaders/vulkan_quad_array.comp deleted file mode 100644 index 212f4e998..000000000 --- a/src/video_core/host_shaders/vulkan_quad_array.comp +++ /dev/null | |||
| @@ -1,28 +0,0 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #version 460 core | ||
| 6 | |||
| 7 | layout (local_size_x = 1024) in; | ||
| 8 | |||
| 9 | layout (std430, set = 0, binding = 0) buffer OutputBuffer { | ||
| 10 | uint output_indexes[]; | ||
| 11 | }; | ||
| 12 | |||
| 13 | layout (push_constant) uniform PushConstants { | ||
| 14 | uint first; | ||
| 15 | }; | ||
| 16 | |||
| 17 | void main() { | ||
| 18 | uint primitive = gl_GlobalInvocationID.x; | ||
| 19 | if (primitive * 6 >= output_indexes.length()) { | ||
| 20 | return; | ||
| 21 | } | ||
| 22 | |||
| 23 | const uint quad_map[6] = uint[](0, 1, 2, 0, 2, 3); | ||
| 24 | for (uint vertex = 0; vertex < 6; ++vertex) { | ||
| 25 | uint index = first + primitive * 4 + quad_map[vertex]; | ||
| 26 | output_indexes[primitive * 6 + vertex] = index; | ||
| 27 | } | ||
| 28 | } | ||
diff --git a/src/video_core/host_shaders/vulkan_uint8.comp b/src/video_core/host_shaders/vulkan_uint8.comp index ad74d7af9..872291670 100644 --- a/src/video_core/host_shaders/vulkan_uint8.comp +++ b/src/video_core/host_shaders/vulkan_uint8.comp | |||
| @@ -16,9 +16,16 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { | |||
| 16 | uint16_t output_indexes[]; | 16 | uint16_t output_indexes[]; |
| 17 | }; | 17 | }; |
| 18 | 18 | ||
| 19 | uint AssembleIndex(uint id) { | ||
| 20 | // Most primitive restart indices are 0xFF | ||
| 21 | // Hardcode this to 0xFF for now | ||
| 22 | uint index = uint(input_indexes[id]); | ||
| 23 | return index == 0xFF ? 0xFFFF : index; | ||
| 24 | } | ||
| 25 | |||
| 19 | void main() { | 26 | void main() { |
| 20 | uint id = gl_GlobalInvocationID.x; | 27 | uint id = gl_GlobalInvocationID.x; |
| 21 | if (id < input_indexes.length()) { | 28 | if (id < input_indexes.length()) { |
| 22 | output_indexes[id] = uint16_t(input_indexes[id]); | 29 | output_indexes[id] = uint16_t(AssembleIndex(id)); |
| 23 | } | 30 | } |
| 24 | } | 31 | } |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index c841f3cd7..44240a9c4 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -21,8 +21,8 @@ MemoryManager::MemoryManager(Core::System& system_) | |||
| 21 | 21 | ||
| 22 | MemoryManager::~MemoryManager() = default; | 22 | MemoryManager::~MemoryManager() = default; |
| 23 | 23 | ||
| 24 | void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { | 24 | void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { |
| 25 | rasterizer = &rasterizer_; | 25 | rasterizer = rasterizer_; |
| 26 | } | 26 | } |
| 27 | 27 | ||
| 28 | GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { | 28 | GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index b468a67de..b3538d503 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -72,7 +72,7 @@ public: | |||
| 72 | ~MemoryManager(); | 72 | ~MemoryManager(); |
| 73 | 73 | ||
| 74 | /// Binds a renderer to the memory manager. | 74 | /// Binds a renderer to the memory manager. |
| 75 | void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); | 75 | void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); |
| 76 | 76 | ||
| 77 | [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; | 77 | [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; |
| 78 | 78 | ||
| @@ -157,6 +157,8 @@ private: | |||
| 157 | 157 | ||
| 158 | using MapRange = std::pair<GPUVAddr, size_t>; | 158 | using MapRange = std::pair<GPUVAddr, size_t>; |
| 159 | std::vector<MapRange> map_ranges; | 159 | std::vector<MapRange> map_ranges; |
| 160 | |||
| 161 | std::vector<std::pair<VAddr, std::size_t>> cache_invalidate_queue; | ||
| 160 | }; | 162 | }; |
| 161 | 163 | ||
| 162 | } // namespace Tegra | 164 | } // namespace Tegra |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 0cb0f387d..50491b758 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <atomic> | 7 | #include <atomic> |
| 8 | #include <functional> | 8 | #include <functional> |
| 9 | #include <optional> | 9 | #include <optional> |
| 10 | #include <span> | ||
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 11 | #include "video_core/engines/fermi_2d.h" | 12 | #include "video_core/engines/fermi_2d.h" |
| 12 | #include "video_core/gpu.h" | 13 | #include "video_core/gpu.h" |
| @@ -49,6 +50,10 @@ public: | |||
| 49 | /// Records a GPU query and caches it | 50 | /// Records a GPU query and caches it |
| 50 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; | 51 | virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; |
| 51 | 52 | ||
| 53 | /// Signal an uniform buffer binding | ||
| 54 | virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||
| 55 | u32 size) = 0; | ||
| 56 | |||
| 52 | /// Signal a GPU based semaphore as a fence | 57 | /// Signal a GPU based semaphore as a fence |
| 53 | virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; | 58 | virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; |
| 54 | 59 | ||
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 51dde8eb5..320ee8d30 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h | |||
| @@ -37,15 +37,11 @@ public: | |||
| 37 | std::unique_ptr<Core::Frontend::GraphicsContext> context); | 37 | std::unique_ptr<Core::Frontend::GraphicsContext> context); |
| 38 | virtual ~RendererBase(); | 38 | virtual ~RendererBase(); |
| 39 | 39 | ||
| 40 | /// Initialize the renderer | ||
| 41 | [[nodiscard]] virtual bool Init() = 0; | ||
| 42 | |||
| 43 | /// Shutdown the renderer | ||
| 44 | virtual void ShutDown() = 0; | ||
| 45 | |||
| 46 | /// Finalize rendering the guest frame and draw into the presentation texture | 40 | /// Finalize rendering the guest frame and draw into the presentation texture |
| 47 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; | 41 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; |
| 48 | 42 | ||
| 43 | [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; | ||
| 44 | |||
| 49 | // Getter/setter functions: | 45 | // Getter/setter functions: |
| 50 | // ------------------------ | 46 | // ------------------------ |
| 51 | 47 | ||
| @@ -57,14 +53,6 @@ public: | |||
| 57 | return m_current_frame; | 53 | return m_current_frame; |
| 58 | } | 54 | } |
| 59 | 55 | ||
| 60 | [[nodiscard]] RasterizerInterface& Rasterizer() { | ||
| 61 | return *rasterizer; | ||
| 62 | } | ||
| 63 | |||
| 64 | [[nodiscard]] const RasterizerInterface& Rasterizer() const { | ||
| 65 | return *rasterizer; | ||
| 66 | } | ||
| 67 | |||
| 68 | [[nodiscard]] Core::Frontend::GraphicsContext& Context() { | 56 | [[nodiscard]] Core::Frontend::GraphicsContext& Context() { |
| 69 | return *context; | 57 | return *context; |
| 70 | } | 58 | } |
| @@ -98,7 +86,6 @@ public: | |||
| 98 | 86 | ||
| 99 | protected: | 87 | protected: |
| 100 | Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. | 88 | Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. |
| 101 | std::unique_ptr<RasterizerInterface> rasterizer; | ||
| 102 | std::unique_ptr<Core::Frontend::GraphicsContext> context; | 89 | std::unique_ptr<Core::Frontend::GraphicsContext> context; |
| 103 | f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer | 90 | f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer |
| 104 | int m_current_frame = 0; ///< Current frame, should be set by the renderer | 91 | int m_current_frame = 0; ///< Current frame, should be set by the renderer |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 5772cad87..6da3906a4 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -2,98 +2,208 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <memory> | 5 | #include <span> |
| 6 | 6 | ||
| 7 | #include <glad/glad.h> | ||
| 8 | |||
| 9 | #include "common/assert.h" | ||
| 10 | #include "common/microprofile.h" | ||
| 11 | #include "video_core/buffer_cache/buffer_cache.h" | 7 | #include "video_core/buffer_cache/buffer_cache.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | ||
| 13 | #include "video_core/rasterizer_interface.h" | ||
| 14 | #include "video_core/renderer_opengl/gl_buffer_cache.h" | 8 | #include "video_core/renderer_opengl/gl_buffer_cache.h" |
| 15 | #include "video_core/renderer_opengl/gl_device.h" | 9 | #include "video_core/renderer_opengl/gl_device.h" |
| 16 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 17 | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||
| 18 | 10 | ||
| 19 | namespace OpenGL { | 11 | namespace OpenGL { |
| 12 | namespace { | ||
| 13 | struct BindlessSSBO { | ||
| 14 | GLuint64EXT address; | ||
| 15 | GLsizei length; | ||
| 16 | GLsizei padding; | ||
| 17 | }; | ||
| 18 | static_assert(sizeof(BindlessSSBO) == sizeof(GLuint) * 4); | ||
| 19 | |||
| 20 | constexpr std::array PROGRAM_LUT{ | ||
| 21 | GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, | ||
| 22 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | ||
| 23 | }; | ||
| 24 | } // Anonymous namespace | ||
| 25 | |||
| 26 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | ||
| 27 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} | ||
| 28 | |||
| 29 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | ||
| 30 | VAddr cpu_addr_, u64 size_bytes_) | ||
| 31 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | ||
| 32 | buffer.Create(); | ||
| 33 | const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); | ||
| 34 | glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); | ||
| 35 | glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); | ||
| 36 | |||
| 37 | if (runtime.has_unified_vertex_buffers) { | ||
| 38 | glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); | ||
| 39 | } | ||
| 40 | } | ||
| 20 | 41 | ||
| 21 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 42 | void Buffer::ImmediateUpload(size_t offset, std::span<const u8> data) noexcept { |
| 43 | glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | ||
| 44 | static_cast<GLsizeiptr>(data.size_bytes()), data.data()); | ||
| 45 | } | ||
| 22 | 46 | ||
| 23 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); | 47 | void Buffer::ImmediateDownload(size_t offset, std::span<u8> data) noexcept { |
| 48 | glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset), | ||
| 49 | static_cast<GLsizeiptr>(data.size_bytes()), data.data()); | ||
| 50 | } | ||
| 24 | 51 | ||
| 25 | Buffer::Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_) | 52 | void Buffer::MakeResident(GLenum access) noexcept { |
| 26 | : BufferBlock{cpu_addr_, size_} { | 53 | // Abuse GLenum's order to exit early |
| 27 | gl_buffer.Create(); | 54 | // GL_NONE (default) < GL_READ_ONLY < GL_READ_WRITE |
| 28 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size_), nullptr, GL_DYNAMIC_DRAW); | 55 | if (access <= current_residency_access || buffer.handle == 0) { |
| 29 | if (device_.UseAssemblyShaders() || device_.HasVertexBufferUnifiedMemory()) { | 56 | return; |
| 30 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_WRITE); | 57 | } |
| 31 | glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); | 58 | if (std::exchange(current_residency_access, access) != GL_NONE) { |
| 59 | // If the buffer is already resident, remove its residency before promoting it | ||
| 60 | glMakeNamedBufferNonResidentNV(buffer.handle); | ||
| 32 | } | 61 | } |
| 62 | glMakeNamedBufferResidentNV(buffer.handle, access); | ||
| 33 | } | 63 | } |
| 34 | 64 | ||
| 35 | Buffer::~Buffer() = default; | 65 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_) |
| 36 | 66 | : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, | |
| 37 | void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { | 67 | use_assembly_shaders{device.UseAssemblyShaders()}, |
| 38 | glNamedBufferSubData(Handle(), static_cast<GLintptr>(offset), | 68 | has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, |
| 39 | static_cast<GLsizeiptr>(data_size), data); | 69 | stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { |
| 70 | GLint gl_max_attributes; | ||
| 71 | glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); | ||
| 72 | max_attributes = static_cast<u32>(gl_max_attributes); | ||
| 73 | for (auto& stage_uniforms : fast_uniforms) { | ||
| 74 | for (OGLBuffer& buffer : stage_uniforms) { | ||
| 75 | buffer.Create(); | ||
| 76 | glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); | ||
| 77 | } | ||
| 78 | } | ||
| 79 | for (auto& stage_uniforms : copy_uniforms) { | ||
| 80 | for (OGLBuffer& buffer : stage_uniforms) { | ||
| 81 | buffer.Create(); | ||
| 82 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | ||
| 83 | } | ||
| 84 | } | ||
| 85 | for (OGLBuffer& buffer : copy_compute_uniforms) { | ||
| 86 | buffer.Create(); | ||
| 87 | glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | ||
| 88 | } | ||
| 40 | } | 89 | } |
| 41 | 90 | ||
| 42 | void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { | 91 | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, |
| 43 | MICROPROFILE_SCOPE(OpenGL_Buffer_Download); | 92 | std::span<const VideoCommon::BufferCopy> copies) { |
| 44 | const GLsizeiptr gl_size = static_cast<GLsizeiptr>(data_size); | 93 | for (const VideoCommon::BufferCopy& copy : copies) { |
| 45 | const GLintptr gl_offset = static_cast<GLintptr>(offset); | 94 | glCopyNamedBufferSubData( |
| 46 | if (read_buffer.handle == 0) { | 95 | src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset), |
| 47 | read_buffer.Create(); | 96 | static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size)); |
| 48 | glNamedBufferData(read_buffer.handle, static_cast<GLsizeiptr>(Size()), nullptr, | ||
| 49 | GL_STREAM_READ); | ||
| 50 | } | 97 | } |
| 51 | glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); | ||
| 52 | glCopyNamedBufferSubData(gl_buffer.handle, read_buffer.handle, gl_offset, gl_offset, gl_size); | ||
| 53 | glGetNamedBufferSubData(read_buffer.handle, gl_offset, gl_size, data); | ||
| 54 | } | 98 | } |
| 55 | 99 | ||
| 56 | void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | 100 | void BufferCacheRuntime::BindIndexBuffer(Buffer& buffer, u32 offset, u32 size) { |
| 57 | std::size_t copy_size) { | 101 | if (has_unified_vertex_buffers) { |
| 58 | glCopyNamedBufferSubData(src.Handle(), Handle(), static_cast<GLintptr>(src_offset), | 102 | buffer.MakeResident(GL_READ_ONLY); |
| 59 | static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(copy_size)); | 103 | glBufferAddressRangeNV(GL_ELEMENT_ARRAY_ADDRESS_NV, 0, buffer.HostGpuAddr() + offset, |
| 104 | static_cast<GLsizeiptr>(size)); | ||
| 105 | } else { | ||
| 106 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer.Handle()); | ||
| 107 | index_buffer_offset = offset; | ||
| 108 | } | ||
| 60 | } | 109 | } |
| 61 | 110 | ||
| 62 | OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 111 | void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, |
| 63 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 112 | u32 stride) { |
| 64 | const Device& device_, OGLStreamBuffer& stream_buffer_, | 113 | if (index >= max_attributes) { |
| 65 | StateTracker& state_tracker) | ||
| 66 | : GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} { | ||
| 67 | if (!device.HasFastBufferSubData()) { | ||
| 68 | return; | 114 | return; |
| 69 | } | 115 | } |
| 70 | 116 | if (has_unified_vertex_buffers) { | |
| 71 | static constexpr GLsizeiptr size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize); | 117 | buffer.MakeResident(GL_READ_ONLY); |
| 72 | glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | 118 | glBindVertexBuffer(index, 0, 0, static_cast<GLsizei>(stride)); |
| 73 | for (const GLuint cbuf : cbufs) { | 119 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, index, |
| 74 | glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW); | 120 | buffer.HostGpuAddr() + offset, static_cast<GLsizeiptr>(size)); |
| 121 | } else { | ||
| 122 | glBindVertexBuffer(index, buffer.Handle(), static_cast<GLintptr>(offset), | ||
| 123 | static_cast<GLsizei>(stride)); | ||
| 75 | } | 124 | } |
| 76 | } | 125 | } |
| 77 | 126 | ||
| 78 | OGLBufferCache::~OGLBufferCache() { | 127 | void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, |
| 79 | glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | 128 | u32 offset, u32 size) { |
| 129 | if (use_assembly_shaders) { | ||
| 130 | GLuint handle; | ||
| 131 | if (offset != 0) { | ||
| 132 | handle = copy_uniforms[stage][binding_index].handle; | ||
| 133 | glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size); | ||
| 134 | } else { | ||
| 135 | handle = buffer.Handle(); | ||
| 136 | } | ||
| 137 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, | ||
| 138 | static_cast<GLsizeiptr>(size)); | ||
| 139 | } else { | ||
| 140 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||
| 141 | const GLuint binding = base_binding + binding_index; | ||
| 142 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), | ||
| 143 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 144 | } | ||
| 80 | } | 145 | } |
| 81 | 146 | ||
| 82 | std::shared_ptr<Buffer> OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | 147 | void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, |
| 83 | return std::make_shared<Buffer>(device, cpu_addr, size); | 148 | u32 size) { |
| 149 | if (use_assembly_shaders) { | ||
| 150 | GLuint handle; | ||
| 151 | if (offset != 0) { | ||
| 152 | handle = copy_compute_uniforms[binding_index].handle; | ||
| 153 | glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size); | ||
| 154 | } else { | ||
| 155 | handle = buffer.Handle(); | ||
| 156 | } | ||
| 157 | glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0, | ||
| 158 | static_cast<GLsizeiptr>(size)); | ||
| 159 | } else { | ||
| 160 | glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(), | ||
| 161 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 162 | } | ||
| 84 | } | 163 | } |
| 85 | 164 | ||
| 86 | OGLBufferCache::BufferInfo OGLBufferCache::GetEmptyBuffer(std::size_t) { | 165 | void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, |
| 87 | return {0, 0, 0}; | 166 | u32 offset, u32 size, bool is_written) { |
| 167 | if (use_assembly_shaders) { | ||
| 168 | const BindlessSSBO ssbo{ | ||
| 169 | .address = buffer.HostGpuAddr() + offset, | ||
| 170 | .length = static_cast<GLsizei>(size), | ||
| 171 | .padding = 0, | ||
| 172 | }; | ||
| 173 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); | ||
| 174 | glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, | ||
| 175 | reinterpret_cast<const GLuint*>(&ssbo)); | ||
| 176 | } else { | ||
| 177 | const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; | ||
| 178 | const GLuint binding = base_binding + binding_index; | ||
| 179 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), | ||
| 180 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 181 | } | ||
| 88 | } | 182 | } |
| 89 | 183 | ||
| 90 | OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, | 184 | void BufferCacheRuntime::BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, |
| 91 | std::size_t size) { | 185 | u32 size, bool is_written) { |
| 92 | DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); | 186 | if (use_assembly_shaders) { |
| 93 | const GLuint cbuf = cbufs[cbuf_cursor++]; | 187 | const BindlessSSBO ssbo{ |
| 188 | .address = buffer.HostGpuAddr() + offset, | ||
| 189 | .length = static_cast<GLsizei>(size), | ||
| 190 | .padding = 0, | ||
| 191 | }; | ||
| 192 | buffer.MakeResident(is_written ? GL_READ_WRITE : GL_READ_ONLY); | ||
| 193 | glProgramLocalParametersI4uivNV(GL_COMPUTE_PROGRAM_NV, binding_index, 1, | ||
| 194 | reinterpret_cast<const GLuint*>(&ssbo)); | ||
| 195 | } else if (size == 0) { | ||
| 196 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, 0, 0, 0); | ||
| 197 | } else { | ||
| 198 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding_index, buffer.Handle(), | ||
| 199 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 200 | } | ||
| 201 | } | ||
| 94 | 202 | ||
| 95 | glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); | 203 | void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, |
| 96 | return {cbuf, 0, 0}; | 204 | u32 size) { |
| 205 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, index, buffer.Handle(), | ||
| 206 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 97 | } | 207 | } |
| 98 | 208 | ||
| 99 | } // namespace OpenGL | 209 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 17ee90316..d8b20a9af 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -5,79 +5,157 @@ | |||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | 7 | #include <array> |
| 8 | #include <memory> | 8 | #include <span> |
| 9 | 9 | ||
| 10 | #include "common/alignment.h" | ||
| 10 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| 12 | #include "common/dynamic_library.h" | ||
| 11 | #include "video_core/buffer_cache/buffer_cache.h" | 13 | #include "video_core/buffer_cache/buffer_cache.h" |
| 12 | #include "video_core/engines/maxwell_3d.h" | 14 | #include "video_core/rasterizer_interface.h" |
| 15 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 16 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 17 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 15 | 18 | ||
| 16 | namespace Core { | ||
| 17 | class System; | ||
| 18 | } | ||
| 19 | |||
| 20 | namespace OpenGL { | 19 | namespace OpenGL { |
| 21 | 20 | ||
| 22 | class Device; | 21 | class BufferCacheRuntime; |
| 23 | class OGLStreamBuffer; | ||
| 24 | class RasterizerOpenGL; | ||
| 25 | class StateTracker; | ||
| 26 | 22 | ||
| 27 | class Buffer : public VideoCommon::BufferBlock { | 23 | class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { |
| 28 | public: | 24 | public: |
| 29 | explicit Buffer(const Device& device_, VAddr cpu_addr_, std::size_t size_); | 25 | explicit Buffer(BufferCacheRuntime&, VideoCore::RasterizerInterface& rasterizer, VAddr cpu_addr, |
| 30 | ~Buffer(); | 26 | u64 size_bytes); |
| 27 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams); | ||
| 31 | 28 | ||
| 32 | void Upload(std::size_t offset, std::size_t data_size, const u8* data); | 29 | void ImmediateUpload(size_t offset, std::span<const u8> data) noexcept; |
| 33 | 30 | ||
| 34 | void Download(std::size_t offset, std::size_t data_size, u8* data); | 31 | void ImmediateDownload(size_t offset, std::span<u8> data) noexcept; |
| 35 | 32 | ||
| 36 | void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | 33 | void MakeResident(GLenum access) noexcept; |
| 37 | std::size_t copy_size); | ||
| 38 | 34 | ||
| 39 | GLuint Handle() const noexcept { | 35 | [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { |
| 40 | return gl_buffer.handle; | 36 | return address; |
| 41 | } | 37 | } |
| 42 | 38 | ||
| 43 | u64 Address() const noexcept { | 39 | [[nodiscard]] GLuint Handle() const noexcept { |
| 44 | return gpu_address; | 40 | return buffer.handle; |
| 45 | } | 41 | } |
| 46 | 42 | ||
| 47 | private: | 43 | private: |
| 48 | OGLBuffer gl_buffer; | 44 | GLuint64EXT address = 0; |
| 49 | OGLBuffer read_buffer; | 45 | OGLBuffer buffer; |
| 50 | u64 gpu_address = 0; | 46 | GLenum current_residency_access = GL_NONE; |
| 51 | }; | 47 | }; |
| 52 | 48 | ||
| 53 | using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; | 49 | class BufferCacheRuntime { |
| 54 | class OGLBufferCache final : public GenericBufferCache { | 50 | friend Buffer; |
| 51 | |||
| 55 | public: | 52 | public: |
| 56 | explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, | 53 | static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max(); |
| 57 | Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, | 54 | |
| 58 | const Device& device, OGLStreamBuffer& stream_buffer, | 55 | explicit BufferCacheRuntime(const Device& device_); |
| 59 | StateTracker& state_tracker); | 56 | |
| 60 | ~OGLBufferCache(); | 57 | void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, |
| 58 | std::span<const VideoCommon::BufferCopy> copies); | ||
| 59 | |||
| 60 | void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); | ||
| 61 | |||
| 62 | void BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride); | ||
| 63 | |||
| 64 | void BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size); | ||
| 65 | |||
| 66 | void BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size); | ||
| 67 | |||
| 68 | void BindStorageBuffer(size_t stage, u32 binding_index, Buffer& buffer, u32 offset, u32 size, | ||
| 69 | bool is_written); | ||
| 70 | |||
| 71 | void BindComputeStorageBuffer(u32 binding_index, Buffer& buffer, u32 offset, u32 size, | ||
| 72 | bool is_written); | ||
| 73 | |||
| 74 | void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); | ||
| 75 | |||
| 76 | void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { | ||
| 77 | if (use_assembly_shaders) { | ||
| 78 | const GLuint handle = fast_uniforms[stage][binding_index].handle; | ||
| 79 | const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); | ||
| 80 | glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); | ||
| 81 | } else { | ||
| 82 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||
| 83 | const GLuint binding = base_binding + binding_index; | ||
| 84 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, | ||
| 85 | fast_uniforms[stage][binding_index].handle, 0, | ||
| 86 | static_cast<GLsizeiptr>(size)); | ||
| 87 | } | ||
| 88 | } | ||
| 61 | 89 | ||
| 62 | BufferInfo GetEmptyBuffer(std::size_t) override; | 90 | void PushFastUniformBuffer(size_t stage, u32 binding_index, std::span<const u8> data) { |
| 91 | if (use_assembly_shaders) { | ||
| 92 | glProgramBufferParametersIuivNV( | ||
| 93 | PABO_LUT[stage], binding_index, 0, | ||
| 94 | static_cast<GLsizei>(data.size_bytes() / sizeof(GLuint)), | ||
| 95 | reinterpret_cast<const GLuint*>(data.data())); | ||
| 96 | } else { | ||
| 97 | glNamedBufferSubData(fast_uniforms[stage][binding_index].handle, 0, | ||
| 98 | static_cast<GLsizeiptr>(data.size_bytes()), data.data()); | ||
| 99 | } | ||
| 100 | } | ||
| 63 | 101 | ||
| 64 | void Acquire() noexcept { | 102 | std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { |
| 65 | cbuf_cursor = 0; | 103 | const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); |
| 104 | const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||
| 105 | const GLuint binding = base_binding + binding_index; | ||
| 106 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), | ||
| 107 | static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||
| 108 | return mapped_span; | ||
| 66 | } | 109 | } |
| 67 | 110 | ||
| 68 | protected: | 111 | [[nodiscard]] const GLvoid* IndexOffset() const noexcept { |
| 69 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; | 112 | return reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(index_buffer_offset)); |
| 113 | } | ||
| 70 | 114 | ||
| 71 | BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override; | 115 | [[nodiscard]] bool HasFastBufferSubData() const noexcept { |
| 116 | return has_fast_buffer_sub_data; | ||
| 117 | } | ||
| 72 | 118 | ||
| 73 | private: | 119 | private: |
| 74 | static constexpr std::size_t NUM_CBUFS = Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | 120 | static constexpr std::array PABO_LUT{ |
| 75 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; | 121 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, |
| 122 | GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 123 | GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 124 | }; | ||
| 76 | 125 | ||
| 77 | const Device& device; | 126 | const Device& device; |
| 78 | 127 | ||
| 79 | std::size_t cbuf_cursor = 0; | 128 | bool has_fast_buffer_sub_data = false; |
| 80 | std::array<GLuint, NUM_CBUFS> cbufs{}; | 129 | bool use_assembly_shaders = false; |
| 130 | bool has_unified_vertex_buffers = false; | ||
| 131 | |||
| 132 | u32 max_attributes = 0; | ||
| 133 | |||
| 134 | std::optional<StreamBuffer> stream_buffer; | ||
| 135 | |||
| 136 | std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, | ||
| 137 | VideoCommon::NUM_STAGES> | ||
| 138 | fast_uniforms; | ||
| 139 | std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, | ||
| 140 | VideoCommon::NUM_STAGES> | ||
| 141 | copy_uniforms; | ||
| 142 | std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms; | ||
| 143 | |||
| 144 | u32 index_buffer_offset = 0; | ||
| 145 | }; | ||
| 146 | |||
| 147 | struct BufferCacheParams { | ||
| 148 | using Runtime = OpenGL::BufferCacheRuntime; | ||
| 149 | using Buffer = OpenGL::Buffer; | ||
| 150 | |||
| 151 | static constexpr bool IS_OPENGL = true; | ||
| 152 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = true; | ||
| 153 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; | ||
| 154 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; | ||
| 155 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | ||
| 156 | static constexpr bool USE_MEMORY_MAPS = false; | ||
| 81 | }; | 157 | }; |
| 82 | 158 | ||
| 159 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | ||
| 160 | |||
| 83 | } // namespace OpenGL | 161 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 04c267ee4..48d5c4a5e 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -21,9 +21,7 @@ | |||
| 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 21 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 22 | 22 | ||
| 23 | namespace OpenGL { | 23 | namespace OpenGL { |
| 24 | |||
| 25 | namespace { | 24 | namespace { |
| 26 | |||
| 27 | // One uniform block is reserved for emulation purposes | 25 | // One uniform block is reserved for emulation purposes |
| 28 | constexpr u32 ReservedUniformBlocks = 1; | 26 | constexpr u32 ReservedUniformBlocks = 1; |
| 29 | 27 | ||
| @@ -197,11 +195,13 @@ bool IsASTCSupported() { | |||
| 197 | const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | 195 | const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); |
| 198 | return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); | 196 | return nsight || HasExtension(extensions, "GL_EXT_debug_tool"); |
| 199 | } | 197 | } |
| 200 | |||
| 201 | } // Anonymous namespace | 198 | } // Anonymous namespace |
| 202 | 199 | ||
| 203 | Device::Device() | 200 | Device::Device() { |
| 204 | : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { | 201 | if (!GLAD_GL_VERSION_4_6) { |
| 202 | LOG_ERROR(Render_OpenGL, "OpenGL 4.6 is not available"); | ||
| 203 | throw std::runtime_error{"Insufficient version"}; | ||
| 204 | } | ||
| 205 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); | 205 | const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); |
| 206 | const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); | 206 | const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); |
| 207 | const std::vector extensions = GetExtensions(); | 207 | const std::vector extensions = GetExtensions(); |
| @@ -217,6 +217,9 @@ Device::Device() | |||
| 217 | "Beta driver 443.24 is known to have issues. There might be performance issues."); | 217 | "Beta driver 443.24 is known to have issues. There might be performance issues."); |
| 218 | disable_fast_buffer_sub_data = true; | 218 | disable_fast_buffer_sub_data = true; |
| 219 | } | 219 | } |
| 220 | |||
| 221 | max_uniform_buffers = BuildMaxUniformBuffers(); | ||
| 222 | base_bindings = BuildBaseBindings(); | ||
| 220 | uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | 223 | uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); |
| 221 | shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | 224 | shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); |
| 222 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | 225 | max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 9141de635..ee053776d 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -10,11 +10,9 @@ | |||
| 10 | 10 | ||
| 11 | namespace OpenGL { | 11 | namespace OpenGL { |
| 12 | 12 | ||
| 13 | static constexpr u32 EmulationUniformBlockBinding = 0; | 13 | class Device { |
| 14 | |||
| 15 | class Device final { | ||
| 16 | public: | 14 | public: |
| 17 | struct BaseBindings final { | 15 | struct BaseBindings { |
| 18 | u32 uniform_buffer{}; | 16 | u32 uniform_buffer{}; |
| 19 | u32 shader_storage_buffer{}; | 17 | u32 shader_storage_buffer{}; |
| 20 | u32 sampler{}; | 18 | u32 sampler{}; |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 3e9c922f5..151290101 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp | |||
| @@ -47,7 +47,7 @@ void GLInnerFence::Wait() { | |||
| 47 | 47 | ||
| 48 | FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, | 48 | FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, |
| 49 | Tegra::GPU& gpu_, TextureCache& texture_cache_, | 49 | Tegra::GPU& gpu_, TextureCache& texture_cache_, |
| 50 | OGLBufferCache& buffer_cache_, QueryCache& query_cache_) | 50 | BufferCache& buffer_cache_, QueryCache& query_cache_) |
| 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} | 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} |
| 52 | 52 | ||
| 53 | Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { | 53 | Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { |
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index 30dbee613..e714aa115 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h | |||
| @@ -32,14 +32,13 @@ private: | |||
| 32 | }; | 32 | }; |
| 33 | 33 | ||
| 34 | using Fence = std::shared_ptr<GLInnerFence>; | 34 | using Fence = std::shared_ptr<GLInnerFence>; |
| 35 | using GenericFenceManager = | 35 | using GenericFenceManager = VideoCommon::FenceManager<Fence, TextureCache, BufferCache, QueryCache>; |
| 36 | VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>; | ||
| 37 | 36 | ||
| 38 | class FenceManagerOpenGL final : public GenericFenceManager { | 37 | class FenceManagerOpenGL final : public GenericFenceManager { |
| 39 | public: | 38 | public: |
| 40 | explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 39 | explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, |
| 41 | TextureCache& texture_cache_, OGLBufferCache& buffer_cache_, | 40 | TextureCache& texture_cache, BufferCache& buffer_cache, |
| 42 | QueryCache& query_cache_); | 41 | QueryCache& query_cache); |
| 43 | 42 | ||
| 44 | protected: | 43 | protected: |
| 45 | Fence CreateFence(u32 value, bool is_stubbed) override; | 44 | Fence CreateFence(u32 value, bool is_stubbed) override; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ea4ca9a82..418644108 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -38,34 +38,21 @@ | |||
| 38 | namespace OpenGL { | 38 | namespace OpenGL { |
| 39 | 39 | ||
| 40 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 40 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 41 | using GLvec4 = std::array<GLfloat, 4>; | ||
| 41 | 42 | ||
| 42 | using Tegra::Engines::ShaderType; | 43 | using Tegra::Engines::ShaderType; |
| 43 | using VideoCore::Surface::PixelFormat; | 44 | using VideoCore::Surface::PixelFormat; |
| 44 | using VideoCore::Surface::SurfaceTarget; | 45 | using VideoCore::Surface::SurfaceTarget; |
| 45 | using VideoCore::Surface::SurfaceType; | 46 | using VideoCore::Surface::SurfaceType; |
| 46 | 47 | ||
| 47 | MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Format Setup", MP_RGB(128, 128, 192)); | ||
| 48 | MICROPROFILE_DEFINE(OpenGL_VB, "OpenGL", "Vertex Buffer Setup", MP_RGB(128, 128, 192)); | ||
| 49 | MICROPROFILE_DEFINE(OpenGL_Shader, "OpenGL", "Shader Setup", MP_RGB(128, 128, 192)); | ||
| 50 | MICROPROFILE_DEFINE(OpenGL_UBO, "OpenGL", "Const Buffer Setup", MP_RGB(128, 128, 192)); | ||
| 51 | MICROPROFILE_DEFINE(OpenGL_Index, "OpenGL", "Index Buffer Setup", MP_RGB(128, 128, 192)); | ||
| 52 | MICROPROFILE_DEFINE(OpenGL_Texture, "OpenGL", "Texture Setup", MP_RGB(128, 128, 192)); | ||
| 53 | MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(128, 128, 192)); | ||
| 54 | MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | 48 | MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); |
| 49 | MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192)); | ||
| 55 | MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); | 50 | MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); |
| 56 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | 51 | MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100)); |
| 57 | MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); | ||
| 58 | 52 | ||
| 59 | namespace { | 53 | namespace { |
| 60 | 54 | ||
| 61 | constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18; | ||
| 62 | constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE = | ||
| 63 | NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize; | ||
| 64 | constexpr size_t TOTAL_CONST_BUFFER_BYTES = | ||
| 65 | NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage; | ||
| 66 | |||
| 67 | constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; | 55 | constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; |
| 68 | constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16; | ||
| 69 | 56 | ||
| 70 | struct TextureHandle { | 57 | struct TextureHandle { |
| 71 | constexpr TextureHandle(u32 data, bool via_header_index) { | 58 | constexpr TextureHandle(u32 data, bool via_header_index) { |
| @@ -101,20 +88,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const | |||
| 101 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | 88 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); |
| 102 | } | 89 | } |
| 103 | 90 | ||
| 104 | std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 105 | const ConstBufferEntry& entry) { | ||
| 106 | if (!entry.IsIndirect()) { | ||
| 107 | return entry.GetSize(); | ||
| 108 | } | ||
| 109 | if (buffer.size > Maxwell::MaxConstBufferSize) { | ||
| 110 | LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, | ||
| 111 | Maxwell::MaxConstBufferSize); | ||
| 112 | return Maxwell::MaxConstBufferSize; | ||
| 113 | } | ||
| 114 | |||
| 115 | return buffer.size; | ||
| 116 | } | ||
| 117 | |||
| 118 | /// Translates hardware transform feedback indices | 91 | /// Translates hardware transform feedback indices |
| 119 | /// @param location Hardware location | 92 | /// @param location Hardware location |
| 120 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments | 93 | /// @return Pair of ARB_transform_feedback3 token stream first and third arguments |
| @@ -147,14 +120,6 @@ void oglEnable(GLenum cap, bool state) { | |||
| 147 | (state ? glEnable : glDisable)(cap); | 120 | (state ? glEnable : glDisable)(cap); |
| 148 | } | 121 | } |
| 149 | 122 | ||
| 150 | void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ssbos) { | ||
| 151 | if (num_ssbos == 0) { | ||
| 152 | return; | ||
| 153 | } | ||
| 154 | glProgramLocalParametersI4uivNV(target, 0, static_cast<GLsizei>(num_ssbos), | ||
| 155 | reinterpret_cast<const GLuint*>(ssbos)); | ||
| 156 | } | ||
| 157 | |||
| 158 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | 123 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { |
| 159 | if (entry.is_buffer) { | 124 | if (entry.is_buffer) { |
| 160 | return ImageViewType::Buffer; | 125 | return ImageViewType::Buffer; |
| @@ -201,44 +166,28 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 201 | : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), | 166 | : RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()), |
| 202 | kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), | 167 | kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), |
| 203 | screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), | 168 | screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), |
| 204 | stream_buffer(device, state_tracker), | ||
| 205 | texture_cache_runtime(device, program_manager, state_tracker), | 169 | texture_cache_runtime(device, program_manager, state_tracker), |
| 206 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 170 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), |
| 171 | buffer_cache_runtime(device), | ||
| 172 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | ||
| 207 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | 173 | shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), |
| 208 | query_cache(*this, maxwell3d, gpu_memory), | 174 | query_cache(*this, maxwell3d, gpu_memory), |
| 209 | buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker), | ||
| 210 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | 175 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), |
| 211 | async_shaders(emu_window_) { | 176 | async_shaders(emu_window_) { |
| 212 | unified_uniform_buffer.Create(); | ||
| 213 | glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0); | ||
| 214 | |||
| 215 | if (device.UseAssemblyShaders()) { | ||
| 216 | glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | ||
| 217 | for (const GLuint cbuf : staging_cbufs) { | ||
| 218 | glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize), | ||
| 219 | nullptr, 0); | ||
| 220 | } | ||
| 221 | } | ||
| 222 | if (device.UseAsynchronousShaders()) { | 177 | if (device.UseAsynchronousShaders()) { |
| 223 | async_shaders.AllocateWorkers(); | 178 | async_shaders.AllocateWorkers(); |
| 224 | } | 179 | } |
| 225 | } | 180 | } |
| 226 | 181 | ||
| 227 | RasterizerOpenGL::~RasterizerOpenGL() { | 182 | RasterizerOpenGL::~RasterizerOpenGL() = default; |
| 228 | if (device.UseAssemblyShaders()) { | ||
| 229 | glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data()); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | 183 | ||
| 233 | void RasterizerOpenGL::SetupVertexFormat() { | 184 | void RasterizerOpenGL::SyncVertexFormats() { |
| 234 | auto& flags = maxwell3d.dirty.flags; | 185 | auto& flags = maxwell3d.dirty.flags; |
| 235 | if (!flags[Dirty::VertexFormats]) { | 186 | if (!flags[Dirty::VertexFormats]) { |
| 236 | return; | 187 | return; |
| 237 | } | 188 | } |
| 238 | flags[Dirty::VertexFormats] = false; | 189 | flags[Dirty::VertexFormats] = false; |
| 239 | 190 | ||
| 240 | MICROPROFILE_SCOPE(OpenGL_VAO); | ||
| 241 | |||
| 242 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables | 191 | // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables |
| 243 | // the first 16 vertex attributes always, as we don't know which ones are actually used until | 192 | // the first 16 vertex attributes always, as we don't know which ones are actually used until |
| 244 | // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to | 193 | // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to |
| @@ -274,55 +223,7 @@ void RasterizerOpenGL::SetupVertexFormat() { | |||
| 274 | } | 223 | } |
| 275 | } | 224 | } |
| 276 | 225 | ||
| 277 | void RasterizerOpenGL::SetupVertexBuffer() { | 226 | void RasterizerOpenGL::SyncVertexInstances() { |
| 278 | auto& flags = maxwell3d.dirty.flags; | ||
| 279 | if (!flags[Dirty::VertexBuffers]) { | ||
| 280 | return; | ||
| 281 | } | ||
| 282 | flags[Dirty::VertexBuffers] = false; | ||
| 283 | |||
| 284 | MICROPROFILE_SCOPE(OpenGL_VB); | ||
| 285 | |||
| 286 | const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); | ||
| 287 | |||
| 288 | // Upload all guest vertex arrays sequentially to our buffer | ||
| 289 | const auto& regs = maxwell3d.regs; | ||
| 290 | for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { | ||
| 291 | if (!flags[Dirty::VertexBuffer0 + index]) { | ||
| 292 | continue; | ||
| 293 | } | ||
| 294 | flags[Dirty::VertexBuffer0 + index] = false; | ||
| 295 | |||
| 296 | const auto& vertex_array = regs.vertex_array[index]; | ||
| 297 | if (!vertex_array.IsEnabled()) { | ||
| 298 | continue; | ||
| 299 | } | ||
| 300 | |||
| 301 | const GPUVAddr start = vertex_array.StartAddress(); | ||
| 302 | const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | ||
| 303 | ASSERT(end >= start); | ||
| 304 | |||
| 305 | const GLuint gl_index = static_cast<GLuint>(index); | ||
| 306 | const u64 size = end - start; | ||
| 307 | if (size == 0) { | ||
| 308 | glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); | ||
| 309 | if (use_unified_memory) { | ||
| 310 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, 0, 0); | ||
| 311 | } | ||
| 312 | continue; | ||
| 313 | } | ||
| 314 | const auto info = buffer_cache.UploadMemory(start, size); | ||
| 315 | if (use_unified_memory) { | ||
| 316 | glBindVertexBuffer(gl_index, 0, 0, vertex_array.stride); | ||
| 317 | glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, gl_index, | ||
| 318 | info.address + info.offset, size); | ||
| 319 | } else { | ||
| 320 | glBindVertexBuffer(gl_index, info.handle, info.offset, vertex_array.stride); | ||
| 321 | } | ||
| 322 | } | ||
| 323 | } | ||
| 324 | |||
| 325 | void RasterizerOpenGL::SetupVertexInstances() { | ||
| 326 | auto& flags = maxwell3d.dirty.flags; | 227 | auto& flags = maxwell3d.dirty.flags; |
| 327 | if (!flags[Dirty::VertexInstances]) { | 228 | if (!flags[Dirty::VertexInstances]) { |
| 328 | return; | 229 | return; |
| @@ -343,17 +244,7 @@ void RasterizerOpenGL::SetupVertexInstances() { | |||
| 343 | } | 244 | } |
| 344 | } | 245 | } |
| 345 | 246 | ||
| 346 | GLintptr RasterizerOpenGL::SetupIndexBuffer() { | 247 | void RasterizerOpenGL::SetupShaders(bool is_indexed) { |
| 347 | MICROPROFILE_SCOPE(OpenGL_Index); | ||
| 348 | const auto& regs = maxwell3d.regs; | ||
| 349 | const std::size_t size = CalculateIndexBufferSize(); | ||
| 350 | const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); | ||
| 351 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); | ||
| 352 | return info.offset; | ||
| 353 | } | ||
| 354 | |||
| 355 | void RasterizerOpenGL::SetupShaders() { | ||
| 356 | MICROPROFILE_SCOPE(OpenGL_Shader); | ||
| 357 | u32 clip_distances = 0; | 248 | u32 clip_distances = 0; |
| 358 | 249 | ||
| 359 | std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; | 250 | std::array<Shader*, Maxwell::MaxShaderStage> shaders{}; |
| @@ -410,11 +301,19 @@ void RasterizerOpenGL::SetupShaders() { | |||
| 410 | const size_t stage = index == 0 ? 0 : index - 1; | 301 | const size_t stage = index == 0 ? 0 : index - 1; |
| 411 | shaders[stage] = shader; | 302 | shaders[stage] = shader; |
| 412 | 303 | ||
| 413 | SetupDrawConstBuffers(stage, shader); | ||
| 414 | SetupDrawGlobalMemory(stage, shader); | ||
| 415 | SetupDrawTextures(shader, stage); | 304 | SetupDrawTextures(shader, stage); |
| 416 | SetupDrawImages(shader, stage); | 305 | SetupDrawImages(shader, stage); |
| 417 | 306 | ||
| 307 | buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers); | ||
| 308 | |||
| 309 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 310 | u32 ssbo_index = 0; | ||
| 311 | for (const auto& buffer : shader->GetEntries().global_memory_entries) { | ||
| 312 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||
| 313 | buffer.cbuf_offset, buffer.is_written); | ||
| 314 | ++ssbo_index; | ||
| 315 | } | ||
| 316 | |||
| 418 | // Workaround for Intel drivers. | 317 | // Workaround for Intel drivers. |
| 419 | // When a clip distance is enabled but not set in the shader it crops parts of the screen | 318 | // When a clip distance is enabled but not set in the shader it crops parts of the screen |
| 420 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the | 319 | // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the |
| @@ -430,43 +329,26 @@ void RasterizerOpenGL::SetupShaders() { | |||
| 430 | SyncClipEnabled(clip_distances); | 329 | SyncClipEnabled(clip_distances); |
| 431 | maxwell3d.dirty.flags[Dirty::Shaders] = false; | 330 | maxwell3d.dirty.flags[Dirty::Shaders] = false; |
| 432 | 331 | ||
| 332 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 333 | |||
| 433 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | 334 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 434 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | 335 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); |
| 435 | 336 | ||
| 337 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 338 | |||
| 436 | size_t image_view_index = 0; | 339 | size_t image_view_index = 0; |
| 437 | size_t texture_index = 0; | 340 | size_t texture_index = 0; |
| 438 | size_t image_index = 0; | 341 | size_t image_index = 0; |
| 439 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | 342 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| 440 | const Shader* const shader = shaders[stage]; | 343 | const Shader* const shader = shaders[stage]; |
| 441 | if (shader) { | 344 | if (!shader) { |
| 442 | const auto base = device.GetBaseBindings(stage); | ||
| 443 | BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, | ||
| 444 | texture_index, image_index); | ||
| 445 | } | ||
| 446 | } | ||
| 447 | } | ||
| 448 | |||
| 449 | std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | ||
| 450 | const auto& regs = maxwell3d.regs; | ||
| 451 | |||
| 452 | std::size_t size = 0; | ||
| 453 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 454 | if (!regs.vertex_array[index].IsEnabled()) | ||
| 455 | continue; | 345 | continue; |
| 456 | 346 | } | |
| 457 | const GPUVAddr start = regs.vertex_array[index].StartAddress(); | 347 | buffer_cache.BindHostStageBuffers(stage); |
| 458 | const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | 348 | const auto& base = device.GetBaseBindings(stage); |
| 459 | 349 | BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index, | |
| 460 | size += end - start; | 350 | texture_index, image_index); |
| 461 | ASSERT(end >= start); | ||
| 462 | } | 351 | } |
| 463 | |||
| 464 | return size; | ||
| 465 | } | ||
| 466 | |||
| 467 | std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { | ||
| 468 | return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * | ||
| 469 | static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); | ||
| 470 | } | 352 | } |
| 471 | 353 | ||
| 472 | void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, | 354 | void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, |
| @@ -475,6 +357,7 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s | |||
| 475 | } | 357 | } |
| 476 | 358 | ||
| 477 | void RasterizerOpenGL::Clear() { | 359 | void RasterizerOpenGL::Clear() { |
| 360 | MICROPROFILE_SCOPE(OpenGL_Clears); | ||
| 478 | if (!maxwell3d.ShouldExecute()) { | 361 | if (!maxwell3d.ShouldExecute()) { |
| 479 | return; | 362 | return; |
| 480 | } | 363 | } |
| @@ -525,11 +408,9 @@ void RasterizerOpenGL::Clear() { | |||
| 525 | } | 408 | } |
| 526 | UNIMPLEMENTED_IF(regs.clear_flags.viewport); | 409 | UNIMPLEMENTED_IF(regs.clear_flags.viewport); |
| 527 | 410 | ||
| 528 | { | 411 | std::scoped_lock lock{texture_cache.mutex}; |
| 529 | auto lock = texture_cache.AcquireLock(); | 412 | texture_cache.UpdateRenderTargets(true); |
| 530 | texture_cache.UpdateRenderTargets(true); | 413 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); |
| 531 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||
| 532 | } | ||
| 533 | 414 | ||
| 534 | if (use_color) { | 415 | if (use_color) { |
| 535 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); | 416 | glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color); |
| @@ -541,7 +422,6 @@ void RasterizerOpenGL::Clear() { | |||
| 541 | } else if (use_stencil) { | 422 | } else if (use_stencil) { |
| 542 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); | 423 | glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil); |
| 543 | } | 424 | } |
| 544 | |||
| 545 | ++num_queued_commands; | 425 | ++num_queued_commands; |
| 546 | } | 426 | } |
| 547 | 427 | ||
| @@ -550,75 +430,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 550 | 430 | ||
| 551 | query_cache.UpdateCounters(); | 431 | query_cache.UpdateCounters(); |
| 552 | 432 | ||
| 553 | SyncViewport(); | 433 | SyncState(); |
| 554 | SyncRasterizeEnable(); | ||
| 555 | SyncPolygonModes(); | ||
| 556 | SyncColorMask(); | ||
| 557 | SyncFragmentColorClampState(); | ||
| 558 | SyncMultiSampleState(); | ||
| 559 | SyncDepthTestState(); | ||
| 560 | SyncDepthClamp(); | ||
| 561 | SyncStencilTestState(); | ||
| 562 | SyncBlendState(); | ||
| 563 | SyncLogicOpState(); | ||
| 564 | SyncCullMode(); | ||
| 565 | SyncPrimitiveRestart(); | ||
| 566 | SyncScissorTest(); | ||
| 567 | SyncPointState(); | ||
| 568 | SyncLineState(); | ||
| 569 | SyncPolygonOffset(); | ||
| 570 | SyncAlphaTest(); | ||
| 571 | SyncFramebufferSRGB(); | ||
| 572 | |||
| 573 | buffer_cache.Acquire(); | ||
| 574 | current_cbuf = 0; | ||
| 575 | |||
| 576 | std::size_t buffer_size = CalculateVertexArraysSize(); | ||
| 577 | |||
| 578 | // Add space for index buffer | ||
| 579 | if (is_indexed) { | ||
| 580 | buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); | ||
| 581 | } | ||
| 582 | |||
| 583 | // Uniform space for the 5 shader stages | ||
| 584 | buffer_size = | ||
| 585 | Common::AlignUp<std::size_t>(buffer_size, 4) + | ||
| 586 | (sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage; | ||
| 587 | |||
| 588 | // Add space for at least 18 constant buffers | ||
| 589 | buffer_size += Maxwell::MaxConstBuffers * | ||
| 590 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 591 | |||
| 592 | // Prepare the vertex array. | ||
| 593 | buffer_cache.Map(buffer_size); | ||
| 594 | |||
| 595 | // Prepare vertex array format. | ||
| 596 | SetupVertexFormat(); | ||
| 597 | |||
| 598 | // Upload vertex and index data. | ||
| 599 | SetupVertexBuffer(); | ||
| 600 | SetupVertexInstances(); | ||
| 601 | GLintptr index_buffer_offset = 0; | ||
| 602 | if (is_indexed) { | ||
| 603 | index_buffer_offset = SetupIndexBuffer(); | ||
| 604 | } | ||
| 605 | |||
| 606 | // Setup emulation uniform buffer. | ||
| 607 | if (!device.UseAssemblyShaders()) { | ||
| 608 | MaxwellUniformData ubo; | ||
| 609 | ubo.SetFromRegs(maxwell3d); | ||
| 610 | const auto info = | ||
| 611 | buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); | ||
| 612 | glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, | ||
| 613 | static_cast<GLsizeiptr>(sizeof(ubo))); | ||
| 614 | } | ||
| 615 | 434 | ||
| 616 | // Setup shaders and their used resources. | 435 | // Setup shaders and their used resources. |
| 617 | auto lock = texture_cache.AcquireLock(); | 436 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 618 | SetupShaders(); | 437 | SetupShaders(is_indexed); |
| 619 | 438 | ||
| 620 | // Signal the buffer cache that we are not going to upload more things. | ||
| 621 | buffer_cache.Unmap(); | ||
| 622 | texture_cache.UpdateRenderTargets(false); | 439 | texture_cache.UpdateRenderTargets(false); |
| 623 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | 440 | state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); |
| 624 | program_manager.BindGraphicsPipeline(); | 441 | program_manager.BindGraphicsPipeline(); |
| @@ -632,7 +449,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 632 | if (is_indexed) { | 449 | if (is_indexed) { |
| 633 | const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base); | 450 | const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base); |
| 634 | const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count); | 451 | const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count); |
| 635 | const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset); | 452 | const GLvoid* const offset = buffer_cache_runtime.IndexOffset(); |
| 636 | const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format); | 453 | const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format); |
| 637 | if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { | 454 | if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { |
| 638 | glDrawElements(primitive_mode, num_vertices, format, offset); | 455 | glDrawElements(primitive_mode, num_vertices, format, offset); |
| @@ -672,22 +489,22 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 672 | } | 489 | } |
| 673 | 490 | ||
| 674 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | 491 | void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { |
| 675 | buffer_cache.Acquire(); | ||
| 676 | current_cbuf = 0; | ||
| 677 | |||
| 678 | Shader* const kernel = shader_cache.GetComputeKernel(code_addr); | 492 | Shader* const kernel = shader_cache.GetComputeKernel(code_addr); |
| 679 | 493 | ||
| 680 | auto lock = texture_cache.AcquireLock(); | 494 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 681 | BindComputeTextures(kernel); | 495 | BindComputeTextures(kernel); |
| 682 | 496 | ||
| 683 | const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * | 497 | const auto& entries = kernel->GetEntries(); |
| 684 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | 498 | buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); |
| 685 | buffer_cache.Map(buffer_size); | 499 | buffer_cache.UnbindComputeStorageBuffers(); |
| 686 | 500 | u32 ssbo_index = 0; | |
| 687 | SetupComputeConstBuffers(kernel); | 501 | for (const auto& buffer : entries.global_memory_entries) { |
| 688 | SetupComputeGlobalMemory(kernel); | 502 | buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, |
| 689 | 503 | buffer.is_written); | |
| 690 | buffer_cache.Unmap(); | 504 | ++ssbo_index; |
| 505 | } | ||
| 506 | buffer_cache.UpdateComputeBuffers(); | ||
| 507 | buffer_cache.BindHostComputeBuffers(); | ||
| 691 | 508 | ||
| 692 | const auto& launch_desc = kepler_compute.launch_description; | 509 | const auto& launch_desc = kepler_compute.launch_description; |
| 693 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); | 510 | glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); |
| @@ -703,6 +520,12 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
| 703 | query_cache.Query(gpu_addr, type, timestamp); | 520 | query_cache.Query(gpu_addr, type, timestamp); |
| 704 | } | 521 | } |
| 705 | 522 | ||
| 523 | void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||
| 524 | u32 size) { | ||
| 525 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 526 | buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size); | ||
| 527 | } | ||
| 528 | |||
| 706 | void RasterizerOpenGL::FlushAll() {} | 529 | void RasterizerOpenGL::FlushAll() {} |
| 707 | 530 | ||
| 708 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | 531 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { |
| @@ -711,19 +534,23 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | |||
| 711 | return; | 534 | return; |
| 712 | } | 535 | } |
| 713 | { | 536 | { |
| 714 | auto lock = texture_cache.AcquireLock(); | 537 | std::scoped_lock lock{texture_cache.mutex}; |
| 715 | texture_cache.DownloadMemory(addr, size); | 538 | texture_cache.DownloadMemory(addr, size); |
| 716 | } | 539 | } |
| 717 | buffer_cache.FlushRegion(addr, size); | 540 | { |
| 541 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 542 | buffer_cache.DownloadMemory(addr, size); | ||
| 543 | } | ||
| 718 | query_cache.FlushRegion(addr, size); | 544 | query_cache.FlushRegion(addr, size); |
| 719 | } | 545 | } |
| 720 | 546 | ||
| 721 | bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { | 547 | bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) { |
| 548 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||
| 722 | if (!Settings::IsGPULevelHigh()) { | 549 | if (!Settings::IsGPULevelHigh()) { |
| 723 | return buffer_cache.MustFlushRegion(addr, size); | 550 | return buffer_cache.IsRegionGpuModified(addr, size); |
| 724 | } | 551 | } |
| 725 | return texture_cache.IsRegionGpuModified(addr, size) || | 552 | return texture_cache.IsRegionGpuModified(addr, size) || |
| 726 | buffer_cache.MustFlushRegion(addr, size); | 553 | buffer_cache.IsRegionGpuModified(addr, size); |
| 727 | } | 554 | } |
| 728 | 555 | ||
| 729 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | 556 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |
| @@ -732,11 +559,14 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | |||
| 732 | return; | 559 | return; |
| 733 | } | 560 | } |
| 734 | { | 561 | { |
| 735 | auto lock = texture_cache.AcquireLock(); | 562 | std::scoped_lock lock{texture_cache.mutex}; |
| 736 | texture_cache.WriteMemory(addr, size); | 563 | texture_cache.WriteMemory(addr, size); |
| 737 | } | 564 | } |
| 565 | { | ||
| 566 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 567 | buffer_cache.WriteMemory(addr, size); | ||
| 568 | } | ||
| 738 | shader_cache.InvalidateRegion(addr, size); | 569 | shader_cache.InvalidateRegion(addr, size); |
| 739 | buffer_cache.InvalidateRegion(addr, size); | ||
| 740 | query_cache.InvalidateRegion(addr, size); | 570 | query_cache.InvalidateRegion(addr, size); |
| 741 | } | 571 | } |
| 742 | 572 | ||
| @@ -745,26 +575,35 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
| 745 | if (addr == 0 || size == 0) { | 575 | if (addr == 0 || size == 0) { |
| 746 | return; | 576 | return; |
| 747 | } | 577 | } |
| 578 | shader_cache.OnCPUWrite(addr, size); | ||
| 748 | { | 579 | { |
| 749 | auto lock = texture_cache.AcquireLock(); | 580 | std::scoped_lock lock{texture_cache.mutex}; |
| 750 | texture_cache.WriteMemory(addr, size); | 581 | texture_cache.WriteMemory(addr, size); |
| 751 | } | 582 | } |
| 752 | shader_cache.OnCPUWrite(addr, size); | 583 | { |
| 753 | buffer_cache.OnCPUWrite(addr, size); | 584 | std::scoped_lock lock{buffer_cache.mutex}; |
| 585 | buffer_cache.CachedWriteMemory(addr, size); | ||
| 586 | } | ||
| 754 | } | 587 | } |
| 755 | 588 | ||
| 756 | void RasterizerOpenGL::SyncGuestHost() { | 589 | void RasterizerOpenGL::SyncGuestHost() { |
| 757 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 590 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 758 | buffer_cache.SyncGuestHost(); | ||
| 759 | shader_cache.SyncGuestHost(); | 591 | shader_cache.SyncGuestHost(); |
| 592 | { | ||
| 593 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 594 | buffer_cache.FlushCachedWrites(); | ||
| 595 | } | ||
| 760 | } | 596 | } |
| 761 | 597 | ||
| 762 | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | 598 | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { |
| 763 | { | 599 | { |
| 764 | auto lock = texture_cache.AcquireLock(); | 600 | std::scoped_lock lock{texture_cache.mutex}; |
| 765 | texture_cache.UnmapMemory(addr, size); | 601 | texture_cache.UnmapMemory(addr, size); |
| 766 | } | 602 | } |
| 767 | buffer_cache.OnCPUWrite(addr, size); | 603 | { |
| 604 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 605 | buffer_cache.WriteMemory(addr, size); | ||
| 606 | } | ||
| 768 | shader_cache.OnCPUWrite(addr, size); | 607 | shader_cache.OnCPUWrite(addr, size); |
| 769 | } | 608 | } |
| 770 | 609 | ||
| @@ -799,14 +638,7 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | |||
| 799 | } | 638 | } |
| 800 | 639 | ||
| 801 | void RasterizerOpenGL::WaitForIdle() { | 640 | void RasterizerOpenGL::WaitForIdle() { |
| 802 | // Place a barrier on everything that is not framebuffer related. | 641 | glMemoryBarrier(GL_ALL_BARRIER_BITS); |
| 803 | // This is related to another flag that is not currently implemented. | ||
| 804 | glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT | GL_ELEMENT_ARRAY_BARRIER_BIT | | ||
| 805 | GL_UNIFORM_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT | | ||
| 806 | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_COMMAND_BARRIER_BIT | | ||
| 807 | GL_PIXEL_BUFFER_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT | | ||
| 808 | GL_BUFFER_UPDATE_BARRIER_BIT | GL_TRANSFORM_FEEDBACK_BARRIER_BIT | | ||
| 809 | GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT); | ||
| 810 | } | 642 | } |
| 811 | 643 | ||
| 812 | void RasterizerOpenGL::FragmentBarrier() { | 644 | void RasterizerOpenGL::FragmentBarrier() { |
| @@ -831,18 +663,21 @@ void RasterizerOpenGL::TickFrame() { | |||
| 831 | num_queued_commands = 0; | 663 | num_queued_commands = 0; |
| 832 | 664 | ||
| 833 | fence_manager.TickFrame(); | 665 | fence_manager.TickFrame(); |
| 834 | buffer_cache.TickFrame(); | ||
| 835 | { | 666 | { |
| 836 | auto lock = texture_cache.AcquireLock(); | 667 | std::scoped_lock lock{texture_cache.mutex}; |
| 837 | texture_cache.TickFrame(); | 668 | texture_cache.TickFrame(); |
| 838 | } | 669 | } |
| 670 | { | ||
| 671 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 672 | buffer_cache.TickFrame(); | ||
| 673 | } | ||
| 839 | } | 674 | } |
| 840 | 675 | ||
| 841 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | 676 | bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 842 | const Tegra::Engines::Fermi2D::Surface& dst, | 677 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 843 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 678 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 844 | MICROPROFILE_SCOPE(OpenGL_Blits); | 679 | MICROPROFILE_SCOPE(OpenGL_Blits); |
| 845 | auto lock = texture_cache.AcquireLock(); | 680 | std::scoped_lock lock{texture_cache.mutex}; |
| 846 | texture_cache.BlitImage(dst, src, copy_config); | 681 | texture_cache.BlitImage(dst, src, copy_config); |
| 847 | return true; | 682 | return true; |
| 848 | } | 683 | } |
| @@ -854,7 +689,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 854 | } | 689 | } |
| 855 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 690 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 856 | 691 | ||
| 857 | auto lock = texture_cache.AcquireLock(); | 692 | std::scoped_lock lock{texture_cache.mutex}; |
| 858 | ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; | 693 | ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)}; |
| 859 | if (!image_view) { | 694 | if (!image_view) { |
| 860 | return false; | 695 | return false; |
| @@ -921,166 +756,6 @@ void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_te | |||
| 921 | } | 756 | } |
| 922 | } | 757 | } |
| 923 | 758 | ||
| 924 | void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) { | ||
| 925 | static constexpr std::array PARAMETER_LUT{ | ||
| 926 | GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 927 | GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 928 | GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV, | ||
| 929 | }; | ||
| 930 | MICROPROFILE_SCOPE(OpenGL_UBO); | ||
| 931 | const auto& stages = maxwell3d.state.shader_stages; | ||
| 932 | const auto& shader_stage = stages[stage_index]; | ||
| 933 | const auto& entries = shader->GetEntries(); | ||
| 934 | const bool use_unified = entries.use_unified_uniforms; | ||
| 935 | const std::size_t base_unified_offset = stage_index * NUM_CONST_BUFFERS_BYTES_PER_STAGE; | ||
| 936 | |||
| 937 | const auto base_bindings = device.GetBaseBindings(stage_index); | ||
| 938 | u32 binding = device.UseAssemblyShaders() ? 0 : base_bindings.uniform_buffer; | ||
| 939 | for (const auto& entry : entries.const_buffers) { | ||
| 940 | const u32 index = entry.GetIndex(); | ||
| 941 | const auto& buffer = shader_stage.const_buffers[index]; | ||
| 942 | SetupConstBuffer(PARAMETER_LUT[stage_index], binding, buffer, entry, use_unified, | ||
| 943 | base_unified_offset + index * Maxwell::MaxConstBufferSize); | ||
| 944 | ++binding; | ||
| 945 | } | ||
| 946 | if (use_unified) { | ||
| 947 | const u32 index = static_cast<u32>(base_bindings.shader_storage_buffer + | ||
| 948 | entries.global_memory_entries.size()); | ||
| 949 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, | ||
| 950 | base_unified_offset, NUM_CONST_BUFFERS_BYTES_PER_STAGE); | ||
| 951 | } | ||
| 952 | } | ||
| 953 | |||
| 954 | void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) { | ||
| 955 | MICROPROFILE_SCOPE(OpenGL_UBO); | ||
| 956 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 957 | const auto& entries = kernel->GetEntries(); | ||
| 958 | const bool use_unified = entries.use_unified_uniforms; | ||
| 959 | |||
| 960 | u32 binding = 0; | ||
| 961 | for (const auto& entry : entries.const_buffers) { | ||
| 962 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 963 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | ||
| 964 | Tegra::Engines::ConstBufferInfo buffer; | ||
| 965 | buffer.address = config.Address(); | ||
| 966 | buffer.size = config.size; | ||
| 967 | buffer.enabled = mask[entry.GetIndex()]; | ||
| 968 | SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding, buffer, entry, | ||
| 969 | use_unified, entry.GetIndex() * Maxwell::MaxConstBufferSize); | ||
| 970 | ++binding; | ||
| 971 | } | ||
| 972 | if (use_unified) { | ||
| 973 | const GLuint index = static_cast<GLuint>(entries.global_memory_entries.size()); | ||
| 974 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, index, unified_uniform_buffer.handle, 0, | ||
| 975 | NUM_CONST_BUFFERS_BYTES_PER_STAGE); | ||
| 976 | } | ||
| 977 | } | ||
| 978 | |||
| 979 | void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding, | ||
| 980 | const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 981 | const ConstBufferEntry& entry, bool use_unified, | ||
| 982 | std::size_t unified_offset) { | ||
| 983 | if (!buffer.enabled) { | ||
| 984 | // Set values to zero to unbind buffers | ||
| 985 | if (device.UseAssemblyShaders()) { | ||
| 986 | glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0); | ||
| 987 | } else { | ||
| 988 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, 0, 0, sizeof(float)); | ||
| 989 | } | ||
| 990 | return; | ||
| 991 | } | ||
| 992 | |||
| 993 | // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 | ||
| 994 | // UBO alignment requirements. | ||
| 995 | const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); | ||
| 996 | |||
| 997 | const bool fast_upload = !use_unified && device.HasFastBufferSubData(); | ||
| 998 | |||
| 999 | const std::size_t alignment = use_unified ? 4 : device.GetUniformBufferAlignment(); | ||
| 1000 | const GPUVAddr gpu_addr = buffer.address; | ||
| 1001 | auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, false, fast_upload); | ||
| 1002 | |||
| 1003 | if (device.UseAssemblyShaders()) { | ||
| 1004 | UNIMPLEMENTED_IF(use_unified); | ||
| 1005 | if (info.offset != 0) { | ||
| 1006 | const GLuint staging_cbuf = staging_cbufs[current_cbuf++]; | ||
| 1007 | glCopyNamedBufferSubData(info.handle, staging_cbuf, info.offset, 0, size); | ||
| 1008 | info.handle = staging_cbuf; | ||
| 1009 | info.offset = 0; | ||
| 1010 | } | ||
| 1011 | glBindBufferRangeNV(stage, binding, info.handle, info.offset, size); | ||
| 1012 | return; | ||
| 1013 | } | ||
| 1014 | |||
| 1015 | if (use_unified) { | ||
| 1016 | glCopyNamedBufferSubData(info.handle, unified_uniform_buffer.handle, info.offset, | ||
| 1017 | unified_offset, size); | ||
| 1018 | } else { | ||
| 1019 | glBindBufferRange(GL_UNIFORM_BUFFER, binding, info.handle, info.offset, size); | ||
| 1020 | } | ||
| 1021 | } | ||
| 1022 | |||
| 1023 | void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) { | ||
| 1024 | static constexpr std::array TARGET_LUT = { | ||
| 1025 | GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV, | ||
| 1026 | GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, | ||
| 1027 | }; | ||
| 1028 | const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; | ||
| 1029 | const auto& entries{shader->GetEntries().global_memory_entries}; | ||
| 1030 | |||
| 1031 | std::array<BindlessSSBO, 32> ssbos; | ||
| 1032 | ASSERT(entries.size() < ssbos.size()); | ||
| 1033 | |||
| 1034 | const bool assembly_shaders = device.UseAssemblyShaders(); | ||
| 1035 | u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; | ||
| 1036 | for (const auto& entry : entries) { | ||
| 1037 | const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; | ||
| 1038 | const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; | ||
| 1039 | const u32 size{gpu_memory.Read<u32>(addr + 8)}; | ||
| 1040 | SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); | ||
| 1041 | ++binding; | ||
| 1042 | } | ||
| 1043 | if (assembly_shaders) { | ||
| 1044 | UpdateBindlessSSBOs(TARGET_LUT[stage_index], ssbos.data(), entries.size()); | ||
| 1045 | } | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { | ||
| 1049 | const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; | ||
| 1050 | const auto& entries{kernel->GetEntries().global_memory_entries}; | ||
| 1051 | |||
| 1052 | std::array<BindlessSSBO, 32> ssbos; | ||
| 1053 | ASSERT(entries.size() < ssbos.size()); | ||
| 1054 | |||
| 1055 | u32 binding = 0; | ||
| 1056 | for (const auto& entry : entries) { | ||
| 1057 | const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; | ||
| 1058 | const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; | ||
| 1059 | const u32 size{gpu_memory.Read<u32>(addr + 8)}; | ||
| 1060 | SetupGlobalMemory(binding, entry, gpu_addr, size, &ssbos[binding]); | ||
| 1061 | ++binding; | ||
| 1062 | } | ||
| 1063 | if (device.UseAssemblyShaders()) { | ||
| 1064 | UpdateBindlessSSBOs(GL_COMPUTE_PROGRAM_NV, ssbos.data(), ssbos.size()); | ||
| 1065 | } | ||
| 1066 | } | ||
| 1067 | |||
| 1068 | void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, | ||
| 1069 | GPUVAddr gpu_addr, size_t size, BindlessSSBO* ssbo) { | ||
| 1070 | const size_t alignment{device.GetShaderStorageBufferAlignment()}; | ||
| 1071 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.is_written); | ||
| 1072 | if (device.UseAssemblyShaders()) { | ||
| 1073 | *ssbo = BindlessSSBO{ | ||
| 1074 | .address = static_cast<GLuint64EXT>(info.address + info.offset), | ||
| 1075 | .length = static_cast<GLsizei>(size), | ||
| 1076 | .padding = 0, | ||
| 1077 | }; | ||
| 1078 | } else { | ||
| 1079 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, info.handle, info.offset, | ||
| 1080 | static_cast<GLsizeiptr>(size)); | ||
| 1081 | } | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { | 759 | void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) { |
| 1085 | const bool via_header_index = | 760 | const bool via_header_index = |
| 1086 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 761 | maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| @@ -1128,6 +803,30 @@ void RasterizerOpenGL::SetupComputeImages(const Shader* shader) { | |||
| 1128 | } | 803 | } |
| 1129 | } | 804 | } |
| 1130 | 805 | ||
| 806 | void RasterizerOpenGL::SyncState() { | ||
| 807 | SyncViewport(); | ||
| 808 | SyncRasterizeEnable(); | ||
| 809 | SyncPolygonModes(); | ||
| 810 | SyncColorMask(); | ||
| 811 | SyncFragmentColorClampState(); | ||
| 812 | SyncMultiSampleState(); | ||
| 813 | SyncDepthTestState(); | ||
| 814 | SyncDepthClamp(); | ||
| 815 | SyncStencilTestState(); | ||
| 816 | SyncBlendState(); | ||
| 817 | SyncLogicOpState(); | ||
| 818 | SyncCullMode(); | ||
| 819 | SyncPrimitiveRestart(); | ||
| 820 | SyncScissorTest(); | ||
| 821 | SyncPointState(); | ||
| 822 | SyncLineState(); | ||
| 823 | SyncPolygonOffset(); | ||
| 824 | SyncAlphaTest(); | ||
| 825 | SyncFramebufferSRGB(); | ||
| 826 | SyncVertexFormats(); | ||
| 827 | SyncVertexInstances(); | ||
| 828 | } | ||
| 829 | |||
| 1131 | void RasterizerOpenGL::SyncViewport() { | 830 | void RasterizerOpenGL::SyncViewport() { |
| 1132 | auto& flags = maxwell3d.dirty.flags; | 831 | auto& flags = maxwell3d.dirty.flags; |
| 1133 | const auto& regs = maxwell3d.regs; | 832 | const auto& regs = maxwell3d.regs; |
| @@ -1163,9 +862,11 @@ void RasterizerOpenGL::SyncViewport() { | |||
| 1163 | if (regs.screen_y_control.y_negate != 0) { | 862 | if (regs.screen_y_control.y_negate != 0) { |
| 1164 | flip_y = !flip_y; | 863 | flip_y = !flip_y; |
| 1165 | } | 864 | } |
| 1166 | glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT, | 865 | const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne; |
| 1167 | regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE | 866 | const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT; |
| 1168 | : GL_NEGATIVE_ONE_TO_ONE); | 867 | const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE; |
| 868 | state_tracker.ClipControl(origin, depth); | ||
| 869 | state_tracker.SetYNegate(regs.screen_y_control.y_negate != 0); | ||
| 1169 | } | 870 | } |
| 1170 | 871 | ||
| 1171 | if (dirty_viewport) { | 872 | if (dirty_viewport) { |
| @@ -1649,36 +1350,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { | |||
| 1649 | if (regs.tfb_enabled == 0) { | 1350 | if (regs.tfb_enabled == 0) { |
| 1650 | return; | 1351 | return; |
| 1651 | } | 1352 | } |
| 1652 | |||
| 1653 | if (device.UseAssemblyShaders()) { | 1353 | if (device.UseAssemblyShaders()) { |
| 1654 | SyncTransformFeedback(); | 1354 | SyncTransformFeedback(); |
| 1655 | } | 1355 | } |
| 1656 | |||
| 1657 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | 1356 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || |
| 1658 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | 1357 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || |
| 1659 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | 1358 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); |
| 1660 | 1359 | UNIMPLEMENTED_IF(primitive_mode != GL_POINTS); | |
| 1661 | for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||
| 1662 | const auto& binding = regs.tfb_bindings[index]; | ||
| 1663 | if (!binding.buffer_enable) { | ||
| 1664 | if (enabled_transform_feedback_buffers[index]) { | ||
| 1665 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0, | ||
| 1666 | 0); | ||
| 1667 | } | ||
| 1668 | enabled_transform_feedback_buffers[index] = false; | ||
| 1669 | continue; | ||
| 1670 | } | ||
| 1671 | enabled_transform_feedback_buffers[index] = true; | ||
| 1672 | |||
| 1673 | auto& tfb_buffer = transform_feedback_buffers[index]; | ||
| 1674 | tfb_buffer.Create(); | ||
| 1675 | |||
| 1676 | const GLuint handle = tfb_buffer.handle; | ||
| 1677 | const std::size_t size = binding.buffer_size; | ||
| 1678 | glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY); | ||
| 1679 | glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0, | ||
| 1680 | static_cast<GLsizeiptr>(size)); | ||
| 1681 | } | ||
| 1682 | 1360 | ||
| 1683 | // We may have to call BeginTransformFeedbackNV here since they seem to call different | 1361 | // We may have to call BeginTransformFeedbackNV here since they seem to call different |
| 1684 | // implementations on Nvidia's driver (the pointer is different) but we are using | 1362 | // implementations on Nvidia's driver (the pointer is different) but we are using |
| @@ -1692,23 +1370,7 @@ void RasterizerOpenGL::EndTransformFeedback() { | |||
| 1692 | if (regs.tfb_enabled == 0) { | 1370 | if (regs.tfb_enabled == 0) { |
| 1693 | return; | 1371 | return; |
| 1694 | } | 1372 | } |
| 1695 | |||
| 1696 | glEndTransformFeedback(); | 1373 | glEndTransformFeedback(); |
| 1697 | |||
| 1698 | for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { | ||
| 1699 | const auto& binding = regs.tfb_bindings[index]; | ||
| 1700 | if (!binding.buffer_enable) { | ||
| 1701 | continue; | ||
| 1702 | } | ||
| 1703 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 1704 | |||
| 1705 | const GLuint handle = transform_feedback_buffers[index].handle; | ||
| 1706 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 1707 | const std::size_t size = binding.buffer_size; | ||
| 1708 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 1709 | glCopyNamedBufferSubData(handle, info.handle, 0, info.offset, | ||
| 1710 | static_cast<GLsizeiptr>(size)); | ||
| 1711 | } | ||
| 1712 | } | 1374 | } |
| 1713 | 1375 | ||
| 1714 | } // namespace OpenGL | 1376 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 82e03e677..3745cf637 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -30,7 +30,6 @@ | |||
| 30 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | 30 | #include "video_core/renderer_opengl/gl_shader_decompiler.h" |
| 31 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 31 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 32 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 32 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 33 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | ||
| 34 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 33 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 35 | #include "video_core/shader/async_shaders.h" | 34 | #include "video_core/shader/async_shaders.h" |
| 36 | #include "video_core/textures/texture.h" | 35 | #include "video_core/textures/texture.h" |
| @@ -72,6 +71,7 @@ public: | |||
| 72 | void DispatchCompute(GPUVAddr code_addr) override; | 71 | void DispatchCompute(GPUVAddr code_addr) override; |
| 73 | void ResetCounter(VideoCore::QueryType type) override; | 72 | void ResetCounter(VideoCore::QueryType type) override; |
| 74 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 73 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 74 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||
| 75 | void FlushAll() override; | 75 | void FlushAll() override; |
| 76 | void FlushRegion(VAddr addr, u64 size) override; | 76 | void FlushRegion(VAddr addr, u64 size) override; |
| 77 | bool MustFlushRegion(VAddr addr, u64 size) override; | 77 | bool MustFlushRegion(VAddr addr, u64 size) override; |
| @@ -119,27 +119,6 @@ private: | |||
| 119 | void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, | 119 | void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image, |
| 120 | size_t& image_view_index, size_t& texture_index, size_t& image_index); | 120 | size_t& image_view_index, size_t& texture_index, size_t& image_index); |
| 121 | 121 | ||
| 122 | /// Configures the current constbuffers to use for the draw command. | ||
| 123 | void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader); | ||
| 124 | |||
| 125 | /// Configures the current constbuffers to use for the kernel invocation. | ||
| 126 | void SetupComputeConstBuffers(Shader* kernel); | ||
| 127 | |||
| 128 | /// Configures a constant buffer. | ||
| 129 | void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, | ||
| 130 | const ConstBufferEntry& entry, bool use_unified, | ||
| 131 | std::size_t unified_offset); | ||
| 132 | |||
| 133 | /// Configures the current global memory entries to use for the draw command. | ||
| 134 | void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader); | ||
| 135 | |||
| 136 | /// Configures the current global memory entries to use for the kernel invocation. | ||
| 137 | void SetupComputeGlobalMemory(Shader* kernel); | ||
| 138 | |||
| 139 | /// Configures a global memory buffer. | ||
| 140 | void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | ||
| 141 | size_t size, BindlessSSBO* ssbo); | ||
| 142 | |||
| 143 | /// Configures the current textures to use for the draw command. | 122 | /// Configures the current textures to use for the draw command. |
| 144 | void SetupDrawTextures(const Shader* shader, size_t stage_index); | 123 | void SetupDrawTextures(const Shader* shader, size_t stage_index); |
| 145 | 124 | ||
| @@ -152,6 +131,9 @@ private: | |||
| 152 | /// Configures images in a compute shader. | 131 | /// Configures images in a compute shader. |
| 153 | void SetupComputeImages(const Shader* shader); | 132 | void SetupComputeImages(const Shader* shader); |
| 154 | 133 | ||
| 134 | /// Syncs state to match guest's | ||
| 135 | void SyncState(); | ||
| 136 | |||
| 155 | /// Syncs the viewport and depth range to match the guest state | 137 | /// Syncs the viewport and depth range to match the guest state |
| 156 | void SyncViewport(); | 138 | void SyncViewport(); |
| 157 | 139 | ||
| @@ -215,6 +197,12 @@ private: | |||
| 215 | /// Syncs the framebuffer sRGB state to match the guest state | 197 | /// Syncs the framebuffer sRGB state to match the guest state |
| 216 | void SyncFramebufferSRGB(); | 198 | void SyncFramebufferSRGB(); |
| 217 | 199 | ||
| 200 | /// Syncs vertex formats to match the guest state | ||
| 201 | void SyncVertexFormats(); | ||
| 202 | |||
| 203 | /// Syncs vertex instances to match the guest state | ||
| 204 | void SyncVertexInstances(); | ||
| 205 | |||
| 218 | /// Syncs transform feedback state to match guest state | 206 | /// Syncs transform feedback state to match guest state |
| 219 | /// @note Only valid on assembly shaders | 207 | /// @note Only valid on assembly shaders |
| 220 | void SyncTransformFeedback(); | 208 | void SyncTransformFeedback(); |
| @@ -225,19 +213,7 @@ private: | |||
| 225 | /// End a transform feedback | 213 | /// End a transform feedback |
| 226 | void EndTransformFeedback(); | 214 | void EndTransformFeedback(); |
| 227 | 215 | ||
| 228 | std::size_t CalculateVertexArraysSize() const; | 216 | void SetupShaders(bool is_indexed); |
| 229 | |||
| 230 | std::size_t CalculateIndexBufferSize() const; | ||
| 231 | |||
| 232 | /// Updates the current vertex format | ||
| 233 | void SetupVertexFormat(); | ||
| 234 | |||
| 235 | void SetupVertexBuffer(); | ||
| 236 | void SetupVertexInstances(); | ||
| 237 | |||
| 238 | GLintptr SetupIndexBuffer(); | ||
| 239 | |||
| 240 | void SetupShaders(); | ||
| 241 | 217 | ||
| 242 | Tegra::GPU& gpu; | 218 | Tegra::GPU& gpu; |
| 243 | Tegra::Engines::Maxwell3D& maxwell3d; | 219 | Tegra::Engines::Maxwell3D& maxwell3d; |
| @@ -249,12 +225,12 @@ private: | |||
| 249 | ProgramManager& program_manager; | 225 | ProgramManager& program_manager; |
| 250 | StateTracker& state_tracker; | 226 | StateTracker& state_tracker; |
| 251 | 227 | ||
| 252 | OGLStreamBuffer stream_buffer; | ||
| 253 | TextureCacheRuntime texture_cache_runtime; | 228 | TextureCacheRuntime texture_cache_runtime; |
| 254 | TextureCache texture_cache; | 229 | TextureCache texture_cache; |
| 230 | BufferCacheRuntime buffer_cache_runtime; | ||
| 231 | BufferCache buffer_cache; | ||
| 255 | ShaderCacheOpenGL shader_cache; | 232 | ShaderCacheOpenGL shader_cache; |
| 256 | QueryCache query_cache; | 233 | QueryCache query_cache; |
| 257 | OGLBufferCache buffer_cache; | ||
| 258 | FenceManagerOpenGL fence_manager; | 234 | FenceManagerOpenGL fence_manager; |
| 259 | 235 | ||
| 260 | VideoCommon::Shader::AsyncShaders async_shaders; | 236 | VideoCommon::Shader::AsyncShaders async_shaders; |
| @@ -262,20 +238,8 @@ private: | |||
| 262 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; | 238 | boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices; |
| 263 | std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; | 239 | std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids; |
| 264 | boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; | 240 | boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles; |
| 265 | std::array<GLuint, MAX_TEXTURES> texture_handles; | 241 | std::array<GLuint, MAX_TEXTURES> texture_handles{}; |
| 266 | std::array<GLuint, MAX_IMAGES> image_handles; | 242 | std::array<GLuint, MAX_IMAGES> image_handles{}; |
| 267 | |||
| 268 | std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 269 | transform_feedback_buffers; | ||
| 270 | std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> | ||
| 271 | enabled_transform_feedback_buffers; | ||
| 272 | |||
| 273 | static constexpr std::size_t NUM_CONSTANT_BUFFERS = | ||
| 274 | Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers * | ||
| 275 | Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; | ||
| 276 | std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{}; | ||
| 277 | std::size_t current_cbuf = 0; | ||
| 278 | OGLBuffer unified_uniform_buffer; | ||
| 279 | 243 | ||
| 280 | /// Number of commands queued to the OpenGL driver. Resetted on flush. | 244 | /// Number of commands queued to the OpenGL driver. Resetted on flush. |
| 281 | std::size_t num_queued_commands = 0; | 245 | std::size_t num_queued_commands = 0; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 0e34a0f20..3428e5e21 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp | |||
| @@ -171,12 +171,6 @@ void OGLBuffer::Release() { | |||
| 171 | handle = 0; | 171 | handle = 0; |
| 172 | } | 172 | } |
| 173 | 173 | ||
| 174 | void OGLBuffer::MakeStreamCopy(std::size_t buffer_size) { | ||
| 175 | ASSERT_OR_EXECUTE((handle != 0 && buffer_size != 0), { return; }); | ||
| 176 | |||
| 177 | glNamedBufferData(handle, buffer_size, nullptr, GL_STREAM_COPY); | ||
| 178 | } | ||
| 179 | |||
| 180 | void OGLSync::Create() { | 174 | void OGLSync::Create() { |
| 181 | if (handle != 0) | 175 | if (handle != 0) |
| 182 | return; | 176 | return; |
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index f48398669..552d79db4 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h | |||
| @@ -234,9 +234,6 @@ public: | |||
| 234 | /// Deletes the internal OpenGL resource | 234 | /// Deletes the internal OpenGL resource |
| 235 | void Release(); | 235 | void Release(); |
| 236 | 236 | ||
| 237 | // Converts the buffer into a stream copy buffer with a fixed size | ||
| 238 | void MakeStreamCopy(std::size_t buffer_size); | ||
| 239 | |||
| 240 | GLuint handle = 0; | 237 | GLuint handle = 0; |
| 241 | }; | 238 | }; |
| 242 | 239 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c35b71b6b..ac78d344c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp | |||
| @@ -64,7 +64,7 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument> | |||
| 64 | constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); | 64 | constexpr u32 MAX_CONSTBUFFER_SCALARS = static_cast<u32>(Maxwell::MaxConstBufferSize) / sizeof(u32); |
| 65 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); | 65 | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_SCALARS / sizeof(u32); |
| 66 | 66 | ||
| 67 | constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt | 67 | constexpr std::string_view COMMON_DECLARATIONS = R"(#define ftoi floatBitsToInt |
| 68 | #define ftou floatBitsToUint | 68 | #define ftou floatBitsToUint |
| 69 | #define itof intBitsToFloat | 69 | #define itof intBitsToFloat |
| 70 | #define utof uintBitsToFloat | 70 | #define utof uintBitsToFloat |
| @@ -77,10 +77,6 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ | |||
| 77 | 77 | ||
| 78 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); | 78 | const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); |
| 79 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); | 79 | const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); |
| 80 | |||
| 81 | layout (std140, binding = {}) uniform vs_config {{ | ||
| 82 | float y_direction; | ||
| 83 | }}; | ||
| 84 | )"; | 80 | )"; |
| 85 | 81 | ||
| 86 | class ShaderWriter final { | 82 | class ShaderWriter final { |
| @@ -402,13 +398,6 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
| 402 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | 398 | return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); |
| 403 | } | 399 | } |
| 404 | 400 | ||
| 405 | bool UseUnifiedUniforms(const Device& device, const ShaderIR& ir, ShaderType stage) { | ||
| 406 | const u32 num_ubos = static_cast<u32>(ir.GetConstantBuffers().size()); | ||
| 407 | // We waste one UBO for emulation | ||
| 408 | const u32 num_available_ubos = device.GetMaxUniformBuffers(stage) - 1; | ||
| 409 | return num_ubos > num_available_ubos; | ||
| 410 | } | ||
| 411 | |||
| 412 | struct GenericVaryingDescription { | 401 | struct GenericVaryingDescription { |
| 413 | std::string name; | 402 | std::string name; |
| 414 | u8 first_element = 0; | 403 | u8 first_element = 0; |
| @@ -420,9 +409,8 @@ public: | |||
| 420 | explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, | 409 | explicit GLSLDecompiler(const Device& device_, const ShaderIR& ir_, const Registry& registry_, |
| 421 | ShaderType stage_, std::string_view identifier_, | 410 | ShaderType stage_, std::string_view identifier_, |
| 422 | std::string_view suffix_) | 411 | std::string_view suffix_) |
| 423 | : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, identifier{identifier_}, | 412 | : device{device_}, ir{ir_}, registry{registry_}, stage{stage_}, |
| 424 | suffix{suffix_}, header{ir.GetHeader()}, use_unified_uniforms{ | 413 | identifier{identifier_}, suffix{suffix_}, header{ir.GetHeader()} { |
| 425 | UseUnifiedUniforms(device_, ir_, stage_)} { | ||
| 426 | if (stage != ShaderType::Compute) { | 414 | if (stage != ShaderType::Compute) { |
| 427 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); | 415 | transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); |
| 428 | } | 416 | } |
| @@ -516,7 +504,8 @@ private: | |||
| 516 | if (!identifier.empty()) { | 504 | if (!identifier.empty()) { |
| 517 | code.AddLine("// {}", identifier); | 505 | code.AddLine("// {}", identifier); |
| 518 | } | 506 | } |
| 519 | code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core"); | 507 | const bool use_compatibility = ir.UsesLegacyVaryings() || ir.UsesYNegate(); |
| 508 | code.AddLine("#version 440 {}", use_compatibility ? "compatibility" : "core"); | ||
| 520 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); | 509 | code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); |
| 521 | if (device.HasShaderBallot()) { | 510 | if (device.HasShaderBallot()) { |
| 522 | code.AddLine("#extension GL_ARB_shader_ballot : require"); | 511 | code.AddLine("#extension GL_ARB_shader_ballot : require"); |
| @@ -542,7 +531,7 @@ private: | |||
| 542 | 531 | ||
| 543 | code.AddNewLine(); | 532 | code.AddNewLine(); |
| 544 | 533 | ||
| 545 | code.AddLine(CommonDeclarations, EmulationUniformBlockBinding); | 534 | code.AddLine(COMMON_DECLARATIONS); |
| 546 | } | 535 | } |
| 547 | 536 | ||
| 548 | void DeclareVertex() { | 537 | void DeclareVertex() { |
| @@ -865,17 +854,6 @@ private: | |||
| 865 | } | 854 | } |
| 866 | 855 | ||
| 867 | void DeclareConstantBuffers() { | 856 | void DeclareConstantBuffers() { |
| 868 | if (use_unified_uniforms) { | ||
| 869 | const u32 binding = device.GetBaseBindings(stage).shader_storage_buffer + | ||
| 870 | static_cast<u32>(ir.GetGlobalMemory().size()); | ||
| 871 | code.AddLine("layout (std430, binding = {}) readonly buffer UnifiedUniforms {{", | ||
| 872 | binding); | ||
| 873 | code.AddLine(" uint cbufs[];"); | ||
| 874 | code.AddLine("}};"); | ||
| 875 | code.AddNewLine(); | ||
| 876 | return; | ||
| 877 | } | ||
| 878 | |||
| 879 | u32 binding = device.GetBaseBindings(stage).uniform_buffer; | 857 | u32 binding = device.GetBaseBindings(stage).uniform_buffer; |
| 880 | for (const auto& [index, info] : ir.GetConstantBuffers()) { | 858 | for (const auto& [index, info] : ir.GetConstantBuffers()) { |
| 881 | const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); | 859 | const u32 num_elements = Common::DivCeil(info.GetSize(), 4 * sizeof(u32)); |
| @@ -1081,29 +1059,17 @@ private: | |||
| 1081 | 1059 | ||
| 1082 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { | 1060 | if (const auto cbuf = std::get_if<CbufNode>(&*node)) { |
| 1083 | const Node offset = cbuf->GetOffset(); | 1061 | const Node offset = cbuf->GetOffset(); |
| 1084 | const u32 base_unified_offset = cbuf->GetIndex() * MAX_CONSTBUFFER_SCALARS; | ||
| 1085 | 1062 | ||
| 1086 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { | 1063 | if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { |
| 1087 | // Direct access | 1064 | // Direct access |
| 1088 | const u32 offset_imm = immediate->GetValue(); | 1065 | const u32 offset_imm = immediate->GetValue(); |
| 1089 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); | 1066 | ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); |
| 1090 | if (use_unified_uniforms) { | 1067 | return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), |
| 1091 | return {fmt::format("cbufs[{}]", base_unified_offset + offset_imm / 4), | 1068 | offset_imm / (4 * 4), (offset_imm / 4) % 4), |
| 1092 | Type::Uint}; | ||
| 1093 | } else { | ||
| 1094 | return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()), | ||
| 1095 | offset_imm / (4 * 4), (offset_imm / 4) % 4), | ||
| 1096 | Type::Uint}; | ||
| 1097 | } | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | // Indirect access | ||
| 1101 | if (use_unified_uniforms) { | ||
| 1102 | return {fmt::format("cbufs[{} + ({} >> 2)]", base_unified_offset, | ||
| 1103 | Visit(offset).AsUint()), | ||
| 1104 | Type::Uint}; | 1069 | Type::Uint}; |
| 1105 | } | 1070 | } |
| 1106 | 1071 | ||
| 1072 | // Indirect access | ||
| 1107 | const std::string final_offset = code.GenerateTemporary(); | 1073 | const std::string final_offset = code.GenerateTemporary(); |
| 1108 | code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); | 1074 | code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint()); |
| 1109 | 1075 | ||
| @@ -2293,7 +2259,6 @@ private: | |||
| 2293 | } | 2259 | } |
| 2294 | } | 2260 | } |
| 2295 | } | 2261 | } |
| 2296 | |||
| 2297 | if (header.ps.omap.depth) { | 2262 | if (header.ps.omap.depth) { |
| 2298 | // The depth output is always 2 registers after the last color output, and current_reg | 2263 | // The depth output is always 2 registers after the last color output, and current_reg |
| 2299 | // already contains one past the last color register. | 2264 | // already contains one past the last color register. |
| @@ -2337,7 +2302,8 @@ private: | |||
| 2337 | } | 2302 | } |
| 2338 | 2303 | ||
| 2339 | Expression YNegate(Operation operation) { | 2304 | Expression YNegate(Operation operation) { |
| 2340 | return {"y_direction", Type::Float}; | 2305 | // Y_NEGATE is mapped to this uniform value |
| 2306 | return {"gl_FrontMaterial.ambient.a", Type::Float}; | ||
| 2341 | } | 2307 | } |
| 2342 | 2308 | ||
| 2343 | template <u32 element> | 2309 | template <u32 element> |
| @@ -2787,7 +2753,6 @@ private: | |||
| 2787 | const std::string_view identifier; | 2753 | const std::string_view identifier; |
| 2788 | const std::string_view suffix; | 2754 | const std::string_view suffix; |
| 2789 | const Header header; | 2755 | const Header header; |
| 2790 | const bool use_unified_uniforms; | ||
| 2791 | std::unordered_map<u8, VaryingTFB> transform_feedback; | 2756 | std::unordered_map<u8, VaryingTFB> transform_feedback; |
| 2792 | 2757 | ||
| 2793 | ShaderWriter code; | 2758 | ShaderWriter code; |
| @@ -3003,8 +2968,10 @@ ShaderEntries MakeEntries(const Device& device, const ShaderIR& ir, ShaderType s | |||
| 3003 | for (std::size_t i = 0; i < std::size(clip_distances); ++i) { | 2968 | for (std::size_t i = 0; i < std::size(clip_distances); ++i) { |
| 3004 | entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; | 2969 | entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; |
| 3005 | } | 2970 | } |
| 2971 | for (const auto& buffer : entries.const_buffers) { | ||
| 2972 | entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); | ||
| 2973 | } | ||
| 3006 | entries.shader_length = ir.GetLength(); | 2974 | entries.shader_length = ir.GetLength(); |
| 3007 | entries.use_unified_uniforms = UseUnifiedUniforms(device, ir, stage); | ||
| 3008 | return entries; | 2975 | return entries; |
| 3009 | } | 2976 | } |
| 3010 | 2977 | ||
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index be68994bb..0397a000c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h | |||
| @@ -55,7 +55,7 @@ struct ShaderEntries { | |||
| 55 | std::vector<ImageEntry> images; | 55 | std::vector<ImageEntry> images; |
| 56 | std::size_t shader_length{}; | 56 | std::size_t shader_length{}; |
| 57 | u32 clip_distances{}; | 57 | u32 clip_distances{}; |
| 58 | bool use_unified_uniforms{}; | 58 | u32 enabled_uniform_buffers{}; |
| 59 | }; | 59 | }; |
| 60 | 60 | ||
| 61 | ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | 61 | ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir, |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index 60e6fa39f..dbdf5230f 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp | |||
| @@ -36,16 +36,10 @@ void SetupDirtyColorMasks(Tables& tables) { | |||
| 36 | FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks); | 36 | FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks); |
| 37 | } | 37 | } |
| 38 | 38 | ||
| 39 | void SetupDirtyVertexArrays(Tables& tables) { | 39 | void SetupDirtyVertexInstances(Tables& tables) { |
| 40 | static constexpr std::size_t num_array = 3; | ||
| 41 | static constexpr std::size_t instance_base_offset = 3; | 40 | static constexpr std::size_t instance_base_offset = 3; |
| 42 | for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { | 41 | for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { |
| 43 | const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); | 42 | const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); |
| 44 | const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]); | ||
| 45 | |||
| 46 | FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers); | ||
| 47 | FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers); | ||
| 48 | |||
| 49 | const std::size_t instance_array_offset = array_offset + instance_base_offset; | 43 | const std::size_t instance_array_offset = array_offset + instance_base_offset; |
| 50 | tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i); | 44 | tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i); |
| 51 | tables[1][instance_array_offset] = VertexInstances; | 45 | tables[1][instance_array_offset] = VertexInstances; |
| @@ -217,11 +211,11 @@ void SetupDirtyMisc(Tables& tables) { | |||
| 217 | StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} { | 211 | StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} { |
| 218 | auto& dirty = gpu.Maxwell3D().dirty; | 212 | auto& dirty = gpu.Maxwell3D().dirty; |
| 219 | auto& tables = dirty.tables; | 213 | auto& tables = dirty.tables; |
| 220 | SetupDirtyRenderTargets(tables); | 214 | SetupDirtyFlags(tables); |
| 221 | SetupDirtyColorMasks(tables); | 215 | SetupDirtyColorMasks(tables); |
| 222 | SetupDirtyViewports(tables); | 216 | SetupDirtyViewports(tables); |
| 223 | SetupDirtyScissors(tables); | 217 | SetupDirtyScissors(tables); |
| 224 | SetupDirtyVertexArrays(tables); | 218 | SetupDirtyVertexInstances(tables); |
| 225 | SetupDirtyVertexFormat(tables); | 219 | SetupDirtyVertexFormat(tables); |
| 226 | SetupDirtyShaders(tables); | 220 | SetupDirtyShaders(tables); |
| 227 | SetupDirtyPolygonModes(tables); | 221 | SetupDirtyPolygonModes(tables); |
| @@ -241,19 +235,6 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} | |||
| 241 | SetupDirtyClipControl(tables); | 235 | SetupDirtyClipControl(tables); |
| 242 | SetupDirtyDepthClampEnabled(tables); | 236 | SetupDirtyDepthClampEnabled(tables); |
| 243 | SetupDirtyMisc(tables); | 237 | SetupDirtyMisc(tables); |
| 244 | |||
| 245 | auto& store = dirty.on_write_stores; | ||
| 246 | store[VertexBuffers] = true; | ||
| 247 | for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { | ||
| 248 | store[VertexBuffer0 + i] = true; | ||
| 249 | } | ||
| 250 | } | ||
| 251 | |||
| 252 | void StateTracker::InvalidateStreamBuffer() { | ||
| 253 | flags[Dirty::VertexBuffers] = true; | ||
| 254 | for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { | ||
| 255 | flags[index] = true; | ||
| 256 | } | ||
| 257 | } | 238 | } |
| 258 | 239 | ||
| 259 | } // namespace OpenGL | 240 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 574615d3c..94c905116 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h | |||
| @@ -28,10 +28,6 @@ enum : u8 { | |||
| 28 | VertexFormat0, | 28 | VertexFormat0, |
| 29 | VertexFormat31 = VertexFormat0 + 31, | 29 | VertexFormat31 = VertexFormat0 + 31, |
| 30 | 30 | ||
| 31 | VertexBuffers, | ||
| 32 | VertexBuffer0, | ||
| 33 | VertexBuffer31 = VertexBuffer0 + 31, | ||
| 34 | |||
| 35 | VertexInstances, | 31 | VertexInstances, |
| 36 | VertexInstance0, | 32 | VertexInstance0, |
| 37 | VertexInstance31 = VertexInstance0 + 31, | 33 | VertexInstance31 = VertexInstance0 + 31, |
| @@ -92,8 +88,6 @@ class StateTracker { | |||
| 92 | public: | 88 | public: |
| 93 | explicit StateTracker(Tegra::GPU& gpu); | 89 | explicit StateTracker(Tegra::GPU& gpu); |
| 94 | 90 | ||
| 95 | void InvalidateStreamBuffer(); | ||
| 96 | |||
| 97 | void BindIndexBuffer(GLuint new_index_buffer) { | 91 | void BindIndexBuffer(GLuint new_index_buffer) { |
| 98 | if (index_buffer == new_index_buffer) { | 92 | if (index_buffer == new_index_buffer) { |
| 99 | return; | 93 | return; |
| @@ -110,13 +104,32 @@ public: | |||
| 110 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); | 104 | glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer); |
| 111 | } | 105 | } |
| 112 | 106 | ||
| 107 | void ClipControl(GLenum new_origin, GLenum new_depth) { | ||
| 108 | if (new_origin == origin && new_depth == depth) { | ||
| 109 | return; | ||
| 110 | } | ||
| 111 | origin = new_origin; | ||
| 112 | depth = new_depth; | ||
| 113 | glClipControl(origin, depth); | ||
| 114 | } | ||
| 115 | |||
| 116 | void SetYNegate(bool new_y_negate) { | ||
| 117 | if (new_y_negate == y_negate) { | ||
| 118 | return; | ||
| 119 | } | ||
| 120 | // Y_NEGATE is mapped to gl_FrontMaterial.ambient.a | ||
| 121 | y_negate = new_y_negate; | ||
| 122 | const std::array ambient{0.0f, 0.0f, 0.0f, y_negate ? -1.0f : 1.0f}; | ||
| 123 | glMaterialfv(GL_FRONT, GL_AMBIENT, ambient.data()); | ||
| 124 | } | ||
| 125 | |||
| 113 | void NotifyScreenDrawVertexArray() { | 126 | void NotifyScreenDrawVertexArray() { |
| 114 | flags[OpenGL::Dirty::VertexFormats] = true; | 127 | flags[OpenGL::Dirty::VertexFormats] = true; |
| 115 | flags[OpenGL::Dirty::VertexFormat0 + 0] = true; | 128 | flags[OpenGL::Dirty::VertexFormat0 + 0] = true; |
| 116 | flags[OpenGL::Dirty::VertexFormat0 + 1] = true; | 129 | flags[OpenGL::Dirty::VertexFormat0 + 1] = true; |
| 117 | 130 | ||
| 118 | flags[OpenGL::Dirty::VertexBuffers] = true; | 131 | flags[VideoCommon::Dirty::VertexBuffers] = true; |
| 119 | flags[OpenGL::Dirty::VertexBuffer0] = true; | 132 | flags[VideoCommon::Dirty::VertexBuffer0] = true; |
| 120 | 133 | ||
| 121 | flags[OpenGL::Dirty::VertexInstances] = true; | 134 | flags[OpenGL::Dirty::VertexInstances] = true; |
| 122 | flags[OpenGL::Dirty::VertexInstance0 + 0] = true; | 135 | flags[OpenGL::Dirty::VertexInstance0 + 0] = true; |
| @@ -202,6 +215,9 @@ private: | |||
| 202 | 215 | ||
| 203 | GLuint framebuffer = 0; | 216 | GLuint framebuffer = 0; |
| 204 | GLuint index_buffer = 0; | 217 | GLuint index_buffer = 0; |
| 218 | GLenum origin = GL_LOWER_LEFT; | ||
| 219 | GLenum depth = GL_NEGATIVE_ONE_TO_ONE; | ||
| 220 | bool y_negate = false; | ||
| 205 | }; | 221 | }; |
| 206 | 222 | ||
| 207 | } // namespace OpenGL | 223 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index e0819cdf2..bfb992a79 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp | |||
| @@ -1,70 +1,64 @@ | |||
| 1 | // Copyright 2018 Citra Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <tuple> | 5 | #include <array> |
| 6 | #include <vector> | 6 | #include <memory> |
| 7 | #include <span> | ||
| 8 | |||
| 9 | #include <glad/glad.h> | ||
| 7 | 10 | ||
| 8 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 9 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 10 | #include "common/microprofile.h" | ||
| 11 | #include "video_core/renderer_opengl/gl_device.h" | ||
| 12 | #include "video_core/renderer_opengl/gl_state_tracker.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" | 13 | #include "video_core/renderer_opengl/gl_stream_buffer.h" |
| 14 | 14 | ||
| 15 | MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", | ||
| 16 | MP_RGB(128, 128, 192)); | ||
| 17 | |||
| 18 | namespace OpenGL { | 15 | namespace OpenGL { |
| 19 | 16 | ||
| 20 | OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_) | 17 | StreamBuffer::StreamBuffer() { |
| 21 | : state_tracker{state_tracker_} { | 18 | static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; |
| 22 | gl_buffer.Create(); | 19 | buffer.Create(); |
| 23 | 20 | glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer"); | |
| 24 | static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT; | 21 | glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags); |
| 25 | glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags); | 22 | mapped_pointer = |
| 26 | mapped_ptr = static_cast<u8*>( | 23 | static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags)); |
| 27 | glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT)); | 24 | for (OGLSync& sync : fences) { |
| 28 | 25 | sync.Create(); | |
| 29 | if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) { | ||
| 30 | glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY); | ||
| 31 | glGetNamedBufferParameterui64vNV(gl_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &gpu_address); | ||
| 32 | } | 26 | } |
| 33 | } | 27 | } |
| 34 | 28 | ||
| 35 | OGLStreamBuffer::~OGLStreamBuffer() { | 29 | std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept { |
| 36 | glUnmapNamedBuffer(gl_buffer.handle); | 30 | ASSERT(size < REGION_SIZE); |
| 37 | gl_buffer.Release(); | 31 | for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end; |
| 38 | } | 32 | ++region) { |
| 39 | 33 | fences[region].Create(); | |
| 40 | std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { | ||
| 41 | ASSERT(size <= BUFFER_SIZE); | ||
| 42 | ASSERT(alignment <= BUFFER_SIZE); | ||
| 43 | mapped_size = size; | ||
| 44 | |||
| 45 | if (alignment > 0) { | ||
| 46 | buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment); | ||
| 47 | } | 34 | } |
| 35 | used_iterator = iterator; | ||
| 48 | 36 | ||
| 49 | if (buffer_pos + size > BUFFER_SIZE) { | 37 | for (size_t region = Region(free_iterator) + 1, |
| 50 | MICROPROFILE_SCOPE(OpenGL_StreamBuffer); | 38 | region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS); |
| 51 | glInvalidateBufferData(gl_buffer.handle); | 39 | region < region_end; ++region) { |
| 52 | state_tracker.InvalidateStreamBuffer(); | 40 | glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); |
| 53 | 41 | fences[region].Release(); | |
| 54 | buffer_pos = 0; | ||
| 55 | } | 42 | } |
| 56 | 43 | if (iterator + size > free_iterator) { | |
| 57 | return std::make_pair(mapped_ptr + buffer_pos, buffer_pos); | 44 | free_iterator = iterator + size; |
| 58 | } | ||
| 59 | |||
| 60 | void OGLStreamBuffer::Unmap(GLsizeiptr size) { | ||
| 61 | ASSERT(size <= mapped_size); | ||
| 62 | |||
| 63 | if (size > 0) { | ||
| 64 | glFlushMappedNamedBufferRange(gl_buffer.handle, buffer_pos, size); | ||
| 65 | } | 45 | } |
| 66 | 46 | if (iterator + size > STREAM_BUFFER_SIZE) { | |
| 67 | buffer_pos += size; | 47 | for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) { |
| 48 | fences[region].Create(); | ||
| 49 | } | ||
| 50 | used_iterator = 0; | ||
| 51 | iterator = 0; | ||
| 52 | free_iterator = size; | ||
| 53 | |||
| 54 | for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) { | ||
| 55 | glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED); | ||
| 56 | fences[region].Release(); | ||
| 57 | } | ||
| 58 | } | ||
| 59 | const size_t offset = iterator; | ||
| 60 | iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); | ||
| 61 | return {std::span(mapped_pointer + offset, size), offset}; | ||
| 68 | } | 62 | } |
| 69 | 63 | ||
| 70 | } // namespace OpenGL | 64 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index dd9cf67eb..6dbb6bfba 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h | |||
| @@ -1,9 +1,12 @@ | |||
| 1 | // Copyright 2018 Citra Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <memory> | ||
| 9 | #include <span> | ||
| 7 | #include <utility> | 10 | #include <utility> |
| 8 | 11 | ||
| 9 | #include <glad/glad.h> | 12 | #include <glad/glad.h> |
| @@ -13,48 +16,35 @@ | |||
| 13 | 16 | ||
| 14 | namespace OpenGL { | 17 | namespace OpenGL { |
| 15 | 18 | ||
| 16 | class Device; | 19 | class StreamBuffer { |
| 17 | class StateTracker; | 20 | static constexpr size_t STREAM_BUFFER_SIZE = 64 * 1024 * 1024; |
| 21 | static constexpr size_t NUM_SYNCS = 16; | ||
| 22 | static constexpr size_t REGION_SIZE = STREAM_BUFFER_SIZE / NUM_SYNCS; | ||
| 23 | static constexpr size_t MAX_ALIGNMENT = 256; | ||
| 24 | static_assert(STREAM_BUFFER_SIZE % MAX_ALIGNMENT == 0); | ||
| 25 | static_assert(STREAM_BUFFER_SIZE % NUM_SYNCS == 0); | ||
| 26 | static_assert(REGION_SIZE % MAX_ALIGNMENT == 0); | ||
| 18 | 27 | ||
| 19 | class OGLStreamBuffer : private NonCopyable { | ||
| 20 | public: | 28 | public: |
| 21 | explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_); | 29 | explicit StreamBuffer(); |
| 22 | ~OGLStreamBuffer(); | ||
| 23 | |||
| 24 | /* | ||
| 25 | * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes | ||
| 26 | * and the optional alignment requirement. | ||
| 27 | * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. | ||
| 28 | * The return values are the pointer to the new chunk, and the offset within the buffer. | ||
| 29 | * The actual used size must be specified on unmapping the chunk. | ||
| 30 | */ | ||
| 31 | std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0); | ||
| 32 | |||
| 33 | void Unmap(GLsizeiptr size); | ||
| 34 | |||
| 35 | GLuint Handle() const { | ||
| 36 | return gl_buffer.handle; | ||
| 37 | } | ||
| 38 | 30 | ||
| 39 | u64 Address() const { | 31 | [[nodiscard]] std::pair<std::span<u8>, size_t> Request(size_t size) noexcept; |
| 40 | return gpu_address; | ||
| 41 | } | ||
| 42 | 32 | ||
| 43 | GLsizeiptr Size() const noexcept { | 33 | [[nodiscard]] GLuint Handle() const noexcept { |
| 44 | return BUFFER_SIZE; | 34 | return buffer.handle; |
| 45 | } | 35 | } |
| 46 | 36 | ||
| 47 | private: | 37 | private: |
| 48 | static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024; | 38 | [[nodiscard]] static size_t Region(size_t offset) noexcept { |
| 49 | 39 | return offset / REGION_SIZE; | |
| 50 | StateTracker& state_tracker; | 40 | } |
| 51 | |||
| 52 | OGLBuffer gl_buffer; | ||
| 53 | 41 | ||
| 54 | GLuint64EXT gpu_address = 0; | 42 | size_t iterator = 0; |
| 55 | GLintptr buffer_pos = 0; | 43 | size_t used_iterator = 0; |
| 56 | GLsizeiptr mapped_size = 0; | 44 | size_t free_iterator = 0; |
| 57 | u8* mapped_ptr = nullptr; | 45 | u8* mapped_pointer = nullptr; |
| 46 | OGLBuffer buffer; | ||
| 47 | std::array<OGLSync, NUM_SYNCS> fences; | ||
| 58 | }; | 48 | }; |
| 59 | 49 | ||
| 60 | } // namespace OpenGL | 50 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 546cb6d00..31eb54123 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -398,9 +398,6 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { | |||
| 398 | 398 | ||
| 399 | } // Anonymous namespace | 399 | } // Anonymous namespace |
| 400 | 400 | ||
| 401 | ImageBufferMap::ImageBufferMap(GLuint handle_, u8* map, size_t size, OGLSync* sync_) | ||
| 402 | : span(map, size), sync{sync_}, handle{handle_} {} | ||
| 403 | |||
| 404 | ImageBufferMap::~ImageBufferMap() { | 401 | ImageBufferMap::~ImageBufferMap() { |
| 405 | if (sync) { | 402 | if (sync) { |
| 406 | sync->Create(); | 403 | sync->Create(); |
| @@ -487,11 +484,11 @@ void TextureCacheRuntime::Finish() { | |||
| 487 | glFinish(); | 484 | glFinish(); |
| 488 | } | 485 | } |
| 489 | 486 | ||
| 490 | ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { | 487 | ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { |
| 491 | return upload_buffers.RequestMap(size, true); | 488 | return upload_buffers.RequestMap(size, true); |
| 492 | } | 489 | } |
| 493 | 490 | ||
| 494 | ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { | 491 | ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { |
| 495 | return download_buffers.RequestMap(size, false); | 492 | return download_buffers.RequestMap(size, false); |
| 496 | } | 493 | } |
| 497 | 494 | ||
| @@ -553,15 +550,14 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src, | |||
| 553 | } | 550 | } |
| 554 | 551 | ||
| 555 | void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, | 552 | void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, |
| 556 | size_t buffer_offset, | ||
| 557 | std::span<const SwizzleParameters> swizzles) { | 553 | std::span<const SwizzleParameters> swizzles) { |
| 558 | switch (image.info.type) { | 554 | switch (image.info.type) { |
| 559 | case ImageType::e2D: | 555 | case ImageType::e2D: |
| 560 | return util_shaders.BlockLinearUpload2D(image, map, buffer_offset, swizzles); | 556 | return util_shaders.BlockLinearUpload2D(image, map, swizzles); |
| 561 | case ImageType::e3D: | 557 | case ImageType::e3D: |
| 562 | return util_shaders.BlockLinearUpload3D(image, map, buffer_offset, swizzles); | 558 | return util_shaders.BlockLinearUpload3D(image, map, swizzles); |
| 563 | case ImageType::Linear: | 559 | case ImageType::Linear: |
| 564 | return util_shaders.PitchUpload(image, map, buffer_offset, swizzles); | 560 | return util_shaders.PitchUpload(image, map, swizzles); |
| 565 | default: | 561 | default: |
| 566 | UNREACHABLE(); | 562 | UNREACHABLE(); |
| 567 | break; | 563 | break; |
| @@ -596,7 +592,11 @@ ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_ | |||
| 596 | bool insert_fence) { | 592 | bool insert_fence) { |
| 597 | const size_t index = RequestBuffer(requested_size); | 593 | const size_t index = RequestBuffer(requested_size); |
| 598 | OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; | 594 | OGLSync* const sync = insert_fence ? &syncs[index] : nullptr; |
| 599 | return ImageBufferMap(buffers[index].handle, maps[index], requested_size, sync); | 595 | return ImageBufferMap{ |
| 596 | .mapped_span = std::span(maps[index], requested_size), | ||
| 597 | .sync = sync, | ||
| 598 | .buffer = buffers[index].handle, | ||
| 599 | }; | ||
| 600 | } | 600 | } |
| 601 | 601 | ||
| 602 | size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { | 602 | size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) { |
| @@ -709,10 +709,10 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, | |||
| 709 | } | 709 | } |
| 710 | } | 710 | } |
| 711 | 711 | ||
| 712 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 712 | void Image::UploadMemory(const ImageBufferMap& map, |
| 713 | std::span<const VideoCommon::BufferImageCopy> copies) { | 713 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 714 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.Handle()); | 714 | glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); |
| 715 | glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, buffer_offset, unswizzled_size_bytes); | 715 | glFlushMappedBufferRange(GL_PIXEL_UNPACK_BUFFER, map.offset, unswizzled_size_bytes); |
| 716 | 716 | ||
| 717 | glPixelStorei(GL_UNPACK_ALIGNMENT, 1); | 717 | glPixelStorei(GL_UNPACK_ALIGNMENT, 1); |
| 718 | 718 | ||
| @@ -728,23 +728,23 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | |||
| 728 | current_image_height = copy.buffer_image_height; | 728 | current_image_height = copy.buffer_image_height; |
| 729 | glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); | 729 | glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, current_image_height); |
| 730 | } | 730 | } |
| 731 | CopyBufferToImage(copy, buffer_offset); | 731 | CopyBufferToImage(copy, map.offset); |
| 732 | } | 732 | } |
| 733 | } | 733 | } |
| 734 | 734 | ||
| 735 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 735 | void Image::UploadMemory(const ImageBufferMap& map, |
| 736 | std::span<const VideoCommon::BufferCopy> copies) { | 736 | std::span<const VideoCommon::BufferCopy> copies) { |
| 737 | for (const VideoCommon::BufferCopy& copy : copies) { | 737 | for (const VideoCommon::BufferCopy& copy : copies) { |
| 738 | glCopyNamedBufferSubData(map.Handle(), buffer.handle, copy.src_offset + buffer_offset, | 738 | glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, |
| 739 | copy.dst_offset, copy.size); | 739 | copy.dst_offset, copy.size); |
| 740 | } | 740 | } |
| 741 | } | 741 | } |
| 742 | 742 | ||
| 743 | void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, | 743 | void Image::DownloadMemory(ImageBufferMap& map, |
| 744 | std::span<const VideoCommon::BufferImageCopy> copies) { | 744 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 745 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API | 745 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API |
| 746 | 746 | ||
| 747 | glBindBuffer(GL_PIXEL_PACK_BUFFER, map.Handle()); | 747 | glBindBuffer(GL_PIXEL_PACK_BUFFER, map.buffer); |
| 748 | glPixelStorei(GL_PACK_ALIGNMENT, 1); | 748 | glPixelStorei(GL_PACK_ALIGNMENT, 1); |
| 749 | 749 | ||
| 750 | u32 current_row_length = std::numeric_limits<u32>::max(); | 750 | u32 current_row_length = std::numeric_limits<u32>::max(); |
| @@ -759,7 +759,7 @@ void Image::DownloadMemory(ImageBufferMap& map, size_t buffer_offset, | |||
| 759 | current_image_height = copy.buffer_image_height; | 759 | current_image_height = copy.buffer_image_height; |
| 760 | glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); | 760 | glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); |
| 761 | } | 761 | } |
| 762 | CopyImageToBuffer(copy, buffer_offset); | 762 | CopyImageToBuffer(copy, map.offset); |
| 763 | } | 763 | } |
| 764 | } | 764 | } |
| 765 | 765 | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 15b7c3676..874cf54f4 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -31,23 +31,13 @@ using VideoCommon::NUM_RT; | |||
| 31 | using VideoCommon::Offset2D; | 31 | using VideoCommon::Offset2D; |
| 32 | using VideoCommon::RenderTargets; | 32 | using VideoCommon::RenderTargets; |
| 33 | 33 | ||
| 34 | class ImageBufferMap { | 34 | struct ImageBufferMap { |
| 35 | public: | ||
| 36 | explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync); | ||
| 37 | ~ImageBufferMap(); | 35 | ~ImageBufferMap(); |
| 38 | 36 | ||
| 39 | GLuint Handle() const noexcept { | 37 | std::span<u8> mapped_span; |
| 40 | return handle; | 38 | size_t offset = 0; |
| 41 | } | ||
| 42 | |||
| 43 | std::span<u8> Span() const noexcept { | ||
| 44 | return span; | ||
| 45 | } | ||
| 46 | |||
| 47 | private: | ||
| 48 | std::span<u8> span; | ||
| 49 | OGLSync* sync; | 39 | OGLSync* sync; |
| 50 | GLuint handle; | 40 | GLuint buffer; |
| 51 | }; | 41 | }; |
| 52 | 42 | ||
| 53 | struct FormatProperties { | 43 | struct FormatProperties { |
| @@ -69,9 +59,9 @@ public: | |||
| 69 | 59 | ||
| 70 | void Finish(); | 60 | void Finish(); |
| 71 | 61 | ||
| 72 | ImageBufferMap MapUploadBuffer(size_t size); | 62 | ImageBufferMap UploadStagingBuffer(size_t size); |
| 73 | 63 | ||
| 74 | ImageBufferMap MapDownloadBuffer(size_t size); | 64 | ImageBufferMap DownloadStagingBuffer(size_t size); |
| 75 | 65 | ||
| 76 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | 66 | void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); |
| 77 | 67 | ||
| @@ -89,7 +79,7 @@ public: | |||
| 89 | Tegra::Engines::Fermi2D::Filter filter, | 79 | Tegra::Engines::Fermi2D::Filter filter, |
| 90 | Tegra::Engines::Fermi2D::Operation operation); | 80 | Tegra::Engines::Fermi2D::Operation operation); |
| 91 | 81 | ||
| 92 | void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 82 | void AccelerateImageUpload(Image& image, const ImageBufferMap& map, |
| 93 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 83 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 94 | 84 | ||
| 95 | void InsertUploadMemoryBarrier(); | 85 | void InsertUploadMemoryBarrier(); |
| @@ -148,14 +138,12 @@ public: | |||
| 148 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | 138 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 149 | VAddr cpu_addr); | 139 | VAddr cpu_addr); |
| 150 | 140 | ||
| 151 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 141 | void UploadMemory(const ImageBufferMap& map, |
| 152 | std::span<const VideoCommon::BufferImageCopy> copies); | 142 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 153 | 143 | ||
| 154 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 144 | void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies); |
| 155 | std::span<const VideoCommon::BufferCopy> copies); | ||
| 156 | 145 | ||
| 157 | void DownloadMemory(ImageBufferMap& map, size_t buffer_offset, | 146 | void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); |
| 158 | std::span<const VideoCommon::BufferImageCopy> copies); | ||
| 159 | 147 | ||
| 160 | GLuint Handle() const noexcept { | 148 | GLuint Handle() const noexcept { |
| 161 | return texture.handle; | 149 | return texture.handle; |
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index cbccfdeb4..f7ad8f370 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h | |||
| @@ -4,23 +4,10 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <array> | ||
| 8 | #include <glad/glad.h> | 7 | #include <glad/glad.h> |
| 9 | #include "common/common_types.h" | ||
| 10 | #include "common/logging/log.h" | ||
| 11 | #include "video_core/engines/maxwell_3d.h" | 8 | #include "video_core/engines/maxwell_3d.h" |
| 12 | 9 | ||
| 13 | namespace OpenGL { | 10 | namespace OpenGL::MaxwellToGL { |
| 14 | |||
| 15 | using GLvec2 = std::array<GLfloat, 2>; | ||
| 16 | using GLvec3 = std::array<GLfloat, 3>; | ||
| 17 | using GLvec4 = std::array<GLfloat, 4>; | ||
| 18 | |||
| 19 | using GLuvec2 = std::array<GLuint, 2>; | ||
| 20 | using GLuvec3 = std::array<GLuint, 3>; | ||
| 21 | using GLuvec4 = std::array<GLuint, 4>; | ||
| 22 | |||
| 23 | namespace MaxwellToGL { | ||
| 24 | 11 | ||
| 25 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | 12 | using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
| 26 | 13 | ||
| @@ -317,26 +304,6 @@ inline GLenum BlendFunc(Maxwell::Blend::Factor factor) { | |||
| 317 | return GL_ZERO; | 304 | return GL_ZERO; |
| 318 | } | 305 | } |
| 319 | 306 | ||
| 320 | inline GLenum SwizzleSource(Tegra::Texture::SwizzleSource source) { | ||
| 321 | switch (source) { | ||
| 322 | case Tegra::Texture::SwizzleSource::Zero: | ||
| 323 | return GL_ZERO; | ||
| 324 | case Tegra::Texture::SwizzleSource::R: | ||
| 325 | return GL_RED; | ||
| 326 | case Tegra::Texture::SwizzleSource::G: | ||
| 327 | return GL_GREEN; | ||
| 328 | case Tegra::Texture::SwizzleSource::B: | ||
| 329 | return GL_BLUE; | ||
| 330 | case Tegra::Texture::SwizzleSource::A: | ||
| 331 | return GL_ALPHA; | ||
| 332 | case Tegra::Texture::SwizzleSource::OneInt: | ||
| 333 | case Tegra::Texture::SwizzleSource::OneFloat: | ||
| 334 | return GL_ONE; | ||
| 335 | } | ||
| 336 | UNIMPLEMENTED_MSG("Unimplemented swizzle source={}", source); | ||
| 337 | return GL_ZERO; | ||
| 338 | } | ||
| 339 | |||
| 340 | inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { | 307 | inline GLenum ComparisonOp(Maxwell::ComparisonOp comparison) { |
| 341 | switch (comparison) { | 308 | switch (comparison) { |
| 342 | case Maxwell::ComparisonOp::Never: | 309 | case Maxwell::ComparisonOp::Never: |
| @@ -493,5 +460,4 @@ inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) { | |||
| 493 | return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); | 460 | return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle); |
| 494 | } | 461 | } |
| 495 | 462 | ||
| 496 | } // namespace MaxwellToGL | 463 | } // namespace OpenGL::MaxwellToGL |
| 497 | } // namespace OpenGL | ||
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 21159e498..9d2acd4d9 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -29,9 +29,7 @@ | |||
| 29 | #include "video_core/textures/decoders.h" | 29 | #include "video_core/textures/decoders.h" |
| 30 | 30 | ||
| 31 | namespace OpenGL { | 31 | namespace OpenGL { |
| 32 | |||
| 33 | namespace { | 32 | namespace { |
| 34 | |||
| 35 | constexpr GLint PositionLocation = 0; | 33 | constexpr GLint PositionLocation = 0; |
| 36 | constexpr GLint TexCoordLocation = 1; | 34 | constexpr GLint TexCoordLocation = 1; |
| 37 | constexpr GLint ModelViewMatrixLocation = 0; | 35 | constexpr GLint ModelViewMatrixLocation = 0; |
| @@ -124,7 +122,6 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit | |||
| 124 | break; | 122 | break; |
| 125 | } | 123 | } |
| 126 | } | 124 | } |
| 127 | |||
| 128 | } // Anonymous namespace | 125 | } // Anonymous namespace |
| 129 | 126 | ||
| 130 | RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | 127 | RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, |
| @@ -132,7 +129,17 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | |||
| 132 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 129 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, |
| 133 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) | 130 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) |
| 134 | : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, | 131 | : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, |
| 135 | emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {} | 132 | emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, |
| 133 | program_manager{device}, | ||
| 134 | rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { | ||
| 135 | if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { | ||
| 136 | glEnable(GL_DEBUG_OUTPUT); | ||
| 137 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | ||
| 138 | glDebugMessageCallback(DebugHandler, nullptr); | ||
| 139 | } | ||
| 140 | AddTelemetryFields(); | ||
| 141 | InitOpenGLObjects(); | ||
| 142 | } | ||
| 136 | 143 | ||
| 137 | RendererOpenGL::~RendererOpenGL() = default; | 144 | RendererOpenGL::~RendererOpenGL() = default; |
| 138 | 145 | ||
| @@ -148,7 +155,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 148 | 155 | ||
| 149 | ++m_current_frame; | 156 | ++m_current_frame; |
| 150 | 157 | ||
| 151 | rasterizer->TickFrame(); | 158 | rasterizer.TickFrame(); |
| 152 | 159 | ||
| 153 | context->SwapBuffers(); | 160 | context->SwapBuffers(); |
| 154 | render_window.OnFrameDisplayed(); | 161 | render_window.OnFrameDisplayed(); |
| @@ -179,7 +186,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf | |||
| 179 | framebuffer_crop_rect = framebuffer.crop_rect; | 186 | framebuffer_crop_rect = framebuffer.crop_rect; |
| 180 | 187 | ||
| 181 | const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; | 188 | const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; |
| 182 | if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { | 189 | if (rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) { |
| 183 | return; | 190 | return; |
| 184 | } | 191 | } |
| 185 | 192 | ||
| @@ -267,6 +274,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
| 267 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it | 274 | // Enable unified vertex attributes and query vertex buffer address when the driver supports it |
| 268 | if (device.HasVertexBufferUnifiedMemory()) { | 275 | if (device.HasVertexBufferUnifiedMemory()) { |
| 269 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); | 276 | glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); |
| 277 | glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); | ||
| 270 | 278 | ||
| 271 | glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); | 279 | glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); |
| 272 | glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, | 280 | glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, |
| @@ -289,14 +297,6 @@ void RendererOpenGL::AddTelemetryFields() { | |||
| 289 | telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); | 297 | telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); |
| 290 | } | 298 | } |
| 291 | 299 | ||
| 292 | void RendererOpenGL::CreateRasterizer() { | ||
| 293 | if (rasterizer) { | ||
| 294 | return; | ||
| 295 | } | ||
| 296 | rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device, | ||
| 297 | screen_info, program_manager, state_tracker); | ||
| 298 | } | ||
| 299 | |||
| 300 | void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, | 300 | void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, |
| 301 | const Tegra::FramebufferConfig& framebuffer) { | 301 | const Tegra::FramebufferConfig& framebuffer) { |
| 302 | texture.width = framebuffer.width; | 302 | texture.width = framebuffer.width; |
| @@ -407,6 +407,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 407 | 407 | ||
| 408 | program_manager.BindHostPipeline(pipeline.handle); | 408 | program_manager.BindHostPipeline(pipeline.handle); |
| 409 | 409 | ||
| 410 | state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); | ||
| 410 | glEnable(GL_CULL_FACE); | 411 | glEnable(GL_CULL_FACE); |
| 411 | if (screen_info.display_srgb) { | 412 | if (screen_info.display_srgb) { |
| 412 | glEnable(GL_FRAMEBUFFER_SRGB); | 413 | glEnable(GL_FRAMEBUFFER_SRGB); |
| @@ -425,7 +426,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
| 425 | glCullFace(GL_BACK); | 426 | glCullFace(GL_BACK); |
| 426 | glFrontFace(GL_CW); | 427 | glFrontFace(GL_CW); |
| 427 | glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); | 428 | glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); |
| 428 | glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); | ||
| 429 | glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), | 429 | glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), |
| 430 | static_cast<GLfloat>(layout.height)); | 430 | static_cast<GLfloat>(layout.height)); |
| 431 | glDepthRangeIndexed(0, 0.0, 0.0); | 431 | glDepthRangeIndexed(0, 0.0, 0.0); |
| @@ -497,25 +497,4 @@ void RendererOpenGL::RenderScreenshot() { | |||
| 497 | renderer_settings.screenshot_requested = false; | 497 | renderer_settings.screenshot_requested = false; |
| 498 | } | 498 | } |
| 499 | 499 | ||
| 500 | bool RendererOpenGL::Init() { | ||
| 501 | if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { | ||
| 502 | glEnable(GL_DEBUG_OUTPUT); | ||
| 503 | glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); | ||
| 504 | glDebugMessageCallback(DebugHandler, nullptr); | ||
| 505 | } | ||
| 506 | |||
| 507 | AddTelemetryFields(); | ||
| 508 | |||
| 509 | if (!GLAD_GL_VERSION_4_6) { | ||
| 510 | return false; | ||
| 511 | } | ||
| 512 | |||
| 513 | InitOpenGLObjects(); | ||
| 514 | CreateRasterizer(); | ||
| 515 | |||
| 516 | return true; | ||
| 517 | } | ||
| 518 | |||
| 519 | void RendererOpenGL::ShutDown() {} | ||
| 520 | |||
| 521 | } // namespace OpenGL | 500 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 44e109794..cc19a110f 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "common/math_util.h" | 10 | #include "common/math_util.h" |
| 11 | #include "video_core/renderer_base.h" | 11 | #include "video_core/renderer_base.h" |
| 12 | #include "video_core/renderer_opengl/gl_device.h" | 12 | #include "video_core/renderer_opengl/gl_device.h" |
| 13 | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 14 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 15 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 15 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 16 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| @@ -63,18 +64,18 @@ public: | |||
| 63 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); | 64 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); |
| 64 | ~RendererOpenGL() override; | 65 | ~RendererOpenGL() override; |
| 65 | 66 | ||
| 66 | bool Init() override; | ||
| 67 | void ShutDown() override; | ||
| 68 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | 67 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 69 | 68 | ||
| 69 | VideoCore::RasterizerInterface* ReadRasterizer() override { | ||
| 70 | return &rasterizer; | ||
| 71 | } | ||
| 72 | |||
| 70 | private: | 73 | private: |
| 71 | /// Initializes the OpenGL state and creates persistent objects. | 74 | /// Initializes the OpenGL state and creates persistent objects. |
| 72 | void InitOpenGLObjects(); | 75 | void InitOpenGLObjects(); |
| 73 | 76 | ||
| 74 | void AddTelemetryFields(); | 77 | void AddTelemetryFields(); |
| 75 | 78 | ||
| 76 | void CreateRasterizer(); | ||
| 77 | |||
| 78 | void ConfigureFramebufferTexture(TextureInfo& texture, | 79 | void ConfigureFramebufferTexture(TextureInfo& texture, |
| 79 | const Tegra::FramebufferConfig& framebuffer); | 80 | const Tegra::FramebufferConfig& framebuffer); |
| 80 | 81 | ||
| @@ -98,8 +99,10 @@ private: | |||
| 98 | Core::Memory::Memory& cpu_memory; | 99 | Core::Memory::Memory& cpu_memory; |
| 99 | Tegra::GPU& gpu; | 100 | Tegra::GPU& gpu; |
| 100 | 101 | ||
| 101 | const Device device; | 102 | Device device; |
| 102 | StateTracker state_tracker{gpu}; | 103 | StateTracker state_tracker; |
| 104 | ProgramManager program_manager; | ||
| 105 | RasterizerOpenGL rasterizer; | ||
| 103 | 106 | ||
| 104 | // OpenGL object IDs | 107 | // OpenGL object IDs |
| 105 | OGLSampler present_sampler; | 108 | OGLSampler present_sampler; |
| @@ -115,9 +118,6 @@ private: | |||
| 115 | /// Display information for Switch screen | 118 | /// Display information for Switch screen |
| 116 | ScreenInfo screen_info; | 119 | ScreenInfo screen_info; |
| 117 | 120 | ||
| 118 | /// Global dummy shader pipeline | ||
| 119 | ProgramManager program_manager; | ||
| 120 | |||
| 121 | /// OpenGL framebuffer data | 121 | /// OpenGL framebuffer data |
| 122 | std::vector<u8> gl_framebuffer_data; | 122 | std::vector<u8> gl_framebuffer_data; |
| 123 | 123 | ||
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index eb849cbf2..1b58e8617 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -63,7 +63,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) | |||
| 63 | 63 | ||
| 64 | UtilShaders::~UtilShaders() = default; | 64 | UtilShaders::~UtilShaders() = default; |
| 65 | 65 | ||
| 66 | void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 66 | void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, |
| 67 | std::span<const SwizzleParameters> swizzles) { | 67 | std::span<const SwizzleParameters> swizzles) { |
| 68 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | 68 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; |
| 69 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; | 69 | static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; |
| @@ -71,13 +71,13 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s | |||
| 71 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 71 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 72 | 72 | ||
| 73 | program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); | 73 | program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); |
| 74 | glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | 74 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 75 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 75 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 76 | 76 | ||
| 77 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | 77 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); |
| 78 | for (const SwizzleParameters& swizzle : swizzles) { | 78 | for (const SwizzleParameters& swizzle : swizzles) { |
| 79 | const Extent3D num_tiles = swizzle.num_tiles; | 79 | const Extent3D num_tiles = swizzle.num_tiles; |
| 80 | const size_t input_offset = swizzle.buffer_offset + buffer_offset; | 80 | const size_t input_offset = swizzle.buffer_offset + map.offset; |
| 81 | 81 | ||
| 82 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | 82 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); |
| 83 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | 83 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); |
| @@ -91,8 +91,8 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s | |||
| 91 | glUniform1ui(5, params.x_shift); | 91 | glUniform1ui(5, params.x_shift); |
| 92 | glUniform1ui(6, params.block_height); | 92 | glUniform1ui(6, params.block_height); |
| 93 | glUniform1ui(7, params.block_height_mask); | 93 | glUniform1ui(7, params.block_height_mask); |
| 94 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | 94 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, |
| 95 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | 95 | image.guest_size_bytes - swizzle.buffer_offset); |
| 96 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, | 96 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, |
| 97 | GL_WRITE_ONLY, store_format); | 97 | GL_WRITE_ONLY, store_format); |
| 98 | glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); | 98 | glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); |
| @@ -100,7 +100,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, s | |||
| 100 | program_manager.RestoreGuestCompute(); | 100 | program_manager.RestoreGuestCompute(); |
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 103 | void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, |
| 104 | std::span<const SwizzleParameters> swizzles) { | 104 | std::span<const SwizzleParameters> swizzles) { |
| 105 | static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; | 105 | static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; |
| 106 | 106 | ||
| @@ -108,14 +108,14 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s | |||
| 108 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; | 108 | static constexpr GLuint BINDING_INPUT_BUFFER = 1; |
| 109 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | 109 | static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; |
| 110 | 110 | ||
| 111 | glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | 111 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 112 | program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); | 112 | program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); |
| 113 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | 113 | glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); |
| 114 | 114 | ||
| 115 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | 115 | const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); |
| 116 | for (const SwizzleParameters& swizzle : swizzles) { | 116 | for (const SwizzleParameters& swizzle : swizzles) { |
| 117 | const Extent3D num_tiles = swizzle.num_tiles; | 117 | const Extent3D num_tiles = swizzle.num_tiles; |
| 118 | const size_t input_offset = swizzle.buffer_offset + buffer_offset; | 118 | const size_t input_offset = swizzle.buffer_offset + map.offset; |
| 119 | 119 | ||
| 120 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | 120 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); |
| 121 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | 121 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); |
| @@ -132,8 +132,8 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s | |||
| 132 | glUniform1ui(7, params.block_height_mask); | 132 | glUniform1ui(7, params.block_height_mask); |
| 133 | glUniform1ui(8, params.block_depth); | 133 | glUniform1ui(8, params.block_depth); |
| 134 | glUniform1ui(9, params.block_depth_mask); | 134 | glUniform1ui(9, params.block_depth_mask); |
| 135 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | 135 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, |
| 136 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | 136 | image.guest_size_bytes - swizzle.buffer_offset); |
| 137 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, | 137 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0, |
| 138 | GL_WRITE_ONLY, store_format); | 138 | GL_WRITE_ONLY, store_format); |
| 139 | glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); | 139 | glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); |
| @@ -141,7 +141,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, s | |||
| 141 | program_manager.RestoreGuestCompute(); | 141 | program_manager.RestoreGuestCompute(); |
| 142 | } | 142 | } |
| 143 | 143 | ||
| 144 | void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 144 | void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, |
| 145 | std::span<const SwizzleParameters> swizzles) { | 145 | std::span<const SwizzleParameters> swizzles) { |
| 146 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; | 146 | static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; |
| 147 | static constexpr GLuint BINDING_INPUT_BUFFER = 0; | 147 | static constexpr GLuint BINDING_INPUT_BUFFER = 0; |
| @@ -159,7 +159,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu | |||
| 159 | "Non-power of two images are not implemented"); | 159 | "Non-power of two images are not implemented"); |
| 160 | 160 | ||
| 161 | program_manager.BindHostCompute(pitch_unswizzle_program.handle); | 161 | program_manager.BindHostCompute(pitch_unswizzle_program.handle); |
| 162 | glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes); | 162 | glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); |
| 163 | glUniform2ui(LOC_ORIGIN, 0, 0); | 163 | glUniform2ui(LOC_ORIGIN, 0, 0); |
| 164 | glUniform2i(LOC_DESTINATION, 0, 0); | 164 | glUniform2i(LOC_DESTINATION, 0, 0); |
| 165 | glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); | 165 | glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block); |
| @@ -167,13 +167,13 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t bu | |||
| 167 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); | 167 | glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format); |
| 168 | for (const SwizzleParameters& swizzle : swizzles) { | 168 | for (const SwizzleParameters& swizzle : swizzles) { |
| 169 | const Extent3D num_tiles = swizzle.num_tiles; | 169 | const Extent3D num_tiles = swizzle.num_tiles; |
| 170 | const size_t input_offset = swizzle.buffer_offset + buffer_offset; | 170 | const size_t input_offset = swizzle.buffer_offset + map.offset; |
| 171 | 171 | ||
| 172 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); | 172 | const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width); |
| 173 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); | 173 | const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height); |
| 174 | 174 | ||
| 175 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(), | 175 | glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, |
| 176 | input_offset, image.guest_size_bytes - swizzle.buffer_offset); | 176 | image.guest_size_bytes - swizzle.buffer_offset); |
| 177 | glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); | 177 | glDispatchCompute(num_dispatches_x, num_dispatches_y, 1); |
| 178 | } | 178 | } |
| 179 | program_manager.RestoreGuestCompute(); | 179 | program_manager.RestoreGuestCompute(); |
diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 359997255..7b1d16b09 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h | |||
| @@ -15,21 +15,22 @@ | |||
| 15 | namespace OpenGL { | 15 | namespace OpenGL { |
| 16 | 16 | ||
| 17 | class Image; | 17 | class Image; |
| 18 | class ImageBufferMap; | ||
| 19 | class ProgramManager; | 18 | class ProgramManager; |
| 20 | 19 | ||
| 20 | struct ImageBufferMap; | ||
| 21 | |||
| 21 | class UtilShaders { | 22 | class UtilShaders { |
| 22 | public: | 23 | public: |
| 23 | explicit UtilShaders(ProgramManager& program_manager); | 24 | explicit UtilShaders(ProgramManager& program_manager); |
| 24 | ~UtilShaders(); | 25 | ~UtilShaders(); |
| 25 | 26 | ||
| 26 | void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 27 | void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, |
| 27 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 28 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 28 | 29 | ||
| 29 | void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 30 | void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, |
| 30 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 31 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 31 | 32 | ||
| 32 | void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset, | 33 | void PitchUpload(Image& image, const ImageBufferMap& map, |
| 33 | std::span<const VideoCommon::SwizzleParameters> swizzles); | 34 | std::span<const VideoCommon::SwizzleParameters> swizzles); |
| 34 | 35 | ||
| 35 | void CopyBC4(Image& dst_image, Image& src_image, | 36 | void CopyBC4(Image& dst_image, Image& src_image, |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 85121d9fd..19aaf034f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -531,13 +531,9 @@ VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison) { | |||
| 531 | return {}; | 531 | return {}; |
| 532 | } | 532 | } |
| 533 | 533 | ||
| 534 | VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format) { | 534 | VkIndexType IndexFormat(Maxwell::IndexFormat index_format) { |
| 535 | switch (index_format) { | 535 | switch (index_format) { |
| 536 | case Maxwell::IndexFormat::UnsignedByte: | 536 | case Maxwell::IndexFormat::UnsignedByte: |
| 537 | if (!device.IsExtIndexTypeUint8Supported()) { | ||
| 538 | UNIMPLEMENTED_MSG("Native uint8 indices are not supported on this device"); | ||
| 539 | return VK_INDEX_TYPE_UINT16; | ||
| 540 | } | ||
| 541 | return VK_INDEX_TYPE_UINT8_EXT; | 537 | return VK_INDEX_TYPE_UINT8_EXT; |
| 542 | case Maxwell::IndexFormat::UnsignedShort: | 538 | case Maxwell::IndexFormat::UnsignedShort: |
| 543 | return VK_INDEX_TYPE_UINT16; | 539 | return VK_INDEX_TYPE_UINT16; |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h index 7c34b47dc..e3e06ba38 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.h +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h | |||
| @@ -53,7 +53,7 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib | |||
| 53 | 53 | ||
| 54 | VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); | 54 | VkCompareOp ComparisonOp(Maxwell::ComparisonOp comparison); |
| 55 | 55 | ||
| 56 | VkIndexType IndexFormat(const Device& device, Maxwell::IndexFormat index_format); | 56 | VkIndexType IndexFormat(Maxwell::IndexFormat index_format); |
| 57 | 57 | ||
| 58 | VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); | 58 | VkStencilOp StencilOp(Maxwell::StencilOp stencil_op); |
| 59 | 59 | ||
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 61796e33a..1cc720ddd 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -80,17 +80,50 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext | |||
| 80 | return separated_extensions; | 80 | return separated_extensions; |
| 81 | } | 81 | } |
| 82 | 82 | ||
| 83 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, | ||
| 84 | VkSurfaceKHR surface) { | ||
| 85 | const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); | ||
| 86 | const s32 device_index = Settings::values.vulkan_device.GetValue(); | ||
| 87 | if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { | ||
| 88 | LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); | ||
| 89 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||
| 90 | } | ||
| 91 | const vk::PhysicalDevice physical_device(devices[device_index], dld); | ||
| 92 | return Device(*instance, physical_device, surface, dld); | ||
| 93 | } | ||
| 83 | } // Anonymous namespace | 94 | } // Anonymous namespace |
| 84 | 95 | ||
| 85 | RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | 96 | RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, |
| 86 | Core::Frontend::EmuWindow& emu_window, | 97 | Core::Frontend::EmuWindow& emu_window, |
| 87 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, | 98 | Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, |
| 88 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) | 99 | std::unique_ptr<Core::Frontend::GraphicsContext> context_) try |
| 89 | : RendererBase{emu_window, std::move(context_)}, telemetry_session{telemetry_session_}, | 100 | : RendererBase(emu_window, std::move(context_)), |
| 90 | cpu_memory{cpu_memory_}, gpu{gpu_} {} | 101 | telemetry_session(telemetry_session_), |
| 102 | cpu_memory(cpu_memory_), | ||
| 103 | gpu(gpu_), | ||
| 104 | library(OpenLibrary()), | ||
| 105 | instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | ||
| 106 | true, Settings::values.renderer_debug)), | ||
| 107 | debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), | ||
| 108 | surface(CreateSurface(instance, render_window)), | ||
| 109 | device(CreateDevice(instance, dld, *surface)), | ||
| 110 | memory_allocator(device, false), | ||
| 111 | state_tracker(gpu), | ||
| 112 | scheduler(device, state_tracker), | ||
| 113 | swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, | ||
| 114 | render_window.GetFramebufferLayout().height, false), | ||
| 115 | blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler, | ||
| 116 | screen_info), | ||
| 117 | rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device, | ||
| 118 | memory_allocator, state_tracker, scheduler) { | ||
| 119 | Report(); | ||
| 120 | } catch (const vk::Exception& exception) { | ||
| 121 | LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); | ||
| 122 | throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())}; | ||
| 123 | } | ||
| 91 | 124 | ||
| 92 | RendererVulkan::~RendererVulkan() { | 125 | RendererVulkan::~RendererVulkan() { |
| 93 | ShutDown(); | 126 | void(device.GetLogical().WaitIdle()); |
| 94 | } | 127 | } |
| 95 | 128 | ||
| 96 | void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | 129 | void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { |
| @@ -101,101 +134,38 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 101 | if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { | 134 | if (layout.width > 0 && layout.height > 0 && render_window.IsShown()) { |
| 102 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; | 135 | const VAddr framebuffer_addr = framebuffer->address + framebuffer->offset; |
| 103 | const bool use_accelerated = | 136 | const bool use_accelerated = |
| 104 | rasterizer->AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); | 137 | rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); |
| 105 | const bool is_srgb = use_accelerated && screen_info.is_srgb; | 138 | const bool is_srgb = use_accelerated && screen_info.is_srgb; |
| 106 | if (swapchain->HasFramebufferChanged(layout) || swapchain->GetSrgbState() != is_srgb) { | 139 | if (swapchain.HasFramebufferChanged(layout) || swapchain.GetSrgbState() != is_srgb) { |
| 107 | swapchain->Create(layout.width, layout.height, is_srgb); | 140 | swapchain.Create(layout.width, layout.height, is_srgb); |
| 108 | blit_screen->Recreate(); | 141 | blit_screen.Recreate(); |
| 109 | } | 142 | } |
| 110 | 143 | ||
| 111 | scheduler->WaitWorker(); | 144 | scheduler.WaitWorker(); |
| 112 | 145 | ||
| 113 | swapchain->AcquireNextImage(); | 146 | swapchain.AcquireNextImage(); |
| 114 | const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated); | 147 | const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); |
| 115 | 148 | ||
| 116 | scheduler->Flush(render_semaphore); | 149 | scheduler.Flush(render_semaphore); |
| 117 | 150 | ||
| 118 | if (swapchain->Present(render_semaphore)) { | 151 | if (swapchain.Present(render_semaphore)) { |
| 119 | blit_screen->Recreate(); | 152 | blit_screen.Recreate(); |
| 120 | } | 153 | } |
| 121 | 154 | rasterizer.TickFrame(); | |
| 122 | rasterizer->TickFrame(); | ||
| 123 | } | 155 | } |
| 124 | 156 | ||
| 125 | render_window.OnFrameDisplayed(); | 157 | render_window.OnFrameDisplayed(); |
| 126 | } | 158 | } |
| 127 | 159 | ||
| 128 | bool RendererVulkan::Init() try { | ||
| 129 | library = OpenLibrary(); | ||
| 130 | instance = CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | ||
| 131 | true, Settings::values.renderer_debug); | ||
| 132 | if (Settings::values.renderer_debug) { | ||
| 133 | debug_callback = CreateDebugCallback(instance); | ||
| 134 | } | ||
| 135 | surface = CreateSurface(instance, render_window); | ||
| 136 | |||
| 137 | InitializeDevice(); | ||
| 138 | Report(); | ||
| 139 | |||
| 140 | memory_allocator = std::make_unique<MemoryAllocator>(*device); | ||
| 141 | |||
| 142 | state_tracker = std::make_unique<StateTracker>(gpu); | ||
| 143 | |||
| 144 | scheduler = std::make_unique<VKScheduler>(*device, *state_tracker); | ||
| 145 | |||
| 146 | const auto& framebuffer = render_window.GetFramebufferLayout(); | ||
| 147 | swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler); | ||
| 148 | swapchain->Create(framebuffer.width, framebuffer.height, false); | ||
| 149 | |||
| 150 | rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(), | ||
| 151 | cpu_memory, screen_info, *device, | ||
| 152 | *memory_allocator, *state_tracker, *scheduler); | ||
| 153 | |||
| 154 | blit_screen = | ||
| 155 | std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device, | ||
| 156 | *memory_allocator, *swapchain, *scheduler, screen_info); | ||
| 157 | return true; | ||
| 158 | |||
| 159 | } catch (const vk::Exception& exception) { | ||
| 160 | LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); | ||
| 161 | return false; | ||
| 162 | } | ||
| 163 | |||
| 164 | void RendererVulkan::ShutDown() { | ||
| 165 | if (!device) { | ||
| 166 | return; | ||
| 167 | } | ||
| 168 | if (const auto& dev = device->GetLogical()) { | ||
| 169 | dev.WaitIdle(); | ||
| 170 | } | ||
| 171 | rasterizer.reset(); | ||
| 172 | blit_screen.reset(); | ||
| 173 | scheduler.reset(); | ||
| 174 | swapchain.reset(); | ||
| 175 | memory_allocator.reset(); | ||
| 176 | device.reset(); | ||
| 177 | } | ||
| 178 | |||
| 179 | void RendererVulkan::InitializeDevice() { | ||
| 180 | const std::vector<VkPhysicalDevice> devices = instance.EnumeratePhysicalDevices(); | ||
| 181 | const s32 device_index = Settings::values.vulkan_device.GetValue(); | ||
| 182 | if (device_index < 0 || device_index >= static_cast<s32>(devices.size())) { | ||
| 183 | LOG_ERROR(Render_Vulkan, "Invalid device index {}!", device_index); | ||
| 184 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | ||
| 185 | } | ||
| 186 | const vk::PhysicalDevice physical_device(devices[static_cast<size_t>(device_index)], dld); | ||
| 187 | device = std::make_unique<Device>(*instance, physical_device, *surface, dld); | ||
| 188 | } | ||
| 189 | |||
| 190 | void RendererVulkan::Report() const { | 160 | void RendererVulkan::Report() const { |
| 191 | const std::string vendor_name{device->GetVendorName()}; | 161 | const std::string vendor_name{device.GetVendorName()}; |
| 192 | const std::string model_name{device->GetModelName()}; | 162 | const std::string model_name{device.GetModelName()}; |
| 193 | const std::string driver_version = GetDriverVersion(*device); | 163 | const std::string driver_version = GetDriverVersion(device); |
| 194 | const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); | 164 | const std::string driver_name = fmt::format("{} {}", vendor_name, driver_version); |
| 195 | 165 | ||
| 196 | const std::string api_version = GetReadableVersion(device->ApiVersion()); | 166 | const std::string api_version = GetReadableVersion(device.ApiVersion()); |
| 197 | 167 | ||
| 198 | const std::string extensions = BuildCommaSeparatedExtensions(device->GetAvailableExtensions()); | 168 | const std::string extensions = BuildCommaSeparatedExtensions(device.GetAvailableExtensions()); |
| 199 | 169 | ||
| 200 | LOG_INFO(Render_Vulkan, "Driver: {}", driver_name); | 170 | LOG_INFO(Render_Vulkan, "Driver: {}", driver_name); |
| 201 | LOG_INFO(Render_Vulkan, "Device: {}", model_name); | 171 | LOG_INFO(Render_Vulkan, "Device: {}", model_name); |
| @@ -209,21 +179,4 @@ void RendererVulkan::Report() const { | |||
| 209 | telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); | 179 | telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); |
| 210 | } | 180 | } |
| 211 | 181 | ||
| 212 | std::vector<std::string> RendererVulkan::EnumerateDevices() try { | ||
| 213 | vk::InstanceDispatch dld; | ||
| 214 | const Common::DynamicLibrary library = OpenLibrary(); | ||
| 215 | const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0); | ||
| 216 | const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); | ||
| 217 | std::vector<std::string> names; | ||
| 218 | names.reserve(physical_devices.size()); | ||
| 219 | for (const VkPhysicalDevice device : physical_devices) { | ||
| 220 | names.push_back(vk::PhysicalDevice(device, dld).GetProperties().deviceName); | ||
| 221 | } | ||
| 222 | return names; | ||
| 223 | |||
| 224 | } catch (const vk::Exception& exception) { | ||
| 225 | LOG_ERROR(Render_Vulkan, "Failed to enumerate devices with error: {}", exception.what()); | ||
| 226 | return {}; | ||
| 227 | } | ||
| 228 | |||
| 229 | } // namespace Vulkan | 182 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index daf55b9b4..72071316c 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -9,8 +9,14 @@ | |||
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include "common/dynamic_library.h" | 11 | #include "common/dynamic_library.h" |
| 12 | |||
| 13 | #include "video_core/renderer_base.h" | 12 | #include "video_core/renderer_base.h" |
| 13 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | ||
| 14 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 15 | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||
| 16 | #include "video_core/renderer_vulkan/vk_state_tracker.h" | ||
| 17 | #include "video_core/renderer_vulkan/vk_swapchain.h" | ||
| 18 | #include "video_core/vulkan_common/vulkan_device.h" | ||
| 19 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 20 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 21 | ||
| 16 | namespace Core { | 22 | namespace Core { |
| @@ -27,20 +33,6 @@ class GPU; | |||
| 27 | 33 | ||
| 28 | namespace Vulkan { | 34 | namespace Vulkan { |
| 29 | 35 | ||
| 30 | class Device; | ||
| 31 | class StateTracker; | ||
| 32 | class MemoryAllocator; | ||
| 33 | class VKBlitScreen; | ||
| 34 | class VKSwapchain; | ||
| 35 | class VKScheduler; | ||
| 36 | |||
| 37 | struct VKScreenInfo { | ||
| 38 | VkImageView image_view{}; | ||
| 39 | u32 width{}; | ||
| 40 | u32 height{}; | ||
| 41 | bool is_srgb{}; | ||
| 42 | }; | ||
| 43 | |||
| 44 | class RendererVulkan final : public VideoCore::RendererBase { | 36 | class RendererVulkan final : public VideoCore::RendererBase { |
| 45 | public: | 37 | public: |
| 46 | explicit RendererVulkan(Core::TelemetrySession& telemtry_session, | 38 | explicit RendererVulkan(Core::TelemetrySession& telemtry_session, |
| @@ -49,15 +41,13 @@ public: | |||
| 49 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); | 41 | std::unique_ptr<Core::Frontend::GraphicsContext> context_); |
| 50 | ~RendererVulkan() override; | 42 | ~RendererVulkan() override; |
| 51 | 43 | ||
| 52 | bool Init() override; | ||
| 53 | void ShutDown() override; | ||
| 54 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | 44 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 55 | 45 | ||
| 56 | static std::vector<std::string> EnumerateDevices(); | 46 | VideoCore::RasterizerInterface* ReadRasterizer() override { |
| 47 | return &rasterizer; | ||
| 48 | } | ||
| 57 | 49 | ||
| 58 | private: | 50 | private: |
| 59 | void InitializeDevice(); | ||
| 60 | |||
| 61 | void Report() const; | 51 | void Report() const; |
| 62 | 52 | ||
| 63 | Core::TelemetrySession& telemetry_session; | 53 | Core::TelemetrySession& telemetry_session; |
| @@ -68,18 +58,18 @@ private: | |||
| 68 | vk::InstanceDispatch dld; | 58 | vk::InstanceDispatch dld; |
| 69 | 59 | ||
| 70 | vk::Instance instance; | 60 | vk::Instance instance; |
| 71 | 61 | vk::DebugUtilsMessenger debug_callback; | |
| 72 | vk::SurfaceKHR surface; | 62 | vk::SurfaceKHR surface; |
| 73 | 63 | ||
| 74 | VKScreenInfo screen_info; | 64 | VKScreenInfo screen_info; |
| 75 | 65 | ||
| 76 | vk::DebugUtilsMessenger debug_callback; | 66 | Device device; |
| 77 | std::unique_ptr<Device> device; | 67 | MemoryAllocator memory_allocator; |
| 78 | std::unique_ptr<MemoryAllocator> memory_allocator; | 68 | StateTracker state_tracker; |
| 79 | std::unique_ptr<StateTracker> state_tracker; | 69 | VKScheduler scheduler; |
| 80 | std::unique_ptr<VKScheduler> scheduler; | 70 | VKSwapchain swapchain; |
| 81 | std::unique_ptr<VKSwapchain> swapchain; | 71 | VKBlitScreen blit_screen; |
| 82 | std::unique_ptr<VKBlitScreen> blit_screen; | 72 | RasterizerVulkan rasterizer; |
| 83 | }; | 73 | }; |
| 84 | 74 | ||
| 85 | } // namespace Vulkan | 75 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 3e3b895e0..a1a32aabe 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #include "video_core/gpu.h" | 18 | #include "video_core/gpu.h" |
| 19 | #include "video_core/host_shaders/vulkan_present_frag_spv.h" | 19 | #include "video_core/host_shaders/vulkan_present_frag_spv.h" |
| 20 | #include "video_core/host_shaders/vulkan_present_vert_spv.h" | 20 | #include "video_core/host_shaders/vulkan_present_vert_spv.h" |
| 21 | #include "video_core/rasterizer_interface.h" | ||
| 22 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 21 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 23 | #include "video_core/renderer_vulkan/vk_blit_screen.h" | 22 | #include "video_core/renderer_vulkan/vk_blit_screen.h" |
| 24 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | 23 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" |
| @@ -113,13 +112,12 @@ struct VKBlitScreen::BufferData { | |||
| 113 | }; | 112 | }; |
| 114 | 113 | ||
| 115 | VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, | 114 | VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, |
| 116 | Core::Frontend::EmuWindow& render_window_, | 115 | Core::Frontend::EmuWindow& render_window_, const Device& device_, |
| 117 | VideoCore::RasterizerInterface& rasterizer_, const Device& device_, | ||
| 118 | MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_, | 116 | MemoryAllocator& memory_allocator_, VKSwapchain& swapchain_, |
| 119 | VKScheduler& scheduler_, const VKScreenInfo& screen_info_) | 117 | VKScheduler& scheduler_, const VKScreenInfo& screen_info_) |
| 120 | : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, | 118 | : cpu_memory{cpu_memory_}, render_window{render_window_}, device{device_}, |
| 121 | device{device_}, memory_allocator{memory_allocator_}, swapchain{swapchain_}, | 119 | memory_allocator{memory_allocator_}, swapchain{swapchain_}, scheduler{scheduler_}, |
| 122 | scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { | 120 | image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { |
| 123 | resource_ticks.resize(image_count); | 121 | resource_ticks.resize(image_count); |
| 124 | 122 | ||
| 125 | CreateStaticResources(); | 123 | CreateStaticResources(); |
| @@ -150,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 150 | SetUniformData(data, framebuffer); | 148 | SetUniformData(data, framebuffer); |
| 151 | SetVertexData(data, framebuffer); | 149 | SetVertexData(data, framebuffer); |
| 152 | 150 | ||
| 153 | const std::span<u8> map = buffer_commit.Map(); | 151 | const std::span<u8> mapped_span = buffer_commit.Map(); |
| 154 | std::memcpy(map.data(), &data, sizeof(data)); | 152 | std::memcpy(mapped_span.data(), &data, sizeof(data)); |
| 155 | 153 | ||
| 156 | if (!use_accelerated) { | 154 | if (!use_accelerated) { |
| 157 | const u64 image_offset = GetRawImageOffset(framebuffer, image_index); | 155 | const u64 image_offset = GetRawImageOffset(framebuffer, image_index); |
| @@ -159,14 +157,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 159 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; | 157 | const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; |
| 160 | const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); | 158 | const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); |
| 161 | const size_t size_bytes = GetSizeInBytes(framebuffer); | 159 | const size_t size_bytes = GetSizeInBytes(framebuffer); |
| 162 | rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes); | ||
| 163 | 160 | ||
| 164 | // TODO(Rodrigo): Read this from HLE | 161 | // TODO(Rodrigo): Read this from HLE |
| 165 | constexpr u32 block_height_log2 = 4; | 162 | constexpr u32 block_height_log2 = 4; |
| 166 | const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); | 163 | const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); |
| 167 | Tegra::Texture::UnswizzleTexture( | 164 | Tegra::Texture::UnswizzleTexture( |
| 168 | map.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, | 165 | mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), |
| 169 | framebuffer.width, framebuffer.height, 1, block_height_log2, 0); | 166 | bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); |
| 170 | 167 | ||
| 171 | const VkBufferImageCopy copy{ | 168 | const VkBufferImageCopy copy{ |
| 172 | .bufferOffset = image_offset, | 169 | .bufferOffset = image_offset, |
| @@ -266,7 +263,6 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool | |||
| 266 | cmdbuf.Draw(4, 1, 0, 0); | 263 | cmdbuf.Draw(4, 1, 0, 0); |
| 267 | cmdbuf.EndRenderPass(); | 264 | cmdbuf.EndRenderPass(); |
| 268 | }); | 265 | }); |
| 269 | |||
| 270 | return *semaphores[image_index]; | 266 | return *semaphores[image_index]; |
| 271 | } | 267 | } |
| 272 | 268 | ||
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index b52576957..5e3177685 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h | |||
| @@ -38,12 +38,18 @@ class RasterizerVulkan; | |||
| 38 | class VKScheduler; | 38 | class VKScheduler; |
| 39 | class VKSwapchain; | 39 | class VKSwapchain; |
| 40 | 40 | ||
| 41 | class VKBlitScreen final { | 41 | struct VKScreenInfo { |
| 42 | VkImageView image_view{}; | ||
| 43 | u32 width{}; | ||
| 44 | u32 height{}; | ||
| 45 | bool is_srgb{}; | ||
| 46 | }; | ||
| 47 | |||
| 48 | class VKBlitScreen { | ||
| 42 | public: | 49 | public: |
| 43 | explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, | 50 | explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, |
| 44 | Core::Frontend::EmuWindow& render_window, | 51 | Core::Frontend::EmuWindow& render_window, const Device& device, |
| 45 | VideoCore::RasterizerInterface& rasterizer, const Device& device, | 52 | MemoryAllocator& memory_manager, VKSwapchain& swapchain, |
| 46 | MemoryAllocator& memory_allocator, VKSwapchain& swapchain, | ||
| 47 | VKScheduler& scheduler, const VKScreenInfo& screen_info); | 53 | VKScheduler& scheduler, const VKScreenInfo& screen_info); |
| 48 | ~VKBlitScreen(); | 54 | ~VKBlitScreen(); |
| 49 | 55 | ||
| @@ -84,7 +90,6 @@ private: | |||
| 84 | 90 | ||
| 85 | Core::Memory::Memory& cpu_memory; | 91 | Core::Memory::Memory& cpu_memory; |
| 86 | Core::Frontend::EmuWindow& render_window; | 92 | Core::Frontend::EmuWindow& render_window; |
| 87 | VideoCore::RasterizerInterface& rasterizer; | ||
| 88 | const Device& device; | 93 | const Device& device; |
| 89 | MemoryAllocator& memory_allocator; | 94 | MemoryAllocator& memory_allocator; |
| 90 | VKSwapchain& swapchain; | 95 | VKSwapchain& swapchain; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index d8ad40a0f..848eedd66 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -3,188 +3,308 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <array> | ||
| 6 | #include <cstring> | 7 | #include <cstring> |
| 7 | #include <memory> | 8 | #include <span> |
| 9 | #include <vector> | ||
| 8 | 10 | ||
| 9 | #include "core/core.h" | ||
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 11 | #include "video_core/buffer_cache/buffer_cache.h" |
| 12 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 13 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 12 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 14 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 13 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | 15 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 16 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||
| 14 | #include "video_core/vulkan_common/vulkan_device.h" | 17 | #include "video_core/vulkan_common/vulkan_device.h" |
| 18 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | ||
| 15 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 19 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 16 | 20 | ||
| 17 | namespace Vulkan { | 21 | namespace Vulkan { |
| 18 | |||
| 19 | namespace { | 22 | namespace { |
| 23 | VkBufferCopy MakeBufferCopy(const VideoCommon::BufferCopy& copy) { | ||
| 24 | return VkBufferCopy{ | ||
| 25 | .srcOffset = copy.src_offset, | ||
| 26 | .dstOffset = copy.dst_offset, | ||
| 27 | .size = copy.size, | ||
| 28 | }; | ||
| 29 | } | ||
| 20 | 30 | ||
| 21 | constexpr VkBufferUsageFlags BUFFER_USAGE = | 31 | VkIndexType IndexTypeFromNumElements(const Device& device, u32 num_elements) { |
| 22 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | 32 | if (num_elements <= 0xff && device.IsExtIndexTypeUint8Supported()) { |
| 23 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; | 33 | return VK_INDEX_TYPE_UINT8_EXT; |
| 24 | 34 | } | |
| 25 | constexpr VkPipelineStageFlags UPLOAD_PIPELINE_STAGE = | 35 | if (num_elements <= 0xffff) { |
| 26 | VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | | 36 | return VK_INDEX_TYPE_UINT16; |
| 27 | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | | 37 | } |
| 28 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; | 38 | return VK_INDEX_TYPE_UINT32; |
| 29 | 39 | } | |
| 30 | constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS = | ||
| 31 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT | | ||
| 32 | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT; | ||
| 33 | 40 | ||
| 34 | constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS = | 41 | size_t BytesPerIndex(VkIndexType index_type) { |
| 35 | VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT; | 42 | switch (index_type) { |
| 43 | case VK_INDEX_TYPE_UINT8_EXT: | ||
| 44 | return 1; | ||
| 45 | case VK_INDEX_TYPE_UINT16: | ||
| 46 | return 2; | ||
| 47 | case VK_INDEX_TYPE_UINT32: | ||
| 48 | return 4; | ||
| 49 | default: | ||
| 50 | UNREACHABLE_MSG("Invalid index type={}", index_type); | ||
| 51 | return 1; | ||
| 52 | } | ||
| 53 | } | ||
| 36 | 54 | ||
| 55 | template <typename T> | ||
| 56 | std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { | ||
| 57 | std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; | ||
| 58 | std::ranges::transform(indices, indices.begin(), | ||
| 59 | [quad, first](u32 index) { return first + index + quad * 4; }); | ||
| 60 | return indices; | ||
| 61 | } | ||
| 37 | } // Anonymous namespace | 62 | } // Anonymous namespace |
| 38 | 63 | ||
| 39 | Buffer::Buffer(const Device& device_, MemoryAllocator& memory_allocator, VKScheduler& scheduler_, | 64 | Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) |
| 40 | StagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_) | 65 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params) {} |
| 41 | : BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{ | 66 | |
| 42 | staging_pool_} { | 67 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, |
| 43 | buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | 68 | VAddr cpu_addr_, u64 size_bytes_) |
| 69 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_) { | ||
| 70 | buffer = runtime.device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 44 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 71 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 45 | .pNext = nullptr, | 72 | .pNext = nullptr, |
| 46 | .flags = 0, | 73 | .flags = 0, |
| 47 | .size = static_cast<VkDeviceSize>(size_), | 74 | .size = SizeBytes(), |
| 48 | .usage = BUFFER_USAGE | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | 75 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 76 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | | ||
| 77 | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||
| 78 | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | | ||
| 79 | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, | ||
| 49 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 80 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 50 | .queueFamilyIndexCount = 0, | 81 | .queueFamilyIndexCount = 0, |
| 51 | .pQueueFamilyIndices = nullptr, | 82 | .pQueueFamilyIndices = nullptr, |
| 52 | }); | 83 | }); |
| 53 | commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | 84 | if (runtime.device.HasDebuggingToolAttached()) { |
| 85 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | ||
| 86 | } | ||
| 87 | commit = runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||
| 54 | } | 88 | } |
| 55 | 89 | ||
| 56 | Buffer::~Buffer() = default; | 90 | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, |
| 91 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, | ||
| 92 | VKUpdateDescriptorQueue& update_descriptor_queue_, | ||
| 93 | VKDescriptorPool& descriptor_pool) | ||
| 94 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||
| 95 | staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, | ||
| 96 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 97 | quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {} | ||
| 57 | 98 | ||
| 58 | void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) { | 99 | StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { |
| 59 | const auto& staging = staging_pool.Request(data_size, MemoryUsage::Upload); | 100 | return staging_pool.Request(size, MemoryUsage::Upload); |
| 60 | std::memcpy(staging.mapped_span.data(), data, data_size); | 101 | } |
| 61 | 102 | ||
| 62 | scheduler.RequestOutsideRenderPassOperationContext(); | 103 | StagingBufferRef BufferCacheRuntime::DownloadStagingBuffer(size_t size) { |
| 104 | return staging_pool.Request(size, MemoryUsage::Download); | ||
| 105 | } | ||
| 63 | 106 | ||
| 64 | const VkBuffer handle = Handle(); | 107 | void BufferCacheRuntime::Finish() { |
| 65 | scheduler.Record([staging = staging.buffer, handle, offset, data_size, | 108 | scheduler.Finish(); |
| 66 | &device = device](vk::CommandBuffer cmdbuf) { | 109 | } |
| 67 | const VkBufferMemoryBarrier read_barrier{ | 110 | |
| 68 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | 111 | void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, |
| 69 | .pNext = nullptr, | 112 | std::span<const VideoCommon::BufferCopy> copies) { |
| 70 | .srcAccessMask = | 113 | static constexpr VkMemoryBarrier READ_BARRIER{ |
| 71 | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT | | 114 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| 72 | VK_ACCESS_HOST_WRITE_BIT | | 115 | .pNext = nullptr, |
| 73 | (device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0), | 116 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, |
| 74 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | 117 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, |
| 75 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 118 | }; |
| 76 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 119 | static constexpr VkMemoryBarrier WRITE_BARRIER{ |
| 77 | .buffer = handle, | 120 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, |
| 78 | .offset = offset, | 121 | .pNext = nullptr, |
| 79 | .size = data_size, | 122 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 80 | }; | 123 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, |
| 81 | const VkBufferMemoryBarrier write_barrier{ | 124 | }; |
| 82 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | 125 | // Measuring a popular game, this number never exceeds the specified size once data is warmed up |
| 83 | .pNext = nullptr, | 126 | boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); |
| 84 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 127 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); |
| 85 | .dstAccessMask = UPLOAD_ACCESS_BARRIERS, | 128 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 86 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 129 | scheduler.Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { |
| 87 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 88 | .buffer = handle, | ||
| 89 | .offset = offset, | ||
| 90 | .size = data_size, | ||
| 91 | }; | ||
| 92 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | 130 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 93 | 0, read_barrier); | 131 | 0, READ_BARRIER); |
| 94 | cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size}); | 132 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, vk_copies); |
| 95 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, | 133 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, |
| 96 | write_barrier); | 134 | 0, WRITE_BARRIER); |
| 97 | }); | 135 | }); |
| 98 | } | 136 | } |
| 99 | 137 | ||
| 100 | void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) { | 138 | void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, |
| 101 | auto staging = staging_pool.Request(data_size, MemoryUsage::Download); | 139 | u32 base_vertex, u32 num_indices, VkBuffer buffer, |
| 102 | scheduler.RequestOutsideRenderPassOperationContext(); | 140 | u32 offset, [[maybe_unused]] u32 size) { |
| 141 | VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); | ||
| 142 | VkDeviceSize vk_offset = offset; | ||
| 143 | VkBuffer vk_buffer = buffer; | ||
| 144 | if (topology == PrimitiveTopology::Quads) { | ||
| 145 | vk_index_type = VK_INDEX_TYPE_UINT32; | ||
| 146 | std::tie(vk_buffer, vk_offset) = | ||
| 147 | quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); | ||
| 148 | } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { | ||
| 149 | vk_index_type = VK_INDEX_TYPE_UINT16; | ||
| 150 | std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); | ||
| 151 | } | ||
| 152 | if (vk_buffer == VK_NULL_HANDLE) { | ||
| 153 | // Vulkan doesn't support null index buffers. Replace it with our own null buffer. | ||
| 154 | ReserveNullIndexBuffer(); | ||
| 155 | vk_buffer = *null_index_buffer; | ||
| 156 | } | ||
| 157 | scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) { | ||
| 158 | cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type); | ||
| 159 | }); | ||
| 160 | } | ||
| 103 | 161 | ||
| 104 | const VkBuffer handle = Handle(); | 162 | void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { |
| 105 | scheduler.Record( | 163 | ReserveQuadArrayLUT(first + count, true); |
| 106 | [staging = staging.buffer, handle, offset, data_size](vk::CommandBuffer cmdbuf) { | ||
| 107 | const VkBufferMemoryBarrier barrier{ | ||
| 108 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||
| 109 | .pNext = nullptr, | ||
| 110 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 111 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 112 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 113 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 114 | .buffer = handle, | ||
| 115 | .offset = offset, | ||
| 116 | .size = data_size, | ||
| 117 | }; | ||
| 118 | |||
| 119 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | | ||
| 120 | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | | ||
| 121 | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 122 | VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, barrier, {}); | ||
| 123 | cmdbuf.CopyBuffer(handle, staging, VkBufferCopy{offset, 0, data_size}); | ||
| 124 | }); | ||
| 125 | scheduler.Finish(); | ||
| 126 | 164 | ||
| 127 | std::memcpy(data, staging.mapped_span.data(), data_size); | 165 | // The LUT has the indices 0, 1, 2, and 3 copied as an array |
| 166 | // To apply these 'first' offsets we can apply an offset based on the modulus. | ||
| 167 | const VkIndexType index_type = quad_array_lut_index_type; | ||
| 168 | const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4); | ||
| 169 | const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type); | ||
| 170 | scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) { | ||
| 171 | cmdbuf.BindIndexBuffer(buffer, offset, index_type); | ||
| 172 | }); | ||
| 128 | } | 173 | } |
| 129 | 174 | ||
| 130 | void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | 175 | void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, |
| 131 | std::size_t copy_size) { | 176 | u32 stride) { |
| 132 | scheduler.RequestOutsideRenderPassOperationContext(); | 177 | if (device.IsExtExtendedDynamicStateSupported()) { |
| 178 | scheduler.Record([index, buffer, offset, size, stride](vk::CommandBuffer cmdbuf) { | ||
| 179 | const VkDeviceSize vk_offset = offset; | ||
| 180 | const VkDeviceSize vk_size = buffer != VK_NULL_HANDLE ? size : VK_WHOLE_SIZE; | ||
| 181 | const VkDeviceSize vk_stride = stride; | ||
| 182 | cmdbuf.BindVertexBuffers2EXT(index, 1, &buffer, &vk_offset, &vk_size, &vk_stride); | ||
| 183 | }); | ||
| 184 | } else { | ||
| 185 | scheduler.Record([index, buffer, offset](vk::CommandBuffer cmdbuf) { | ||
| 186 | cmdbuf.BindVertexBuffer(index, buffer, offset); | ||
| 187 | }); | ||
| 188 | } | ||
| 189 | } | ||
| 133 | 190 | ||
| 134 | const VkBuffer dst_buffer = Handle(); | 191 | void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, |
| 135 | scheduler.Record([src_buffer = src.Handle(), dst_buffer, src_offset, dst_offset, | 192 | u32 size) { |
| 136 | copy_size](vk::CommandBuffer cmdbuf) { | 193 | if (!device.IsExtTransformFeedbackSupported()) { |
| 137 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, copy_size}); | 194 | // Already logged in the rasterizer |
| 138 | 195 | return; | |
| 139 | std::array<VkBufferMemoryBarrier, 2> barriers; | 196 | } |
| 140 | barriers[0].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | 197 | scheduler.Record([index, buffer, offset, size](vk::CommandBuffer cmdbuf) { |
| 141 | barriers[0].pNext = nullptr; | 198 | const VkDeviceSize vk_offset = offset; |
| 142 | barriers[0].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; | 199 | const VkDeviceSize vk_size = size; |
| 143 | barriers[0].dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | 200 | cmdbuf.BindTransformFeedbackBuffersEXT(index, 1, &buffer, &vk_offset, &vk_size); |
| 144 | barriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 145 | barriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 146 | barriers[0].buffer = src_buffer; | ||
| 147 | barriers[0].offset = src_offset; | ||
| 148 | barriers[0].size = copy_size; | ||
| 149 | barriers[1].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 150 | barriers[1].pNext = nullptr; | ||
| 151 | barriers[1].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; | ||
| 152 | barriers[1].dstAccessMask = UPLOAD_ACCESS_BARRIERS; | ||
| 153 | barriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 154 | barriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 155 | barriers[1].buffer = dst_buffer; | ||
| 156 | barriers[1].offset = dst_offset; | ||
| 157 | barriers[1].size = copy_size; | ||
| 158 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {}, | ||
| 159 | barriers, {}); | ||
| 160 | }); | 201 | }); |
| 161 | } | 202 | } |
| 162 | 203 | ||
| 163 | VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_, | 204 | void BufferCacheRuntime::BindBuffer(VkBuffer buffer, u32 offset, u32 size) { |
| 164 | Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | 205 | update_descriptor_queue.AddBuffer(buffer, offset, size); |
| 165 | const Device& device_, MemoryAllocator& memory_allocator_, | 206 | } |
| 166 | VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_, | ||
| 167 | StagingBufferPool& staging_pool_) | ||
| 168 | : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_, | ||
| 169 | cpu_memory_, stream_buffer_}, | ||
| 170 | device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||
| 171 | staging_pool{staging_pool_} {} | ||
| 172 | 207 | ||
| 173 | VKBufferCache::~VKBufferCache() = default; | 208 | void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) { |
| 209 | if (num_indices <= current_num_indices) { | ||
| 210 | return; | ||
| 211 | } | ||
| 212 | if (wait_for_idle) { | ||
| 213 | scheduler.Finish(); | ||
| 214 | } | ||
| 215 | current_num_indices = num_indices; | ||
| 216 | quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices); | ||
| 174 | 217 | ||
| 175 | std::shared_ptr<Buffer> VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { | 218 | const u32 num_quads = num_indices / 4; |
| 176 | return std::make_shared<Buffer>(device, memory_allocator, scheduler, staging_pool, cpu_addr, | 219 | const u32 num_triangle_indices = num_quads * 6; |
| 177 | size); | 220 | const u32 num_first_offset_copies = 4; |
| 221 | const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type); | ||
| 222 | const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; | ||
| 223 | quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 224 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 225 | .pNext = nullptr, | ||
| 226 | .flags = 0, | ||
| 227 | .size = size_bytes, | ||
| 228 | .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 229 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 230 | .queueFamilyIndexCount = 0, | ||
| 231 | .pQueueFamilyIndices = nullptr, | ||
| 232 | }); | ||
| 233 | if (device.HasDebuggingToolAttached()) { | ||
| 234 | quad_array_lut.SetObjectNameEXT("Quad LUT"); | ||
| 235 | } | ||
| 236 | quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal); | ||
| 237 | |||
| 238 | const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); | ||
| 239 | u8* staging_data = staging.mapped_span.data(); | ||
| 240 | const size_t quad_size = bytes_per_index * 6; | ||
| 241 | for (u32 first = 0; first < num_first_offset_copies; ++first) { | ||
| 242 | for (u32 quad = 0; quad < num_quads; ++quad) { | ||
| 243 | switch (quad_array_lut_index_type) { | ||
| 244 | case VK_INDEX_TYPE_UINT8_EXT: | ||
| 245 | std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size); | ||
| 246 | break; | ||
| 247 | case VK_INDEX_TYPE_UINT16: | ||
| 248 | std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size); | ||
| 249 | break; | ||
| 250 | case VK_INDEX_TYPE_UINT32: | ||
| 251 | std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size); | ||
| 252 | break; | ||
| 253 | default: | ||
| 254 | UNREACHABLE(); | ||
| 255 | break; | ||
| 256 | } | ||
| 257 | staging_data += quad_size; | ||
| 258 | } | ||
| 259 | } | ||
| 260 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 261 | scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, | ||
| 262 | dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) { | ||
| 263 | const VkBufferCopy copy{ | ||
| 264 | .srcOffset = src_offset, | ||
| 265 | .dstOffset = 0, | ||
| 266 | .size = size_bytes, | ||
| 267 | }; | ||
| 268 | const VkBufferMemoryBarrier write_barrier{ | ||
| 269 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||
| 270 | .pNext = nullptr, | ||
| 271 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||
| 272 | .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | ||
| 273 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 274 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 275 | .buffer = dst_buffer, | ||
| 276 | .offset = 0, | ||
| 277 | .size = size_bytes, | ||
| 278 | }; | ||
| 279 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | ||
| 280 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, | ||
| 281 | 0, write_barrier); | ||
| 282 | }); | ||
| 178 | } | 283 | } |
| 179 | 284 | ||
| 180 | VKBufferCache::BufferInfo VKBufferCache::GetEmptyBuffer(std::size_t size) { | 285 | void BufferCacheRuntime::ReserveNullIndexBuffer() { |
| 181 | size = std::max(size, std::size_t(4)); | 286 | if (null_index_buffer) { |
| 182 | const auto& empty = staging_pool.Request(size, MemoryUsage::DeviceLocal); | 287 | return; |
| 288 | } | ||
| 289 | null_index_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||
| 290 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 291 | .pNext = nullptr, | ||
| 292 | .flags = 0, | ||
| 293 | .size = 4, | ||
| 294 | .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 295 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 296 | .queueFamilyIndexCount = 0, | ||
| 297 | .pQueueFamilyIndices = nullptr, | ||
| 298 | }); | ||
| 299 | if (device.HasDebuggingToolAttached()) { | ||
| 300 | null_index_buffer.SetObjectNameEXT("Null index buffer"); | ||
| 301 | } | ||
| 302 | null_index_buffer_commit = memory_allocator.Commit(null_index_buffer, MemoryUsage::DeviceLocal); | ||
| 303 | |||
| 183 | scheduler.RequestOutsideRenderPassOperationContext(); | 304 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 184 | scheduler.Record([size, buffer = empty.buffer](vk::CommandBuffer cmdbuf) { | 305 | scheduler.Record([buffer = *null_index_buffer](vk::CommandBuffer cmdbuf) { |
| 185 | cmdbuf.FillBuffer(buffer, 0, size, 0); | 306 | cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0); |
| 186 | }); | 307 | }); |
| 187 | return {empty.buffer, 0, 0}; | ||
| 188 | } | 308 | } |
| 189 | 309 | ||
| 190 | } // namespace Vulkan | 310 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 41d577510..041e6515c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -4,69 +4,124 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <memory> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/buffer_cache/buffer_cache.h" | 7 | #include "video_core/buffer_cache/buffer_cache.h" |
| 8 | #include "video_core/engines/maxwell_3d.h" | ||
| 9 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 11 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 12 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||
| 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 11 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 13 | ||
| 16 | namespace Vulkan { | 14 | namespace Vulkan { |
| 17 | 15 | ||
| 18 | class Device; | 16 | class Device; |
| 17 | class VKDescriptorPool; | ||
| 19 | class VKScheduler; | 18 | class VKScheduler; |
| 19 | class VKUpdateDescriptorQueue; | ||
| 20 | 20 | ||
| 21 | class Buffer final : public VideoCommon::BufferBlock { | 21 | class BufferCacheRuntime; |
| 22 | public: | ||
| 23 | explicit Buffer(const Device& device, MemoryAllocator& memory_allocator, VKScheduler& scheduler, | ||
| 24 | StagingBufferPool& staging_pool, VAddr cpu_addr_, std::size_t size_); | ||
| 25 | ~Buffer(); | ||
| 26 | |||
| 27 | void Upload(std::size_t offset, std::size_t data_size, const u8* data); | ||
| 28 | 22 | ||
| 29 | void Download(std::size_t offset, std::size_t data_size, u8* data); | 23 | class Buffer : public VideoCommon::BufferBase<VideoCore::RasterizerInterface> { |
| 30 | 24 | public: | |
| 31 | void CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst_offset, | 25 | explicit Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params); |
| 32 | std::size_t copy_size); | 26 | explicit Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, |
| 27 | VAddr cpu_addr_, u64 size_bytes_); | ||
| 33 | 28 | ||
| 34 | VkBuffer Handle() const { | 29 | [[nodiscard]] VkBuffer Handle() const noexcept { |
| 35 | return *buffer; | 30 | return *buffer; |
| 36 | } | 31 | } |
| 37 | 32 | ||
| 38 | u64 Address() const { | 33 | operator VkBuffer() const noexcept { |
| 39 | return 0; | 34 | return *buffer; |
| 40 | } | 35 | } |
| 41 | 36 | ||
| 42 | private: | 37 | private: |
| 43 | const Device& device; | ||
| 44 | VKScheduler& scheduler; | ||
| 45 | StagingBufferPool& staging_pool; | ||
| 46 | |||
| 47 | vk::Buffer buffer; | 38 | vk::Buffer buffer; |
| 48 | MemoryCommit commit; | 39 | MemoryCommit commit; |
| 49 | }; | 40 | }; |
| 50 | 41 | ||
| 51 | class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { | 42 | class BufferCacheRuntime { |
| 43 | friend Buffer; | ||
| 44 | |||
| 45 | using PrimitiveTopology = Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology; | ||
| 46 | using IndexFormat = Tegra::Engines::Maxwell3D::Regs::IndexFormat; | ||
| 47 | |||
| 52 | public: | 48 | public: |
| 53 | explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, | 49 | explicit BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_manager_, |
| 54 | Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, | 50 | VKScheduler& scheduler_, StagingBufferPool& staging_pool_, |
| 55 | const Device& device, MemoryAllocator& memory_allocator, | 51 | VKUpdateDescriptorQueue& update_descriptor_queue_, |
| 56 | VKScheduler& scheduler, VKStreamBuffer& stream_buffer, | 52 | VKDescriptorPool& descriptor_pool); |
| 57 | StagingBufferPool& staging_pool); | 53 | |
| 58 | ~VKBufferCache(); | 54 | void Finish(); |
| 55 | |||
| 56 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); | ||
| 57 | |||
| 58 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); | ||
| 59 | |||
| 60 | void CopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer, | ||
| 61 | std::span<const VideoCommon::BufferCopy> copies); | ||
| 62 | |||
| 63 | void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices, | ||
| 64 | u32 base_vertex, VkBuffer buffer, u32 offset, u32 size); | ||
| 59 | 65 | ||
| 60 | BufferInfo GetEmptyBuffer(std::size_t size) override; | 66 | void BindQuadArrayIndexBuffer(u32 first, u32 count); |
| 61 | 67 | ||
| 62 | protected: | 68 | void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); |
| 63 | std::shared_ptr<Buffer> CreateBlock(VAddr cpu_addr, std::size_t size) override; | 69 | |
| 70 | void BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size); | ||
| 71 | |||
| 72 | std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage, | ||
| 73 | [[maybe_unused]] u32 binding_index, u32 size) { | ||
| 74 | const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload); | ||
| 75 | BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size); | ||
| 76 | return ref.mapped_span; | ||
| 77 | } | ||
| 78 | |||
| 79 | void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) { | ||
| 80 | BindBuffer(buffer, offset, size); | ||
| 81 | } | ||
| 82 | |||
| 83 | void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size, | ||
| 84 | [[maybe_unused]] bool is_written) { | ||
| 85 | BindBuffer(buffer, offset, size); | ||
| 86 | } | ||
| 64 | 87 | ||
| 65 | private: | 88 | private: |
| 89 | void BindBuffer(VkBuffer buffer, u32 offset, u32 size); | ||
| 90 | |||
| 91 | void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle); | ||
| 92 | |||
| 93 | void ReserveNullIndexBuffer(); | ||
| 94 | |||
| 66 | const Device& device; | 95 | const Device& device; |
| 67 | MemoryAllocator& memory_allocator; | 96 | MemoryAllocator& memory_allocator; |
| 68 | VKScheduler& scheduler; | 97 | VKScheduler& scheduler; |
| 69 | StagingBufferPool& staging_pool; | 98 | StagingBufferPool& staging_pool; |
| 99 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 100 | |||
| 101 | vk::Buffer quad_array_lut; | ||
| 102 | MemoryCommit quad_array_lut_commit; | ||
| 103 | VkIndexType quad_array_lut_index_type{}; | ||
| 104 | u32 current_num_indices = 0; | ||
| 105 | |||
| 106 | vk::Buffer null_index_buffer; | ||
| 107 | MemoryCommit null_index_buffer_commit; | ||
| 108 | |||
| 109 | Uint8Pass uint8_pass; | ||
| 110 | QuadIndexedPass quad_index_pass; | ||
| 70 | }; | 111 | }; |
| 71 | 112 | ||
| 113 | struct BufferCacheParams { | ||
| 114 | using Runtime = Vulkan::BufferCacheRuntime; | ||
| 115 | using Buffer = Vulkan::Buffer; | ||
| 116 | |||
| 117 | static constexpr bool IS_OPENGL = false; | ||
| 118 | static constexpr bool HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS = false; | ||
| 119 | static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; | ||
| 120 | static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; | ||
| 121 | static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; | ||
| 122 | static constexpr bool USE_MEMORY_MAPS = true; | ||
| 123 | }; | ||
| 124 | |||
| 125 | using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | ||
| 126 | |||
| 72 | } // namespace Vulkan | 127 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 5eb6a54be..2f9a7b028 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp | |||
| @@ -10,7 +10,7 @@ | |||
| 10 | #include "common/alignment.h" | 10 | #include "common/alignment.h" |
| 11 | #include "common/assert.h" | 11 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "video_core/host_shaders/vulkan_quad_array_comp_spv.h" | 13 | #include "common/div_ceil.h" |
| 14 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" | 14 | #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" |
| 15 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" | 15 | #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" |
| 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 16 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| @@ -22,30 +22,7 @@ | |||
| 22 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 22 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 23 | 23 | ||
| 24 | namespace Vulkan { | 24 | namespace Vulkan { |
| 25 | |||
| 26 | namespace { | 25 | namespace { |
| 27 | |||
| 28 | VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { | ||
| 29 | return { | ||
| 30 | .binding = 0, | ||
| 31 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 32 | .descriptorCount = 1, | ||
| 33 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | ||
| 34 | .pImmutableSamplers = nullptr, | ||
| 35 | }; | ||
| 36 | } | ||
| 37 | |||
| 38 | VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEntry() { | ||
| 39 | return { | ||
| 40 | .dstBinding = 0, | ||
| 41 | .dstArrayElement = 0, | ||
| 42 | .descriptorCount = 1, | ||
| 43 | .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, | ||
| 44 | .offset = 0, | ||
| 45 | .stride = sizeof(DescriptorUpdateEntry), | ||
| 46 | }; | ||
| 47 | } | ||
| 48 | |||
| 49 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | 26 | VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { |
| 50 | return { | 27 | return { |
| 51 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, | 28 | .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, |
| @@ -162,55 +139,6 @@ VkDescriptorSet VKComputePass::CommitDescriptorSet( | |||
| 162 | return set; | 139 | return set; |
| 163 | } | 140 | } |
| 164 | 141 | ||
| 165 | QuadArrayPass::QuadArrayPass(const Device& device_, VKScheduler& scheduler_, | ||
| 166 | VKDescriptorPool& descriptor_pool_, | ||
| 167 | StagingBufferPool& staging_buffer_pool_, | ||
| 168 | VKUpdateDescriptorQueue& update_descriptor_queue_) | ||
| 169 | : VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(), | ||
| 170 | BuildQuadArrayPassDescriptorUpdateTemplateEntry(), | ||
| 171 | BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV), | ||
| 172 | scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | ||
| 173 | update_descriptor_queue{update_descriptor_queue_} {} | ||
| 174 | |||
| 175 | QuadArrayPass::~QuadArrayPass() = default; | ||
| 176 | |||
| 177 | std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { | ||
| 178 | const u32 num_triangle_vertices = (num_vertices / 4) * 6; | ||
| 179 | const std::size_t staging_size = num_triangle_vertices * sizeof(u32); | ||
| 180 | const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | ||
| 181 | |||
| 182 | update_descriptor_queue.Acquire(); | ||
| 183 | update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); | ||
| 184 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | ||
| 185 | |||
| 186 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 187 | |||
| 188 | ASSERT(num_vertices % 4 == 0); | ||
| 189 | const u32 num_quads = num_vertices / 4; | ||
| 190 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, | ||
| 191 | num_quads, first, set](vk::CommandBuffer cmdbuf) { | ||
| 192 | constexpr u32 dispatch_size = 1024; | ||
| 193 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | ||
| 194 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | ||
| 195 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(first), &first); | ||
| 196 | cmdbuf.Dispatch(Common::AlignUp(num_quads, dispatch_size) / dispatch_size, 1, 1); | ||
| 197 | |||
| 198 | VkBufferMemoryBarrier barrier; | ||
| 199 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 200 | barrier.pNext = nullptr; | ||
| 201 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 202 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||
| 203 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 204 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 205 | barrier.buffer = buffer; | ||
| 206 | barrier.offset = 0; | ||
| 207 | barrier.size = static_cast<VkDeviceSize>(num_quads) * 6 * sizeof(u32); | ||
| 208 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||
| 209 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {}); | ||
| 210 | }); | ||
| 211 | return {staging_ref.buffer, 0}; | ||
| 212 | } | ||
| 213 | |||
| 214 | Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, | 142 | Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, |
| 215 | VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, | 143 | VKDescriptorPool& descriptor_pool, StagingBufferPool& staging_buffer_pool_, |
| 216 | VKUpdateDescriptorQueue& update_descriptor_queue_) | 144 | VKUpdateDescriptorQueue& update_descriptor_queue_) |
| @@ -221,38 +149,33 @@ Uint8Pass::Uint8Pass(const Device& device, VKScheduler& scheduler_, | |||
| 221 | 149 | ||
| 222 | Uint8Pass::~Uint8Pass() = default; | 150 | Uint8Pass::~Uint8Pass() = default; |
| 223 | 151 | ||
| 224 | std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, | 152 | std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, |
| 225 | u64 src_offset) { | 153 | u32 src_offset) { |
| 226 | const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); | 154 | const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); |
| 227 | const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | 155 | const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); |
| 228 | 156 | ||
| 229 | update_descriptor_queue.Acquire(); | 157 | update_descriptor_queue.Acquire(); |
| 230 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); | 158 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); |
| 231 | update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); | 159 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); |
| 232 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 160 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 233 | 161 | ||
| 234 | scheduler.RequestOutsideRenderPassOperationContext(); | 162 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 235 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, | 163 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, |
| 236 | num_vertices](vk::CommandBuffer cmdbuf) { | 164 | num_vertices](vk::CommandBuffer cmdbuf) { |
| 237 | constexpr u32 dispatch_size = 1024; | 165 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 166 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 167 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 168 | .pNext = nullptr, | ||
| 169 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 170 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | ||
| 171 | }; | ||
| 238 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 172 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| 239 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | 173 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); |
| 240 | cmdbuf.Dispatch(Common::AlignUp(num_vertices, dispatch_size) / dispatch_size, 1, 1); | 174 | cmdbuf.Dispatch(Common::DivCeil(num_vertices, DISPATCH_SIZE), 1, 1); |
| 241 | |||
| 242 | VkBufferMemoryBarrier barrier; | ||
| 243 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 244 | barrier.pNext = nullptr; | ||
| 245 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 246 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||
| 247 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 248 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 249 | barrier.buffer = buffer; | ||
| 250 | barrier.offset = 0; | ||
| 251 | barrier.size = static_cast<VkDeviceSize>(num_vertices * sizeof(u16)); | ||
| 252 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 175 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 253 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | 176 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); |
| 254 | }); | 177 | }); |
| 255 | return {staging_ref.buffer, 0}; | 178 | return {staging.buffer, staging.offset}; |
| 256 | } | 179 | } |
| 257 | 180 | ||
| 258 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | 181 | QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, |
| @@ -267,9 +190,9 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, VKScheduler& scheduler_, | |||
| 267 | 190 | ||
| 268 | QuadIndexedPass::~QuadIndexedPass() = default; | 191 | QuadIndexedPass::~QuadIndexedPass() = default; |
| 269 | 192 | ||
| 270 | std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( | 193 | std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( |
| 271 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, | 194 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, |
| 272 | VkBuffer src_buffer, u64 src_offset) { | 195 | VkBuffer src_buffer, u32 src_offset) { |
| 273 | const u32 index_shift = [index_format] { | 196 | const u32 index_shift = [index_format] { |
| 274 | switch (index_format) { | 197 | switch (index_format) { |
| 275 | case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: | 198 | case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: |
| @@ -286,38 +209,33 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( | |||
| 286 | const u32 num_tri_vertices = (num_vertices / 4) * 6; | 209 | const u32 num_tri_vertices = (num_vertices / 4) * 6; |
| 287 | 210 | ||
| 288 | const std::size_t staging_size = num_tri_vertices * sizeof(u32); | 211 | const std::size_t staging_size = num_tri_vertices * sizeof(u32); |
| 289 | const auto staging_ref = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | 212 | const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); |
| 290 | 213 | ||
| 291 | update_descriptor_queue.Acquire(); | 214 | update_descriptor_queue.Acquire(); |
| 292 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); | 215 | update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); |
| 293 | update_descriptor_queue.AddBuffer(staging_ref.buffer, 0, staging_size); | 216 | update_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size); |
| 294 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); | 217 | const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); |
| 295 | 218 | ||
| 296 | scheduler.RequestOutsideRenderPassOperationContext(); | 219 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 297 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging_ref.buffer, set, | 220 | scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = staging.buffer, set, |
| 298 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { | 221 | num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { |
| 299 | static constexpr u32 dispatch_size = 1024; | 222 | static constexpr u32 DISPATCH_SIZE = 1024; |
| 223 | static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||
| 224 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 225 | .pNext = nullptr, | ||
| 226 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||
| 227 | .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, | ||
| 228 | }; | ||
| 300 | const std::array push_constants = {base_vertex, index_shift}; | 229 | const std::array push_constants = {base_vertex, index_shift}; |
| 301 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | 230 | cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); |
| 302 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | 231 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); |
| 303 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), | 232 | cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), |
| 304 | &push_constants); | 233 | &push_constants); |
| 305 | cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); | 234 | cmdbuf.Dispatch(Common::DivCeil(num_tri_vertices, DISPATCH_SIZE), 1, 1); |
| 306 | |||
| 307 | VkBufferMemoryBarrier barrier; | ||
| 308 | barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||
| 309 | barrier.pNext = nullptr; | ||
| 310 | barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||
| 311 | barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||
| 312 | barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 313 | barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||
| 314 | barrier.buffer = buffer; | ||
| 315 | barrier.offset = 0; | ||
| 316 | barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32)); | ||
| 317 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | 235 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, |
| 318 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | 236 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, WRITE_BARRIER); |
| 319 | }); | 237 | }); |
| 320 | return {staging_ref.buffer, 0}; | 238 | return {staging.buffer, staging.offset}; |
| 321 | } | 239 | } |
| 322 | 240 | ||
| 323 | } // namespace Vulkan | 241 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index f5c6f5f17..17d781d99 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h | |||
| @@ -41,22 +41,6 @@ private: | |||
| 41 | vk::ShaderModule module; | 41 | vk::ShaderModule module; |
| 42 | }; | 42 | }; |
| 43 | 43 | ||
| 44 | class QuadArrayPass final : public VKComputePass { | ||
| 45 | public: | ||
| 46 | explicit QuadArrayPass(const Device& device_, VKScheduler& scheduler_, | ||
| 47 | VKDescriptorPool& descriptor_pool_, | ||
| 48 | StagingBufferPool& staging_buffer_pool_, | ||
| 49 | VKUpdateDescriptorQueue& update_descriptor_queue_); | ||
| 50 | ~QuadArrayPass(); | ||
| 51 | |||
| 52 | std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first); | ||
| 53 | |||
| 54 | private: | ||
| 55 | VKScheduler& scheduler; | ||
| 56 | StagingBufferPool& staging_buffer_pool; | ||
| 57 | VKUpdateDescriptorQueue& update_descriptor_queue; | ||
| 58 | }; | ||
| 59 | |||
| 60 | class Uint8Pass final : public VKComputePass { | 44 | class Uint8Pass final : public VKComputePass { |
| 61 | public: | 45 | public: |
| 62 | explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, | 46 | explicit Uint8Pass(const Device& device_, VKScheduler& scheduler_, |
| @@ -64,7 +48,10 @@ public: | |||
| 64 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 48 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| 65 | ~Uint8Pass(); | 49 | ~Uint8Pass(); |
| 66 | 50 | ||
| 67 | std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); | 51 | /// Assemble uint8 indices into an uint16 index buffer |
| 52 | /// Returns a pair with the staging buffer, and the offset where the assembled data is | ||
| 53 | std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, VkBuffer src_buffer, | ||
| 54 | u32 src_offset); | ||
| 68 | 55 | ||
| 69 | private: | 56 | private: |
| 70 | VKScheduler& scheduler; | 57 | VKScheduler& scheduler; |
| @@ -80,9 +67,9 @@ public: | |||
| 80 | VKUpdateDescriptorQueue& update_descriptor_queue_); | 67 | VKUpdateDescriptorQueue& update_descriptor_queue_); |
| 81 | ~QuadIndexedPass(); | 68 | ~QuadIndexedPass(); |
| 82 | 69 | ||
| 83 | std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, | 70 | std::pair<VkBuffer, VkDeviceSize> Assemble( |
| 84 | u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, | 71 | Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, |
| 85 | u64 src_offset); | 72 | u32 base_vertex, VkBuffer src_buffer, u32 src_offset); |
| 86 | 73 | ||
| 87 | private: | 74 | private: |
| 88 | VKScheduler& scheduler; | 75 | VKScheduler& scheduler; |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index 6cd00884d..3bec48d14 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp | |||
| @@ -45,8 +45,8 @@ void InnerFence::Wait() { | |||
| 45 | } | 45 | } |
| 46 | 46 | ||
| 47 | VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 47 | VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, |
| 48 | Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, | 48 | TextureCache& texture_cache_, BufferCache& buffer_cache_, |
| 49 | VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, | 49 | VKQueryCache& query_cache_, const Device& device_, |
| 50 | VKScheduler& scheduler_) | 50 | VKScheduler& scheduler_) |
| 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, | 51 | : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, |
| 52 | scheduler{scheduler_} {} | 52 | scheduler{scheduler_} {} |
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 9c5e5aa8f..2f8322d29 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h | |||
| @@ -22,7 +22,6 @@ class RasterizerInterface; | |||
| 22 | namespace Vulkan { | 22 | namespace Vulkan { |
| 23 | 23 | ||
| 24 | class Device; | 24 | class Device; |
| 25 | class VKBufferCache; | ||
| 26 | class VKQueryCache; | 25 | class VKQueryCache; |
| 27 | class VKScheduler; | 26 | class VKScheduler; |
| 28 | 27 | ||
| @@ -45,14 +44,14 @@ private: | |||
| 45 | using Fence = std::shared_ptr<InnerFence>; | 44 | using Fence = std::shared_ptr<InnerFence>; |
| 46 | 45 | ||
| 47 | using GenericFenceManager = | 46 | using GenericFenceManager = |
| 48 | VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>; | 47 | VideoCommon::FenceManager<Fence, TextureCache, BufferCache, VKQueryCache>; |
| 49 | 48 | ||
| 50 | class VKFenceManager final : public GenericFenceManager { | 49 | class VKFenceManager final : public GenericFenceManager { |
| 51 | public: | 50 | public: |
| 52 | explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, | 51 | explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, |
| 53 | Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_, | 52 | TextureCache& texture_cache, BufferCache& buffer_cache, |
| 54 | VKBufferCache& buffer_cache_, VKQueryCache& query_cache_, | 53 | VKQueryCache& query_cache, const Device& device, |
| 55 | VKScheduler& scheduler_); | 54 | VKScheduler& scheduler); |
| 56 | 55 | ||
| 57 | protected: | 56 | protected: |
| 58 | Fence CreateFence(u32 value, bool is_stubbed) override; | 57 | Fence CreateFence(u32 value, bool is_stubbed) override; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index f0a111829..684d4e3a6 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -8,8 +8,6 @@ | |||
| 8 | #include <mutex> | 8 | #include <mutex> |
| 9 | #include <vector> | 9 | #include <vector> |
| 10 | 10 | ||
| 11 | #include <boost/container/static_vector.hpp> | ||
| 12 | |||
| 13 | #include "common/alignment.h" | 11 | #include "common/alignment.h" |
| 14 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 15 | #include "common/logging/log.h" | 13 | #include "common/logging/log.h" |
| @@ -24,7 +22,6 @@ | |||
| 24 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 22 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 25 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 23 | #include "video_core/renderer_vulkan/renderer_vulkan.h" |
| 26 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 27 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 28 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" | 25 | #include "video_core/renderer_vulkan/vk_compute_pipeline.h" |
| 29 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 26 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 30 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" | 27 | #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" |
| @@ -50,15 +47,16 @@ MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(25 | |||
| 50 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); | 47 | MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); |
| 51 | MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); | 48 | MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); |
| 52 | MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); | 49 | MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); |
| 53 | MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128)); | ||
| 54 | MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128)); | ||
| 55 | MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128)); | ||
| 56 | MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128)); | ||
| 57 | MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128)); | ||
| 58 | MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128)); | ||
| 59 | MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); | 50 | MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); |
| 60 | 51 | ||
| 61 | namespace { | 52 | namespace { |
| 53 | struct DrawParams { | ||
| 54 | u32 base_instance; | ||
| 55 | u32 num_instances; | ||
| 56 | u32 base_vertex; | ||
| 57 | u32 num_vertices; | ||
| 58 | bool is_indexed; | ||
| 59 | }; | ||
| 62 | 60 | ||
| 63 | constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); | 61 | constexpr auto COMPUTE_SHADER_INDEX = static_cast<size_t>(Tegra::Engines::ShaderType::Compute); |
| 64 | 62 | ||
| @@ -67,7 +65,6 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in | |||
| 67 | const float width = src.scale_x * 2.0f; | 65 | const float width = src.scale_x * 2.0f; |
| 68 | const float height = src.scale_y * 2.0f; | 66 | const float height = src.scale_y * 2.0f; |
| 69 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; | 67 | const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f; |
| 70 | |||
| 71 | VkViewport viewport{ | 68 | VkViewport viewport{ |
| 72 | .x = src.translate_x - src.scale_x, | 69 | .x = src.translate_x - src.scale_x, |
| 73 | .y = src.translate_y - src.scale_y, | 70 | .y = src.translate_y - src.scale_y, |
| @@ -76,12 +73,10 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in | |||
| 76 | .minDepth = src.translate_z - src.scale_z * reduce_z, | 73 | .minDepth = src.translate_z - src.scale_z * reduce_z, |
| 77 | .maxDepth = src.translate_z + src.scale_z, | 74 | .maxDepth = src.translate_z + src.scale_z, |
| 78 | }; | 75 | }; |
| 79 | |||
| 80 | if (!device.IsExtDepthRangeUnrestrictedSupported()) { | 76 | if (!device.IsExtDepthRangeUnrestrictedSupported()) { |
| 81 | viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); | 77 | viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f); |
| 82 | viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); | 78 | viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f); |
| 83 | } | 79 | } |
| 84 | |||
| 85 | return viewport; | 80 | return viewport; |
| 86 | } | 81 | } |
| 87 | 82 | ||
| @@ -146,13 +141,6 @@ TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const | |||
| 146 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); | 141 | return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index); |
| 147 | } | 142 | } |
| 148 | 143 | ||
| 149 | template <size_t N> | ||
| 150 | std::array<VkDeviceSize, N> ExpandStrides(const std::array<u16, N>& strides) { | ||
| 151 | std::array<VkDeviceSize, N> expanded; | ||
| 152 | std::copy(strides.begin(), strides.end(), expanded.begin()); | ||
| 153 | return expanded; | ||
| 154 | } | ||
| 155 | |||
| 156 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { | 144 | ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) { |
| 157 | if (entry.is_buffer) { | 145 | if (entry.is_buffer) { |
| 158 | return ImageViewType::e2D; | 146 | return ImageViewType::e2D; |
| @@ -221,190 +209,25 @@ void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_ca | |||
| 221 | } | 209 | } |
| 222 | } | 210 | } |
| 223 | 211 | ||
| 224 | } // Anonymous namespace | 212 | DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, |
| 225 | 213 | bool is_indexed) { | |
| 226 | class BufferBindings final { | 214 | DrawParams params{ |
| 227 | public: | 215 | .base_instance = regs.vb_base_instance, |
| 228 | void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size, u32 stride) { | 216 | .num_instances = is_instanced ? num_instances : 1, |
| 229 | vertex.buffers[vertex.num_buffers] = buffer; | 217 | .base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first, |
| 230 | vertex.offsets[vertex.num_buffers] = offset; | 218 | .num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count, |
| 231 | vertex.sizes[vertex.num_buffers] = size; | 219 | .is_indexed = is_indexed, |
| 232 | vertex.strides[vertex.num_buffers] = static_cast<u16>(stride); | 220 | }; |
| 233 | ++vertex.num_buffers; | 221 | if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { |
| 234 | } | 222 | // 6 triangle vertices per quad, base vertex is part of the index |
| 235 | 223 | // See BindQuadArrayIndexBuffer for more details | |
| 236 | void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) { | 224 | params.num_vertices = (params.num_vertices / 4) * 6; |
| 237 | index.buffer = buffer; | 225 | params.base_vertex = 0; |
| 238 | index.offset = offset; | 226 | params.is_indexed = true; |
| 239 | index.type = type; | ||
| 240 | } | ||
| 241 | |||
| 242 | void Bind(const Device& device, VKScheduler& scheduler) const { | ||
| 243 | // Use this large switch case to avoid dispatching more memory in the record lambda than | ||
| 244 | // what we need. It looks horrible, but it's the best we can do on standard C++. | ||
| 245 | switch (vertex.num_buffers) { | ||
| 246 | case 0: | ||
| 247 | return BindStatic<0>(device, scheduler); | ||
| 248 | case 1: | ||
| 249 | return BindStatic<1>(device, scheduler); | ||
| 250 | case 2: | ||
| 251 | return BindStatic<2>(device, scheduler); | ||
| 252 | case 3: | ||
| 253 | return BindStatic<3>(device, scheduler); | ||
| 254 | case 4: | ||
| 255 | return BindStatic<4>(device, scheduler); | ||
| 256 | case 5: | ||
| 257 | return BindStatic<5>(device, scheduler); | ||
| 258 | case 6: | ||
| 259 | return BindStatic<6>(device, scheduler); | ||
| 260 | case 7: | ||
| 261 | return BindStatic<7>(device, scheduler); | ||
| 262 | case 8: | ||
| 263 | return BindStatic<8>(device, scheduler); | ||
| 264 | case 9: | ||
| 265 | return BindStatic<9>(device, scheduler); | ||
| 266 | case 10: | ||
| 267 | return BindStatic<10>(device, scheduler); | ||
| 268 | case 11: | ||
| 269 | return BindStatic<11>(device, scheduler); | ||
| 270 | case 12: | ||
| 271 | return BindStatic<12>(device, scheduler); | ||
| 272 | case 13: | ||
| 273 | return BindStatic<13>(device, scheduler); | ||
| 274 | case 14: | ||
| 275 | return BindStatic<14>(device, scheduler); | ||
| 276 | case 15: | ||
| 277 | return BindStatic<15>(device, scheduler); | ||
| 278 | case 16: | ||
| 279 | return BindStatic<16>(device, scheduler); | ||
| 280 | case 17: | ||
| 281 | return BindStatic<17>(device, scheduler); | ||
| 282 | case 18: | ||
| 283 | return BindStatic<18>(device, scheduler); | ||
| 284 | case 19: | ||
| 285 | return BindStatic<19>(device, scheduler); | ||
| 286 | case 20: | ||
| 287 | return BindStatic<20>(device, scheduler); | ||
| 288 | case 21: | ||
| 289 | return BindStatic<21>(device, scheduler); | ||
| 290 | case 22: | ||
| 291 | return BindStatic<22>(device, scheduler); | ||
| 292 | case 23: | ||
| 293 | return BindStatic<23>(device, scheduler); | ||
| 294 | case 24: | ||
| 295 | return BindStatic<24>(device, scheduler); | ||
| 296 | case 25: | ||
| 297 | return BindStatic<25>(device, scheduler); | ||
| 298 | case 26: | ||
| 299 | return BindStatic<26>(device, scheduler); | ||
| 300 | case 27: | ||
| 301 | return BindStatic<27>(device, scheduler); | ||
| 302 | case 28: | ||
| 303 | return BindStatic<28>(device, scheduler); | ||
| 304 | case 29: | ||
| 305 | return BindStatic<29>(device, scheduler); | ||
| 306 | case 30: | ||
| 307 | return BindStatic<30>(device, scheduler); | ||
| 308 | case 31: | ||
| 309 | return BindStatic<31>(device, scheduler); | ||
| 310 | case 32: | ||
| 311 | return BindStatic<32>(device, scheduler); | ||
| 312 | } | ||
| 313 | UNREACHABLE(); | ||
| 314 | } | ||
| 315 | |||
| 316 | private: | ||
| 317 | // Some of these fields are intentionally left uninitialized to avoid initializing them twice. | ||
| 318 | struct { | ||
| 319 | size_t num_buffers = 0; | ||
| 320 | std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; | ||
| 321 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; | ||
| 322 | std::array<VkDeviceSize, Maxwell::NumVertexArrays> sizes; | ||
| 323 | std::array<u16, Maxwell::NumVertexArrays> strides; | ||
| 324 | } vertex; | ||
| 325 | |||
| 326 | struct { | ||
| 327 | VkBuffer buffer = nullptr; | ||
| 328 | VkDeviceSize offset; | ||
| 329 | VkIndexType type; | ||
| 330 | } index; | ||
| 331 | |||
| 332 | template <size_t N> | ||
| 333 | void BindStatic(const Device& device, VKScheduler& scheduler) const { | ||
| 334 | if (device.IsExtExtendedDynamicStateSupported()) { | ||
| 335 | if (index.buffer) { | ||
| 336 | BindStatic<N, true, true>(scheduler); | ||
| 337 | } else { | ||
| 338 | BindStatic<N, false, true>(scheduler); | ||
| 339 | } | ||
| 340 | } else { | ||
| 341 | if (index.buffer) { | ||
| 342 | BindStatic<N, true, false>(scheduler); | ||
| 343 | } else { | ||
| 344 | BindStatic<N, false, false>(scheduler); | ||
| 345 | } | ||
| 346 | } | ||
| 347 | } | ||
| 348 | |||
| 349 | template <size_t N, bool is_indexed, bool has_extended_dynamic_state> | ||
| 350 | void BindStatic(VKScheduler& scheduler) const { | ||
| 351 | static_assert(N <= Maxwell::NumVertexArrays); | ||
| 352 | if constexpr (N == 0) { | ||
| 353 | return; | ||
| 354 | } | ||
| 355 | |||
| 356 | std::array<VkBuffer, N> buffers; | ||
| 357 | std::array<VkDeviceSize, N> offsets; | ||
| 358 | std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin()); | ||
| 359 | std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); | ||
| 360 | |||
| 361 | if constexpr (has_extended_dynamic_state) { | ||
| 362 | // With extended dynamic states we can specify the length and stride of a vertex buffer | ||
| 363 | std::array<VkDeviceSize, N> sizes; | ||
| 364 | std::array<u16, N> strides; | ||
| 365 | std::copy(vertex.sizes.begin(), vertex.sizes.begin() + N, sizes.begin()); | ||
| 366 | std::copy(vertex.strides.begin(), vertex.strides.begin() + N, strides.begin()); | ||
| 367 | |||
| 368 | if constexpr (is_indexed) { | ||
| 369 | scheduler.Record( | ||
| 370 | [buffers, offsets, sizes, strides, index = index](vk::CommandBuffer cmdbuf) { | ||
| 371 | cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); | ||
| 372 | cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), | ||
| 373 | offsets.data(), sizes.data(), | ||
| 374 | ExpandStrides(strides).data()); | ||
| 375 | }); | ||
| 376 | } else { | ||
| 377 | scheduler.Record([buffers, offsets, sizes, strides](vk::CommandBuffer cmdbuf) { | ||
| 378 | cmdbuf.BindVertexBuffers2EXT(0, static_cast<u32>(N), buffers.data(), | ||
| 379 | offsets.data(), sizes.data(), | ||
| 380 | ExpandStrides(strides).data()); | ||
| 381 | }); | ||
| 382 | } | ||
| 383 | return; | ||
| 384 | } | ||
| 385 | |||
| 386 | if constexpr (is_indexed) { | ||
| 387 | // Indexed draw | ||
| 388 | scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) { | ||
| 389 | cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); | ||
| 390 | cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); | ||
| 391 | }); | ||
| 392 | } else { | ||
| 393 | // Array draw | ||
| 394 | scheduler.Record([buffers, offsets](vk::CommandBuffer cmdbuf) { | ||
| 395 | cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); | ||
| 396 | }); | ||
| 397 | } | ||
| 398 | } | ||
| 399 | }; | ||
| 400 | |||
| 401 | void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { | ||
| 402 | if (is_indexed) { | ||
| 403 | cmdbuf.DrawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance); | ||
| 404 | } else { | ||
| 405 | cmdbuf.Draw(num_vertices, num_instances, base_vertex, base_instance); | ||
| 406 | } | 227 | } |
| 228 | return params; | ||
| 407 | } | 229 | } |
| 230 | } // Anonymous namespace | ||
| 408 | 231 | ||
| 409 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | 232 | RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |
| 410 | Tegra::MemoryManager& gpu_memory_, | 233 | Tegra::MemoryManager& gpu_memory_, |
| @@ -414,21 +237,19 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
| 414 | : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, | 237 | : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, |
| 415 | gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, | 238 | gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()}, |
| 416 | screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, | 239 | screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_}, |
| 417 | state_tracker{state_tracker_}, scheduler{scheduler_}, stream_buffer(device, scheduler), | 240 | state_tracker{state_tracker_}, scheduler{scheduler_}, |
| 418 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), | 241 | staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), |
| 419 | update_descriptor_queue(device, scheduler), | 242 | update_descriptor_queue(device, scheduler), |
| 420 | blit_image(device, scheduler, state_tracker, descriptor_pool), | 243 | blit_image(device, scheduler, state_tracker, descriptor_pool), |
| 421 | quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 422 | quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 423 | uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||
| 424 | texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, | 244 | texture_cache_runtime{device, scheduler, memory_allocator, staging_pool, blit_image}, |
| 425 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | 245 | texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), |
| 246 | buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, | ||
| 247 | update_descriptor_queue, descriptor_pool), | ||
| 248 | buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | ||
| 426 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | 249 | pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, |
| 427 | descriptor_pool, update_descriptor_queue), | 250 | descriptor_pool, update_descriptor_queue), |
| 428 | buffer_cache(*this, gpu_memory, cpu_memory_, device, memory_allocator, scheduler, | ||
| 429 | stream_buffer, staging_pool), | ||
| 430 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, | 251 | query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, |
| 431 | fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, scheduler), | 252 | fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |
| 432 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { | 253 | wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { |
| 433 | scheduler.SetQueryCache(query_cache); | 254 | scheduler.SetQueryCache(query_cache); |
| 434 | if (device.UseAsynchronousShaders()) { | 255 | if (device.UseAsynchronousShaders()) { |
| @@ -449,22 +270,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 449 | GraphicsPipelineCacheKey key; | 270 | GraphicsPipelineCacheKey key; |
| 450 | key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); | 271 | key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); |
| 451 | 272 | ||
| 452 | buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); | 273 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 453 | |||
| 454 | BufferBindings buffer_bindings; | ||
| 455 | const DrawParameters draw_params = | ||
| 456 | SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); | ||
| 457 | 274 | ||
| 458 | auto lock = texture_cache.AcquireLock(); | ||
| 459 | texture_cache.SynchronizeGraphicsDescriptors(); | 275 | texture_cache.SynchronizeGraphicsDescriptors(); |
| 460 | |||
| 461 | texture_cache.UpdateRenderTargets(false); | 276 | texture_cache.UpdateRenderTargets(false); |
| 462 | 277 | ||
| 463 | const auto shaders = pipeline_cache.GetShaders(); | 278 | const auto shaders = pipeline_cache.GetShaders(); |
| 464 | key.shaders = GetShaderAddresses(shaders); | 279 | key.shaders = GetShaderAddresses(shaders); |
| 465 | SetupShaderDescriptors(shaders); | 280 | SetupShaderDescriptors(shaders, is_indexed); |
| 466 | |||
| 467 | buffer_cache.Unmap(); | ||
| 468 | 281 | ||
| 469 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); | 282 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); |
| 470 | key.renderpass = framebuffer->RenderPass(); | 283 | key.renderpass = framebuffer->RenderPass(); |
| @@ -476,22 +289,29 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||
| 476 | return; | 289 | return; |
| 477 | } | 290 | } |
| 478 | 291 | ||
| 479 | buffer_bindings.Bind(device, scheduler); | ||
| 480 | |||
| 481 | BeginTransformFeedback(); | 292 | BeginTransformFeedback(); |
| 482 | 293 | ||
| 483 | scheduler.RequestRenderpass(framebuffer); | 294 | scheduler.RequestRenderpass(framebuffer); |
| 484 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); | 295 | scheduler.BindGraphicsPipeline(pipeline->GetHandle()); |
| 485 | UpdateDynamicStates(); | 296 | UpdateDynamicStates(); |
| 486 | 297 | ||
| 487 | const auto pipeline_layout = pipeline->GetLayout(); | 298 | const auto& regs = maxwell3d.regs; |
| 488 | const auto descriptor_set = pipeline->CommitDescriptorSet(); | 299 | const u32 num_instances = maxwell3d.mme_draw.instance_count; |
| 300 | const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed); | ||
| 301 | const VkPipelineLayout pipeline_layout = pipeline->GetLayout(); | ||
| 302 | const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet(); | ||
| 489 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { | 303 | scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { |
| 490 | if (descriptor_set) { | 304 | if (descriptor_set) { |
| 491 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, | 305 | cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, |
| 492 | DESCRIPTOR_SET, descriptor_set, {}); | 306 | DESCRIPTOR_SET, descriptor_set, nullptr); |
| 307 | } | ||
| 308 | if (draw_params.is_indexed) { | ||
| 309 | cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0, | ||
| 310 | draw_params.base_vertex, draw_params.base_instance); | ||
| 311 | } else { | ||
| 312 | cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances, | ||
| 313 | draw_params.base_vertex, draw_params.base_instance); | ||
| 493 | } | 314 | } |
| 494 | draw_params.Draw(cmdbuf); | ||
| 495 | }); | 315 | }); |
| 496 | 316 | ||
| 497 | EndTransformFeedback(); | 317 | EndTransformFeedback(); |
| @@ -515,7 +335,7 @@ void RasterizerVulkan::Clear() { | |||
| 515 | return; | 335 | return; |
| 516 | } | 336 | } |
| 517 | 337 | ||
| 518 | auto lock = texture_cache.AcquireLock(); | 338 | std::scoped_lock lock{texture_cache.mutex}; |
| 519 | texture_cache.UpdateRenderTargets(true); | 339 | texture_cache.UpdateRenderTargets(true); |
| 520 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); | 340 | const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); |
| 521 | const VkExtent2D render_area = framebuffer->RenderArea(); | 341 | const VkExtent2D render_area = framebuffer->RenderArea(); |
| @@ -559,7 +379,6 @@ void RasterizerVulkan::Clear() { | |||
| 559 | if (use_stencil) { | 379 | if (use_stencil) { |
| 560 | aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; | 380 | aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT; |
| 561 | } | 381 | } |
| 562 | |||
| 563 | scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, | 382 | scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, |
| 564 | clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { | 383 | clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { |
| 565 | VkClearAttachment attachment; | 384 | VkClearAttachment attachment; |
| @@ -580,12 +399,11 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 580 | auto& pipeline = pipeline_cache.GetComputePipeline({ | 399 | auto& pipeline = pipeline_cache.GetComputePipeline({ |
| 581 | .shader = code_addr, | 400 | .shader = code_addr, |
| 582 | .shared_memory_size = launch_desc.shared_alloc, | 401 | .shared_memory_size = launch_desc.shared_alloc, |
| 583 | .workgroup_size = | 402 | .workgroup_size{ |
| 584 | { | 403 | launch_desc.block_dim_x, |
| 585 | launch_desc.block_dim_x, | 404 | launch_desc.block_dim_y, |
| 586 | launch_desc.block_dim_y, | 405 | launch_desc.block_dim_z, |
| 587 | launch_desc.block_dim_z, | 406 | }, |
| 588 | }, | ||
| 589 | }); | 407 | }); |
| 590 | 408 | ||
| 591 | // Compute dispatches can't be executed inside a renderpass | 409 | // Compute dispatches can't be executed inside a renderpass |
| @@ -594,10 +412,21 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 594 | image_view_indices.clear(); | 412 | image_view_indices.clear(); |
| 595 | sampler_handles.clear(); | 413 | sampler_handles.clear(); |
| 596 | 414 | ||
| 597 | auto lock = texture_cache.AcquireLock(); | 415 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 598 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 599 | 416 | ||
| 600 | const auto& entries = pipeline.GetEntries(); | 417 | const auto& entries = pipeline.GetEntries(); |
| 418 | buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers); | ||
| 419 | buffer_cache.UnbindComputeStorageBuffers(); | ||
| 420 | u32 ssbo_index = 0; | ||
| 421 | for (const auto& buffer : entries.global_buffers) { | ||
| 422 | buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset, | ||
| 423 | buffer.is_written); | ||
| 424 | ++ssbo_index; | ||
| 425 | } | ||
| 426 | buffer_cache.UpdateComputeBuffers(); | ||
| 427 | |||
| 428 | texture_cache.SynchronizeComputeDescriptors(); | ||
| 429 | |||
| 601 | SetupComputeUniformTexels(entries); | 430 | SetupComputeUniformTexels(entries); |
| 602 | SetupComputeTextures(entries); | 431 | SetupComputeTextures(entries); |
| 603 | SetupComputeStorageTexels(entries); | 432 | SetupComputeStorageTexels(entries); |
| @@ -606,20 +435,15 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||
| 606 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | 435 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 607 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); | 436 | texture_cache.FillComputeImageViews(indices_span, image_view_ids); |
| 608 | 437 | ||
| 609 | buffer_cache.Map(CalculateComputeStreamBufferSize()); | ||
| 610 | |||
| 611 | update_descriptor_queue.Acquire(); | 438 | update_descriptor_queue.Acquire(); |
| 612 | 439 | ||
| 613 | SetupComputeConstBuffers(entries); | 440 | buffer_cache.BindHostComputeBuffers(); |
| 614 | SetupComputeGlobalBuffers(entries); | ||
| 615 | 441 | ||
| 616 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | 442 | ImageViewId* image_view_id_ptr = image_view_ids.data(); |
| 617 | VkSampler* sampler_ptr = sampler_handles.data(); | 443 | VkSampler* sampler_ptr = sampler_handles.data(); |
| 618 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | 444 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, |
| 619 | sampler_ptr); | 445 | sampler_ptr); |
| 620 | 446 | ||
| 621 | buffer_cache.Unmap(); | ||
| 622 | |||
| 623 | const VkPipeline pipeline_handle = pipeline.GetHandle(); | 447 | const VkPipeline pipeline_handle = pipeline.GetHandle(); |
| 624 | const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); | 448 | const VkPipelineLayout pipeline_layout = pipeline.GetLayout(); |
| 625 | const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); | 449 | const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet(); |
| @@ -644,6 +468,11 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
| 644 | query_cache.Query(gpu_addr, type, timestamp); | 468 | query_cache.Query(gpu_addr, type, timestamp); |
| 645 | } | 469 | } |
| 646 | 470 | ||
| 471 | void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||
| 472 | u32 size) { | ||
| 473 | buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size); | ||
| 474 | } | ||
| 475 | |||
| 647 | void RasterizerVulkan::FlushAll() {} | 476 | void RasterizerVulkan::FlushAll() {} |
| 648 | 477 | ||
| 649 | void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | 478 | void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { |
| @@ -651,19 +480,23 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { | |||
| 651 | return; | 480 | return; |
| 652 | } | 481 | } |
| 653 | { | 482 | { |
| 654 | auto lock = texture_cache.AcquireLock(); | 483 | std::scoped_lock lock{texture_cache.mutex}; |
| 655 | texture_cache.DownloadMemory(addr, size); | 484 | texture_cache.DownloadMemory(addr, size); |
| 656 | } | 485 | } |
| 657 | buffer_cache.FlushRegion(addr, size); | 486 | { |
| 487 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 488 | buffer_cache.DownloadMemory(addr, size); | ||
| 489 | } | ||
| 658 | query_cache.FlushRegion(addr, size); | 490 | query_cache.FlushRegion(addr, size); |
| 659 | } | 491 | } |
| 660 | 492 | ||
| 661 | bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { | 493 | bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) { |
| 494 | std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; | ||
| 662 | if (!Settings::IsGPULevelHigh()) { | 495 | if (!Settings::IsGPULevelHigh()) { |
| 663 | return buffer_cache.MustFlushRegion(addr, size); | 496 | return buffer_cache.IsRegionGpuModified(addr, size); |
| 664 | } | 497 | } |
| 665 | return texture_cache.IsRegionGpuModified(addr, size) || | 498 | return texture_cache.IsRegionGpuModified(addr, size) || |
| 666 | buffer_cache.MustFlushRegion(addr, size); | 499 | buffer_cache.IsRegionGpuModified(addr, size); |
| 667 | } | 500 | } |
| 668 | 501 | ||
| 669 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | 502 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { |
| @@ -671,11 +504,14 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { | |||
| 671 | return; | 504 | return; |
| 672 | } | 505 | } |
| 673 | { | 506 | { |
| 674 | auto lock = texture_cache.AcquireLock(); | 507 | std::scoped_lock lock{texture_cache.mutex}; |
| 675 | texture_cache.WriteMemory(addr, size); | 508 | texture_cache.WriteMemory(addr, size); |
| 676 | } | 509 | } |
| 510 | { | ||
| 511 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 512 | buffer_cache.WriteMemory(addr, size); | ||
| 513 | } | ||
| 677 | pipeline_cache.InvalidateRegion(addr, size); | 514 | pipeline_cache.InvalidateRegion(addr, size); |
| 678 | buffer_cache.InvalidateRegion(addr, size); | ||
| 679 | query_cache.InvalidateRegion(addr, size); | 515 | query_cache.InvalidateRegion(addr, size); |
| 680 | } | 516 | } |
| 681 | 517 | ||
| @@ -683,25 +519,34 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
| 683 | if (addr == 0 || size == 0) { | 519 | if (addr == 0 || size == 0) { |
| 684 | return; | 520 | return; |
| 685 | } | 521 | } |
| 522 | pipeline_cache.OnCPUWrite(addr, size); | ||
| 686 | { | 523 | { |
| 687 | auto lock = texture_cache.AcquireLock(); | 524 | std::scoped_lock lock{texture_cache.mutex}; |
| 688 | texture_cache.WriteMemory(addr, size); | 525 | texture_cache.WriteMemory(addr, size); |
| 689 | } | 526 | } |
| 690 | pipeline_cache.OnCPUWrite(addr, size); | 527 | { |
| 691 | buffer_cache.OnCPUWrite(addr, size); | 528 | std::scoped_lock lock{buffer_cache.mutex}; |
| 529 | buffer_cache.CachedWriteMemory(addr, size); | ||
| 530 | } | ||
| 692 | } | 531 | } |
| 693 | 532 | ||
| 694 | void RasterizerVulkan::SyncGuestHost() { | 533 | void RasterizerVulkan::SyncGuestHost() { |
| 695 | buffer_cache.SyncGuestHost(); | ||
| 696 | pipeline_cache.SyncGuestHost(); | 534 | pipeline_cache.SyncGuestHost(); |
| 535 | { | ||
| 536 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 537 | buffer_cache.FlushCachedWrites(); | ||
| 538 | } | ||
| 697 | } | 539 | } |
| 698 | 540 | ||
| 699 | void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | 541 | void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { |
| 700 | { | 542 | { |
| 701 | auto lock = texture_cache.AcquireLock(); | 543 | std::scoped_lock lock{texture_cache.mutex}; |
| 702 | texture_cache.UnmapMemory(addr, size); | 544 | texture_cache.UnmapMemory(addr, size); |
| 703 | } | 545 | } |
| 704 | buffer_cache.OnCPUWrite(addr, size); | 546 | { |
| 547 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 548 | buffer_cache.WriteMemory(addr, size); | ||
| 549 | } | ||
| 705 | pipeline_cache.OnCPUWrite(addr, size); | 550 | pipeline_cache.OnCPUWrite(addr, size); |
| 706 | } | 551 | } |
| 707 | 552 | ||
| @@ -774,18 +619,21 @@ void RasterizerVulkan::TickFrame() { | |||
| 774 | draw_counter = 0; | 619 | draw_counter = 0; |
| 775 | update_descriptor_queue.TickFrame(); | 620 | update_descriptor_queue.TickFrame(); |
| 776 | fence_manager.TickFrame(); | 621 | fence_manager.TickFrame(); |
| 777 | buffer_cache.TickFrame(); | ||
| 778 | staging_pool.TickFrame(); | 622 | staging_pool.TickFrame(); |
| 779 | { | 623 | { |
| 780 | auto lock = texture_cache.AcquireLock(); | 624 | std::scoped_lock lock{texture_cache.mutex}; |
| 781 | texture_cache.TickFrame(); | 625 | texture_cache.TickFrame(); |
| 782 | } | 626 | } |
| 627 | { | ||
| 628 | std::scoped_lock lock{buffer_cache.mutex}; | ||
| 629 | buffer_cache.TickFrame(); | ||
| 630 | } | ||
| 783 | } | 631 | } |
| 784 | 632 | ||
| 785 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | 633 | bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |
| 786 | const Tegra::Engines::Fermi2D::Surface& dst, | 634 | const Tegra::Engines::Fermi2D::Surface& dst, |
| 787 | const Tegra::Engines::Fermi2D::Config& copy_config) { | 635 | const Tegra::Engines::Fermi2D::Config& copy_config) { |
| 788 | auto lock = texture_cache.AcquireLock(); | 636 | std::scoped_lock lock{texture_cache.mutex}; |
| 789 | texture_cache.BlitImage(dst, src, copy_config); | 637 | texture_cache.BlitImage(dst, src, copy_config); |
| 790 | return true; | 638 | return true; |
| 791 | } | 639 | } |
| @@ -795,13 +643,11 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 795 | if (!framebuffer_addr) { | 643 | if (!framebuffer_addr) { |
| 796 | return false; | 644 | return false; |
| 797 | } | 645 | } |
| 798 | 646 | std::scoped_lock lock{texture_cache.mutex}; | |
| 799 | auto lock = texture_cache.AcquireLock(); | ||
| 800 | ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); | 647 | ImageView* const image_view = texture_cache.TryFindFramebufferImageView(framebuffer_addr); |
| 801 | if (!image_view) { | 648 | if (!image_view) { |
| 802 | return false; | 649 | return false; |
| 803 | } | 650 | } |
| 804 | |||
| 805 | screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); | 651 | screen_info.image_view = image_view->Handle(VideoCommon::ImageViewType::e2D); |
| 806 | screen_info.width = image_view->size.width; | 652 | screen_info.width = image_view->size.width; |
| 807 | screen_info.height = image_view->size.height; | 653 | screen_info.height = image_view->size.height; |
| @@ -830,29 +676,8 @@ void RasterizerVulkan::FlushWork() { | |||
| 830 | draw_counter = 0; | 676 | draw_counter = 0; |
| 831 | } | 677 | } |
| 832 | 678 | ||
| 833 | RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, | ||
| 834 | BufferBindings& buffer_bindings, | ||
| 835 | bool is_indexed, | ||
| 836 | bool is_instanced) { | ||
| 837 | MICROPROFILE_SCOPE(Vulkan_Geometry); | ||
| 838 | |||
| 839 | const auto& regs = maxwell3d.regs; | ||
| 840 | |||
| 841 | SetupVertexArrays(buffer_bindings); | ||
| 842 | |||
| 843 | const u32 base_instance = regs.vb_base_instance; | ||
| 844 | const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1; | ||
| 845 | const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; | ||
| 846 | const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; | ||
| 847 | |||
| 848 | DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed}; | ||
| 849 | SetupIndexBuffer(buffer_bindings, params, is_indexed); | ||
| 850 | |||
| 851 | return params; | ||
| 852 | } | ||
| 853 | |||
| 854 | void RasterizerVulkan::SetupShaderDescriptors( | 679 | void RasterizerVulkan::SetupShaderDescriptors( |
| 855 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) { | 680 | const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { |
| 856 | image_view_indices.clear(); | 681 | image_view_indices.clear(); |
| 857 | sampler_handles.clear(); | 682 | sampler_handles.clear(); |
| 858 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { | 683 | for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { |
| @@ -860,15 +685,27 @@ void RasterizerVulkan::SetupShaderDescriptors( | |||
| 860 | if (!shader) { | 685 | if (!shader) { |
| 861 | continue; | 686 | continue; |
| 862 | } | 687 | } |
| 863 | const auto& entries = shader->GetEntries(); | 688 | const ShaderEntries& entries = shader->GetEntries(); |
| 864 | SetupGraphicsUniformTexels(entries, stage); | 689 | SetupGraphicsUniformTexels(entries, stage); |
| 865 | SetupGraphicsTextures(entries, stage); | 690 | SetupGraphicsTextures(entries, stage); |
| 866 | SetupGraphicsStorageTexels(entries, stage); | 691 | SetupGraphicsStorageTexels(entries, stage); |
| 867 | SetupGraphicsImages(entries, stage); | 692 | SetupGraphicsImages(entries, stage); |
| 693 | |||
| 694 | buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers); | ||
| 695 | buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||
| 696 | u32 ssbo_index = 0; | ||
| 697 | for (const auto& buffer : entries.global_buffers) { | ||
| 698 | buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index, | ||
| 699 | buffer.cbuf_offset, buffer.is_written); | ||
| 700 | ++ssbo_index; | ||
| 701 | } | ||
| 868 | } | 702 | } |
| 869 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | 703 | const std::span indices_span(image_view_indices.data(), image_view_indices.size()); |
| 704 | buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||
| 870 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | 705 | texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); |
| 871 | 706 | ||
| 707 | buffer_cache.BindHostGeometryBuffers(is_indexed); | ||
| 708 | |||
| 872 | update_descriptor_queue.Acquire(); | 709 | update_descriptor_queue.Acquire(); |
| 873 | 710 | ||
| 874 | ImageViewId* image_view_id_ptr = image_view_ids.data(); | 711 | ImageViewId* image_view_id_ptr = image_view_ids.data(); |
| @@ -879,11 +716,9 @@ void RasterizerVulkan::SetupShaderDescriptors( | |||
| 879 | if (!shader) { | 716 | if (!shader) { |
| 880 | continue; | 717 | continue; |
| 881 | } | 718 | } |
| 882 | const auto& entries = shader->GetEntries(); | 719 | buffer_cache.BindHostStageBuffers(stage); |
| 883 | SetupGraphicsConstBuffers(entries, stage); | 720 | PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue, |
| 884 | SetupGraphicsGlobalBuffers(entries, stage); | 721 | image_view_id_ptr, sampler_ptr); |
| 885 | PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr, | ||
| 886 | sampler_ptr); | ||
| 887 | } | 722 | } |
| 888 | } | 723 | } |
| 889 | 724 | ||
| @@ -916,27 +751,11 @@ void RasterizerVulkan::BeginTransformFeedback() { | |||
| 916 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); | 751 | LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported"); |
| 917 | return; | 752 | return; |
| 918 | } | 753 | } |
| 919 | |||
| 920 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || | 754 | UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || |
| 921 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || | 755 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || |
| 922 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); | 756 | regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); |
| 923 | 757 | scheduler.Record( | |
| 924 | UNIMPLEMENTED_IF(regs.tfb_bindings[1].buffer_enable); | 758 | [](vk::CommandBuffer cmdbuf) { cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |
| 925 | UNIMPLEMENTED_IF(regs.tfb_bindings[2].buffer_enable); | ||
| 926 | UNIMPLEMENTED_IF(regs.tfb_bindings[3].buffer_enable); | ||
| 927 | |||
| 928 | const auto& binding = regs.tfb_bindings[0]; | ||
| 929 | UNIMPLEMENTED_IF(binding.buffer_enable == 0); | ||
| 930 | UNIMPLEMENTED_IF(binding.buffer_offset != 0); | ||
| 931 | |||
| 932 | const GPUVAddr gpu_addr = binding.Address(); | ||
| 933 | const VkDeviceSize size = static_cast<VkDeviceSize>(binding.buffer_size); | ||
| 934 | const auto info = buffer_cache.UploadMemory(gpu_addr, size, 4, true); | ||
| 935 | |||
| 936 | scheduler.Record([buffer = info.handle, offset = info.offset, size](vk::CommandBuffer cmdbuf) { | ||
| 937 | cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); | ||
| 938 | cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); | ||
| 939 | }); | ||
| 940 | } | 759 | } |
| 941 | 760 | ||
| 942 | void RasterizerVulkan::EndTransformFeedback() { | 761 | void RasterizerVulkan::EndTransformFeedback() { |
| @@ -947,104 +766,11 @@ void RasterizerVulkan::EndTransformFeedback() { | |||
| 947 | if (!device.IsExtTransformFeedbackSupported()) { | 766 | if (!device.IsExtTransformFeedbackSupported()) { |
| 948 | return; | 767 | return; |
| 949 | } | 768 | } |
| 950 | |||
| 951 | scheduler.Record( | 769 | scheduler.Record( |
| 952 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); | 770 | [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); }); |
| 953 | } | 771 | } |
| 954 | 772 | ||
| 955 | void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { | ||
| 956 | const auto& regs = maxwell3d.regs; | ||
| 957 | |||
| 958 | for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 959 | const auto& vertex_array = regs.vertex_array[index]; | ||
| 960 | if (!vertex_array.IsEnabled()) { | ||
| 961 | continue; | ||
| 962 | } | ||
| 963 | const GPUVAddr start{vertex_array.StartAddress()}; | ||
| 964 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||
| 965 | |||
| 966 | ASSERT(end >= start); | ||
| 967 | const size_t size = end - start; | ||
| 968 | if (size == 0) { | ||
| 969 | buffer_bindings.AddVertexBinding(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE, 0); | ||
| 970 | continue; | ||
| 971 | } | ||
| 972 | const auto info = buffer_cache.UploadMemory(start, size); | ||
| 973 | buffer_bindings.AddVertexBinding(info.handle, info.offset, size, vertex_array.stride); | ||
| 974 | } | ||
| 975 | } | ||
| 976 | |||
| 977 | void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, | ||
| 978 | bool is_indexed) { | ||
| 979 | if (params.num_vertices == 0) { | ||
| 980 | return; | ||
| 981 | } | ||
| 982 | const auto& regs = maxwell3d.regs; | ||
| 983 | switch (regs.draw.topology) { | ||
| 984 | case Maxwell::PrimitiveTopology::Quads: { | ||
| 985 | if (!params.is_indexed) { | ||
| 986 | const auto [buffer, offset] = | ||
| 987 | quad_array_pass.Assemble(params.num_vertices, params.base_vertex); | ||
| 988 | buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); | ||
| 989 | params.base_vertex = 0; | ||
| 990 | params.num_vertices = params.num_vertices * 6 / 4; | ||
| 991 | params.is_indexed = true; | ||
| 992 | break; | ||
| 993 | } | ||
| 994 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | ||
| 995 | const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | ||
| 996 | VkBuffer buffer = info.handle; | ||
| 997 | u64 offset = info.offset; | ||
| 998 | std::tie(buffer, offset) = quad_indexed_pass.Assemble( | ||
| 999 | regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); | ||
| 1000 | |||
| 1001 | buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); | ||
| 1002 | params.num_vertices = (params.num_vertices / 4) * 6; | ||
| 1003 | params.base_vertex = 0; | ||
| 1004 | break; | ||
| 1005 | } | ||
| 1006 | default: { | ||
| 1007 | if (!is_indexed) { | ||
| 1008 | break; | ||
| 1009 | } | ||
| 1010 | const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | ||
| 1011 | const auto info = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | ||
| 1012 | VkBuffer buffer = info.handle; | ||
| 1013 | u64 offset = info.offset; | ||
| 1014 | |||
| 1015 | auto format = regs.index_array.format; | ||
| 1016 | const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; | ||
| 1017 | if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { | ||
| 1018 | std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset); | ||
| 1019 | format = Maxwell::IndexFormat::UnsignedShort; | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format)); | ||
| 1023 | break; | ||
| 1024 | } | ||
| 1025 | } | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, size_t stage) { | ||
| 1029 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||
| 1030 | const auto& shader_stage = maxwell3d.state.shader_stages[stage]; | ||
| 1031 | for (const auto& entry : entries.const_buffers) { | ||
| 1032 | SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); | ||
| 1033 | } | ||
| 1034 | } | ||
| 1035 | |||
| 1036 | void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, size_t stage) { | ||
| 1037 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||
| 1038 | const auto& cbufs{maxwell3d.state.shader_stages[stage]}; | ||
| 1039 | |||
| 1040 | for (const auto& entry : entries.global_buffers) { | ||
| 1041 | const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); | ||
| 1042 | SetupGlobalBuffer(entry, addr); | ||
| 1043 | } | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { | 773 | void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) { |
| 1047 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1048 | const auto& regs = maxwell3d.regs; | 774 | const auto& regs = maxwell3d.regs; |
| 1049 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 775 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1050 | for (const auto& entry : entries.uniform_texels) { | 776 | for (const auto& entry : entries.uniform_texels) { |
| @@ -1054,7 +780,6 @@ void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, | |||
| 1054 | } | 780 | } |
| 1055 | 781 | ||
| 1056 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { | 782 | void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) { |
| 1057 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1058 | const auto& regs = maxwell3d.regs; | 783 | const auto& regs = maxwell3d.regs; |
| 1059 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 784 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1060 | for (const auto& entry : entries.samplers) { | 785 | for (const auto& entry : entries.samplers) { |
| @@ -1070,7 +795,6 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_ | |||
| 1070 | } | 795 | } |
| 1071 | 796 | ||
| 1072 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { | 797 | void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) { |
| 1073 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1074 | const auto& regs = maxwell3d.regs; | 798 | const auto& regs = maxwell3d.regs; |
| 1075 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 799 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1076 | for (const auto& entry : entries.storage_texels) { | 800 | for (const auto& entry : entries.storage_texels) { |
| @@ -1080,7 +804,6 @@ void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, | |||
| 1080 | } | 804 | } |
| 1081 | 805 | ||
| 1082 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { | 806 | void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) { |
| 1083 | MICROPROFILE_SCOPE(Vulkan_Images); | ||
| 1084 | const auto& regs = maxwell3d.regs; | 807 | const auto& regs = maxwell3d.regs; |
| 1085 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; | 808 | const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex; |
| 1086 | for (const auto& entry : entries.images) { | 809 | for (const auto& entry : entries.images) { |
| @@ -1089,32 +812,7 @@ void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t | |||
| 1089 | } | 812 | } |
| 1090 | } | 813 | } |
| 1091 | 814 | ||
| 1092 | void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { | ||
| 1093 | MICROPROFILE_SCOPE(Vulkan_ConstBuffers); | ||
| 1094 | const auto& launch_desc = kepler_compute.launch_description; | ||
| 1095 | for (const auto& entry : entries.const_buffers) { | ||
| 1096 | const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||
| 1097 | const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); | ||
| 1098 | const Tegra::Engines::ConstBufferInfo info{ | ||
| 1099 | .address = config.Address(), | ||
| 1100 | .size = config.size, | ||
| 1101 | .enabled = mask[entry.GetIndex()], | ||
| 1102 | }; | ||
| 1103 | SetupConstBuffer(entry, info); | ||
| 1104 | } | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { | ||
| 1108 | MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); | ||
| 1109 | const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; | ||
| 1110 | for (const auto& entry : entries.global_buffers) { | ||
| 1111 | const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||
| 1112 | SetupGlobalBuffer(entry, addr); | ||
| 1113 | } | ||
| 1114 | } | ||
| 1115 | |||
| 1116 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | 815 | void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { |
| 1117 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1118 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | 816 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1119 | for (const auto& entry : entries.uniform_texels) { | 817 | for (const auto& entry : entries.uniform_texels) { |
| 1120 | const TextureHandle handle = | 818 | const TextureHandle handle = |
| @@ -1124,7 +822,6 @@ void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { | |||
| 1124 | } | 822 | } |
| 1125 | 823 | ||
| 1126 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | 824 | void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { |
| 1127 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1128 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | 825 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1129 | for (const auto& entry : entries.samplers) { | 826 | for (const auto& entry : entries.samplers) { |
| 1130 | for (size_t index = 0; index < entry.size; ++index) { | 827 | for (size_t index = 0; index < entry.size; ++index) { |
| @@ -1139,7 +836,6 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { | |||
| 1139 | } | 836 | } |
| 1140 | 837 | ||
| 1141 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | 838 | void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { |
| 1142 | MICROPROFILE_SCOPE(Vulkan_Textures); | ||
| 1143 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | 839 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1144 | for (const auto& entry : entries.storage_texels) { | 840 | for (const auto& entry : entries.storage_texels) { |
| 1145 | const TextureHandle handle = | 841 | const TextureHandle handle = |
| @@ -1149,7 +845,6 @@ void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { | |||
| 1149 | } | 845 | } |
| 1150 | 846 | ||
| 1151 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | 847 | void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { |
| 1152 | MICROPROFILE_SCOPE(Vulkan_Images); | ||
| 1153 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; | 848 | const bool via_header_index = kepler_compute.launch_description.linked_tsc; |
| 1154 | for (const auto& entry : entries.images) { | 849 | for (const auto& entry : entries.images) { |
| 1155 | const TextureHandle handle = | 850 | const TextureHandle handle = |
| @@ -1158,42 +853,6 @@ void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { | |||
| 1158 | } | 853 | } |
| 1159 | } | 854 | } |
| 1160 | 855 | ||
| 1161 | void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, | ||
| 1162 | const Tegra::Engines::ConstBufferInfo& buffer) { | ||
| 1163 | if (!buffer.enabled) { | ||
| 1164 | // Set values to zero to unbind buffers | ||
| 1165 | update_descriptor_queue.AddBuffer(DefaultBuffer(), 0, DEFAULT_BUFFER_SIZE); | ||
| 1166 | return; | ||
| 1167 | } | ||
| 1168 | // Align the size to avoid bad std140 interactions | ||
| 1169 | const size_t size = Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); | ||
| 1170 | ASSERT(size <= MaxConstbufferSize); | ||
| 1171 | |||
| 1172 | const u64 alignment = device.GetUniformBufferAlignment(); | ||
| 1173 | const auto info = buffer_cache.UploadMemory(buffer.address, size, alignment); | ||
| 1174 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | ||
| 1175 | } | ||
| 1176 | |||
| 1177 | void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { | ||
| 1178 | const u64 actual_addr = gpu_memory.Read<u64>(address); | ||
| 1179 | const u32 size = gpu_memory.Read<u32>(address + 8); | ||
| 1180 | |||
| 1181 | if (size == 0) { | ||
| 1182 | // Sometimes global memory pointers don't have a proper size. Upload a dummy entry | ||
| 1183 | // because Vulkan doesn't like empty buffers. | ||
| 1184 | // Note: Do *not* use DefaultBuffer() here, storage buffers can be written breaking the | ||
| 1185 | // default buffer. | ||
| 1186 | static constexpr size_t dummy_size = 4; | ||
| 1187 | const auto info = buffer_cache.GetEmptyBuffer(dummy_size); | ||
| 1188 | update_descriptor_queue.AddBuffer(info.handle, info.offset, dummy_size); | ||
| 1189 | return; | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | const auto info = buffer_cache.UploadMemory( | ||
| 1193 | actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); | ||
| 1194 | update_descriptor_queue.AddBuffer(info.handle, info.offset, size); | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 856 | void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| 1198 | if (!state_tracker.TouchViewports()) { | 857 | if (!state_tracker.TouchViewports()) { |
| 1199 | return; | 858 | return; |
| @@ -1206,7 +865,8 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg | |||
| 1206 | GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), | 865 | GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), |
| 1207 | GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), | 866 | GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), |
| 1208 | GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), | 867 | GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), |
| 1209 | GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; | 868 | GetViewportState(device, regs, 14), GetViewportState(device, regs, 15), |
| 869 | }; | ||
| 1210 | scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); | 870 | scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); |
| 1211 | } | 871 | } |
| 1212 | 872 | ||
| @@ -1214,13 +874,14 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs | |||
| 1214 | if (!state_tracker.TouchScissors()) { | 874 | if (!state_tracker.TouchScissors()) { |
| 1215 | return; | 875 | return; |
| 1216 | } | 876 | } |
| 1217 | const std::array scissors = { | 877 | const std::array scissors{ |
| 1218 | GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), | 878 | GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), |
| 1219 | GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), | 879 | GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), |
| 1220 | GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), | 880 | GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), |
| 1221 | GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), | 881 | GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), |
| 1222 | GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), | 882 | GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), |
| 1223 | GetScissorState(regs, 15)}; | 883 | GetScissorState(regs, 15), |
| 884 | }; | ||
| 1224 | scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); | 885 | scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); |
| 1225 | } | 886 | } |
| 1226 | 887 | ||
| @@ -1385,73 +1046,4 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& | |||
| 1385 | }); | 1046 | }); |
| 1386 | } | 1047 | } |
| 1387 | 1048 | ||
| 1388 | size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { | ||
| 1389 | size_t size = CalculateVertexArraysSize(); | ||
| 1390 | if (is_indexed) { | ||
| 1391 | size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); | ||
| 1392 | } | ||
| 1393 | size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); | ||
| 1394 | return size; | ||
| 1395 | } | ||
| 1396 | |||
| 1397 | size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { | ||
| 1398 | return Tegra::Engines::KeplerCompute::NumConstBuffers * | ||
| 1399 | (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||
| 1400 | } | ||
| 1401 | |||
| 1402 | size_t RasterizerVulkan::CalculateVertexArraysSize() const { | ||
| 1403 | const auto& regs = maxwell3d.regs; | ||
| 1404 | |||
| 1405 | size_t size = 0; | ||
| 1406 | for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||
| 1407 | // This implementation assumes that all attributes are used in the shader. | ||
| 1408 | const GPUVAddr start{regs.vertex_array[index].StartAddress()}; | ||
| 1409 | const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; | ||
| 1410 | DEBUG_ASSERT(end >= start); | ||
| 1411 | |||
| 1412 | size += (end - start) * regs.vertex_array[index].enable; | ||
| 1413 | } | ||
| 1414 | return size; | ||
| 1415 | } | ||
| 1416 | |||
| 1417 | size_t RasterizerVulkan::CalculateIndexBufferSize() const { | ||
| 1418 | return static_cast<size_t>(maxwell3d.regs.index_array.count) * | ||
| 1419 | static_cast<size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); | ||
| 1420 | } | ||
| 1421 | |||
| 1422 | size_t RasterizerVulkan::CalculateConstBufferSize( | ||
| 1423 | const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { | ||
| 1424 | if (entry.IsIndirect()) { | ||
| 1425 | // Buffer is accessed indirectly, so upload the entire thing | ||
| 1426 | return buffer.size; | ||
| 1427 | } else { | ||
| 1428 | // Buffer is accessed directly, upload just what we use | ||
| 1429 | return entry.GetSize(); | ||
| 1430 | } | ||
| 1431 | } | ||
| 1432 | |||
| 1433 | VkBuffer RasterizerVulkan::DefaultBuffer() { | ||
| 1434 | if (default_buffer) { | ||
| 1435 | return *default_buffer; | ||
| 1436 | } | ||
| 1437 | default_buffer = device.GetLogical().CreateBuffer({ | ||
| 1438 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 1439 | .pNext = nullptr, | ||
| 1440 | .flags = 0, | ||
| 1441 | .size = DEFAULT_BUFFER_SIZE, | ||
| 1442 | .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | | ||
| 1443 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, | ||
| 1444 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 1445 | .queueFamilyIndexCount = 0, | ||
| 1446 | .pQueueFamilyIndices = nullptr, | ||
| 1447 | }); | ||
| 1448 | default_buffer_commit = memory_allocator.Commit(default_buffer, MemoryUsage::DeviceLocal); | ||
| 1449 | |||
| 1450 | scheduler.RequestOutsideRenderPassOperationContext(); | ||
| 1451 | scheduler.Record([buffer = *default_buffer](vk::CommandBuffer cmdbuf) { | ||
| 1452 | cmdbuf.FillBuffer(buffer, 0, DEFAULT_BUFFER_SIZE, 0); | ||
| 1453 | }); | ||
| 1454 | return *default_buffer; | ||
| 1455 | } | ||
| 1456 | |||
| 1457 | } // namespace Vulkan | 1049 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 8e261b9bd..7fc6741da 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -18,14 +18,12 @@ | |||
| 18 | #include "video_core/renderer_vulkan/blit_image.h" | 18 | #include "video_core/renderer_vulkan/blit_image.h" |
| 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" | 19 | #include "video_core/renderer_vulkan/fixed_pipeline_state.h" |
| 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" | 20 | #include "video_core/renderer_vulkan/vk_buffer_cache.h" |
| 21 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | ||
| 22 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | 21 | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" |
| 23 | #include "video_core/renderer_vulkan/vk_fence_manager.h" | 22 | #include "video_core/renderer_vulkan/vk_fence_manager.h" |
| 24 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | 23 | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" |
| 25 | #include "video_core/renderer_vulkan/vk_query_cache.h" | 24 | #include "video_core/renderer_vulkan/vk_query_cache.h" |
| 26 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 25 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 27 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 26 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 28 | #include "video_core/renderer_vulkan/vk_stream_buffer.h" | ||
| 29 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 27 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 30 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 28 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 31 | #include "video_core/shader/async_shaders.h" | 29 | #include "video_core/shader/async_shaders.h" |
| @@ -49,7 +47,6 @@ namespace Vulkan { | |||
| 49 | struct VKScreenInfo; | 47 | struct VKScreenInfo; |
| 50 | 48 | ||
| 51 | class StateTracker; | 49 | class StateTracker; |
| 52 | class BufferBindings; | ||
| 53 | 50 | ||
| 54 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { | 51 | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { |
| 55 | public: | 52 | public: |
| @@ -65,6 +62,7 @@ public: | |||
| 65 | void DispatchCompute(GPUVAddr code_addr) override; | 62 | void DispatchCompute(GPUVAddr code_addr) override; |
| 66 | void ResetCounter(VideoCore::QueryType type) override; | 63 | void ResetCounter(VideoCore::QueryType type) override; |
| 67 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 64 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 65 | void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; | ||
| 68 | void FlushAll() override; | 66 | void FlushAll() override; |
| 69 | void FlushRegion(VAddr addr, u64 size) override; | 67 | void FlushRegion(VAddr addr, u64 size) override; |
| 70 | bool MustFlushRegion(VAddr addr, u64 size) override; | 68 | bool MustFlushRegion(VAddr addr, u64 size) override; |
| @@ -107,24 +105,11 @@ private: | |||
| 107 | 105 | ||
| 108 | static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); | 106 | static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float); |
| 109 | 107 | ||
| 110 | struct DrawParameters { | ||
| 111 | void Draw(vk::CommandBuffer cmdbuf) const; | ||
| 112 | |||
| 113 | u32 base_instance = 0; | ||
| 114 | u32 num_instances = 0; | ||
| 115 | u32 base_vertex = 0; | ||
| 116 | u32 num_vertices = 0; | ||
| 117 | bool is_indexed = 0; | ||
| 118 | }; | ||
| 119 | |||
| 120 | void FlushWork(); | 108 | void FlushWork(); |
| 121 | 109 | ||
| 122 | /// Setups geometry buffers and state. | ||
| 123 | DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, | ||
| 124 | bool is_indexed, bool is_instanced); | ||
| 125 | |||
| 126 | /// Setup descriptors in the graphics pipeline. | 110 | /// Setup descriptors in the graphics pipeline. |
| 127 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders); | 111 | void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, |
| 112 | bool is_indexed); | ||
| 128 | 113 | ||
| 129 | void UpdateDynamicStates(); | 114 | void UpdateDynamicStates(); |
| 130 | 115 | ||
| @@ -132,16 +117,6 @@ private: | |||
| 132 | 117 | ||
| 133 | void EndTransformFeedback(); | 118 | void EndTransformFeedback(); |
| 134 | 119 | ||
| 135 | void SetupVertexArrays(BufferBindings& buffer_bindings); | ||
| 136 | |||
| 137 | void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); | ||
| 138 | |||
| 139 | /// Setup constant buffers in the graphics pipeline. | ||
| 140 | void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 141 | |||
| 142 | /// Setup global buffers in the graphics pipeline. | ||
| 143 | void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); | ||
| 144 | |||
| 145 | /// Setup uniform texels in the graphics pipeline. | 120 | /// Setup uniform texels in the graphics pipeline. |
| 146 | void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); | 121 | void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage); |
| 147 | 122 | ||
| @@ -154,12 +129,6 @@ private: | |||
| 154 | /// Setup images in the graphics pipeline. | 129 | /// Setup images in the graphics pipeline. |
| 155 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); | 130 | void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); |
| 156 | 131 | ||
| 157 | /// Setup constant buffers in the compute pipeline. | ||
| 158 | void SetupComputeConstBuffers(const ShaderEntries& entries); | ||
| 159 | |||
| 160 | /// Setup global buffers in the compute pipeline. | ||
| 161 | void SetupComputeGlobalBuffers(const ShaderEntries& entries); | ||
| 162 | |||
| 163 | /// Setup texel buffers in the compute pipeline. | 132 | /// Setup texel buffers in the compute pipeline. |
| 164 | void SetupComputeUniformTexels(const ShaderEntries& entries); | 133 | void SetupComputeUniformTexels(const ShaderEntries& entries); |
| 165 | 134 | ||
| @@ -172,11 +141,6 @@ private: | |||
| 172 | /// Setup images in the compute pipeline. | 141 | /// Setup images in the compute pipeline. |
| 173 | void SetupComputeImages(const ShaderEntries& entries); | 142 | void SetupComputeImages(const ShaderEntries& entries); |
| 174 | 143 | ||
| 175 | void SetupConstBuffer(const ConstBufferEntry& entry, | ||
| 176 | const Tegra::Engines::ConstBufferInfo& buffer); | ||
| 177 | |||
| 178 | void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); | ||
| 179 | |||
| 180 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); | 144 | void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 181 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); | 145 | void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs); |
| 182 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); | 146 | void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs); |
| @@ -193,19 +157,6 @@ private: | |||
| 193 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); | 157 | void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); |
| 194 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); | 158 | void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); |
| 195 | 159 | ||
| 196 | size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; | ||
| 197 | |||
| 198 | size_t CalculateComputeStreamBufferSize() const; | ||
| 199 | |||
| 200 | size_t CalculateVertexArraysSize() const; | ||
| 201 | |||
| 202 | size_t CalculateIndexBufferSize() const; | ||
| 203 | |||
| 204 | size_t CalculateConstBufferSize(const ConstBufferEntry& entry, | ||
| 205 | const Tegra::Engines::ConstBufferInfo& buffer) const; | ||
| 206 | |||
| 207 | VkBuffer DefaultBuffer(); | ||
| 208 | |||
| 209 | Tegra::GPU& gpu; | 160 | Tegra::GPU& gpu; |
| 210 | Tegra::MemoryManager& gpu_memory; | 161 | Tegra::MemoryManager& gpu_memory; |
| 211 | Tegra::Engines::Maxwell3D& maxwell3d; | 162 | Tegra::Engines::Maxwell3D& maxwell3d; |
| @@ -217,24 +168,19 @@ private: | |||
| 217 | StateTracker& state_tracker; | 168 | StateTracker& state_tracker; |
| 218 | VKScheduler& scheduler; | 169 | VKScheduler& scheduler; |
| 219 | 170 | ||
| 220 | VKStreamBuffer stream_buffer; | ||
| 221 | StagingBufferPool staging_pool; | 171 | StagingBufferPool staging_pool; |
| 222 | VKDescriptorPool descriptor_pool; | 172 | VKDescriptorPool descriptor_pool; |
| 223 | VKUpdateDescriptorQueue update_descriptor_queue; | 173 | VKUpdateDescriptorQueue update_descriptor_queue; |
| 224 | BlitImageHelper blit_image; | 174 | BlitImageHelper blit_image; |
| 225 | QuadArrayPass quad_array_pass; | ||
| 226 | QuadIndexedPass quad_indexed_pass; | ||
| 227 | Uint8Pass uint8_pass; | ||
| 228 | 175 | ||
| 229 | TextureCacheRuntime texture_cache_runtime; | 176 | TextureCacheRuntime texture_cache_runtime; |
| 230 | TextureCache texture_cache; | 177 | TextureCache texture_cache; |
| 178 | BufferCacheRuntime buffer_cache_runtime; | ||
| 179 | BufferCache buffer_cache; | ||
| 231 | VKPipelineCache pipeline_cache; | 180 | VKPipelineCache pipeline_cache; |
| 232 | VKBufferCache buffer_cache; | ||
| 233 | VKQueryCache query_cache; | 181 | VKQueryCache query_cache; |
| 234 | VKFenceManager fence_manager; | 182 | VKFenceManager fence_manager; |
| 235 | 183 | ||
| 236 | vk::Buffer default_buffer; | ||
| 237 | MemoryCommit default_buffer_commit; | ||
| 238 | vk::Event wfi_event; | 184 | vk::Event wfi_event; |
| 239 | VideoCommon::Shader::AsyncShaders async_shaders; | 185 | VideoCommon::Shader::AsyncShaders async_shaders; |
| 240 | 186 | ||
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 66004f9c0..f35c120b0 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp | |||
| @@ -52,18 +52,6 @@ VKScheduler::~VKScheduler() { | |||
| 52 | worker_thread.join(); | 52 | worker_thread.join(); |
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | u64 VKScheduler::CurrentTick() const noexcept { | ||
| 56 | return master_semaphore->CurrentTick(); | ||
| 57 | } | ||
| 58 | |||
| 59 | bool VKScheduler::IsFree(u64 tick) const noexcept { | ||
| 60 | return master_semaphore->IsFree(tick); | ||
| 61 | } | ||
| 62 | |||
| 63 | void VKScheduler::Wait(u64 tick) { | ||
| 64 | master_semaphore->Wait(tick); | ||
| 65 | } | ||
| 66 | |||
| 67 | void VKScheduler::Flush(VkSemaphore semaphore) { | 55 | void VKScheduler::Flush(VkSemaphore semaphore) { |
| 68 | SubmitExecution(semaphore); | 56 | SubmitExecution(semaphore); |
| 69 | AllocateNewContext(); | 57 | AllocateNewContext(); |
| @@ -269,7 +257,7 @@ void VKScheduler::EndRenderPass() { | |||
| 269 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | | 257 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | |
| 270 | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | | 258 | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | |
| 271 | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, | 259 | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, |
| 272 | VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, 0, nullptr, nullptr, | 260 | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, nullptr, nullptr, |
| 273 | vk::Span(barriers.data(), num_images)); | 261 | vk::Span(barriers.data(), num_images)); |
| 274 | }); | 262 | }); |
| 275 | state.renderpass = nullptr; | 263 | state.renderpass = nullptr; |
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 15f2987eb..3ce48e9d2 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include "common/alignment.h" | 14 | #include "common/alignment.h" |
| 15 | #include "common/common_types.h" | 15 | #include "common/common_types.h" |
| 16 | #include "common/threadsafe_queue.h" | 16 | #include "common/threadsafe_queue.h" |
| 17 | #include "video_core/renderer_vulkan/vk_master_semaphore.h" | ||
| 17 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 18 | 19 | ||
| 19 | namespace Vulkan { | 20 | namespace Vulkan { |
| @@ -21,7 +22,6 @@ namespace Vulkan { | |||
| 21 | class CommandPool; | 22 | class CommandPool; |
| 22 | class Device; | 23 | class Device; |
| 23 | class Framebuffer; | 24 | class Framebuffer; |
| 24 | class MasterSemaphore; | ||
| 25 | class StateTracker; | 25 | class StateTracker; |
| 26 | class VKQueryCache; | 26 | class VKQueryCache; |
| 27 | 27 | ||
| @@ -32,15 +32,6 @@ public: | |||
| 32 | explicit VKScheduler(const Device& device, StateTracker& state_tracker); | 32 | explicit VKScheduler(const Device& device, StateTracker& state_tracker); |
| 33 | ~VKScheduler(); | 33 | ~VKScheduler(); |
| 34 | 34 | ||
| 35 | /// Returns the current command buffer tick. | ||
| 36 | [[nodiscard]] u64 CurrentTick() const noexcept; | ||
| 37 | |||
| 38 | /// Returns true when a tick has been triggered by the GPU. | ||
| 39 | [[nodiscard]] bool IsFree(u64 tick) const noexcept; | ||
| 40 | |||
| 41 | /// Waits for the given tick to trigger on the GPU. | ||
| 42 | void Wait(u64 tick); | ||
| 43 | |||
| 44 | /// Sends the current execution context to the GPU. | 35 | /// Sends the current execution context to the GPU. |
| 45 | void Flush(VkSemaphore semaphore = nullptr); | 36 | void Flush(VkSemaphore semaphore = nullptr); |
| 46 | 37 | ||
| @@ -82,6 +73,21 @@ public: | |||
| 82 | (void)chunk->Record(command); | 73 | (void)chunk->Record(command); |
| 83 | } | 74 | } |
| 84 | 75 | ||
| 76 | /// Returns the current command buffer tick. | ||
| 77 | [[nodiscard]] u64 CurrentTick() const noexcept { | ||
| 78 | return master_semaphore->CurrentTick(); | ||
| 79 | } | ||
| 80 | |||
| 81 | /// Returns true when a tick has been triggered by the GPU. | ||
| 82 | [[nodiscard]] bool IsFree(u64 tick) const noexcept { | ||
| 83 | return master_semaphore->IsFree(tick); | ||
| 84 | } | ||
| 85 | |||
| 86 | /// Waits for the given tick to trigger on the GPU. | ||
| 87 | void Wait(u64 tick) { | ||
| 88 | master_semaphore->Wait(tick); | ||
| 89 | } | ||
| 90 | |||
| 85 | /// Returns the master timeline semaphore. | 91 | /// Returns the master timeline semaphore. |
| 86 | [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { | 92 | [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { |
| 87 | return *master_semaphore; | 93 | return *master_semaphore; |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 61d52b961..40e2e0d38 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | |||
| @@ -3106,7 +3106,11 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 3106 | entries.const_buffers.emplace_back(cbuf.second, cbuf.first); | 3106 | entries.const_buffers.emplace_back(cbuf.second, cbuf.first); |
| 3107 | } | 3107 | } |
| 3108 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { | 3108 | for (const auto& [base, usage] : ir.GetGlobalMemory()) { |
| 3109 | entries.global_buffers.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_written); | 3109 | entries.global_buffers.emplace_back(GlobalBufferEntry{ |
| 3110 | .cbuf_index = base.cbuf_index, | ||
| 3111 | .cbuf_offset = base.cbuf_offset, | ||
| 3112 | .is_written = usage.is_written, | ||
| 3113 | }); | ||
| 3110 | } | 3114 | } |
| 3111 | for (const auto& sampler : ir.GetSamplers()) { | 3115 | for (const auto& sampler : ir.GetSamplers()) { |
| 3112 | if (sampler.is_buffer) { | 3116 | if (sampler.is_buffer) { |
| @@ -3127,6 +3131,9 @@ ShaderEntries GenerateShaderEntries(const VideoCommon::Shader::ShaderIR& ir) { | |||
| 3127 | entries.attributes.insert(GetGenericAttributeLocation(attribute)); | 3131 | entries.attributes.insert(GetGenericAttributeLocation(attribute)); |
| 3128 | } | 3132 | } |
| 3129 | } | 3133 | } |
| 3134 | for (const auto& buffer : entries.const_buffers) { | ||
| 3135 | entries.enabled_uniform_buffers |= 1U << buffer.GetIndex(); | ||
| 3136 | } | ||
| 3130 | entries.clip_distances = ir.GetClipDistances(); | 3137 | entries.clip_distances = ir.GetClipDistances(); |
| 3131 | entries.shader_length = ir.GetLength(); | 3138 | entries.shader_length = ir.GetLength(); |
| 3132 | entries.uses_warps = ir.UsesWarps(); | 3139 | entries.uses_warps = ir.UsesWarps(); |
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 26381e444..5d94132a5 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h | |||
| @@ -39,24 +39,7 @@ private: | |||
| 39 | u32 index{}; | 39 | u32 index{}; |
| 40 | }; | 40 | }; |
| 41 | 41 | ||
| 42 | class GlobalBufferEntry { | 42 | struct GlobalBufferEntry { |
| 43 | public: | ||
| 44 | constexpr explicit GlobalBufferEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_written_) | ||
| 45 | : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_written{is_written_} {} | ||
| 46 | |||
| 47 | constexpr u32 GetCbufIndex() const { | ||
| 48 | return cbuf_index; | ||
| 49 | } | ||
| 50 | |||
| 51 | constexpr u32 GetCbufOffset() const { | ||
| 52 | return cbuf_offset; | ||
| 53 | } | ||
| 54 | |||
| 55 | constexpr bool IsWritten() const { | ||
| 56 | return is_written; | ||
| 57 | } | ||
| 58 | |||
| 59 | private: | ||
| 60 | u32 cbuf_index{}; | 43 | u32 cbuf_index{}; |
| 61 | u32 cbuf_offset{}; | 44 | u32 cbuf_offset{}; |
| 62 | bool is_written{}; | 45 | bool is_written{}; |
| @@ -78,6 +61,7 @@ struct ShaderEntries { | |||
| 78 | std::set<u32> attributes; | 61 | std::set<u32> attributes; |
| 79 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | 62 | std::array<bool, Maxwell::NumClipDistances> clip_distances{}; |
| 80 | std::size_t shader_length{}; | 63 | std::size_t shader_length{}; |
| 64 | u32 enabled_uniform_buffers{}; | ||
| 81 | bool uses_warps{}; | 65 | bool uses_warps{}; |
| 82 | }; | 66 | }; |
| 83 | 67 | ||
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 97fd41cc1..dfd8c8e5a 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include <fmt/format.h> | 9 | #include <fmt/format.h> |
| 10 | 10 | ||
| 11 | #include "common/alignment.h" | ||
| 11 | #include "common/assert.h" | 12 | #include "common/assert.h" |
| 12 | #include "common/bit_util.h" | 13 | #include "common/bit_util.h" |
| 13 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| @@ -17,18 +18,119 @@ | |||
| 17 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 18 | 19 | ||
| 19 | namespace Vulkan { | 20 | namespace Vulkan { |
| 21 | namespace { | ||
| 22 | // Maximum potential alignment of a Vulkan buffer | ||
| 23 | constexpr VkDeviceSize MAX_ALIGNMENT = 256; | ||
| 24 | // Maximum size to put elements in the stream buffer | ||
| 25 | constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8 * 1024 * 1024; | ||
| 26 | // Stream buffer size in bytes | ||
| 27 | constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | ||
| 28 | constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; | ||
| 29 | |||
| 30 | constexpr VkMemoryPropertyFlags HOST_FLAGS = | ||
| 31 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; | ||
| 32 | constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; | ||
| 33 | |||
| 34 | bool IsStreamHeap(VkMemoryHeap heap) noexcept { | ||
| 35 | return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; | ||
| 36 | } | ||
| 37 | |||
| 38 | std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, | ||
| 39 | VkMemoryPropertyFlags flags) noexcept { | ||
| 40 | for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { | ||
| 41 | if (((type_mask >> type_index) & 1) == 0) { | ||
| 42 | // Memory type is incompatible | ||
| 43 | continue; | ||
| 44 | } | ||
| 45 | const VkMemoryType& memory_type = props.memoryTypes[type_index]; | ||
| 46 | if ((memory_type.propertyFlags & flags) != flags) { | ||
| 47 | // Memory type doesn't have the flags we want | ||
| 48 | continue; | ||
| 49 | } | ||
| 50 | if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { | ||
| 51 | // Memory heap is not suitable for streaming | ||
| 52 | continue; | ||
| 53 | } | ||
| 54 | // Success! | ||
| 55 | return type_index; | ||
| 56 | } | ||
| 57 | return std::nullopt; | ||
| 58 | } | ||
| 59 | |||
| 60 | u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask) { | ||
| 61 | // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this | ||
| 62 | std::optional<u32> type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); | ||
| 63 | if (type) { | ||
| 64 | return *type; | ||
| 65 | } | ||
| 66 | // Otherwise try without the DEVICE_LOCAL_BIT | ||
| 67 | type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); | ||
| 68 | if (type) { | ||
| 69 | return *type; | ||
| 70 | } | ||
| 71 | // This should never happen, and in case it does, signal it as an out of memory situation | ||
| 72 | throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); | ||
| 73 | } | ||
| 74 | |||
| 75 | size_t Region(size_t iterator) noexcept { | ||
| 76 | return iterator / REGION_SIZE; | ||
| 77 | } | ||
| 78 | } // Anonymous namespace | ||
| 20 | 79 | ||
| 21 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | 80 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, |
| 22 | VKScheduler& scheduler_) | 81 | VKScheduler& scheduler_) |
| 23 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {} | 82 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { |
| 83 | const vk::Device& dev = device.GetLogical(); | ||
| 84 | stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ | ||
| 85 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||
| 86 | .pNext = nullptr, | ||
| 87 | .flags = 0, | ||
| 88 | .size = STREAM_BUFFER_SIZE, | ||
| 89 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | ||
| 90 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, | ||
| 91 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||
| 92 | .queueFamilyIndexCount = 0, | ||
| 93 | .pQueueFamilyIndices = nullptr, | ||
| 94 | }); | ||
| 95 | if (device.HasDebuggingToolAttached()) { | ||
| 96 | stream_buffer.SetObjectNameEXT("Stream Buffer"); | ||
| 97 | } | ||
| 98 | VkMemoryDedicatedRequirements dedicated_reqs{ | ||
| 99 | .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, | ||
| 100 | .pNext = nullptr, | ||
| 101 | .prefersDedicatedAllocation = VK_FALSE, | ||
| 102 | .requiresDedicatedAllocation = VK_FALSE, | ||
| 103 | }; | ||
| 104 | const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs); | ||
| 105 | const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE || | ||
| 106 | dedicated_reqs.requiresDedicatedAllocation == VK_TRUE; | ||
| 107 | const VkMemoryDedicatedAllocateInfo dedicated_info{ | ||
| 108 | .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, | ||
| 109 | .pNext = nullptr, | ||
| 110 | .image = nullptr, | ||
| 111 | .buffer = *stream_buffer, | ||
| 112 | }; | ||
| 113 | const auto memory_properties = device.GetPhysical().GetMemoryProperties(); | ||
| 114 | stream_memory = dev.AllocateMemory(VkMemoryAllocateInfo{ | ||
| 115 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | ||
| 116 | .pNext = make_dedicated ? &dedicated_info : nullptr, | ||
| 117 | .allocationSize = requirements.size, | ||
| 118 | .memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits), | ||
| 119 | }); | ||
| 120 | if (device.HasDebuggingToolAttached()) { | ||
| 121 | stream_memory.SetObjectNameEXT("Stream Buffer Memory"); | ||
| 122 | } | ||
| 123 | stream_buffer.BindMemory(*stream_memory, 0); | ||
| 124 | stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); | ||
| 125 | } | ||
| 24 | 126 | ||
| 25 | StagingBufferPool::~StagingBufferPool() = default; | 127 | StagingBufferPool::~StagingBufferPool() = default; |
| 26 | 128 | ||
| 27 | StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { | 129 | StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage) { |
| 28 | if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { | 130 | if (usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { |
| 29 | return *ref; | 131 | return GetStreamBuffer(size); |
| 30 | } | 132 | } |
| 31 | return CreateStagingBuffer(size, usage); | 133 | return GetStagingBuffer(size, usage); |
| 32 | } | 134 | } |
| 33 | 135 | ||
| 34 | void StagingBufferPool::TickFrame() { | 136 | void StagingBufferPool::TickFrame() { |
| @@ -39,6 +141,51 @@ void StagingBufferPool::TickFrame() { | |||
| 39 | ReleaseCache(MemoryUsage::Download); | 141 | ReleaseCache(MemoryUsage::Download); |
| 40 | } | 142 | } |
| 41 | 143 | ||
| 144 | StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { | ||
| 145 | if (AreRegionsActive(Region(free_iterator) + 1, | ||
| 146 | std::min(Region(iterator + size) + 1, NUM_SYNCS))) { | ||
| 147 | // Avoid waiting for the previous usages to be free | ||
| 148 | return GetStagingBuffer(size, MemoryUsage::Upload); | ||
| 149 | } | ||
| 150 | const u64 current_tick = scheduler.CurrentTick(); | ||
| 151 | std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator), | ||
| 152 | current_tick); | ||
| 153 | used_iterator = iterator; | ||
| 154 | free_iterator = std::max(free_iterator, iterator + size); | ||
| 155 | |||
| 156 | if (iterator + size > STREAM_BUFFER_SIZE) { | ||
| 157 | std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, | ||
| 158 | current_tick); | ||
| 159 | used_iterator = 0; | ||
| 160 | iterator = 0; | ||
| 161 | free_iterator = size; | ||
| 162 | |||
| 163 | if (AreRegionsActive(0, Region(size) + 1)) { | ||
| 164 | // Avoid waiting for the previous usages to be free | ||
| 165 | return GetStagingBuffer(size, MemoryUsage::Upload); | ||
| 166 | } | ||
| 167 | } | ||
| 168 | const size_t offset = iterator; | ||
| 169 | iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); | ||
| 170 | return StagingBufferRef{ | ||
| 171 | .buffer = *stream_buffer, | ||
| 172 | .offset = static_cast<VkDeviceSize>(offset), | ||
| 173 | .mapped_span = std::span<u8>(stream_pointer + offset, size), | ||
| 174 | }; | ||
| 175 | } | ||
| 176 | |||
| 177 | bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const { | ||
| 178 | return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end, | ||
| 179 | [this](u64 sync_tick) { return !scheduler.IsFree(sync_tick); }); | ||
| 180 | }; | ||
| 181 | |||
| 182 | StagingBufferRef StagingBufferPool::GetStagingBuffer(size_t size, MemoryUsage usage) { | ||
| 183 | if (const std::optional<StagingBufferRef> ref = TryGetReservedBuffer(size, usage)) { | ||
| 184 | return *ref; | ||
| 185 | } | ||
| 186 | return CreateStagingBuffer(size, usage); | ||
| 187 | } | ||
| 188 | |||
| 42 | std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, | 189 | std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t size, |
| 43 | MemoryUsage usage) { | 190 | MemoryUsage usage) { |
| 44 | StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; | 191 | StagingBuffers& cache_level = GetCache(usage)[Common::Log2Ceil64(size)]; |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index d42918a47..69f7618de 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -19,11 +19,14 @@ class VKScheduler; | |||
| 19 | 19 | ||
| 20 | struct StagingBufferRef { | 20 | struct StagingBufferRef { |
| 21 | VkBuffer buffer; | 21 | VkBuffer buffer; |
| 22 | VkDeviceSize offset; | ||
| 22 | std::span<u8> mapped_span; | 23 | std::span<u8> mapped_span; |
| 23 | }; | 24 | }; |
| 24 | 25 | ||
| 25 | class StagingBufferPool { | 26 | class StagingBufferPool { |
| 26 | public: | 27 | public: |
| 28 | static constexpr size_t NUM_SYNCS = 16; | ||
| 29 | |||
| 27 | explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, | 30 | explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator, |
| 28 | VKScheduler& scheduler); | 31 | VKScheduler& scheduler); |
| 29 | ~StagingBufferPool(); | 32 | ~StagingBufferPool(); |
| @@ -33,6 +36,11 @@ public: | |||
| 33 | void TickFrame(); | 36 | void TickFrame(); |
| 34 | 37 | ||
| 35 | private: | 38 | private: |
| 39 | struct StreamBufferCommit { | ||
| 40 | size_t upper_bound; | ||
| 41 | u64 tick; | ||
| 42 | }; | ||
| 43 | |||
| 36 | struct StagingBuffer { | 44 | struct StagingBuffer { |
| 37 | vk::Buffer buffer; | 45 | vk::Buffer buffer; |
| 38 | MemoryCommit commit; | 46 | MemoryCommit commit; |
| @@ -42,6 +50,7 @@ private: | |||
| 42 | StagingBufferRef Ref() const noexcept { | 50 | StagingBufferRef Ref() const noexcept { |
| 43 | return { | 51 | return { |
| 44 | .buffer = *buffer, | 52 | .buffer = *buffer, |
| 53 | .offset = 0, | ||
| 45 | .mapped_span = mapped_span, | 54 | .mapped_span = mapped_span, |
| 46 | }; | 55 | }; |
| 47 | } | 56 | } |
| @@ -56,6 +65,12 @@ private: | |||
| 56 | static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; | 65 | static constexpr size_t NUM_LEVELS = sizeof(size_t) * CHAR_BIT; |
| 57 | using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; | 66 | using StagingBuffersCache = std::array<StagingBuffers, NUM_LEVELS>; |
| 58 | 67 | ||
| 68 | StagingBufferRef GetStreamBuffer(size_t size); | ||
| 69 | |||
| 70 | bool AreRegionsActive(size_t region_begin, size_t region_end) const; | ||
| 71 | |||
| 72 | StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage); | ||
| 73 | |||
| 59 | std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); | 74 | std::optional<StagingBufferRef> TryGetReservedBuffer(size_t size, MemoryUsage usage); |
| 60 | 75 | ||
| 61 | StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); | 76 | StagingBufferRef CreateStagingBuffer(size_t size, MemoryUsage usage); |
| @@ -70,6 +85,15 @@ private: | |||
| 70 | MemoryAllocator& memory_allocator; | 85 | MemoryAllocator& memory_allocator; |
| 71 | VKScheduler& scheduler; | 86 | VKScheduler& scheduler; |
| 72 | 87 | ||
| 88 | vk::Buffer stream_buffer; | ||
| 89 | vk::DeviceMemory stream_memory; | ||
| 90 | u8* stream_pointer = nullptr; | ||
| 91 | |||
| 92 | size_t iterator = 0; | ||
| 93 | size_t used_iterator = 0; | ||
| 94 | size_t free_iterator = 0; | ||
| 95 | std::array<u64, NUM_SYNCS> sync_ticks{}; | ||
| 96 | |||
| 73 | StagingBuffersCache device_local_cache; | 97 | StagingBuffersCache device_local_cache; |
| 74 | StagingBuffersCache upload_cache; | 98 | StagingBuffersCache upload_cache; |
| 75 | StagingBuffersCache download_cache; | 99 | StagingBuffersCache download_cache; |
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 1779a2e30..e81fad007 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp | |||
| @@ -30,15 +30,18 @@ using Table = Maxwell3D::DirtyState::Table; | |||
| 30 | using Flags = Maxwell3D::DirtyState::Flags; | 30 | using Flags = Maxwell3D::DirtyState::Flags; |
| 31 | 31 | ||
| 32 | Flags MakeInvalidationFlags() { | 32 | Flags MakeInvalidationFlags() { |
| 33 | static constexpr std::array INVALIDATION_FLAGS{ | 33 | static constexpr int INVALIDATION_FLAGS[]{ |
| 34 | Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, | 34 | Viewports, Scissors, DepthBias, BlendConstants, DepthBounds, |
| 35 | StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, | 35 | StencilProperties, CullMode, DepthBoundsEnable, DepthTestEnable, DepthWriteEnable, |
| 36 | DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, | 36 | DepthCompareOp, FrontFace, StencilOp, StencilTestEnable, VertexBuffers, |
| 37 | }; | 37 | }; |
| 38 | Flags flags{}; | 38 | Flags flags{}; |
| 39 | for (const int flag : INVALIDATION_FLAGS) { | 39 | for (const int flag : INVALIDATION_FLAGS) { |
| 40 | flags[flag] = true; | 40 | flags[flag] = true; |
| 41 | } | 41 | } |
| 42 | for (int index = VertexBuffer0; index <= VertexBuffer31; ++index) { | ||
| 43 | flags[index] = true; | ||
| 44 | } | ||
| 42 | return flags; | 45 | return flags; |
| 43 | } | 46 | } |
| 44 | 47 | ||
| @@ -130,7 +133,7 @@ void SetupDirtyStencilTestEnable(Tables& tables) { | |||
| 130 | StateTracker::StateTracker(Tegra::GPU& gpu) | 133 | StateTracker::StateTracker(Tegra::GPU& gpu) |
| 131 | : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { | 134 | : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { |
| 132 | auto& tables = gpu.Maxwell3D().dirty.tables; | 135 | auto& tables = gpu.Maxwell3D().dirty.tables; |
| 133 | SetupDirtyRenderTargets(tables); | 136 | SetupDirtyFlags(tables); |
| 134 | SetupDirtyViewports(tables); | 137 | SetupDirtyViewports(tables); |
| 135 | SetupDirtyScissors(tables); | 138 | SetupDirtyScissors(tables); |
| 136 | SetupDirtyDepthBias(tables); | 139 | SetupDirtyDepthBias(tables); |
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 725a2a05d..0b63bd6c8 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp | |||
| @@ -56,8 +56,11 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi | |||
| 56 | 56 | ||
| 57 | } // Anonymous namespace | 57 | } // Anonymous namespace |
| 58 | 58 | ||
| 59 | VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_) | 59 | VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const Device& device_, VKScheduler& scheduler_, |
| 60 | : surface{surface_}, device{device_}, scheduler{scheduler_} {} | 60 | u32 width, u32 height, bool srgb) |
| 61 | : surface{surface_}, device{device_}, scheduler{scheduler_} { | ||
| 62 | Create(width, height, srgb); | ||
| 63 | } | ||
| 61 | 64 | ||
| 62 | VKSwapchain::~VKSwapchain() = default; | 65 | VKSwapchain::~VKSwapchain() = default; |
| 63 | 66 | ||
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 2eadd62b3..a728511e0 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h | |||
| @@ -20,7 +20,8 @@ class VKScheduler; | |||
| 20 | 20 | ||
| 21 | class VKSwapchain { | 21 | class VKSwapchain { |
| 22 | public: | 22 | public: |
| 23 | explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler); | 23 | explicit VKSwapchain(VkSurfaceKHR surface, const Device& device, VKScheduler& scheduler, |
| 24 | u32 width, u32 height, bool srgb); | ||
| 24 | ~VKSwapchain(); | 25 | ~VKSwapchain(); |
| 25 | 26 | ||
| 26 | /// Creates (or recreates) the swapchain with a given size. | 27 | /// Creates (or recreates) the swapchain with a given size. |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index aa7c5d7c6..22a1014a9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -426,46 +426,47 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 426 | void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, | 426 | void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, |
| 427 | VkImageAspectFlags aspect_mask, bool is_initialized, | 427 | VkImageAspectFlags aspect_mask, bool is_initialized, |
| 428 | std::span<const VkBufferImageCopy> copies) { | 428 | std::span<const VkBufferImageCopy> copies) { |
| 429 | static constexpr VkAccessFlags ACCESS_FLAGS = VK_ACCESS_SHADER_WRITE_BIT | | 429 | static constexpr VkAccessFlags WRITE_ACCESS_FLAGS = |
| 430 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | 430 | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 431 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; | 431 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; |
| 432 | static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT | | ||
| 433 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||
| 434 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; | ||
| 432 | const VkImageMemoryBarrier read_barrier{ | 435 | const VkImageMemoryBarrier read_barrier{ |
| 433 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 436 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 434 | .pNext = nullptr, | 437 | .pNext = nullptr, |
| 435 | .srcAccessMask = ACCESS_FLAGS, | 438 | .srcAccessMask = WRITE_ACCESS_FLAGS, |
| 436 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 439 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 437 | .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, | 440 | .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, |
| 438 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | 441 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| 439 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 442 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 440 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 443 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 441 | .image = image, | 444 | .image = image, |
| 442 | .subresourceRange = | 445 | .subresourceRange{ |
| 443 | { | 446 | .aspectMask = aspect_mask, |
| 444 | .aspectMask = aspect_mask, | 447 | .baseMipLevel = 0, |
| 445 | .baseMipLevel = 0, | 448 | .levelCount = VK_REMAINING_MIP_LEVELS, |
| 446 | .levelCount = VK_REMAINING_MIP_LEVELS, | 449 | .baseArrayLayer = 0, |
| 447 | .baseArrayLayer = 0, | 450 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 448 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 451 | }, |
| 449 | }, | ||
| 450 | }; | 452 | }; |
| 451 | const VkImageMemoryBarrier write_barrier{ | 453 | const VkImageMemoryBarrier write_barrier{ |
| 452 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 454 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 453 | .pNext = nullptr, | 455 | .pNext = nullptr, |
| 454 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 456 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 455 | .dstAccessMask = ACCESS_FLAGS, | 457 | .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS, |
| 456 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | 458 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| 457 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | 459 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 458 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 460 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 459 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 461 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 460 | .image = image, | 462 | .image = image, |
| 461 | .subresourceRange = | 463 | .subresourceRange{ |
| 462 | { | 464 | .aspectMask = aspect_mask, |
| 463 | .aspectMask = aspect_mask, | 465 | .baseMipLevel = 0, |
| 464 | .baseMipLevel = 0, | 466 | .levelCount = VK_REMAINING_MIP_LEVELS, |
| 465 | .levelCount = VK_REMAINING_MIP_LEVELS, | 467 | .baseArrayLayer = 0, |
| 466 | .baseArrayLayer = 0, | 468 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 467 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 469 | }, |
| 468 | }, | ||
| 469 | }; | 470 | }; |
| 470 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, | 471 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, |
| 471 | read_barrier); | 472 | read_barrier); |
| @@ -569,20 +570,12 @@ void TextureCacheRuntime::Finish() { | |||
| 569 | scheduler.Finish(); | 570 | scheduler.Finish(); |
| 570 | } | 571 | } |
| 571 | 572 | ||
| 572 | ImageBufferMap TextureCacheRuntime::MapUploadBuffer(size_t size) { | 573 | StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) { |
| 573 | const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Upload); | 574 | return staging_buffer_pool.Request(size, MemoryUsage::Upload); |
| 574 | return { | ||
| 575 | .handle = staging_ref.buffer, | ||
| 576 | .span = staging_ref.mapped_span, | ||
| 577 | }; | ||
| 578 | } | 575 | } |
| 579 | 576 | ||
| 580 | ImageBufferMap TextureCacheRuntime::MapDownloadBuffer(size_t size) { | 577 | StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { |
| 581 | const auto staging_ref = staging_buffer_pool.Request(size, MemoryUsage::Download); | 578 | return staging_buffer_pool.Request(size, MemoryUsage::Download); |
| 582 | return { | ||
| 583 | .handle = staging_ref.buffer, | ||
| 584 | .span = staging_ref.mapped_span, | ||
| 585 | }; | ||
| 586 | } | 579 | } |
| 587 | 580 | ||
| 588 | void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | 581 | void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| @@ -754,7 +747,7 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | |||
| 754 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | 747 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 755 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | 748 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | |
| 756 | VK_ACCESS_TRANSFER_WRITE_BIT, | 749 | VK_ACCESS_TRANSFER_WRITE_BIT, |
| 757 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 750 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, |
| 758 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | 751 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 759 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | 752 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 760 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 753 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| @@ -765,12 +758,9 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | |||
| 765 | VkImageMemoryBarrier{ | 758 | VkImageMemoryBarrier{ |
| 766 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 759 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 767 | .pNext = nullptr, | 760 | .pNext = nullptr, |
| 768 | .srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | | 761 | .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | |
| 769 | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | | ||
| 770 | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||
| 771 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | | ||
| 772 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | 762 | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | |
| 773 | VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | 763 | VK_ACCESS_TRANSFER_WRITE_BIT, |
| 774 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 764 | .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 775 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | 765 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, |
| 776 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | 766 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, |
| @@ -828,12 +818,11 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ | |||
| 828 | } | 818 | } |
| 829 | } | 819 | } |
| 830 | 820 | ||
| 831 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 821 | void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 832 | std::span<const BufferImageCopy> copies) { | ||
| 833 | // TODO: Move this to another API | 822 | // TODO: Move this to another API |
| 834 | scheduler->RequestOutsideRenderPassOperationContext(); | 823 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 835 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); | 824 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); |
| 836 | const VkBuffer src_buffer = map.handle; | 825 | const VkBuffer src_buffer = map.buffer; |
| 837 | const VkImage vk_image = *image; | 826 | const VkImage vk_image = *image; |
| 838 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; | 827 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; |
| 839 | const bool is_initialized = std::exchange(initialized, true); | 828 | const bool is_initialized = std::exchange(initialized, true); |
| @@ -843,12 +832,12 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | |||
| 843 | }); | 832 | }); |
| 844 | } | 833 | } |
| 845 | 834 | ||
| 846 | void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 835 | void Image::UploadMemory(const StagingBufferRef& map, |
| 847 | std::span<const VideoCommon::BufferCopy> copies) { | 836 | std::span<const VideoCommon::BufferCopy> copies) { |
| 848 | // TODO: Move this to another API | 837 | // TODO: Move this to another API |
| 849 | scheduler->RequestOutsideRenderPassOperationContext(); | 838 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 850 | std::vector vk_copies = TransformBufferCopies(copies, buffer_offset); | 839 | std::vector vk_copies = TransformBufferCopies(copies, map.offset); |
| 851 | const VkBuffer src_buffer = map.handle; | 840 | const VkBuffer src_buffer = map.buffer; |
| 852 | const VkBuffer dst_buffer = *buffer; | 841 | const VkBuffer dst_buffer = *buffer; |
| 853 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { | 842 | scheduler->Record([src_buffer, dst_buffer, vk_copies](vk::CommandBuffer cmdbuf) { |
| 854 | // TODO: Barriers | 843 | // TODO: Barriers |
| @@ -856,13 +845,57 @@ void Image::UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | |||
| 856 | }); | 845 | }); |
| 857 | } | 846 | } |
| 858 | 847 | ||
| 859 | void Image::DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, | 848 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 860 | std::span<const BufferImageCopy> copies) { | 849 | std::vector vk_copies = TransformBufferImageCopies(copies, map.offset, aspect_mask); |
| 861 | std::vector vk_copies = TransformBufferImageCopies(copies, buffer_offset, aspect_mask); | 850 | scheduler->Record([buffer = map.buffer, image = *image, aspect_mask = aspect_mask, |
| 862 | scheduler->Record([buffer = map.handle, image = *image, aspect_mask = aspect_mask, | ||
| 863 | vk_copies](vk::CommandBuffer cmdbuf) { | 851 | vk_copies](vk::CommandBuffer cmdbuf) { |
| 864 | // TODO: Barriers | 852 | const VkImageMemoryBarrier read_barrier{ |
| 865 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_GENERAL, buffer, vk_copies); | 853 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 854 | .pNext = nullptr, | ||
| 855 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 856 | .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||
| 857 | .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 858 | .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 859 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 860 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 861 | .image = image, | ||
| 862 | .subresourceRange{ | ||
| 863 | .aspectMask = aspect_mask, | ||
| 864 | .baseMipLevel = 0, | ||
| 865 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 866 | .baseArrayLayer = 0, | ||
| 867 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 868 | }, | ||
| 869 | }; | ||
| 870 | const VkImageMemoryBarrier image_write_barrier{ | ||
| 871 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||
| 872 | .pNext = nullptr, | ||
| 873 | .srcAccessMask = 0, | ||
| 874 | .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 875 | .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||
| 876 | .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||
| 877 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 878 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||
| 879 | .image = image, | ||
| 880 | .subresourceRange{ | ||
| 881 | .aspectMask = aspect_mask, | ||
| 882 | .baseMipLevel = 0, | ||
| 883 | .levelCount = VK_REMAINING_MIP_LEVELS, | ||
| 884 | .baseArrayLayer = 0, | ||
| 885 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | ||
| 886 | }, | ||
| 887 | }; | ||
| 888 | const VkMemoryBarrier memory_write_barrier{ | ||
| 889 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 890 | .pNext = nullptr, | ||
| 891 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 892 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 893 | }; | ||
| 894 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 895 | 0, read_barrier); | ||
| 896 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies); | ||
| 897 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||
| 898 | 0, memory_write_barrier, nullptr, image_write_barrier); | ||
| 866 | }); | 899 | }); |
| 867 | } | 900 | } |
| 868 | 901 | ||
| @@ -1127,7 +1160,7 @@ Framebuffer::Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM | |||
| 1127 | .pAttachments = attachments.data(), | 1160 | .pAttachments = attachments.data(), |
| 1128 | .width = key.size.width, | 1161 | .width = key.size.width, |
| 1129 | .height = key.size.height, | 1162 | .height = key.size.height, |
| 1130 | .layers = static_cast<u32>(num_layers), | 1163 | .layers = static_cast<u32>(std::max(num_layers, 1)), |
| 1131 | }); | 1164 | }); |
| 1132 | if (runtime.device.HasDebuggingToolAttached()) { | 1165 | if (runtime.device.HasDebuggingToolAttached()) { |
| 1133 | framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); | 1166 | framebuffer.SetObjectNameEXT(VideoCommon::Name(key).c_str()); |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 8d29361a1..b08c23459 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <compare> | 7 | #include <compare> |
| 8 | #include <span> | 8 | #include <span> |
| 9 | 9 | ||
| 10 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | ||
| 10 | #include "video_core/texture_cache/texture_cache.h" | 11 | #include "video_core/texture_cache/texture_cache.h" |
| 11 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 12 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 12 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| @@ -53,19 +54,6 @@ struct hash<Vulkan::RenderPassKey> { | |||
| 53 | 54 | ||
| 54 | namespace Vulkan { | 55 | namespace Vulkan { |
| 55 | 56 | ||
| 56 | struct ImageBufferMap { | ||
| 57 | [[nodiscard]] VkBuffer Handle() const noexcept { | ||
| 58 | return handle; | ||
| 59 | } | ||
| 60 | |||
| 61 | [[nodiscard]] std::span<u8> Span() const noexcept { | ||
| 62 | return span; | ||
| 63 | } | ||
| 64 | |||
| 65 | VkBuffer handle; | ||
| 66 | std::span<u8> span; | ||
| 67 | }; | ||
| 68 | |||
| 69 | struct TextureCacheRuntime { | 57 | struct TextureCacheRuntime { |
| 70 | const Device& device; | 58 | const Device& device; |
| 71 | VKScheduler& scheduler; | 59 | VKScheduler& scheduler; |
| @@ -76,9 +64,9 @@ struct TextureCacheRuntime { | |||
| 76 | 64 | ||
| 77 | void Finish(); | 65 | void Finish(); |
| 78 | 66 | ||
| 79 | [[nodiscard]] ImageBufferMap MapUploadBuffer(size_t size); | 67 | [[nodiscard]] StagingBufferRef UploadStagingBuffer(size_t size); |
| 80 | 68 | ||
| 81 | [[nodiscard]] ImageBufferMap MapDownloadBuffer(size_t size); | 69 | [[nodiscard]] StagingBufferRef DownloadStagingBuffer(size_t size); |
| 82 | 70 | ||
| 83 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, | 71 | void BlitImage(Framebuffer* dst_framebuffer, ImageView& dst, ImageView& src, |
| 84 | const std::array<Offset2D, 2>& dst_region, | 72 | const std::array<Offset2D, 2>& dst_region, |
| @@ -94,7 +82,7 @@ struct TextureCacheRuntime { | |||
| 94 | return false; | 82 | return false; |
| 95 | } | 83 | } |
| 96 | 84 | ||
| 97 | void AccelerateImageUpload(Image&, const ImageBufferMap&, size_t, | 85 | void AccelerateImageUpload(Image&, const StagingBufferRef&, |
| 98 | std::span<const VideoCommon::SwizzleParameters>) { | 86 | std::span<const VideoCommon::SwizzleParameters>) { |
| 99 | UNREACHABLE(); | 87 | UNREACHABLE(); |
| 100 | } | 88 | } |
| @@ -112,13 +100,12 @@ public: | |||
| 112 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, | 100 | explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, |
| 113 | VAddr cpu_addr); | 101 | VAddr cpu_addr); |
| 114 | 102 | ||
| 115 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 103 | void UploadMemory(const StagingBufferRef& map, |
| 116 | std::span<const VideoCommon::BufferImageCopy> copies); | 104 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 117 | 105 | ||
| 118 | void UploadMemory(const ImageBufferMap& map, size_t buffer_offset, | 106 | void UploadMemory(const StagingBufferRef& map, std::span<const VideoCommon::BufferCopy> copies); |
| 119 | std::span<const VideoCommon::BufferCopy> copies); | ||
| 120 | 107 | ||
| 121 | void DownloadMemory(const ImageBufferMap& map, size_t buffer_offset, | 108 | void DownloadMemory(const StagingBufferRef& map, |
| 122 | std::span<const VideoCommon::BufferImageCopy> copies); | 109 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 123 | 110 | ||
| 124 | [[nodiscard]] VkImage Handle() const noexcept { | 111 | [[nodiscard]] VkImage Handle() const noexcept { |
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 0dbb1a31f..7fdff6e56 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h | |||
| @@ -9,16 +9,7 @@ | |||
| 9 | #include <shared_mutex> | 9 | #include <shared_mutex> |
| 10 | #include <thread> | 10 | #include <thread> |
| 11 | 11 | ||
| 12 | // This header includes both Vulkan and OpenGL headers, this has to be fixed | ||
| 13 | // Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues. | ||
| 14 | // Forcefully include glad early and undefine macros | ||
| 15 | #include <glad/glad.h> | 12 | #include <glad/glad.h> |
| 16 | #ifdef CreateEvent | ||
| 17 | #undef CreateEvent | ||
| 18 | #endif | ||
| 19 | #ifdef CreateSemaphore | ||
| 20 | #undef CreateSemaphore | ||
| 21 | #endif | ||
| 22 | 13 | ||
| 23 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| 24 | #include "video_core/renderer_opengl/gl_device.h" | 15 | #include "video_core/renderer_opengl/gl_device.h" |
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d3ea07aac..5f88537bc 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp | |||
| @@ -76,6 +76,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
| 76 | case SystemVariable::InvocationId: | 76 | case SystemVariable::InvocationId: |
| 77 | return Operation(OperationCode::InvocationId); | 77 | return Operation(OperationCode::InvocationId); |
| 78 | case SystemVariable::Ydirection: | 78 | case SystemVariable::Ydirection: |
| 79 | uses_y_negate = true; | ||
| 79 | return Operation(OperationCode::YNegate); | 80 | return Operation(OperationCode::YNegate); |
| 80 | case SystemVariable::InvocationInfo: | 81 | case SystemVariable::InvocationInfo: |
| 81 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); | 82 | LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete"); |
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 0c6ab0f07..1cd7c14d7 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h | |||
| @@ -139,6 +139,10 @@ public: | |||
| 139 | return uses_legacy_varyings; | 139 | return uses_legacy_varyings; |
| 140 | } | 140 | } |
| 141 | 141 | ||
| 142 | bool UsesYNegate() const { | ||
| 143 | return uses_y_negate; | ||
| 144 | } | ||
| 145 | |||
| 142 | bool UsesWarps() const { | 146 | bool UsesWarps() const { |
| 143 | return uses_warps; | 147 | return uses_warps; |
| 144 | } | 148 | } |
| @@ -465,6 +469,7 @@ private: | |||
| 465 | bool uses_instance_id{}; | 469 | bool uses_instance_id{}; |
| 466 | bool uses_vertex_id{}; | 470 | bool uses_vertex_id{}; |
| 467 | bool uses_legacy_varyings{}; | 471 | bool uses_legacy_varyings{}; |
| 472 | bool uses_y_negate{}; | ||
| 468 | bool uses_warps{}; | 473 | bool uses_warps{}; |
| 469 | bool uses_indexed_samplers{}; | 474 | bool uses_indexed_samplers{}; |
| 470 | 475 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d1080300f..b1da69971 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -103,9 +103,6 @@ public: | |||
| 103 | /// Notify the cache that a new frame has been queued | 103 | /// Notify the cache that a new frame has been queued |
| 104 | void TickFrame(); | 104 | void TickFrame(); |
| 105 | 105 | ||
| 106 | /// Return an unique mutually exclusive lock for the cache | ||
| 107 | [[nodiscard]] std::unique_lock<std::mutex> AcquireLock(); | ||
| 108 | |||
| 109 | /// Return a constant reference to the given image view id | 106 | /// Return a constant reference to the given image view id |
| 110 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; | 107 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; |
| 111 | 108 | ||
| @@ -179,6 +176,8 @@ public: | |||
| 179 | /// Return true when a CPU region is modified from the GPU | 176 | /// Return true when a CPU region is modified from the GPU |
| 180 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 177 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |
| 181 | 178 | ||
| 179 | std::mutex mutex; | ||
| 180 | |||
| 182 | private: | 181 | private: |
| 183 | /// Iterate over all page indices in a range | 182 | /// Iterate over all page indices in a range |
| 184 | template <typename Func> | 183 | template <typename Func> |
| @@ -212,8 +211,8 @@ private: | |||
| 212 | void RefreshContents(Image& image); | 211 | void RefreshContents(Image& image); |
| 213 | 212 | ||
| 214 | /// Upload data from guest to an image | 213 | /// Upload data from guest to an image |
| 215 | template <typename MapBuffer> | 214 | template <typename StagingBuffer> |
| 216 | void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); | 215 | void UploadImageContents(Image& image, StagingBuffer& staging_buffer); |
| 217 | 216 | ||
| 218 | /// Find or create an image view from a guest descriptor | 217 | /// Find or create an image view from a guest descriptor |
| 219 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); | 218 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); |
| @@ -325,8 +324,6 @@ private: | |||
| 325 | 324 | ||
| 326 | RenderTargets render_targets; | 325 | RenderTargets render_targets; |
| 327 | 326 | ||
| 328 | std::mutex mutex; | ||
| 329 | |||
| 330 | std::unordered_map<TICEntry, ImageViewId> image_views; | 327 | std::unordered_map<TICEntry, ImageViewId> image_views; |
| 331 | std::unordered_map<TSCEntry, SamplerId> samplers; | 328 | std::unordered_map<TSCEntry, SamplerId> samplers; |
| 332 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; | 329 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; |
| @@ -386,11 +383,6 @@ void TextureCache<P>::TickFrame() { | |||
| 386 | } | 383 | } |
| 387 | 384 | ||
| 388 | template <class P> | 385 | template <class P> |
| 389 | std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() { | ||
| 390 | return std::unique_lock{mutex}; | ||
| 391 | } | ||
| 392 | |||
| 393 | template <class P> | ||
| 394 | const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { | 386 | const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { |
| 395 | return slot_image_views[id]; | 387 | return slot_image_views[id]; |
| 396 | } | 388 | } |
| @@ -598,11 +590,11 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | |||
| 598 | }); | 590 | }); |
| 599 | for (const ImageId image_id : images) { | 591 | for (const ImageId image_id : images) { |
| 600 | Image& image = slot_images[image_id]; | 592 | Image& image = slot_images[image_id]; |
| 601 | auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); | 593 | auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); |
| 602 | const auto copies = FullDownloadCopies(image.info); | 594 | const auto copies = FullDownloadCopies(image.info); |
| 603 | image.DownloadMemory(map, 0, copies); | 595 | image.DownloadMemory(map, copies); |
| 604 | runtime.Finish(); | 596 | runtime.Finish(); |
| 605 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); | 597 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); |
| 606 | } | 598 | } |
| 607 | } | 599 | } |
| 608 | 600 | ||
| @@ -757,25 +749,25 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 757 | for (const ImageId image_id : download_ids) { | 749 | for (const ImageId image_id : download_ids) { |
| 758 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | 750 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; |
| 759 | } | 751 | } |
| 760 | auto download_map = runtime.MapDownloadBuffer(total_size_bytes); | 752 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); |
| 761 | size_t buffer_offset = 0; | 753 | const size_t original_offset = download_map.offset; |
| 762 | for (const ImageId image_id : download_ids) { | 754 | for (const ImageId image_id : download_ids) { |
| 763 | Image& image = slot_images[image_id]; | 755 | Image& image = slot_images[image_id]; |
| 764 | const auto copies = FullDownloadCopies(image.info); | 756 | const auto copies = FullDownloadCopies(image.info); |
| 765 | image.DownloadMemory(download_map, buffer_offset, copies); | 757 | image.DownloadMemory(download_map, copies); |
| 766 | buffer_offset += image.unswizzled_size_bytes; | 758 | download_map.offset += image.unswizzled_size_bytes; |
| 767 | } | 759 | } |
| 768 | // Wait for downloads to finish | 760 | // Wait for downloads to finish |
| 769 | runtime.Finish(); | 761 | runtime.Finish(); |
| 770 | 762 | ||
| 771 | buffer_offset = 0; | 763 | download_map.offset = original_offset; |
| 772 | const std::span<u8> download_span = download_map.Span(); | 764 | std::span<u8> download_span = download_map.mapped_span; |
| 773 | for (const ImageId image_id : download_ids) { | 765 | for (const ImageId image_id : download_ids) { |
| 774 | const ImageBase& image = slot_images[image_id]; | 766 | const ImageBase& image = slot_images[image_id]; |
| 775 | const auto copies = FullDownloadCopies(image.info); | 767 | const auto copies = FullDownloadCopies(image.info); |
| 776 | const std::span<u8> image_download_span = download_span.subspan(buffer_offset); | 768 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); |
| 777 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, image_download_span); | 769 | download_map.offset += image.unswizzled_size_bytes; |
| 778 | buffer_offset += image.unswizzled_size_bytes; | 770 | download_span = download_span.subspan(image.unswizzled_size_bytes); |
| 779 | } | 771 | } |
| 780 | committed_downloads.pop(); | 772 | committed_downloads.pop(); |
| 781 | } | 773 | } |
| @@ -806,32 +798,32 @@ void TextureCache<P>::RefreshContents(Image& image) { | |||
| 806 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); | 798 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); |
| 807 | return; | 799 | return; |
| 808 | } | 800 | } |
| 809 | auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); | 801 | auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); |
| 810 | UploadImageContents(image, map, 0); | 802 | UploadImageContents(image, staging); |
| 811 | runtime.InsertUploadMemoryBarrier(); | 803 | runtime.InsertUploadMemoryBarrier(); |
| 812 | } | 804 | } |
| 813 | 805 | ||
| 814 | template <class P> | 806 | template <class P> |
| 815 | template <typename MapBuffer> | 807 | template <typename StagingBuffer> |
| 816 | void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { | 808 | void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) { |
| 817 | const std::span<u8> mapped_span = map.Span().subspan(buffer_offset); | 809 | const std::span<u8> mapped_span = staging.mapped_span; |
| 818 | const GPUVAddr gpu_addr = image.gpu_addr; | 810 | const GPUVAddr gpu_addr = image.gpu_addr; |
| 819 | 811 | ||
| 820 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | 812 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { |
| 821 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | 813 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); |
| 822 | const auto uploads = FullUploadSwizzles(image.info); | 814 | const auto uploads = FullUploadSwizzles(image.info); |
| 823 | runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); | 815 | runtime.AccelerateImageUpload(image, staging, uploads); |
| 824 | } else if (True(image.flags & ImageFlagBits::Converted)) { | 816 | } else if (True(image.flags & ImageFlagBits::Converted)) { |
| 825 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | 817 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); |
| 826 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | 818 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); |
| 827 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | 819 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); |
| 828 | image.UploadMemory(map, buffer_offset, copies); | 820 | image.UploadMemory(staging, copies); |
| 829 | } else if (image.info.type == ImageType::Buffer) { | 821 | } else if (image.info.type == ImageType::Buffer) { |
| 830 | const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; | 822 | const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; |
| 831 | image.UploadMemory(map, buffer_offset, copies); | 823 | image.UploadMemory(staging, copies); |
| 832 | } else { | 824 | } else { |
| 833 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | 825 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); |
| 834 | image.UploadMemory(map, buffer_offset, copies); | 826 | image.UploadMemory(staging, copies); |
| 835 | } | 827 | } |
| 836 | } | 828 | } |
| 837 | 829 | ||
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 53444e945..e1b38c6ac 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp | |||
| @@ -38,19 +38,18 @@ namespace VideoCore { | |||
| 38 | 38 | ||
| 39 | std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { | 39 | std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { |
| 40 | const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); | 40 | const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); |
| 41 | std::unique_ptr<Tegra::GPU> gpu = std::make_unique<Tegra::GPU>( | 41 | const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); |
| 42 | system, Settings::values.use_asynchronous_gpu_emulation.GetValue(), use_nvdec); | 42 | auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec); |
| 43 | |||
| 44 | auto context = emu_window.CreateSharedContext(); | 43 | auto context = emu_window.CreateSharedContext(); |
| 45 | const auto scope = context->Acquire(); | 44 | auto scope = context->Acquire(); |
| 46 | 45 | try { | |
| 47 | auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context)); | 46 | auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context)); |
| 48 | if (!renderer->Init()) { | 47 | gpu->BindRenderer(std::move(renderer)); |
| 48 | return gpu; | ||
| 49 | } catch (const std::runtime_error& exception) { | ||
| 50 | LOG_ERROR(HW_GPU, "Failed to initialize GPU: {}", exception.what()); | ||
| 49 | return nullptr; | 51 | return nullptr; |
| 50 | } | 52 | } |
| 51 | |||
| 52 | gpu->BindRenderer(std::move(renderer)); | ||
| 53 | return gpu; | ||
| 54 | } | 53 | } |
| 55 | 54 | ||
| 56 | u16 GetResolutionScaleFactor(const RendererBase& renderer) { | 55 | u16 GetResolutionScaleFactor(const RendererBase& renderer) { |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 51f53bc39..34d396434 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -18,27 +18,22 @@ | |||
| 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 19 | 19 | ||
| 20 | namespace Vulkan { | 20 | namespace Vulkan { |
| 21 | |||
| 22 | namespace { | 21 | namespace { |
| 23 | |||
| 24 | namespace Alternatives { | 22 | namespace Alternatives { |
| 25 | 23 | constexpr std::array DEPTH24_UNORM_STENCIL8_UINT{ | |
| 26 | constexpr std::array Depth24UnormS8_UINT{ | ||
| 27 | VK_FORMAT_D32_SFLOAT_S8_UINT, | 24 | VK_FORMAT_D32_SFLOAT_S8_UINT, |
| 28 | VK_FORMAT_D16_UNORM_S8_UINT, | 25 | VK_FORMAT_D16_UNORM_S8_UINT, |
| 29 | VkFormat{}, | 26 | VK_FORMAT_UNDEFINED, |
| 30 | }; | 27 | }; |
| 31 | 28 | ||
| 32 | constexpr std::array Depth16UnormS8_UINT{ | 29 | constexpr std::array DEPTH16_UNORM_STENCIL8_UINT{ |
| 33 | VK_FORMAT_D24_UNORM_S8_UINT, | 30 | VK_FORMAT_D24_UNORM_S8_UINT, |
| 34 | VK_FORMAT_D32_SFLOAT_S8_UINT, | 31 | VK_FORMAT_D32_SFLOAT_S8_UINT, |
| 35 | VkFormat{}, | 32 | VK_FORMAT_UNDEFINED, |
| 36 | }; | 33 | }; |
| 37 | |||
| 38 | } // namespace Alternatives | 34 | } // namespace Alternatives |
| 39 | 35 | ||
| 40 | constexpr std::array REQUIRED_EXTENSIONS{ | 36 | constexpr std::array REQUIRED_EXTENSIONS{ |
| 41 | VK_KHR_SWAPCHAIN_EXTENSION_NAME, | ||
| 42 | VK_KHR_MAINTENANCE1_EXTENSION_NAME, | 37 | VK_KHR_MAINTENANCE1_EXTENSION_NAME, |
| 43 | VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, | 38 | VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, |
| 44 | VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, | 39 | VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, |
| @@ -51,7 +46,14 @@ constexpr std::array REQUIRED_EXTENSIONS{ | |||
| 51 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, | 46 | VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, |
| 52 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, | 47 | VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, |
| 53 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, | 48 | VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, |
| 49 | VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, | ||
| 54 | VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, | 50 | VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, |
| 51 | #ifdef _WIN32 | ||
| 52 | VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME, | ||
| 53 | #endif | ||
| 54 | #ifdef __linux__ | ||
| 55 | VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, | ||
| 56 | #endif | ||
| 55 | }; | 57 | }; |
| 56 | 58 | ||
| 57 | template <typename T> | 59 | template <typename T> |
| @@ -63,9 +65,9 @@ void SetNext(void**& next, T& data) { | |||
| 63 | constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { | 65 | constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { |
| 64 | switch (format) { | 66 | switch (format) { |
| 65 | case VK_FORMAT_D24_UNORM_S8_UINT: | 67 | case VK_FORMAT_D24_UNORM_S8_UINT: |
| 66 | return Alternatives::Depth24UnormS8_UINT.data(); | 68 | return Alternatives::DEPTH24_UNORM_STENCIL8_UINT.data(); |
| 67 | case VK_FORMAT_D16_UNORM_S8_UINT: | 69 | case VK_FORMAT_D16_UNORM_S8_UINT: |
| 68 | return Alternatives::Depth16UnormS8_UINT.data(); | 70 | return Alternatives::DEPTH16_UNORM_STENCIL8_UINT.data(); |
| 69 | default: | 71 | default: |
| 70 | return nullptr; | 72 | return nullptr; |
| 71 | } | 73 | } |
| @@ -195,78 +197,77 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 195 | const vk::InstanceDispatch& dld_) | 197 | const vk::InstanceDispatch& dld_) |
| 196 | : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, | 198 | : instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()}, |
| 197 | format_properties{GetFormatProperties(physical)} { | 199 | format_properties{GetFormatProperties(physical)} { |
| 198 | CheckSuitability(); | 200 | CheckSuitability(surface != nullptr); |
| 199 | SetupFamilies(surface); | 201 | SetupFamilies(surface); |
| 200 | SetupFeatures(); | 202 | SetupFeatures(); |
| 201 | 203 | ||
| 202 | const auto queue_cis = GetDeviceQueueCreateInfos(); | 204 | const auto queue_cis = GetDeviceQueueCreateInfos(); |
| 203 | const std::vector extensions = LoadExtensions(); | 205 | const std::vector extensions = LoadExtensions(surface != nullptr); |
| 204 | 206 | ||
| 205 | VkPhysicalDeviceFeatures2 features2{ | 207 | VkPhysicalDeviceFeatures2 features2{ |
| 206 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, | 208 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, |
| 207 | .pNext = nullptr, | 209 | .pNext = nullptr, |
| 208 | .features{}, | 210 | .features{ |
| 211 | .robustBufferAccess = true, | ||
| 212 | .fullDrawIndexUint32 = false, | ||
| 213 | .imageCubeArray = true, | ||
| 214 | .independentBlend = true, | ||
| 215 | .geometryShader = true, | ||
| 216 | .tessellationShader = true, | ||
| 217 | .sampleRateShading = false, | ||
| 218 | .dualSrcBlend = false, | ||
| 219 | .logicOp = false, | ||
| 220 | .multiDrawIndirect = false, | ||
| 221 | .drawIndirectFirstInstance = false, | ||
| 222 | .depthClamp = true, | ||
| 223 | .depthBiasClamp = true, | ||
| 224 | .fillModeNonSolid = false, | ||
| 225 | .depthBounds = false, | ||
| 226 | .wideLines = false, | ||
| 227 | .largePoints = true, | ||
| 228 | .alphaToOne = false, | ||
| 229 | .multiViewport = true, | ||
| 230 | .samplerAnisotropy = true, | ||
| 231 | .textureCompressionETC2 = false, | ||
| 232 | .textureCompressionASTC_LDR = is_optimal_astc_supported, | ||
| 233 | .textureCompressionBC = false, | ||
| 234 | .occlusionQueryPrecise = true, | ||
| 235 | .pipelineStatisticsQuery = false, | ||
| 236 | .vertexPipelineStoresAndAtomics = true, | ||
| 237 | .fragmentStoresAndAtomics = true, | ||
| 238 | .shaderTessellationAndGeometryPointSize = false, | ||
| 239 | .shaderImageGatherExtended = true, | ||
| 240 | .shaderStorageImageExtendedFormats = false, | ||
| 241 | .shaderStorageImageMultisample = is_shader_storage_image_multisample, | ||
| 242 | .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, | ||
| 243 | .shaderStorageImageWriteWithoutFormat = true, | ||
| 244 | .shaderUniformBufferArrayDynamicIndexing = false, | ||
| 245 | .shaderSampledImageArrayDynamicIndexing = false, | ||
| 246 | .shaderStorageBufferArrayDynamicIndexing = false, | ||
| 247 | .shaderStorageImageArrayDynamicIndexing = false, | ||
| 248 | .shaderClipDistance = false, | ||
| 249 | .shaderCullDistance = false, | ||
| 250 | .shaderFloat64 = false, | ||
| 251 | .shaderInt64 = false, | ||
| 252 | .shaderInt16 = false, | ||
| 253 | .shaderResourceResidency = false, | ||
| 254 | .shaderResourceMinLod = false, | ||
| 255 | .sparseBinding = false, | ||
| 256 | .sparseResidencyBuffer = false, | ||
| 257 | .sparseResidencyImage2D = false, | ||
| 258 | .sparseResidencyImage3D = false, | ||
| 259 | .sparseResidency2Samples = false, | ||
| 260 | .sparseResidency4Samples = false, | ||
| 261 | .sparseResidency8Samples = false, | ||
| 262 | .sparseResidency16Samples = false, | ||
| 263 | .sparseResidencyAliased = false, | ||
| 264 | .variableMultisampleRate = false, | ||
| 265 | .inheritedQueries = false, | ||
| 266 | }, | ||
| 209 | }; | 267 | }; |
| 210 | const void* first_next = &features2; | 268 | const void* first_next = &features2; |
| 211 | void** next = &features2.pNext; | 269 | void** next = &features2.pNext; |
| 212 | 270 | ||
| 213 | features2.features = { | ||
| 214 | .robustBufferAccess = false, | ||
| 215 | .fullDrawIndexUint32 = false, | ||
| 216 | .imageCubeArray = true, | ||
| 217 | .independentBlend = true, | ||
| 218 | .geometryShader = true, | ||
| 219 | .tessellationShader = true, | ||
| 220 | .sampleRateShading = false, | ||
| 221 | .dualSrcBlend = false, | ||
| 222 | .logicOp = false, | ||
| 223 | .multiDrawIndirect = false, | ||
| 224 | .drawIndirectFirstInstance = false, | ||
| 225 | .depthClamp = true, | ||
| 226 | .depthBiasClamp = true, | ||
| 227 | .fillModeNonSolid = false, | ||
| 228 | .depthBounds = false, | ||
| 229 | .wideLines = false, | ||
| 230 | .largePoints = true, | ||
| 231 | .alphaToOne = false, | ||
| 232 | .multiViewport = true, | ||
| 233 | .samplerAnisotropy = true, | ||
| 234 | .textureCompressionETC2 = false, | ||
| 235 | .textureCompressionASTC_LDR = is_optimal_astc_supported, | ||
| 236 | .textureCompressionBC = false, | ||
| 237 | .occlusionQueryPrecise = true, | ||
| 238 | .pipelineStatisticsQuery = false, | ||
| 239 | .vertexPipelineStoresAndAtomics = true, | ||
| 240 | .fragmentStoresAndAtomics = true, | ||
| 241 | .shaderTessellationAndGeometryPointSize = false, | ||
| 242 | .shaderImageGatherExtended = true, | ||
| 243 | .shaderStorageImageExtendedFormats = false, | ||
| 244 | .shaderStorageImageMultisample = is_shader_storage_image_multisample, | ||
| 245 | .shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported, | ||
| 246 | .shaderStorageImageWriteWithoutFormat = true, | ||
| 247 | .shaderUniformBufferArrayDynamicIndexing = false, | ||
| 248 | .shaderSampledImageArrayDynamicIndexing = false, | ||
| 249 | .shaderStorageBufferArrayDynamicIndexing = false, | ||
| 250 | .shaderStorageImageArrayDynamicIndexing = false, | ||
| 251 | .shaderClipDistance = false, | ||
| 252 | .shaderCullDistance = false, | ||
| 253 | .shaderFloat64 = false, | ||
| 254 | .shaderInt64 = false, | ||
| 255 | .shaderInt16 = false, | ||
| 256 | .shaderResourceResidency = false, | ||
| 257 | .shaderResourceMinLod = false, | ||
| 258 | .sparseBinding = false, | ||
| 259 | .sparseResidencyBuffer = false, | ||
| 260 | .sparseResidencyImage2D = false, | ||
| 261 | .sparseResidencyImage3D = false, | ||
| 262 | .sparseResidency2Samples = false, | ||
| 263 | .sparseResidency4Samples = false, | ||
| 264 | .sparseResidency8Samples = false, | ||
| 265 | .sparseResidency16Samples = false, | ||
| 266 | .sparseResidencyAliased = false, | ||
| 267 | .variableMultisampleRate = false, | ||
| 268 | .inheritedQueries = false, | ||
| 269 | }; | ||
| 270 | VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ | 271 | VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ |
| 271 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, | 272 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, |
| 272 | .pNext = nullptr, | 273 | .pNext = nullptr, |
| @@ -379,20 +380,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 379 | LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); | 380 | LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state"); |
| 380 | } | 381 | } |
| 381 | 382 | ||
| 382 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2; | ||
| 383 | if (ext_robustness2) { | ||
| 384 | robustness2 = { | ||
| 385 | .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, | ||
| 386 | .pNext = nullptr, | ||
| 387 | .robustBufferAccess2 = false, | ||
| 388 | .robustImageAccess2 = true, | ||
| 389 | .nullDescriptor = true, | ||
| 390 | }; | ||
| 391 | SetNext(next, robustness2); | ||
| 392 | } else { | ||
| 393 | LOG_INFO(Render_Vulkan, "Device doesn't support robustness2"); | ||
| 394 | } | ||
| 395 | |||
| 396 | if (!ext_depth_range_unrestricted) { | 383 | if (!ext_depth_range_unrestricted) { |
| 397 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); | 384 | LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted"); |
| 398 | } | 385 | } |
| @@ -535,16 +522,18 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want | |||
| 535 | return (supported_usage & wanted_usage) == wanted_usage; | 522 | return (supported_usage & wanted_usage) == wanted_usage; |
| 536 | } | 523 | } |
| 537 | 524 | ||
| 538 | void Device::CheckSuitability() const { | 525 | void Device::CheckSuitability(bool requires_swapchain) const { |
| 539 | std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; | 526 | std::bitset<REQUIRED_EXTENSIONS.size()> available_extensions; |
| 527 | bool has_swapchain = false; | ||
| 540 | for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { | 528 | for (const VkExtensionProperties& property : physical.EnumerateDeviceExtensionProperties()) { |
| 541 | for (std::size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { | 529 | const std::string_view name{property.extensionName}; |
| 530 | for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { | ||
| 542 | if (available_extensions[i]) { | 531 | if (available_extensions[i]) { |
| 543 | continue; | 532 | continue; |
| 544 | } | 533 | } |
| 545 | const std::string_view name{property.extensionName}; | ||
| 546 | available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; | 534 | available_extensions[i] = name == REQUIRED_EXTENSIONS[i]; |
| 547 | } | 535 | } |
| 536 | has_swapchain = has_swapchain || name == VK_KHR_SWAPCHAIN_EXTENSION_NAME; | ||
| 548 | } | 537 | } |
| 549 | for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { | 538 | for (size_t i = 0; i < REQUIRED_EXTENSIONS.size(); ++i) { |
| 550 | if (available_extensions[i]) { | 539 | if (available_extensions[i]) { |
| @@ -553,6 +542,11 @@ void Device::CheckSuitability() const { | |||
| 553 | LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); | 542 | LOG_ERROR(Render_Vulkan, "Missing required extension: {}", REQUIRED_EXTENSIONS[i]); |
| 554 | throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); | 543 | throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); |
| 555 | } | 544 | } |
| 545 | if (requires_swapchain && !has_swapchain) { | ||
| 546 | LOG_ERROR(Render_Vulkan, "Missing required extension: VK_KHR_swapchain"); | ||
| 547 | throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); | ||
| 548 | } | ||
| 549 | |||
| 556 | struct LimitTuple { | 550 | struct LimitTuple { |
| 557 | u32 minimum; | 551 | u32 minimum; |
| 558 | u32 value; | 552 | u32 value; |
| @@ -572,9 +566,20 @@ void Device::CheckSuitability() const { | |||
| 572 | throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); | 566 | throw vk::Exception(VK_ERROR_FEATURE_NOT_PRESENT); |
| 573 | } | 567 | } |
| 574 | } | 568 | } |
| 575 | const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; | 569 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; |
| 570 | robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; | ||
| 571 | |||
| 572 | VkPhysicalDeviceFeatures2 features2{}; | ||
| 573 | features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; | ||
| 574 | features2.pNext = &robustness2; | ||
| 575 | |||
| 576 | physical.GetFeatures2KHR(features2); | ||
| 577 | |||
| 578 | const VkPhysicalDeviceFeatures& features{features2.features}; | ||
| 576 | const std::array feature_report{ | 579 | const std::array feature_report{ |
| 580 | std::make_pair(features.robustBufferAccess, "robustBufferAccess"), | ||
| 577 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), | 581 | std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"), |
| 582 | std::make_pair(features.robustBufferAccess, "robustBufferAccess"), | ||
| 578 | std::make_pair(features.imageCubeArray, "imageCubeArray"), | 583 | std::make_pair(features.imageCubeArray, "imageCubeArray"), |
| 579 | std::make_pair(features.independentBlend, "independentBlend"), | 584 | std::make_pair(features.independentBlend, "independentBlend"), |
| 580 | std::make_pair(features.depthClamp, "depthClamp"), | 585 | std::make_pair(features.depthClamp, "depthClamp"), |
| @@ -589,6 +594,9 @@ void Device::CheckSuitability() const { | |||
| 589 | std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), | 594 | std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), |
| 590 | std::make_pair(features.shaderStorageImageWriteWithoutFormat, | 595 | std::make_pair(features.shaderStorageImageWriteWithoutFormat, |
| 591 | "shaderStorageImageWriteWithoutFormat"), | 596 | "shaderStorageImageWriteWithoutFormat"), |
| 597 | std::make_pair(robustness2.robustBufferAccess2, "robustBufferAccess2"), | ||
| 598 | std::make_pair(robustness2.robustImageAccess2, "robustImageAccess2"), | ||
| 599 | std::make_pair(robustness2.nullDescriptor, "nullDescriptor"), | ||
| 592 | }; | 600 | }; |
| 593 | for (const auto& [is_supported, name] : feature_report) { | 601 | for (const auto& [is_supported, name] : feature_report) { |
| 594 | if (is_supported) { | 602 | if (is_supported) { |
| @@ -599,17 +607,19 @@ void Device::CheckSuitability() const { | |||
| 599 | } | 607 | } |
| 600 | } | 608 | } |
| 601 | 609 | ||
| 602 | std::vector<const char*> Device::LoadExtensions() { | 610 | std::vector<const char*> Device::LoadExtensions(bool requires_surface) { |
| 603 | std::vector<const char*> extensions; | 611 | std::vector<const char*> extensions; |
| 604 | extensions.reserve(7 + REQUIRED_EXTENSIONS.size()); | 612 | extensions.reserve(8 + REQUIRED_EXTENSIONS.size()); |
| 605 | extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); | 613 | extensions.insert(extensions.begin(), REQUIRED_EXTENSIONS.begin(), REQUIRED_EXTENSIONS.end()); |
| 614 | if (requires_surface) { | ||
| 615 | extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | ||
| 616 | } | ||
| 606 | 617 | ||
| 607 | bool has_khr_shader_float16_int8{}; | 618 | bool has_khr_shader_float16_int8{}; |
| 608 | bool has_ext_subgroup_size_control{}; | 619 | bool has_ext_subgroup_size_control{}; |
| 609 | bool has_ext_transform_feedback{}; | 620 | bool has_ext_transform_feedback{}; |
| 610 | bool has_ext_custom_border_color{}; | 621 | bool has_ext_custom_border_color{}; |
| 611 | bool has_ext_extended_dynamic_state{}; | 622 | bool has_ext_extended_dynamic_state{}; |
| 612 | bool has_ext_robustness2{}; | ||
| 613 | for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { | 623 | for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { |
| 614 | const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, | 624 | const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, |
| 615 | bool push) { | 625 | bool push) { |
| @@ -637,14 +647,12 @@ std::vector<const char*> Device::LoadExtensions() { | |||
| 637 | test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); | 647 | test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false); |
| 638 | test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); | 648 | test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); |
| 639 | test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); | 649 | test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); |
| 640 | test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false); | ||
| 641 | test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); | 650 | test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); |
| 642 | if (Settings::values.renderer_debug) { | 651 | if (Settings::values.renderer_debug) { |
| 643 | test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, | 652 | test(nv_device_diagnostics_config, VK_NV_DEVICE_DIAGNOSTICS_CONFIG_EXTENSION_NAME, |
| 644 | true); | 653 | true); |
| 645 | } | 654 | } |
| 646 | } | 655 | } |
| 647 | |||
| 648 | VkPhysicalDeviceFeatures2KHR features; | 656 | VkPhysicalDeviceFeatures2KHR features; |
| 649 | features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; | 657 | features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; |
| 650 | 658 | ||
| @@ -661,7 +669,6 @@ std::vector<const char*> Device::LoadExtensions() { | |||
| 661 | is_float16_supported = float16_int8_features.shaderFloat16; | 669 | is_float16_supported = float16_int8_features.shaderFloat16; |
| 662 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); | 670 | extensions.push_back(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME); |
| 663 | } | 671 | } |
| 664 | |||
| 665 | if (has_ext_subgroup_size_control) { | 672 | if (has_ext_subgroup_size_control) { |
| 666 | VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features; | 673 | VkPhysicalDeviceSubgroupSizeControlFeaturesEXT subgroup_features; |
| 667 | subgroup_features.sType = | 674 | subgroup_features.sType = |
| @@ -688,7 +695,6 @@ std::vector<const char*> Device::LoadExtensions() { | |||
| 688 | } else { | 695 | } else { |
| 689 | is_warp_potentially_bigger = true; | 696 | is_warp_potentially_bigger = true; |
| 690 | } | 697 | } |
| 691 | |||
| 692 | if (has_ext_transform_feedback) { | 698 | if (has_ext_transform_feedback) { |
| 693 | VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; | 699 | VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; |
| 694 | tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; | 700 | tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; |
| @@ -710,7 +716,6 @@ std::vector<const char*> Device::LoadExtensions() { | |||
| 710 | ext_transform_feedback = true; | 716 | ext_transform_feedback = true; |
| 711 | } | 717 | } |
| 712 | } | 718 | } |
| 713 | |||
| 714 | if (has_ext_custom_border_color) { | 719 | if (has_ext_custom_border_color) { |
| 715 | VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features; | 720 | VkPhysicalDeviceCustomBorderColorFeaturesEXT border_features; |
| 716 | border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; | 721 | border_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT; |
| @@ -723,7 +728,6 @@ std::vector<const char*> Device::LoadExtensions() { | |||
| 723 | ext_custom_border_color = true; | 728 | ext_custom_border_color = true; |
| 724 | } | 729 | } |
| 725 | } | 730 | } |
| 726 | |||
| 727 | if (has_ext_extended_dynamic_state) { | 731 | if (has_ext_extended_dynamic_state) { |
| 728 | VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; | 732 | VkPhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state; |
| 729 | dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; | 733 | dynamic_state.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; |
| @@ -736,19 +740,6 @@ std::vector<const char*> Device::LoadExtensions() { | |||
| 736 | ext_extended_dynamic_state = true; | 740 | ext_extended_dynamic_state = true; |
| 737 | } | 741 | } |
| 738 | } | 742 | } |
| 739 | |||
| 740 | if (has_ext_robustness2) { | ||
| 741 | VkPhysicalDeviceRobustness2FeaturesEXT robustness2; | ||
| 742 | robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; | ||
| 743 | robustness2.pNext = nullptr; | ||
| 744 | features.pNext = &robustness2; | ||
| 745 | physical.GetFeatures2KHR(features); | ||
| 746 | if (robustness2.nullDescriptor && robustness2.robustImageAccess2) { | ||
| 747 | extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); | ||
| 748 | ext_robustness2 = true; | ||
| 749 | } | ||
| 750 | } | ||
| 751 | |||
| 752 | return extensions; | 743 | return extensions; |
| 753 | } | 744 | } |
| 754 | 745 | ||
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 4b66dba7a..67d70cd22 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -23,7 +23,7 @@ enum class FormatType { Linear, Optimal, Buffer }; | |||
| 23 | const u32 GuestWarpSize = 32; | 23 | const u32 GuestWarpSize = 32; |
| 24 | 24 | ||
| 25 | /// Handles data specific to a physical device. | 25 | /// Handles data specific to a physical device. |
| 26 | class Device final { | 26 | class Device { |
| 27 | public: | 27 | public: |
| 28 | explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, | 28 | explicit Device(VkInstance instance, vk::PhysicalDevice physical, VkSurfaceKHR surface, |
| 29 | const vk::InstanceDispatch& dld); | 29 | const vk::InstanceDispatch& dld); |
| @@ -227,10 +227,10 @@ public: | |||
| 227 | 227 | ||
| 228 | private: | 228 | private: |
| 229 | /// Checks if the physical device is suitable. | 229 | /// Checks if the physical device is suitable. |
| 230 | void CheckSuitability() const; | 230 | void CheckSuitability(bool requires_swapchain) const; |
| 231 | 231 | ||
| 232 | /// Loads extensions into a vector and stores available ones in this object. | 232 | /// Loads extensions into a vector and stores available ones in this object. |
| 233 | std::vector<const char*> LoadExtensions(); | 233 | std::vector<const char*> LoadExtensions(bool requires_surface); |
| 234 | 234 | ||
| 235 | /// Sets up queue families. | 235 | /// Sets up queue families. |
| 236 | void SetupFamilies(VkSurfaceKHR surface); | 236 | void SetupFamilies(VkSurfaceKHR surface); |
| @@ -285,7 +285,6 @@ private: | |||
| 285 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. | 285 | bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback. |
| 286 | bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. | 286 | bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. |
| 287 | bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. | 287 | bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. |
| 288 | bool ext_robustness2{}; ///< Support for VK_EXT_robustness2. | ||
| 289 | bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. | 288 | bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export. |
| 290 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. | 289 | bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. |
| 291 | bool has_renderdoc{}; ///< Has RenderDoc attached | 290 | bool has_renderdoc{}; ///< Has RenderDoc attached |
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index 889ecda0c..bfd6e6add 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include <algorithm> | 5 | #include <algorithm> |
| 6 | #include <future> | ||
| 6 | #include <optional> | 7 | #include <optional> |
| 7 | #include <span> | 8 | #include <span> |
| 8 | #include <utility> | 9 | #include <utility> |
| @@ -140,7 +141,10 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD | |||
| 140 | VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version)); | 141 | VK_VERSION_MAJOR(required_version), VK_VERSION_MINOR(required_version)); |
| 141 | throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); | 142 | throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); |
| 142 | } | 143 | } |
| 143 | vk::Instance instance = vk::Instance::Create(required_version, layers, extensions, dld); | 144 | vk::Instance instance = |
| 145 | std::async([&] { | ||
| 146 | return vk::Instance::Create(required_version, layers, extensions, dld); | ||
| 147 | }).get(); | ||
| 144 | if (!vk::Load(*instance, dld)) { | 148 | if (!vk::Load(*instance, dld)) { |
| 145 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); | 149 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan instance function pointers"); |
| 146 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | 150 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); |
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index d6eb3af31..2a8b7a907 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | #include <optional> | 7 | #include <optional> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | 9 | ||
| 10 | #include <glad/glad.h> | ||
| 11 | |||
| 10 | #include "common/alignment.h" | 12 | #include "common/alignment.h" |
| 11 | #include "common/assert.h" | 13 | #include "common/assert.h" |
| 12 | #include "common/common_types.h" | 14 | #include "common/common_types.h" |
| @@ -55,10 +57,24 @@ struct Range { | |||
| 55 | 57 | ||
| 56 | class MemoryAllocation { | 58 | class MemoryAllocation { |
| 57 | public: | 59 | public: |
| 58 | explicit MemoryAllocation(const Device& device_, vk::DeviceMemory memory_, | 60 | explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties, |
| 59 | VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type) | 61 | u64 allocation_size_, u32 type) |
| 60 | : device{device_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, | 62 | : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties}, |
| 61 | property_flags{properties}, shifted_memory_type{1U << type} {} | 63 | shifted_memory_type{1U << type} {} |
| 64 | |||
| 65 | #if defined(_WIN32) || defined(__linux__) | ||
| 66 | ~MemoryAllocation() { | ||
| 67 | if (owning_opengl_handle != 0) { | ||
| 68 | glDeleteMemoryObjectsEXT(1, &owning_opengl_handle); | ||
| 69 | } | ||
| 70 | } | ||
| 71 | #endif | ||
| 72 | |||
| 73 | MemoryAllocation& operator=(const MemoryAllocation&) = delete; | ||
| 74 | MemoryAllocation(const MemoryAllocation&) = delete; | ||
| 75 | |||
| 76 | MemoryAllocation& operator=(MemoryAllocation&&) = delete; | ||
| 77 | MemoryAllocation(MemoryAllocation&&) = delete; | ||
| 62 | 78 | ||
| 63 | [[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) { | 79 | [[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) { |
| 64 | const std::optional<u64> alloc = FindFreeRegion(size, alignment); | 80 | const std::optional<u64> alloc = FindFreeRegion(size, alignment); |
| @@ -88,6 +104,31 @@ public: | |||
| 88 | return memory_mapped_span; | 104 | return memory_mapped_span; |
| 89 | } | 105 | } |
| 90 | 106 | ||
| 107 | #ifdef _WIN32 | ||
| 108 | [[nodiscard]] u32 ExportOpenGLHandle() { | ||
| 109 | if (!owning_opengl_handle) { | ||
| 110 | glCreateMemoryObjectsEXT(1, &owning_opengl_handle); | ||
| 111 | glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size, | ||
| 112 | GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, | ||
| 113 | memory.GetMemoryWin32HandleKHR()); | ||
| 114 | } | ||
| 115 | return owning_opengl_handle; | ||
| 116 | } | ||
| 117 | #elif __linux__ | ||
| 118 | [[nodiscard]] u32 ExportOpenGLHandle() { | ||
| 119 | if (!owning_opengl_handle) { | ||
| 120 | glCreateMemoryObjectsEXT(1, &owning_opengl_handle); | ||
| 121 | glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT, | ||
| 122 | memory.GetMemoryFdKHR()); | ||
| 123 | } | ||
| 124 | return owning_opengl_handle; | ||
| 125 | } | ||
| 126 | #else | ||
| 127 | [[nodiscard]] u32 ExportOpenGLHandle() { | ||
| 128 | return 0; | ||
| 129 | } | ||
| 130 | #endif | ||
| 131 | |||
| 91 | /// Returns whether this allocation is compatible with the arguments. | 132 | /// Returns whether this allocation is compatible with the arguments. |
| 92 | [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { | 133 | [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { |
| 93 | return (flags & property_flags) && (type_mask & shifted_memory_type) != 0; | 134 | return (flags & property_flags) && (type_mask & shifted_memory_type) != 0; |
| @@ -118,13 +159,15 @@ private: | |||
| 118 | return candidate; | 159 | return candidate; |
| 119 | } | 160 | } |
| 120 | 161 | ||
| 121 | const Device& device; ///< Vulkan device. | ||
| 122 | const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. | 162 | const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. |
| 123 | const u64 allocation_size; ///< Size of this allocation. | 163 | const u64 allocation_size; ///< Size of this allocation. |
| 124 | const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. | 164 | const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. |
| 125 | const u32 shifted_memory_type; ///< Shifted Vulkan memory type. | 165 | const u32 shifted_memory_type; ///< Shifted Vulkan memory type. |
| 126 | std::vector<Range> commits; ///< All commit ranges done from this allocation. | 166 | std::vector<Range> commits; ///< All commit ranges done from this allocation. |
| 127 | std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before. | 167 | std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before. |
| 168 | #if defined(_WIN32) || defined(__linux__) | ||
| 169 | u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle. | ||
| 170 | #endif | ||
| 128 | }; | 171 | }; |
| 129 | 172 | ||
| 130 | MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, | 173 | MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, |
| @@ -156,14 +199,19 @@ std::span<u8> MemoryCommit::Map() { | |||
| 156 | return span; | 199 | return span; |
| 157 | } | 200 | } |
| 158 | 201 | ||
| 202 | u32 MemoryCommit::ExportOpenGLHandle() const { | ||
| 203 | return allocation->ExportOpenGLHandle(); | ||
| 204 | } | ||
| 205 | |||
| 159 | void MemoryCommit::Release() { | 206 | void MemoryCommit::Release() { |
| 160 | if (allocation) { | 207 | if (allocation) { |
| 161 | allocation->Free(begin); | 208 | allocation->Free(begin); |
| 162 | } | 209 | } |
| 163 | } | 210 | } |
| 164 | 211 | ||
| 165 | MemoryAllocator::MemoryAllocator(const Device& device_) | 212 | MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_) |
| 166 | : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()} {} | 213 | : device{device_}, properties{device_.GetPhysical().GetMemoryProperties()}, |
| 214 | export_allocations{export_allocations_} {} | ||
| 167 | 215 | ||
| 168 | MemoryAllocator::~MemoryAllocator() = default; | 216 | MemoryAllocator::~MemoryAllocator() = default; |
| 169 | 217 | ||
| @@ -196,14 +244,24 @@ MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) | |||
| 196 | 244 | ||
| 197 | void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { | 245 | void MemoryAllocator::AllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { |
| 198 | const u32 type = FindType(flags, type_mask).value(); | 246 | const u32 type = FindType(flags, type_mask).value(); |
| 247 | const VkExportMemoryAllocateInfo export_allocate_info{ | ||
| 248 | .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, | ||
| 249 | .pNext = nullptr, | ||
| 250 | #ifdef _WIN32 | ||
| 251 | .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, | ||
| 252 | #elif __linux__ | ||
| 253 | .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, | ||
| 254 | #else | ||
| 255 | .handleTypes = 0, | ||
| 256 | #endif | ||
| 257 | }; | ||
| 199 | vk::DeviceMemory memory = device.GetLogical().AllocateMemory({ | 258 | vk::DeviceMemory memory = device.GetLogical().AllocateMemory({ |
| 200 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | 259 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, |
| 201 | .pNext = nullptr, | 260 | .pNext = export_allocations ? &export_allocate_info : nullptr, |
| 202 | .allocationSize = size, | 261 | .allocationSize = size, |
| 203 | .memoryTypeIndex = type, | 262 | .memoryTypeIndex = type, |
| 204 | }); | 263 | }); |
| 205 | allocations.push_back( | 264 | allocations.push_back(std::make_unique<MemoryAllocation>(std::move(memory), flags, size, type)); |
| 206 | std::make_unique<MemoryAllocation>(device, std::move(memory), flags, size, type)); | ||
| 207 | } | 265 | } |
| 208 | 266 | ||
| 209 | std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, | 267 | std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, |
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index 9e6cfabf9..d1ce29450 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h | |||
| @@ -43,6 +43,9 @@ public: | |||
| 43 | /// It will map the backing allocation if it hasn't been mapped before. | 43 | /// It will map the backing allocation if it hasn't been mapped before. |
| 44 | std::span<u8> Map(); | 44 | std::span<u8> Map(); |
| 45 | 45 | ||
| 46 | /// Returns an non-owning OpenGL handle, creating one if it doesn't exist. | ||
| 47 | u32 ExportOpenGLHandle() const; | ||
| 48 | |||
| 46 | /// Returns the Vulkan memory handler. | 49 | /// Returns the Vulkan memory handler. |
| 47 | VkDeviceMemory Memory() const { | 50 | VkDeviceMemory Memory() const { |
| 48 | return memory; | 51 | return memory; |
| @@ -67,7 +70,15 @@ private: | |||
| 67 | /// Allocates and releases memory allocations on demand. | 70 | /// Allocates and releases memory allocations on demand. |
| 68 | class MemoryAllocator { | 71 | class MemoryAllocator { |
| 69 | public: | 72 | public: |
| 70 | explicit MemoryAllocator(const Device& device_); | 73 | /** |
| 74 | * Construct memory allocator | ||
| 75 | * | ||
| 76 | * @param device_ Device to allocate from | ||
| 77 | * @param export_allocations_ True when allocations have to be exported | ||
| 78 | * | ||
| 79 | * @throw vk::Exception on failure | ||
| 80 | */ | ||
| 81 | explicit MemoryAllocator(const Device& device_, bool export_allocations_); | ||
| 71 | ~MemoryAllocator(); | 82 | ~MemoryAllocator(); |
| 72 | 83 | ||
| 73 | MemoryAllocator& operator=(const MemoryAllocator&) = delete; | 84 | MemoryAllocator& operator=(const MemoryAllocator&) = delete; |
| @@ -106,8 +117,9 @@ private: | |||
| 106 | /// Returns index to the fastest memory type compatible with the passed requirements. | 117 | /// Returns index to the fastest memory type compatible with the passed requirements. |
| 107 | std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const; | 118 | std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const; |
| 108 | 119 | ||
| 109 | const Device& device; ///< Device handle. | 120 | const Device& device; ///< Device handle. |
| 110 | const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. | 121 | const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. |
| 122 | const bool export_allocations; ///< True when memory allocations have to be exported. | ||
| 111 | std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. | 123 | std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. |
| 112 | }; | 124 | }; |
| 113 | 125 | ||
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 5e15ad607..2aa0ffbe6 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp | |||
| @@ -168,11 +168,15 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | |||
| 168 | X(vkFreeCommandBuffers); | 168 | X(vkFreeCommandBuffers); |
| 169 | X(vkFreeDescriptorSets); | 169 | X(vkFreeDescriptorSets); |
| 170 | X(vkFreeMemory); | 170 | X(vkFreeMemory); |
| 171 | X(vkGetBufferMemoryRequirements); | 171 | X(vkGetBufferMemoryRequirements2); |
| 172 | X(vkGetDeviceQueue); | 172 | X(vkGetDeviceQueue); |
| 173 | X(vkGetEventStatus); | 173 | X(vkGetEventStatus); |
| 174 | X(vkGetFenceStatus); | 174 | X(vkGetFenceStatus); |
| 175 | X(vkGetImageMemoryRequirements); | 175 | X(vkGetImageMemoryRequirements); |
| 176 | X(vkGetMemoryFdKHR); | ||
| 177 | #ifdef _WIN32 | ||
| 178 | X(vkGetMemoryWin32HandleKHR); | ||
| 179 | #endif | ||
| 176 | X(vkGetQueryPoolResults); | 180 | X(vkGetQueryPoolResults); |
| 177 | X(vkGetSemaphoreCounterValueKHR); | 181 | X(vkGetSemaphoreCounterValueKHR); |
| 178 | X(vkMapMemory); | 182 | X(vkMapMemory); |
| @@ -505,6 +509,32 @@ void ImageView::SetObjectNameEXT(const char* name) const { | |||
| 505 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); | 509 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name); |
| 506 | } | 510 | } |
| 507 | 511 | ||
| 512 | int DeviceMemory::GetMemoryFdKHR() const { | ||
| 513 | const VkMemoryGetFdInfoKHR get_fd_info{ | ||
| 514 | .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR, | ||
| 515 | .pNext = nullptr, | ||
| 516 | .memory = handle, | ||
| 517 | .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, | ||
| 518 | }; | ||
| 519 | int fd; | ||
| 520 | Check(dld->vkGetMemoryFdKHR(owner, &get_fd_info, &fd)); | ||
| 521 | return fd; | ||
| 522 | } | ||
| 523 | |||
| 524 | #ifdef _WIN32 | ||
| 525 | HANDLE DeviceMemory::GetMemoryWin32HandleKHR() const { | ||
| 526 | const VkMemoryGetWin32HandleInfoKHR get_win32_handle_info{ | ||
| 527 | .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR, | ||
| 528 | .pNext = nullptr, | ||
| 529 | .memory = handle, | ||
| 530 | .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR, | ||
| 531 | }; | ||
| 532 | HANDLE win32_handle; | ||
| 533 | Check(dld->vkGetMemoryWin32HandleKHR(owner, &get_win32_handle_info, &win32_handle)); | ||
| 534 | return win32_handle; | ||
| 535 | } | ||
| 536 | #endif | ||
| 537 | |||
| 508 | void DeviceMemory::SetObjectNameEXT(const char* name) const { | 538 | void DeviceMemory::SetObjectNameEXT(const char* name) const { |
| 509 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); | 539 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_DEVICE_MEMORY, name); |
| 510 | } | 540 | } |
| @@ -756,10 +786,20 @@ DeviceMemory Device::AllocateMemory(const VkMemoryAllocateInfo& ai) const { | |||
| 756 | return DeviceMemory(memory, handle, *dld); | 786 | return DeviceMemory(memory, handle, *dld); |
| 757 | } | 787 | } |
| 758 | 788 | ||
| 759 | VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer) const noexcept { | 789 | VkMemoryRequirements Device::GetBufferMemoryRequirements(VkBuffer buffer, |
| 760 | VkMemoryRequirements requirements; | 790 | void* pnext) const noexcept { |
| 761 | dld->vkGetBufferMemoryRequirements(handle, buffer, &requirements); | 791 | const VkBufferMemoryRequirementsInfo2 info{ |
| 762 | return requirements; | 792 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, |
| 793 | .pNext = nullptr, | ||
| 794 | .buffer = buffer, | ||
| 795 | }; | ||
| 796 | VkMemoryRequirements2 requirements{ | ||
| 797 | .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, | ||
| 798 | .pNext = pnext, | ||
| 799 | .memoryRequirements{}, | ||
| 800 | }; | ||
| 801 | dld->vkGetBufferMemoryRequirements2(handle, &info, &requirements); | ||
| 802 | return requirements.memoryRequirements; | ||
| 763 | } | 803 | } |
| 764 | 804 | ||
| 765 | VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { | 805 | VkMemoryRequirements Device::GetImageMemoryRequirements(VkImage image) const noexcept { |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 9689de0cb..3e36d356a 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -15,8 +15,19 @@ | |||
| 15 | #include <vector> | 15 | #include <vector> |
| 16 | 16 | ||
| 17 | #define VK_NO_PROTOTYPES | 17 | #define VK_NO_PROTOTYPES |
| 18 | #ifdef _WIN32 | ||
| 19 | #define VK_USE_PLATFORM_WIN32_KHR | ||
| 20 | #endif | ||
| 18 | #include <vulkan/vulkan.h> | 21 | #include <vulkan/vulkan.h> |
| 19 | 22 | ||
| 23 | // Sanitize macros | ||
| 24 | #ifdef CreateEvent | ||
| 25 | #undef CreateEvent | ||
| 26 | #endif | ||
| 27 | #ifdef CreateSemaphore | ||
| 28 | #undef CreateSemaphore | ||
| 29 | #endif | ||
| 30 | |||
| 20 | #include "common/common_types.h" | 31 | #include "common/common_types.h" |
| 21 | 32 | ||
| 22 | #ifdef _MSC_VER | 33 | #ifdef _MSC_VER |
| @@ -174,7 +185,7 @@ struct InstanceDispatch { | |||
| 174 | }; | 185 | }; |
| 175 | 186 | ||
| 176 | /// Table holding Vulkan device function pointers. | 187 | /// Table holding Vulkan device function pointers. |
| 177 | struct DeviceDispatch : public InstanceDispatch { | 188 | struct DeviceDispatch : InstanceDispatch { |
| 178 | PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{}; | 189 | PFN_vkAcquireNextImageKHR vkAcquireNextImageKHR{}; |
| 179 | PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{}; | 190 | PFN_vkAllocateCommandBuffers vkAllocateCommandBuffers{}; |
| 180 | PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{}; | 191 | PFN_vkAllocateDescriptorSets vkAllocateDescriptorSets{}; |
| @@ -272,11 +283,15 @@ struct DeviceDispatch : public InstanceDispatch { | |||
| 272 | PFN_vkFreeCommandBuffers vkFreeCommandBuffers{}; | 283 | PFN_vkFreeCommandBuffers vkFreeCommandBuffers{}; |
| 273 | PFN_vkFreeDescriptorSets vkFreeDescriptorSets{}; | 284 | PFN_vkFreeDescriptorSets vkFreeDescriptorSets{}; |
| 274 | PFN_vkFreeMemory vkFreeMemory{}; | 285 | PFN_vkFreeMemory vkFreeMemory{}; |
| 275 | PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements{}; | 286 | PFN_vkGetBufferMemoryRequirements2 vkGetBufferMemoryRequirements2{}; |
| 276 | PFN_vkGetDeviceQueue vkGetDeviceQueue{}; | 287 | PFN_vkGetDeviceQueue vkGetDeviceQueue{}; |
| 277 | PFN_vkGetEventStatus vkGetEventStatus{}; | 288 | PFN_vkGetEventStatus vkGetEventStatus{}; |
| 278 | PFN_vkGetFenceStatus vkGetFenceStatus{}; | 289 | PFN_vkGetFenceStatus vkGetFenceStatus{}; |
| 279 | PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{}; | 290 | PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements{}; |
| 291 | PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR{}; | ||
| 292 | #ifdef _WIN32 | ||
| 293 | PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR{}; | ||
| 294 | #endif | ||
| 280 | PFN_vkGetQueryPoolResults vkGetQueryPoolResults{}; | 295 | PFN_vkGetQueryPoolResults vkGetQueryPoolResults{}; |
| 281 | PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{}; | 296 | PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR{}; |
| 282 | PFN_vkMapMemory vkMapMemory{}; | 297 | PFN_vkMapMemory vkMapMemory{}; |
| @@ -344,6 +359,9 @@ public: | |||
| 344 | /// Construct an empty handle. | 359 | /// Construct an empty handle. |
| 345 | Handle() = default; | 360 | Handle() = default; |
| 346 | 361 | ||
| 362 | /// Construct an empty handle. | ||
| 363 | Handle(std::nullptr_t) {} | ||
| 364 | |||
| 347 | /// Copying Vulkan objects is not supported and will never be. | 365 | /// Copying Vulkan objects is not supported and will never be. |
| 348 | Handle(const Handle&) = delete; | 366 | Handle(const Handle&) = delete; |
| 349 | Handle& operator=(const Handle&) = delete; | 367 | Handle& operator=(const Handle&) = delete; |
| @@ -659,6 +677,12 @@ class DeviceMemory : public Handle<VkDeviceMemory, VkDevice, DeviceDispatch> { | |||
| 659 | using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; | 677 | using Handle<VkDeviceMemory, VkDevice, DeviceDispatch>::Handle; |
| 660 | 678 | ||
| 661 | public: | 679 | public: |
| 680 | int GetMemoryFdKHR() const; | ||
| 681 | |||
| 682 | #ifdef _WIN32 | ||
| 683 | HANDLE GetMemoryWin32HandleKHR() const; | ||
| 684 | #endif | ||
| 685 | |||
| 662 | /// Set object name. | 686 | /// Set object name. |
| 663 | void SetObjectNameEXT(const char* name) const; | 687 | void SetObjectNameEXT(const char* name) const; |
| 664 | 688 | ||
| @@ -847,7 +871,8 @@ public: | |||
| 847 | 871 | ||
| 848 | DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; | 872 | DeviceMemory AllocateMemory(const VkMemoryAllocateInfo& ai) const; |
| 849 | 873 | ||
| 850 | VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer) const noexcept; | 874 | VkMemoryRequirements GetBufferMemoryRequirements(VkBuffer buffer, |
| 875 | void* pnext = nullptr) const noexcept; | ||
| 851 | 876 | ||
| 852 | VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; | 877 | VkMemoryRequirements GetImageMemoryRequirements(VkImage image) const noexcept; |
| 853 | 878 | ||
| @@ -1033,6 +1058,12 @@ public: | |||
| 1033 | 1058 | ||
| 1034 | void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, | 1059 | void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, |
| 1035 | VkDependencyFlags dependency_flags, | 1060 | VkDependencyFlags dependency_flags, |
| 1061 | const VkMemoryBarrier& memory_barrier) const noexcept { | ||
| 1062 | PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, memory_barrier, {}, {}); | ||
| 1063 | } | ||
| 1064 | |||
| 1065 | void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, | ||
| 1066 | VkDependencyFlags dependency_flags, | ||
| 1036 | const VkBufferMemoryBarrier& buffer_barrier) const noexcept { | 1067 | const VkBufferMemoryBarrier& buffer_barrier) const noexcept { |
| 1037 | PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); | 1068 | PipelineBarrier(src_stage_mask, dst_stage_mask, dependency_flags, {}, buffer_barrier, {}); |
| 1038 | } | 1069 | } |
diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index d9a3035cb..1c61d419d 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp | |||
| @@ -64,7 +64,7 @@ void EmuThread::run() { | |||
| 64 | 64 | ||
| 65 | emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); | 65 | emit LoadProgress(VideoCore::LoadCallbackStage::Prepare, 0, 0); |
| 66 | 66 | ||
| 67 | system.Renderer().Rasterizer().LoadDiskResources( | 67 | system.Renderer().ReadRasterizer()->LoadDiskResources( |
| 68 | system.CurrentProcess()->GetTitleID(), stop_run, | 68 | system.CurrentProcess()->GetTitleID(), stop_run, |
| 69 | [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { | 69 | [this](VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { |
| 70 | emit LoadProgress(stage, value, total); | 70 | emit LoadProgress(stage, value, total); |
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 8f7458119..0635d13d0 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp | |||
| @@ -782,14 +782,14 @@ void Config::ReadRendererValues() { | |||
| 782 | ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100); | 782 | ReadSettingGlobal(Settings::values.frame_limit, QStringLiteral("frame_limit"), 100); |
| 783 | ReadSettingGlobal(Settings::values.use_disk_shader_cache, | 783 | ReadSettingGlobal(Settings::values.use_disk_shader_cache, |
| 784 | QStringLiteral("use_disk_shader_cache"), true); | 784 | QStringLiteral("use_disk_shader_cache"), true); |
| 785 | ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 0); | 785 | ReadSettingGlobal(Settings::values.gpu_accuracy, QStringLiteral("gpu_accuracy"), 1); |
| 786 | ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation, | 786 | ReadSettingGlobal(Settings::values.use_asynchronous_gpu_emulation, |
| 787 | QStringLiteral("use_asynchronous_gpu_emulation"), true); | 787 | QStringLiteral("use_asynchronous_gpu_emulation"), true); |
| 788 | ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"), | 788 | ReadSettingGlobal(Settings::values.use_nvdec_emulation, QStringLiteral("use_nvdec_emulation"), |
| 789 | true); | 789 | true); |
| 790 | ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); | 790 | ReadSettingGlobal(Settings::values.use_vsync, QStringLiteral("use_vsync"), true); |
| 791 | ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), | 791 | ReadSettingGlobal(Settings::values.use_assembly_shaders, QStringLiteral("use_assembly_shaders"), |
| 792 | true); | 792 | false); |
| 793 | ReadSettingGlobal(Settings::values.use_asynchronous_shaders, | 793 | ReadSettingGlobal(Settings::values.use_asynchronous_shaders, |
| 794 | QStringLiteral("use_asynchronous_shaders"), false); | 794 | QStringLiteral("use_asynchronous_shaders"), false); |
| 795 | ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), | 795 | ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), |
| @@ -1351,14 +1351,14 @@ void Config::SaveRendererValues() { | |||
| 1351 | Settings::values.use_disk_shader_cache, true); | 1351 | Settings::values.use_disk_shader_cache, true); |
| 1352 | WriteSettingGlobal(QStringLiteral("gpu_accuracy"), | 1352 | WriteSettingGlobal(QStringLiteral("gpu_accuracy"), |
| 1353 | static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)), | 1353 | static_cast<int>(Settings::values.gpu_accuracy.GetValue(global)), |
| 1354 | Settings::values.gpu_accuracy.UsingGlobal(), 0); | 1354 | Settings::values.gpu_accuracy.UsingGlobal(), 1); |
| 1355 | WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"), | 1355 | WriteSettingGlobal(QStringLiteral("use_asynchronous_gpu_emulation"), |
| 1356 | Settings::values.use_asynchronous_gpu_emulation, true); | 1356 | Settings::values.use_asynchronous_gpu_emulation, true); |
| 1357 | WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation, | 1357 | WriteSettingGlobal(QStringLiteral("use_nvdec_emulation"), Settings::values.use_nvdec_emulation, |
| 1358 | true); | 1358 | true); |
| 1359 | WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); | 1359 | WriteSettingGlobal(QStringLiteral("use_vsync"), Settings::values.use_vsync, true); |
| 1360 | WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), | 1360 | WriteSettingGlobal(QStringLiteral("use_assembly_shaders"), |
| 1361 | Settings::values.use_assembly_shaders, true); | 1361 | Settings::values.use_assembly_shaders, false); |
| 1362 | WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"), | 1362 | WriteSettingGlobal(QStringLiteral("use_asynchronous_shaders"), |
| 1363 | Settings::values.use_asynchronous_shaders, false); | 1363 | Settings::values.use_asynchronous_shaders, false); |
| 1364 | WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, | 1364 | WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, |
diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index b78a5dff0..9ff32aec4 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp | |||
| @@ -2,6 +2,9 @@ | |||
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | // Include this early to include Vulkan headers how we want to | ||
| 6 | #include "video_core/vulkan_common/vulkan_wrapper.h" | ||
| 7 | |||
| 5 | #include <QColorDialog> | 8 | #include <QColorDialog> |
| 6 | #include <QComboBox> | 9 | #include <QComboBox> |
| 7 | #include <QVulkanInstance> | 10 | #include <QVulkanInstance> |
| @@ -11,7 +14,8 @@ | |||
| 11 | #include "core/core.h" | 14 | #include "core/core.h" |
| 12 | #include "core/settings.h" | 15 | #include "core/settings.h" |
| 13 | #include "ui_configure_graphics.h" | 16 | #include "ui_configure_graphics.h" |
| 14 | #include "video_core/renderer_vulkan/renderer_vulkan.h" | 17 | #include "video_core/vulkan_common/vulkan_instance.h" |
| 18 | #include "video_core/vulkan_common/vulkan_library.h" | ||
| 15 | #include "yuzu/configuration/configuration_shared.h" | 19 | #include "yuzu/configuration/configuration_shared.h" |
| 16 | #include "yuzu/configuration/configure_graphics.h" | 20 | #include "yuzu/configuration/configure_graphics.h" |
| 17 | 21 | ||
| @@ -212,11 +216,23 @@ void ConfigureGraphics::UpdateDeviceComboBox() { | |||
| 212 | ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); | 216 | ui->device->setEnabled(enabled && !Core::System::GetInstance().IsPoweredOn()); |
| 213 | } | 217 | } |
| 214 | 218 | ||
| 215 | void ConfigureGraphics::RetrieveVulkanDevices() { | 219 | void ConfigureGraphics::RetrieveVulkanDevices() try { |
| 220 | using namespace Vulkan; | ||
| 221 | |||
| 222 | vk::InstanceDispatch dld; | ||
| 223 | const Common::DynamicLibrary library = OpenLibrary(); | ||
| 224 | const vk::Instance instance = CreateInstance(library, dld, VK_API_VERSION_1_0); | ||
| 225 | const std::vector<VkPhysicalDevice> physical_devices = instance.EnumeratePhysicalDevices(); | ||
| 226 | |||
| 216 | vulkan_devices.clear(); | 227 | vulkan_devices.clear(); |
| 217 | for (const auto& name : Vulkan::RendererVulkan::EnumerateDevices()) { | 228 | vulkan_devices.reserve(physical_devices.size()); |
| 229 | for (const VkPhysicalDevice device : physical_devices) { | ||
| 230 | const char* const name = vk::PhysicalDevice(device, dld).GetProperties().deviceName; | ||
| 218 | vulkan_devices.push_back(QString::fromStdString(name)); | 231 | vulkan_devices.push_back(QString::fromStdString(name)); |
| 219 | } | 232 | } |
| 233 | |||
| 234 | } catch (const Vulkan::vk::Exception& exception) { | ||
| 235 | LOG_ERROR(Frontend, "Failed to enumerate devices with error: {}", exception.what()); | ||
| 220 | } | 236 | } |
| 221 | 237 | ||
| 222 | Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { | 238 | Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { |
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index f76102459..aa0a9f288 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp | |||
| @@ -388,7 +388,7 @@ void Config::ReadValues() { | |||
| 388 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100))); | 388 | static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100))); |
| 389 | Settings::values.use_disk_shader_cache.SetValue( | 389 | Settings::values.use_disk_shader_cache.SetValue( |
| 390 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false)); | 390 | sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false)); |
| 391 | const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0); | 391 | const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 1); |
| 392 | Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level)); | 392 | Settings::values.gpu_accuracy.SetValue(static_cast<Settings::GPUAccuracy>(gpu_accuracy_level)); |
| 393 | Settings::values.use_asynchronous_gpu_emulation.SetValue( | 393 | Settings::values.use_asynchronous_gpu_emulation.SetValue( |
| 394 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true)); | 394 | sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", true)); |
diff --git a/src/yuzu_cmd/yuzu.cpp b/src/yuzu_cmd/yuzu.cpp index 0e1f3bdb3..982c41785 100644 --- a/src/yuzu_cmd/yuzu.cpp +++ b/src/yuzu_cmd/yuzu.cpp | |||
| @@ -215,7 +215,7 @@ int main(int argc, char** argv) { | |||
| 215 | // Core is loaded, start the GPU (makes the GPU contexts current to this thread) | 215 | // Core is loaded, start the GPU (makes the GPU contexts current to this thread) |
| 216 | system.GPU().Start(); | 216 | system.GPU().Start(); |
| 217 | 217 | ||
| 218 | system.Renderer().Rasterizer().LoadDiskResources( | 218 | system.Renderer().ReadRasterizer()->LoadDiskResources( |
| 219 | system.CurrentProcess()->GetTitleID(), false, | 219 | system.CurrentProcess()->GetTitleID(), false, |
| 220 | [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); | 220 | [](VideoCore::LoadCallbackStage, size_t value, size_t total) {}); |
| 221 | 221 | ||