diff options
| -rw-r--r-- | CMakeLists.txt | 10 | ||||
| -rw-r--r-- | src/common/CMakeLists.txt | 2 | ||||
| -rw-r--r-- | src/common/uint128.cpp | 41 | ||||
| -rw-r--r-- | src/common/uint128.h | 14 | ||||
| -rw-r--r-- | src/core/arm/dynarmic/arm_dynarmic.cpp | 5 | ||||
| -rw-r--r-- | src/core/core_timing_util.cpp | 6 | ||||
| -rw-r--r-- | src/core/core_timing_util.h | 3 | ||||
| -rw-r--r-- | src/core/hle/kernel/scheduler.cpp | 6 | ||||
| -rw-r--r-- | src/core/hle/kernel/svc.cpp | 32 | ||||
| -rw-r--r-- | src/core/hle/kernel/thread.cpp | 13 | ||||
| -rw-r--r-- | src/core/hle/kernel/thread.h | 13 | ||||
| -rw-r--r-- | src/core/memory.cpp | 81 | ||||
| -rw-r--r-- | src/core/memory.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 3 |
14 files changed, 124 insertions, 111 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 32cfa8580..9cc24cba6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt | |||
| @@ -163,12 +163,6 @@ else() | |||
| 163 | set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE) | 163 | set(CMAKE_EXE_LINKER_FLAGS_RELEASE "/DEBUG /MANIFEST:NO /INCREMENTAL:NO /OPT:REF,ICF" CACHE STRING "" FORCE) |
| 164 | endif() | 164 | endif() |
| 165 | 165 | ||
| 166 | # Fix GCC C++17 and Boost.ICL incompatibility (needed to build dynarmic) | ||
| 167 | # See https://bugzilla.redhat.com/show_bug.cgi?id=1485641#c1 | ||
| 168 | if (CMAKE_COMPILER_IS_GNUCC) | ||
| 169 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-new-ttp-matching") | ||
| 170 | endif() | ||
| 171 | |||
| 172 | # Set file offset size to 64 bits. | 166 | # Set file offset size to 64 bits. |
| 173 | # | 167 | # |
| 174 | # On modern Unixes, this is typically already the case. The lone exception is | 168 | # On modern Unixes, this is typically already the case. The lone exception is |
| @@ -185,9 +179,9 @@ set_property(DIRECTORY APPEND PROPERTY | |||
| 185 | # System imported libraries | 179 | # System imported libraries |
| 186 | # ====================== | 180 | # ====================== |
| 187 | 181 | ||
| 188 | find_package(Boost 1.63.0 QUIET) | 182 | find_package(Boost 1.64.0 QUIET) |
| 189 | if (NOT Boost_FOUND) | 183 | if (NOT Boost_FOUND) |
| 190 | message(STATUS "Boost 1.63.0 or newer not found, falling back to externals") | 184 | message(STATUS "Boost 1.64.0 or newer not found, falling back to externals") |
| 191 | 185 | ||
| 192 | set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost") | 186 | set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost") |
| 193 | set(Boost_NO_SYSTEM_PATHS OFF) | 187 | set(Boost_NO_SYSTEM_PATHS OFF) |
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 3d30f0e3e..c538c6415 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt | |||
| @@ -114,6 +114,8 @@ add_library(common STATIC | |||
| 114 | threadsafe_queue.h | 114 | threadsafe_queue.h |
| 115 | timer.cpp | 115 | timer.cpp |
| 116 | timer.h | 116 | timer.h |
| 117 | uint128.cpp | ||
| 118 | uint128.h | ||
| 117 | vector_math.h | 119 | vector_math.h |
| 118 | web_result.h | 120 | web_result.h |
| 119 | ) | 121 | ) |
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp new file mode 100644 index 000000000..2238a52c5 --- /dev/null +++ b/src/common/uint128.cpp | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | #ifdef _MSC_VER | ||
| 2 | #include <intrin.h> | ||
| 3 | |||
| 4 | #pragma intrinsic(_umul128) | ||
| 5 | #endif | ||
| 6 | #include <cstring> | ||
| 7 | #include "common/uint128.h" | ||
| 8 | |||
| 9 | namespace Common { | ||
| 10 | |||
| 11 | u128 Multiply64Into128(u64 a, u64 b) { | ||
| 12 | u128 result; | ||
| 13 | #ifdef _MSC_VER | ||
| 14 | result[0] = _umul128(a, b, &result[1]); | ||
| 15 | #else | ||
| 16 | unsigned __int128 tmp = a; | ||
| 17 | tmp *= b; | ||
| 18 | std::memcpy(&result, &tmp, sizeof(u128)); | ||
| 19 | #endif | ||
| 20 | return result; | ||
| 21 | } | ||
| 22 | |||
| 23 | std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) { | ||
| 24 | u64 remainder = dividend[0] % divisor; | ||
| 25 | u64 accum = dividend[0] / divisor; | ||
| 26 | if (dividend[1] == 0) | ||
| 27 | return {accum, remainder}; | ||
| 28 | // We ignore dividend[1] / divisor as that overflows | ||
| 29 | const u64 first_segment = (dividend[1] % divisor) << 32; | ||
| 30 | accum += (first_segment / divisor) << 32; | ||
| 31 | const u64 second_segment = (first_segment % divisor) << 32; | ||
| 32 | accum += (second_segment / divisor); | ||
| 33 | remainder += second_segment % divisor; | ||
| 34 | if (remainder >= divisor) { | ||
| 35 | accum++; | ||
| 36 | remainder -= divisor; | ||
| 37 | } | ||
| 38 | return {accum, remainder}; | ||
| 39 | } | ||
| 40 | |||
| 41 | } // namespace Common | ||
diff --git a/src/common/uint128.h b/src/common/uint128.h new file mode 100644 index 000000000..52e6b46eb --- /dev/null +++ b/src/common/uint128.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | |||
| 2 | #include <utility> | ||
| 3 | #include "common/common_types.h" | ||
| 4 | |||
| 5 | namespace Common { | ||
| 6 | |||
| 7 | // This function multiplies 2 u64 values and produces a u128 value; | ||
| 8 | u128 Multiply64Into128(u64 a, u64 b); | ||
| 9 | |||
| 10 | // This function divides a u128 by a u32 value and produces two u64 values: | ||
| 11 | // the result of division and the remainder | ||
| 12 | std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor); | ||
| 13 | |||
| 14 | } // namespace Common | ||
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 9b7ca4030..4fdc12f11 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "core/core.h" | 12 | #include "core/core.h" |
| 13 | #include "core/core_cpu.h" | 13 | #include "core/core_cpu.h" |
| 14 | #include "core/core_timing.h" | 14 | #include "core/core_timing.h" |
| 15 | #include "core/core_timing_util.h" | ||
| 15 | #include "core/gdbstub/gdbstub.h" | 16 | #include "core/gdbstub/gdbstub.h" |
| 16 | #include "core/hle/kernel/process.h" | 17 | #include "core/hle/kernel/process.h" |
| 17 | #include "core/hle/kernel/svc.h" | 18 | #include "core/hle/kernel/svc.h" |
| @@ -119,7 +120,7 @@ public: | |||
| 119 | return std::max(parent.core_timing.GetDowncount(), 0); | 120 | return std::max(parent.core_timing.GetDowncount(), 0); |
| 120 | } | 121 | } |
| 121 | u64 GetCNTPCT() override { | 122 | u64 GetCNTPCT() override { |
| 122 | return parent.core_timing.GetTicks(); | 123 | return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks()); |
| 123 | } | 124 | } |
| 124 | 125 | ||
| 125 | ARM_Dynarmic& parent; | 126 | ARM_Dynarmic& parent; |
| @@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const { | |||
| 151 | config.tpidr_el0 = &cb->tpidr_el0; | 152 | config.tpidr_el0 = &cb->tpidr_el0; |
| 152 | config.dczid_el0 = 4; | 153 | config.dczid_el0 = 4; |
| 153 | config.ctr_el0 = 0x8444c004; | 154 | config.ctr_el0 = 0x8444c004; |
| 154 | config.cntfrq_el0 = 19200000; // Value from fusee. | 155 | config.cntfrq_el0 = Timing::CNTFREQ; |
| 155 | 156 | ||
| 156 | // Unpredictable instructions | 157 | // Unpredictable instructions |
| 157 | config.define_unpredictable_behaviour = true; | 158 | config.define_unpredictable_behaviour = true; |
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp index 88ff70233..7942f30d6 100644 --- a/src/core/core_timing_util.cpp +++ b/src/core/core_timing_util.cpp | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <cinttypes> | 7 | #include <cinttypes> |
| 8 | #include <limits> | 8 | #include <limits> |
| 9 | #include "common/logging/log.h" | 9 | #include "common/logging/log.h" |
| 10 | #include "common/uint128.h" | ||
| 10 | 11 | ||
| 11 | namespace Core::Timing { | 12 | namespace Core::Timing { |
| 12 | 13 | ||
| @@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) { | |||
| 60 | return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000; | 61 | return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000; |
| 61 | } | 62 | } |
| 62 | 63 | ||
| 64 | u64 CpuCyclesToClockCycles(u64 ticks) { | ||
| 65 | const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ); | ||
| 66 | return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first; | ||
| 67 | } | ||
| 68 | |||
| 63 | } // namespace Core::Timing | 69 | } // namespace Core::Timing |
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h index 513cfac1b..679aa3123 100644 --- a/src/core/core_timing_util.h +++ b/src/core/core_timing_util.h | |||
| @@ -11,6 +11,7 @@ namespace Core::Timing { | |||
| 11 | // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz | 11 | // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz |
| 12 | // The exact value used is of course unverified. | 12 | // The exact value used is of course unverified. |
| 13 | constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked | 13 | constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked |
| 14 | constexpr u64 CNTFREQ = 19200000; // Value from fusee. | ||
| 14 | 15 | ||
| 15 | inline s64 msToCycles(int ms) { | 16 | inline s64 msToCycles(int ms) { |
| 16 | // since ms is int there is no way to overflow | 17 | // since ms is int there is no way to overflow |
| @@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) { | |||
| 61 | return cycles * 1000 / BASE_CLOCK_RATE; | 62 | return cycles * 1000 / BASE_CLOCK_RATE; |
| 62 | } | 63 | } |
| 63 | 64 | ||
| 65 | u64 CpuCyclesToClockCycles(u64 ticks); | ||
| 66 | |||
| 64 | } // namespace Core::Timing | 67 | } // namespace Core::Timing |
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 5fccfd9f4..e524509df 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp | |||
| @@ -199,8 +199,7 @@ void Scheduler::YieldWithoutLoadBalancing(Thread* thread) { | |||
| 199 | ASSERT(thread->GetPriority() < THREADPRIO_COUNT); | 199 | ASSERT(thread->GetPriority() < THREADPRIO_COUNT); |
| 200 | 200 | ||
| 201 | // Yield this thread -- sleep for zero time and force reschedule to different thread | 201 | // Yield this thread -- sleep for zero time and force reschedule to different thread |
| 202 | WaitCurrentThread_Sleep(); | 202 | GetCurrentThread()->Sleep(0); |
| 203 | GetCurrentThread()->WakeAfterDelay(0); | ||
| 204 | } | 203 | } |
| 205 | 204 | ||
| 206 | void Scheduler::YieldWithLoadBalancing(Thread* thread) { | 205 | void Scheduler::YieldWithLoadBalancing(Thread* thread) { |
| @@ -215,8 +214,7 @@ void Scheduler::YieldWithLoadBalancing(Thread* thread) { | |||
| 215 | ASSERT(priority < THREADPRIO_COUNT); | 214 | ASSERT(priority < THREADPRIO_COUNT); |
| 216 | 215 | ||
| 217 | // Sleep for zero time to be able to force reschedule to different thread | 216 | // Sleep for zero time to be able to force reschedule to different thread |
| 218 | WaitCurrentThread_Sleep(); | 217 | GetCurrentThread()->Sleep(0); |
| 219 | GetCurrentThread()->WakeAfterDelay(0); | ||
| 220 | 218 | ||
| 221 | Thread* suggested_thread = nullptr; | 219 | Thread* suggested_thread = nullptr; |
| 222 | 220 | ||
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 77d0e3d96..047fa0c19 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp | |||
| @@ -1284,10 +1284,14 @@ static ResultCode StartThread(Handle thread_handle) { | |||
| 1284 | 1284 | ||
| 1285 | /// Called when a thread exits | 1285 | /// Called when a thread exits |
| 1286 | static void ExitThread() { | 1286 | static void ExitThread() { |
| 1287 | LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC()); | 1287 | auto& system = Core::System::GetInstance(); |
| 1288 | 1288 | ||
| 1289 | ExitCurrentThread(); | 1289 | LOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", system.CurrentArmInterface().GetPC()); |
| 1290 | Core::System::GetInstance().PrepareReschedule(); | 1290 | |
| 1291 | auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); | ||
| 1292 | current_thread->Stop(); | ||
| 1293 | system.CurrentScheduler().RemoveThread(current_thread); | ||
| 1294 | system.PrepareReschedule(); | ||
| 1291 | } | 1295 | } |
| 1292 | 1296 | ||
| 1293 | /// Sleep the current thread | 1297 | /// Sleep the current thread |
| @@ -1300,32 +1304,32 @@ static void SleepThread(s64 nanoseconds) { | |||
| 1300 | YieldAndWaitForLoadBalancing = -2, | 1304 | YieldAndWaitForLoadBalancing = -2, |
| 1301 | }; | 1305 | }; |
| 1302 | 1306 | ||
| 1307 | auto& system = Core::System::GetInstance(); | ||
| 1308 | auto& scheduler = system.CurrentScheduler(); | ||
| 1309 | auto* const current_thread = scheduler.GetCurrentThread(); | ||
| 1310 | |||
| 1303 | if (nanoseconds <= 0) { | 1311 | if (nanoseconds <= 0) { |
| 1304 | auto& scheduler{Core::System::GetInstance().CurrentScheduler()}; | ||
| 1305 | switch (static_cast<SleepType>(nanoseconds)) { | 1312 | switch (static_cast<SleepType>(nanoseconds)) { |
| 1306 | case SleepType::YieldWithoutLoadBalancing: | 1313 | case SleepType::YieldWithoutLoadBalancing: |
| 1307 | scheduler.YieldWithoutLoadBalancing(GetCurrentThread()); | 1314 | scheduler.YieldWithoutLoadBalancing(current_thread); |
| 1308 | break; | 1315 | break; |
| 1309 | case SleepType::YieldWithLoadBalancing: | 1316 | case SleepType::YieldWithLoadBalancing: |
| 1310 | scheduler.YieldWithLoadBalancing(GetCurrentThread()); | 1317 | scheduler.YieldWithLoadBalancing(current_thread); |
| 1311 | break; | 1318 | break; |
| 1312 | case SleepType::YieldAndWaitForLoadBalancing: | 1319 | case SleepType::YieldAndWaitForLoadBalancing: |
| 1313 | scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread()); | 1320 | scheduler.YieldAndWaitForLoadBalancing(current_thread); |
| 1314 | break; | 1321 | break; |
| 1315 | default: | 1322 | default: |
| 1316 | UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); | 1323 | UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); |
| 1317 | } | 1324 | } |
| 1318 | } else { | 1325 | } else { |
| 1319 | // Sleep current thread and check for next thread to schedule | 1326 | current_thread->Sleep(nanoseconds); |
| 1320 | WaitCurrentThread_Sleep(); | ||
| 1321 | |||
| 1322 | // Create an event to wake the thread up after the specified nanosecond delay has passed | ||
| 1323 | GetCurrentThread()->WakeAfterDelay(nanoseconds); | ||
| 1324 | } | 1327 | } |
| 1325 | 1328 | ||
| 1326 | // Reschedule all CPU cores | 1329 | // Reschedule all CPU cores |
| 1327 | for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) | 1330 | for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) { |
| 1328 | Core::System::GetInstance().CpuCore(i).PrepareReschedule(); | 1331 | system.CpuCore(i).PrepareReschedule(); |
| 1332 | } | ||
| 1329 | } | 1333 | } |
| 1330 | 1334 | ||
| 1331 | /// Wait process wide key atomic | 1335 | /// Wait process wide key atomic |
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index eb54d6651..2e712c9cb 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp | |||
| @@ -68,11 +68,6 @@ void Thread::Stop() { | |||
| 68 | owner_process->FreeTLSSlot(tls_address); | 68 | owner_process->FreeTLSSlot(tls_address); |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | void WaitCurrentThread_Sleep() { | ||
| 72 | Thread* thread = GetCurrentThread(); | ||
| 73 | thread->SetStatus(ThreadStatus::WaitSleep); | ||
| 74 | } | ||
| 75 | |||
| 76 | void ExitCurrentThread() { | 71 | void ExitCurrentThread() { |
| 77 | Thread* thread = GetCurrentThread(); | 72 | Thread* thread = GetCurrentThread(); |
| 78 | thread->Stop(); | 73 | thread->Stop(); |
| @@ -391,6 +386,14 @@ void Thread::SetActivity(ThreadActivity value) { | |||
| 391 | } | 386 | } |
| 392 | } | 387 | } |
| 393 | 388 | ||
| 389 | void Thread::Sleep(s64 nanoseconds) { | ||
| 390 | // Sleep current thread and check for next thread to schedule | ||
| 391 | SetStatus(ThreadStatus::WaitSleep); | ||
| 392 | |||
| 393 | // Create an event to wake the thread up after the specified nanosecond delay has passed | ||
| 394 | WakeAfterDelay(nanoseconds); | ||
| 395 | } | ||
| 396 | |||
| 394 | //////////////////////////////////////////////////////////////////////////////////////////////////// | 397 | //////////////////////////////////////////////////////////////////////////////////////////////////// |
| 395 | 398 | ||
| 396 | /** | 399 | /** |
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index c48b21aba..ccdefeecc 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h | |||
| @@ -383,6 +383,9 @@ public: | |||
| 383 | 383 | ||
| 384 | void SetActivity(ThreadActivity value); | 384 | void SetActivity(ThreadActivity value); |
| 385 | 385 | ||
| 386 | /// Sleeps this thread for the given amount of nanoseconds. | ||
| 387 | void Sleep(s64 nanoseconds); | ||
| 388 | |||
| 386 | private: | 389 | private: |
| 387 | explicit Thread(KernelCore& kernel); | 390 | explicit Thread(KernelCore& kernel); |
| 388 | ~Thread() override; | 391 | ~Thread() override; |
| @@ -460,14 +463,4 @@ private: | |||
| 460 | */ | 463 | */ |
| 461 | Thread* GetCurrentThread(); | 464 | Thread* GetCurrentThread(); |
| 462 | 465 | ||
| 463 | /** | ||
| 464 | * Waits the current thread on a sleep | ||
| 465 | */ | ||
| 466 | void WaitCurrentThread_Sleep(); | ||
| 467 | |||
| 468 | /** | ||
| 469 | * Stops the current thread and removes it from the thread_list | ||
| 470 | */ | ||
| 471 | void ExitCurrentThread(); | ||
| 472 | |||
| 473 | } // namespace Kernel | 466 | } // namespace Kernel |
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 4fde53033..e0cc5175f 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include "core/hle/lock.h" | 18 | #include "core/hle/lock.h" |
| 19 | #include "core/memory.h" | 19 | #include "core/memory.h" |
| 20 | #include "core/memory_setup.h" | 20 | #include "core/memory_setup.h" |
| 21 | #include "video_core/gpu.h" | ||
| 21 | #include "video_core/renderer_base.h" | 22 | #include "video_core/renderer_base.h" |
| 22 | 23 | ||
| 23 | namespace Memory { | 24 | namespace Memory { |
| @@ -69,8 +70,8 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa | |||
| 69 | 70 | ||
| 70 | // During boot, current_page_table might not be set yet, in which case we need not flush | 71 | // During boot, current_page_table might not be set yet, in which case we need not flush |
| 71 | if (current_page_table) { | 72 | if (current_page_table) { |
| 72 | RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, | 73 | Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS, |
| 73 | FlushMode::FlushAndInvalidate); | 74 | size * PAGE_SIZE); |
| 74 | } | 75 | } |
| 75 | 76 | ||
| 76 | VAddr end = base + size; | 77 | VAddr end = base + size; |
| @@ -183,10 +184,10 @@ T Read(const VAddr vaddr) { | |||
| 183 | ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); | 184 | ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); |
| 184 | break; | 185 | break; |
| 185 | case PageType::RasterizerCachedMemory: { | 186 | case PageType::RasterizerCachedMemory: { |
| 186 | RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush); | 187 | auto host_ptr{GetPointerFromVMA(vaddr)}; |
| 187 | 188 | Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T)); | |
| 188 | T value; | 189 | T value; |
| 189 | std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T)); | 190 | std::memcpy(&value, host_ptr, sizeof(T)); |
| 190 | return value; | 191 | return value; |
| 191 | } | 192 | } |
| 192 | default: | 193 | default: |
| @@ -214,8 +215,9 @@ void Write(const VAddr vaddr, const T data) { | |||
| 214 | ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); | 215 | ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); |
| 215 | break; | 216 | break; |
| 216 | case PageType::RasterizerCachedMemory: { | 217 | case PageType::RasterizerCachedMemory: { |
| 217 | RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); | 218 | auto host_ptr{GetPointerFromVMA(vaddr)}; |
| 218 | std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); | 219 | Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T)); |
| 220 | std::memcpy(host_ptr, &data, sizeof(T)); | ||
| 219 | break; | 221 | break; |
| 220 | } | 222 | } |
| 221 | default: | 223 | default: |
| @@ -338,47 +340,6 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) { | |||
| 338 | } | 340 | } |
| 339 | } | 341 | } |
| 340 | 342 | ||
| 341 | void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { | ||
| 342 | auto& system_instance = Core::System::GetInstance(); | ||
| 343 | |||
| 344 | // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be | ||
| 345 | // null here | ||
| 346 | if (!system_instance.IsPoweredOn()) { | ||
| 347 | return; | ||
| 348 | } | ||
| 349 | |||
| 350 | const VAddr end = start + size; | ||
| 351 | |||
| 352 | const auto CheckRegion = [&](VAddr region_start, VAddr region_end) { | ||
| 353 | if (start >= region_end || end <= region_start) { | ||
| 354 | // No overlap with region | ||
| 355 | return; | ||
| 356 | } | ||
| 357 | |||
| 358 | const VAddr overlap_start = std::max(start, region_start); | ||
| 359 | const VAddr overlap_end = std::min(end, region_end); | ||
| 360 | const VAddr overlap_size = overlap_end - overlap_start; | ||
| 361 | |||
| 362 | auto& gpu = system_instance.GPU(); | ||
| 363 | switch (mode) { | ||
| 364 | case FlushMode::Flush: | ||
| 365 | gpu.FlushRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size); | ||
| 366 | break; | ||
| 367 | case FlushMode::Invalidate: | ||
| 368 | gpu.InvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size); | ||
| 369 | break; | ||
| 370 | case FlushMode::FlushAndInvalidate: | ||
| 371 | gpu.FlushAndInvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size); | ||
| 372 | break; | ||
| 373 | } | ||
| 374 | }; | ||
| 375 | |||
| 376 | const auto& vm_manager = Core::CurrentProcess()->VMManager(); | ||
| 377 | |||
| 378 | CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress()); | ||
| 379 | CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress()); | ||
| 380 | } | ||
| 381 | |||
| 382 | u8 Read8(const VAddr addr) { | 343 | u8 Read8(const VAddr addr) { |
| 383 | return Read<u8>(addr); | 344 | return Read<u8>(addr); |
| 384 | } | 345 | } |
| @@ -424,9 +385,9 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_ | |||
| 424 | break; | 385 | break; |
| 425 | } | 386 | } |
| 426 | case PageType::RasterizerCachedMemory: { | 387 | case PageType::RasterizerCachedMemory: { |
| 427 | RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), | 388 | const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)}; |
| 428 | FlushMode::Flush); | 389 | Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); |
| 429 | std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount); | 390 | std::memcpy(dest_buffer, host_ptr, copy_amount); |
| 430 | break; | 391 | break; |
| 431 | } | 392 | } |
| 432 | default: | 393 | default: |
| @@ -487,9 +448,9 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi | |||
| 487 | break; | 448 | break; |
| 488 | } | 449 | } |
| 489 | case PageType::RasterizerCachedMemory: { | 450 | case PageType::RasterizerCachedMemory: { |
| 490 | RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), | 451 | const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)}; |
| 491 | FlushMode::Invalidate); | 452 | Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); |
| 492 | std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); | 453 | std::memcpy(host_ptr, src_buffer, copy_amount); |
| 493 | break; | 454 | break; |
| 494 | } | 455 | } |
| 495 | default: | 456 | default: |
| @@ -533,9 +494,9 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std: | |||
| 533 | break; | 494 | break; |
| 534 | } | 495 | } |
| 535 | case PageType::RasterizerCachedMemory: { | 496 | case PageType::RasterizerCachedMemory: { |
| 536 | RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), | 497 | const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)}; |
| 537 | FlushMode::Invalidate); | 498 | Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); |
| 538 | std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount); | 499 | std::memset(host_ptr, 0, copy_amount); |
| 539 | break; | 500 | break; |
| 540 | } | 501 | } |
| 541 | default: | 502 | default: |
| @@ -575,9 +536,9 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr, | |||
| 575 | break; | 536 | break; |
| 576 | } | 537 | } |
| 577 | case PageType::RasterizerCachedMemory: { | 538 | case PageType::RasterizerCachedMemory: { |
| 578 | RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), | 539 | const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)}; |
| 579 | FlushMode::Flush); | 540 | Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); |
| 580 | WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount); | 541 | WriteBlock(process, dest_addr, host_ptr, copy_amount); |
| 581 | break; | 542 | break; |
| 582 | } | 543 | } |
| 583 | default: | 544 | default: |
diff --git a/src/core/memory.h b/src/core/memory.h index 1acf5ce8c..c2c6643ee 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -161,10 +161,4 @@ enum class FlushMode { | |||
| 161 | */ | 161 | */ |
| 162 | void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached); | 162 | void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached); |
| 163 | 163 | ||
| 164 | /** | ||
| 165 | * Flushes and invalidates any externally cached rasterizer resources touching the given virtual | ||
| 166 | * address region. | ||
| 167 | */ | ||
| 168 | void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode); | ||
| 169 | |||
| 170 | } // namespace Memory | 164 | } // namespace Memory |
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b97576309..5e3d862c6 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp | |||
| @@ -164,8 +164,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf | |||
| 164 | // Reset the screen info's display texture to its own permanent texture | 164 | // Reset the screen info's display texture to its own permanent texture |
| 165 | screen_info.display_texture = screen_info.texture.resource.handle; | 165 | screen_info.display_texture = screen_info.texture.resource.handle; |
| 166 | 166 | ||
| 167 | Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, | 167 | rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes); |
| 168 | Memory::FlushMode::Flush); | ||
| 169 | 168 | ||
| 170 | constexpr u32 linear_bpp = 4; | 169 | constexpr u32 linear_bpp = 4; |
| 171 | VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear, | 170 | VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear, |