summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt4
m---------externals/opus0
-rw-r--r--src/common/CMakeLists.txt6
-rw-r--r--src/common/memory_hook.cpp (renamed from src/core/memory_hook.cpp)6
-rw-r--r--src/common/memory_hook.h (renamed from src/core/memory_hook.h)4
-rw-r--r--src/common/page_table.cpp29
-rw-r--r--src/common/page_table.h80
-rw-r--r--src/common/uint128.cpp41
-rw-r--r--src/common/uint128.h14
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.cpp5
-rw-r--r--src/core/arm/dynarmic/arm_dynarmic.h4
-rw-r--r--src/core/core_timing_util.cpp6
-rw-r--r--src/core/core_timing_util.h3
-rw-r--r--src/core/hle/ipc_helpers.h30
-rw-r--r--src/core/hle/kernel/process.cpp2
-rw-r--r--src/core/hle/kernel/scheduler.cpp2
-rw-r--r--src/core/hle/kernel/thread.cpp58
-rw-r--r--src/core/hle/kernel/thread.h10
-rw-r--r--src/core/hle/kernel/vm_manager.cpp6
-rw-r--r--src/core/hle/kernel/vm_manager.h9
-rw-r--r--src/core/memory.cpp215
-rw-r--r--src/core/memory.h80
-rw-r--r--src/core/memory_setup.h19
-rw-r--r--src/tests/core/arm/arm_test_common.cpp3
-rw-r--r--src/tests/core/arm/arm_test_common.h8
-rw-r--r--src/video_core/dma_pusher.cpp7
-rw-r--r--src/video_core/engines/kepler_memory.cpp15
-rw-r--r--src/video_core/engines/maxwell_3d.cpp41
-rw-r--r--src/video_core/engines/maxwell_dma.cpp28
-rw-r--r--src/video_core/gpu.cpp13
-rw-r--r--src/video_core/memory_manager.cpp55
-rw-r--r--src/video_core/memory_manager.h17
-rw-r--r--src/video_core/morton.cpp15
-rw-r--r--src/video_core/morton.h2
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp33
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.h6
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.cpp39
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer_cache.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp37
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h8
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp3
-rw-r--r--src/video_core/textures/decoders.cpp32
-rw-r--r--src/video_core/textures/decoders.h13
-rw-r--r--src/yuzu/debugger/graphics/graphics_surface.cpp5
48 files changed, 581 insertions, 459 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9cc24cba6..a4914f37d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -179,9 +179,9 @@ set_property(DIRECTORY APPEND PROPERTY
179# System imported libraries 179# System imported libraries
180# ====================== 180# ======================
181 181
182find_package(Boost 1.64.0 QUIET) 182find_package(Boost 1.66.0 QUIET)
183if (NOT Boost_FOUND) 183if (NOT Boost_FOUND)
184 message(STATUS "Boost 1.64.0 or newer not found, falling back to externals") 184 message(STATUS "Boost 1.66.0 or newer not found, falling back to externals")
185 185
186 set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost") 186 set(BOOST_ROOT "${PROJECT_SOURCE_DIR}/externals/boost")
187 set(Boost_NO_SYSTEM_PATHS OFF) 187 set(Boost_NO_SYSTEM_PATHS OFF)
diff --git a/externals/opus b/externals/opus
Subproject b2871922a12abb49579512d604cabc471a59ad9 Subproject 562f8ba555c4181e1b57e82e496e4a959b9c019
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 3d30f0e3e..43ae8a9e7 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -92,10 +92,14 @@ add_library(common STATIC
92 logging/text_formatter.cpp 92 logging/text_formatter.cpp
93 logging/text_formatter.h 93 logging/text_formatter.h
94 math_util.h 94 math_util.h
95 memory_hook.cpp
96 memory_hook.h
95 microprofile.cpp 97 microprofile.cpp
96 microprofile.h 98 microprofile.h
97 microprofileui.h 99 microprofileui.h
98 misc.cpp 100 misc.cpp
101 page_table.cpp
102 page_table.h
99 param_package.cpp 103 param_package.cpp
100 param_package.h 104 param_package.h
101 quaternion.h 105 quaternion.h
@@ -114,6 +118,8 @@ add_library(common STATIC
114 threadsafe_queue.h 118 threadsafe_queue.h
115 timer.cpp 119 timer.cpp
116 timer.h 120 timer.h
121 uint128.cpp
122 uint128.h
117 vector_math.h 123 vector_math.h
118 web_result.h 124 web_result.h
119) 125)
diff --git a/src/core/memory_hook.cpp b/src/common/memory_hook.cpp
index c61c6c1fb..3986986d6 100644
--- a/src/core/memory_hook.cpp
+++ b/src/common/memory_hook.cpp
@@ -2,10 +2,10 @@
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "core/memory_hook.h" 5#include "common/memory_hook.h"
6 6
7namespace Memory { 7namespace Common {
8 8
9MemoryHook::~MemoryHook() = default; 9MemoryHook::~MemoryHook() = default;
10 10
11} // namespace Memory 11} // namespace Common
diff --git a/src/core/memory_hook.h b/src/common/memory_hook.h
index 940777107..adaa4c2c5 100644
--- a/src/core/memory_hook.h
+++ b/src/common/memory_hook.h
@@ -9,7 +9,7 @@
9 9
10#include "common/common_types.h" 10#include "common/common_types.h"
11 11
12namespace Memory { 12namespace Common {
13 13
14/** 14/**
15 * Memory hooks have two purposes: 15 * Memory hooks have two purposes:
@@ -44,4 +44,4 @@ public:
44}; 44};
45 45
46using MemoryHookPointer = std::shared_ptr<MemoryHook>; 46using MemoryHookPointer = std::shared_ptr<MemoryHook>;
47} // namespace Memory 47} // namespace Common
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
new file mode 100644
index 000000000..8eba1c3f1
--- /dev/null
+++ b/src/common/page_table.cpp
@@ -0,0 +1,29 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "common/page_table.h"
6
7namespace Common {
8
9PageTable::PageTable(std::size_t page_size_in_bits) : page_size_in_bits{page_size_in_bits} {}
10
11PageTable::~PageTable() = default;
12
13void PageTable::Resize(std::size_t address_space_width_in_bits) {
14 const std::size_t num_page_table_entries = 1ULL
15 << (address_space_width_in_bits - page_size_in_bits);
16
17 pointers.resize(num_page_table_entries);
18 attributes.resize(num_page_table_entries);
19
20 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
21 // vector size is subsequently decreased (via resize), the vector might not automatically
22 // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
23 // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
24
25 pointers.shrink_to_fit();
26 attributes.shrink_to_fit();
27}
28
29} // namespace Common
diff --git a/src/common/page_table.h b/src/common/page_table.h
new file mode 100644
index 000000000..8339f2890
--- /dev/null
+++ b/src/common/page_table.h
@@ -0,0 +1,80 @@
1// Copyright 2019 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <vector>
8#include <boost/icl/interval_map.hpp>
9#include "common/common_types.h"
10#include "common/memory_hook.h"
11
12namespace Common {
13
14enum class PageType : u8 {
15 /// Page is unmapped and should cause an access error.
16 Unmapped,
17 /// Page is mapped to regular memory. This is the only type you can get pointers to.
18 Memory,
19 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
20 /// invalidation
21 RasterizerCachedMemory,
22 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
23 Special,
24};
25
26struct SpecialRegion {
27 enum class Type {
28 DebugHook,
29 IODevice,
30 } type;
31
32 MemoryHookPointer handler;
33
34 bool operator<(const SpecialRegion& other) const {
35 return std::tie(type, handler) < std::tie(other.type, other.handler);
36 }
37
38 bool operator==(const SpecialRegion& other) const {
39 return std::tie(type, handler) == std::tie(other.type, other.handler);
40 }
41};
42
43/**
44 * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
45 * mimics the way a real CPU page table works.
46 */
47struct PageTable {
48 explicit PageTable(std::size_t page_size_in_bits);
49 ~PageTable();
50
51 /**
52 * Resizes the page table to be able to accomodate enough pages within
53 * a given address space.
54 *
55 * @param address_space_width_in_bits The address size width in bits.
56 */
57 void Resize(std::size_t address_space_width_in_bits);
58
59 /**
60 * Vector of memory pointers backing each page. An entry can only be non-null if the
61 * corresponding entry in the `attributes` vector is of type `Memory`.
62 */
63 std::vector<u8*> pointers;
64
65 /**
66 * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
67 * of type `Special`.
68 */
69 boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
70
71 /**
72 * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
73 * the corresponding entry in `pointers` MUST be set to null.
74 */
75 std::vector<PageType> attributes;
76
77 const std::size_t page_size_in_bits{};
78};
79
80} // namespace Common
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
new file mode 100644
index 000000000..2238a52c5
--- /dev/null
+++ b/src/common/uint128.cpp
@@ -0,0 +1,41 @@
1#ifdef _MSC_VER
2#include <intrin.h>
3
4#pragma intrinsic(_umul128)
5#endif
6#include <cstring>
7#include "common/uint128.h"
8
9namespace Common {
10
11u128 Multiply64Into128(u64 a, u64 b) {
12 u128 result;
13#ifdef _MSC_VER
14 result[0] = _umul128(a, b, &result[1]);
15#else
16 unsigned __int128 tmp = a;
17 tmp *= b;
18 std::memcpy(&result, &tmp, sizeof(u128));
19#endif
20 return result;
21}
22
23std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
24 u64 remainder = dividend[0] % divisor;
25 u64 accum = dividend[0] / divisor;
26 if (dividend[1] == 0)
27 return {accum, remainder};
28 // We ignore dividend[1] / divisor as that overflows
29 const u64 first_segment = (dividend[1] % divisor) << 32;
30 accum += (first_segment / divisor) << 32;
31 const u64 second_segment = (first_segment % divisor) << 32;
32 accum += (second_segment / divisor);
33 remainder += second_segment % divisor;
34 if (remainder >= divisor) {
35 accum++;
36 remainder -= divisor;
37 }
38 return {accum, remainder};
39}
40
41} // namespace Common
diff --git a/src/common/uint128.h b/src/common/uint128.h
new file mode 100644
index 000000000..52e6b46eb
--- /dev/null
+++ b/src/common/uint128.h
@@ -0,0 +1,14 @@
1
2#include <utility>
3#include "common/common_types.h"
4
5namespace Common {
6
7// This function multiplies 2 u64 values and produces a u128 value;
8u128 Multiply64Into128(u64 a, u64 b);
9
10// This function divides a u128 by a u32 value and produces two u64 values:
11// the result of division and the remainder
12std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
13
14} // namespace Common
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 8ccb2d5f0..aee8bc27d 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -437,8 +437,6 @@ add_library(core STATIC
437 loader/xci.h 437 loader/xci.h
438 memory.cpp 438 memory.cpp
439 memory.h 439 memory.h
440 memory_hook.cpp
441 memory_hook.h
442 memory_setup.h 440 memory_setup.h
443 perf_stats.cpp 441 perf_stats.cpp
444 perf_stats.h 442 perf_stats.h
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 9b7ca4030..4fdc12f11 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,6 +12,7 @@
12#include "core/core.h" 12#include "core/core.h"
13#include "core/core_cpu.h" 13#include "core/core_cpu.h"
14#include "core/core_timing.h" 14#include "core/core_timing.h"
15#include "core/core_timing_util.h"
15#include "core/gdbstub/gdbstub.h" 16#include "core/gdbstub/gdbstub.h"
16#include "core/hle/kernel/process.h" 17#include "core/hle/kernel/process.h"
17#include "core/hle/kernel/svc.h" 18#include "core/hle/kernel/svc.h"
@@ -119,7 +120,7 @@ public:
119 return std::max(parent.core_timing.GetDowncount(), 0); 120 return std::max(parent.core_timing.GetDowncount(), 0);
120 } 121 }
121 u64 GetCNTPCT() override { 122 u64 GetCNTPCT() override {
122 return parent.core_timing.GetTicks(); 123 return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
123 } 124 }
124 125
125 ARM_Dynarmic& parent; 126 ARM_Dynarmic& parent;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
151 config.tpidr_el0 = &cb->tpidr_el0; 152 config.tpidr_el0 = &cb->tpidr_el0;
152 config.dczid_el0 = 4; 153 config.dczid_el0 = 4;
153 config.ctr_el0 = 0x8444c004; 154 config.ctr_el0 = 0x8444c004;
154 config.cntfrq_el0 = 19200000; // Value from fusee. 155 config.cntfrq_el0 = Timing::CNTFREQ;
155 156
156 // Unpredictable instructions 157 // Unpredictable instructions
157 config.define_unpredictable_behaviour = true; 158 config.define_unpredictable_behaviour = true;
diff --git a/src/core/arm/dynarmic/arm_dynarmic.h b/src/core/arm/dynarmic/arm_dynarmic.h
index 6cc458296..aada1e862 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@@ -12,7 +12,7 @@
12#include "core/arm/exclusive_monitor.h" 12#include "core/arm/exclusive_monitor.h"
13#include "core/arm/unicorn/arm_unicorn.h" 13#include "core/arm/unicorn/arm_unicorn.h"
14 14
15namespace Memory { 15namespace Common {
16struct PageTable; 16struct PageTable;
17} 17}
18 18
@@ -70,7 +70,7 @@ private:
70 Timing::CoreTiming& core_timing; 70 Timing::CoreTiming& core_timing;
71 DynarmicExclusiveMonitor& exclusive_monitor; 71 DynarmicExclusiveMonitor& exclusive_monitor;
72 72
73 Memory::PageTable* current_page_table = nullptr; 73 Common::PageTable* current_page_table = nullptr;
74}; 74};
75 75
76class DynarmicExclusiveMonitor final : public ExclusiveMonitor { 76class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index 88ff70233..7942f30d6 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,6 +7,7 @@
7#include <cinttypes> 7#include <cinttypes>
8#include <limits> 8#include <limits>
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/uint128.h"
10 11
11namespace Core::Timing { 12namespace Core::Timing {
12 13
@@ -60,4 +61,9 @@ s64 nsToCycles(u64 ns) {
60 return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000; 61 return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
61} 62}
62 63
64u64 CpuCyclesToClockCycles(u64 ticks) {
65 const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
66 return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
67}
68
63} // namespace Core::Timing 69} // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 513cfac1b..679aa3123 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -11,6 +11,7 @@ namespace Core::Timing {
11// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz 11// The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
12// The exact value used is of course unverified. 12// The exact value used is of course unverified.
13constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked 13constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
14constexpr u64 CNTFREQ = 19200000; // Value from fusee.
14 15
15inline s64 msToCycles(int ms) { 16inline s64 msToCycles(int ms) {
16 // since ms is int there is no way to overflow 17 // since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
61 return cycles * 1000 / BASE_CLOCK_RATE; 62 return cycles * 1000 / BASE_CLOCK_RATE;
62} 63}
63 64
65u64 CpuCyclesToClockCycles(u64 ticks);
66
64} // namespace Core::Timing 67} // namespace Core::Timing
diff --git a/src/core/hle/ipc_helpers.h b/src/core/hle/ipc_helpers.h
index a1e4be070..68406eb63 100644
--- a/src/core/hle/ipc_helpers.h
+++ b/src/core/hle/ipc_helpers.h
@@ -275,6 +275,20 @@ inline void ResponseBuilder::Push(u64 value) {
275} 275}
276 276
277template <> 277template <>
278inline void ResponseBuilder::Push(float value) {
279 u32 integral;
280 std::memcpy(&integral, &value, sizeof(u32));
281 Push(integral);
282}
283
284template <>
285inline void ResponseBuilder::Push(double value) {
286 u64 integral;
287 std::memcpy(&integral, &value, sizeof(u64));
288 Push(integral);
289}
290
291template <>
278inline void ResponseBuilder::Push(bool value) { 292inline void ResponseBuilder::Push(bool value) {
279 Push(static_cast<u8>(value)); 293 Push(static_cast<u8>(value));
280} 294}
@@ -416,6 +430,22 @@ inline s64 RequestParser::Pop() {
416} 430}
417 431
418template <> 432template <>
433inline float RequestParser::Pop() {
434 const u32 value = Pop<u32>();
435 float real;
436 std::memcpy(&real, &value, sizeof(real));
437 return real;
438}
439
440template <>
441inline double RequestParser::Pop() {
442 const u64 value = Pop<u64>();
443 float real;
444 std::memcpy(&real, &value, sizeof(real));
445 return real;
446}
447
448template <>
419inline bool RequestParser::Pop() { 449inline bool RequestParser::Pop() {
420 return Pop<u8>() != 0; 450 return Pop<u8>() != 0;
421} 451}
diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp
index 49fced7b1..65c51003d 100644
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@@ -31,7 +31,7 @@ namespace {
31 */ 31 */
32void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) { 32void SetupMainThread(Process& owner_process, KernelCore& kernel, VAddr entry_point, u32 priority) {
33 // Setup page table so we can write to memory 33 // Setup page table so we can write to memory
34 SetCurrentPageTable(&owner_process.VMManager().page_table); 34 Memory::SetCurrentPageTable(&owner_process.VMManager().page_table);
35 35
36 // Initialize new "main" thread 36 // Initialize new "main" thread
37 const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress(); 37 const VAddr stack_top = owner_process.VMManager().GetTLSIORegionEndAddress();
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index e524509df..cc189cc64 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -96,7 +96,7 @@ void Scheduler::SwitchContext(Thread* new_thread) {
96 auto* const thread_owner_process = current_thread->GetOwnerProcess(); 96 auto* const thread_owner_process = current_thread->GetOwnerProcess();
97 if (previous_process != thread_owner_process) { 97 if (previous_process != thread_owner_process) {
98 system.Kernel().MakeCurrentProcess(thread_owner_process); 98 system.Kernel().MakeCurrentProcess(thread_owner_process);
99 SetCurrentPageTable(&thread_owner_process->VMManager().page_table); 99 Memory::SetCurrentPageTable(&thread_owner_process->VMManager().page_table);
100 } 100 }
101 101
102 cpu_core.LoadContext(new_thread->GetContext()); 102 cpu_core.LoadContext(new_thread->GetContext());
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 2e712c9cb..d9ffebc3f 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -7,8 +7,6 @@
7#include <optional> 7#include <optional>
8#include <vector> 8#include <vector>
9 9
10#include <boost/range/algorithm_ext/erase.hpp>
11
12#include "common/assert.h" 10#include "common/assert.h"
13#include "common/common_types.h" 11#include "common/common_types.h"
14#include "common/logging/log.h" 12#include "common/logging/log.h"
@@ -68,12 +66,6 @@ void Thread::Stop() {
68 owner_process->FreeTLSSlot(tls_address); 66 owner_process->FreeTLSSlot(tls_address);
69} 67}
70 68
71void ExitCurrentThread() {
72 Thread* thread = GetCurrentThread();
73 thread->Stop();
74 Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
75}
76
77void Thread::WakeAfterDelay(s64 nanoseconds) { 69void Thread::WakeAfterDelay(s64 nanoseconds) {
78 // Don't schedule a wakeup if the thread wants to wait forever 70 // Don't schedule a wakeup if the thread wants to wait forever
79 if (nanoseconds == -1) 71 if (nanoseconds == -1)
@@ -264,8 +256,8 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
264 if (thread->lock_owner == this) { 256 if (thread->lock_owner == this) {
265 // If the thread is already waiting for this thread to release the mutex, ensure that the 257 // If the thread is already waiting for this thread to release the mutex, ensure that the
266 // waiters list is consistent and return without doing anything. 258 // waiters list is consistent and return without doing anything.
267 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); 259 const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
268 ASSERT(itr != wait_mutex_threads.end()); 260 ASSERT(iter != wait_mutex_threads.end());
269 return; 261 return;
270 } 262 }
271 263
@@ -273,11 +265,16 @@ void Thread::AddMutexWaiter(SharedPtr<Thread> thread) {
273 ASSERT(thread->lock_owner == nullptr); 265 ASSERT(thread->lock_owner == nullptr);
274 266
275 // Ensure that the thread is not already in the list of mutex waiters 267 // Ensure that the thread is not already in the list of mutex waiters
276 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); 268 const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
277 ASSERT(itr == wait_mutex_threads.end()); 269 ASSERT(iter == wait_mutex_threads.end());
278 270
271 // Keep the list in an ordered fashion
272 const auto insertion_point = std::find_if(
273 wait_mutex_threads.begin(), wait_mutex_threads.end(),
274 [&thread](const auto& entry) { return entry->GetPriority() > thread->GetPriority(); });
275 wait_mutex_threads.insert(insertion_point, thread);
279 thread->lock_owner = this; 276 thread->lock_owner = this;
280 wait_mutex_threads.emplace_back(std::move(thread)); 277
281 UpdatePriority(); 278 UpdatePriority();
282} 279}
283 280
@@ -285,32 +282,43 @@ void Thread::RemoveMutexWaiter(SharedPtr<Thread> thread) {
285 ASSERT(thread->lock_owner == this); 282 ASSERT(thread->lock_owner == this);
286 283
287 // Ensure that the thread is in the list of mutex waiters 284 // Ensure that the thread is in the list of mutex waiters
288 auto itr = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread); 285 const auto iter = std::find(wait_mutex_threads.begin(), wait_mutex_threads.end(), thread);
289 ASSERT(itr != wait_mutex_threads.end()); 286 ASSERT(iter != wait_mutex_threads.end());
287
288 wait_mutex_threads.erase(iter);
290 289
291 boost::remove_erase(wait_mutex_threads, thread);
292 thread->lock_owner = nullptr; 290 thread->lock_owner = nullptr;
293 UpdatePriority(); 291 UpdatePriority();
294} 292}
295 293
296void Thread::UpdatePriority() { 294void Thread::UpdatePriority() {
297 // Find the highest priority among all the threads that are waiting for this thread's lock 295 // If any of the threads waiting on the mutex have a higher priority
296 // (taking into account priority inheritance), then this thread inherits
297 // that thread's priority.
298 u32 new_priority = nominal_priority; 298 u32 new_priority = nominal_priority;
299 for (const auto& thread : wait_mutex_threads) { 299 if (!wait_mutex_threads.empty()) {
300 if (thread->nominal_priority < new_priority) 300 if (wait_mutex_threads.front()->current_priority < new_priority) {
301 new_priority = thread->nominal_priority; 301 new_priority = wait_mutex_threads.front()->current_priority;
302 }
302 } 303 }
303 304
304 if (new_priority == current_priority) 305 if (new_priority == current_priority) {
305 return; 306 return;
307 }
306 308
307 scheduler->SetThreadPriority(this, new_priority); 309 scheduler->SetThreadPriority(this, new_priority);
308
309 current_priority = new_priority; 310 current_priority = new_priority;
310 311
312 if (!lock_owner) {
313 return;
314 }
315
316 // Ensure that the thread is within the correct location in the waiting list.
317 lock_owner->RemoveMutexWaiter(this);
318 lock_owner->AddMutexWaiter(this);
319
311 // Recursively update the priority of the thread that depends on the priority of this one. 320 // Recursively update the priority of the thread that depends on the priority of this one.
312 if (lock_owner) 321 lock_owner->UpdatePriority();
313 lock_owner->UpdatePriority();
314} 322}
315 323
316void Thread::ChangeCore(u32 core, u64 mask) { 324void Thread::ChangeCore(u32 core, u64 mask) {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index ccdefeecc..faad5f391 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -401,8 +401,14 @@ private:
401 VAddr entry_point = 0; 401 VAddr entry_point = 0;
402 VAddr stack_top = 0; 402 VAddr stack_top = 0;
403 403
404 u32 nominal_priority = 0; ///< Nominal thread priority, as set by the emulated application 404 /// Nominal thread priority, as set by the emulated application.
405 u32 current_priority = 0; ///< Current thread priority, can be temporarily changed 405 /// The nominal priority is the thread priority without priority
406 /// inheritance taken into account.
407 u32 nominal_priority = 0;
408
409 /// Current thread priority. This may change over the course of the
410 /// thread's lifetime in order to facilitate priority inheritance.
411 u32 current_priority = 0;
406 412
407 u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks. 413 u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks.
408 u64 last_running_ticks = 0; ///< CPU tick when thread was last running 414 u64 last_running_ticks = 0; ///< CPU tick when thread was last running
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 05c59af34..3def3e52c 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -7,13 +7,13 @@
7#include <utility> 7#include <utility>
8#include "common/assert.h" 8#include "common/assert.h"
9#include "common/logging/log.h" 9#include "common/logging/log.h"
10#include "common/memory_hook.h"
10#include "core/arm/arm_interface.h" 11#include "core/arm/arm_interface.h"
11#include "core/core.h" 12#include "core/core.h"
12#include "core/file_sys/program_metadata.h" 13#include "core/file_sys/program_metadata.h"
13#include "core/hle/kernel/errors.h" 14#include "core/hle/kernel/errors.h"
14#include "core/hle/kernel/vm_manager.h" 15#include "core/hle/kernel/vm_manager.h"
15#include "core/memory.h" 16#include "core/memory.h"
16#include "core/memory_hook.h"
17#include "core/memory_setup.h" 17#include "core/memory_setup.h"
18 18
19namespace Kernel { 19namespace Kernel {
@@ -177,7 +177,7 @@ ResultVal<VAddr> VMManager::FindFreeRegion(u64 size) const {
177 177
178ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size, 178ResultVal<VMManager::VMAHandle> VMManager::MapMMIO(VAddr target, PAddr paddr, u64 size,
179 MemoryState state, 179 MemoryState state,
180 Memory::MemoryHookPointer mmio_handler) { 180 Common::MemoryHookPointer mmio_handler) {
181 // This is the appropriately sized VMA that will turn into our allocation. 181 // This is the appropriately sized VMA that will turn into our allocation.
182 CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size)); 182 CASCADE_RESULT(VMAIter vma_handle, CarveVMA(target, size));
183 VirtualMemoryArea& final_vma = vma_handle->second; 183 VirtualMemoryArea& final_vma = vma_handle->second;
@@ -624,7 +624,7 @@ void VMManager::ClearPageTable() {
624 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); 624 std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr);
625 page_table.special_regions.clear(); 625 page_table.special_regions.clear();
626 std::fill(page_table.attributes.begin(), page_table.attributes.end(), 626 std::fill(page_table.attributes.begin(), page_table.attributes.end(),
627 Memory::PageType::Unmapped); 627 Common::PageType::Unmapped);
628} 628}
629 629
630VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask, 630VMManager::CheckResults VMManager::CheckRangeState(VAddr address, u64 size, MemoryState state_mask,
diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h
index 88e0b3c02..b96980f8f 100644
--- a/src/core/hle/kernel/vm_manager.h
+++ b/src/core/hle/kernel/vm_manager.h
@@ -9,9 +9,10 @@
9#include <tuple> 9#include <tuple>
10#include <vector> 10#include <vector>
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/memory_hook.h"
13#include "common/page_table.h"
12#include "core/hle/result.h" 14#include "core/hle/result.h"
13#include "core/memory.h" 15#include "core/memory.h"
14#include "core/memory_hook.h"
15 16
16namespace FileSys { 17namespace FileSys {
17enum class ProgramAddressSpaceType : u8; 18enum class ProgramAddressSpaceType : u8;
@@ -290,7 +291,7 @@ struct VirtualMemoryArea {
290 // Settings for type = MMIO 291 // Settings for type = MMIO
291 /// Physical address of the register area this VMA maps to. 292 /// Physical address of the register area this VMA maps to.
292 PAddr paddr = 0; 293 PAddr paddr = 0;
293 Memory::MemoryHookPointer mmio_handler = nullptr; 294 Common::MemoryHookPointer mmio_handler = nullptr;
294 295
295 /// Tests if this area can be merged to the right with `next`. 296 /// Tests if this area can be merged to the right with `next`.
296 bool CanBeMergedWith(const VirtualMemoryArea& next) const; 297 bool CanBeMergedWith(const VirtualMemoryArea& next) const;
@@ -368,7 +369,7 @@ public:
368 * @param mmio_handler The handler that will implement read and write for this MMIO region. 369 * @param mmio_handler The handler that will implement read and write for this MMIO region.
369 */ 370 */
370 ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state, 371 ResultVal<VMAHandle> MapMMIO(VAddr target, PAddr paddr, u64 size, MemoryState state,
371 Memory::MemoryHookPointer mmio_handler); 372 Common::MemoryHookPointer mmio_handler);
372 373
373 /// Unmaps a range of addresses, splitting VMAs as necessary. 374 /// Unmaps a range of addresses, splitting VMAs as necessary.
374 ResultCode UnmapRange(VAddr target, u64 size); 375 ResultCode UnmapRange(VAddr target, u64 size);
@@ -509,7 +510,7 @@ public:
509 510
510 /// Each VMManager has its own page table, which is set as the main one when the owning process 511 /// Each VMManager has its own page table, which is set as the main one when the owning process
511 /// is scheduled. 512 /// is scheduled.
512 Memory::PageTable page_table; 513 Common::PageTable page_table{Memory::PAGE_BITS};
513 514
514private: 515private:
515 using VMAIter = VMAMap::iterator; 516 using VMAIter = VMAMap::iterator;
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 4fde53033..365ac82b4 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -10,6 +10,7 @@
10#include "common/assert.h" 10#include "common/assert.h"
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/logging/log.h" 12#include "common/logging/log.h"
13#include "common/page_table.h"
13#include "common/swap.h" 14#include "common/swap.h"
14#include "core/arm/arm_interface.h" 15#include "core/arm/arm_interface.h"
15#include "core/core.h" 16#include "core/core.h"
@@ -18,13 +19,14 @@
18#include "core/hle/lock.h" 19#include "core/hle/lock.h"
19#include "core/memory.h" 20#include "core/memory.h"
20#include "core/memory_setup.h" 21#include "core/memory_setup.h"
22#include "video_core/gpu.h"
21#include "video_core/renderer_base.h" 23#include "video_core/renderer_base.h"
22 24
23namespace Memory { 25namespace Memory {
24 26
25static PageTable* current_page_table = nullptr; 27static Common::PageTable* current_page_table = nullptr;
26 28
27void SetCurrentPageTable(PageTable* page_table) { 29void SetCurrentPageTable(Common::PageTable* page_table) {
28 current_page_table = page_table; 30 current_page_table = page_table;
29 31
30 auto& system = Core::System::GetInstance(); 32 auto& system = Core::System::GetInstance();
@@ -36,41 +38,19 @@ void SetCurrentPageTable(PageTable* page_table) {
36 } 38 }
37} 39}
38 40
39PageTable* GetCurrentPageTable() { 41Common::PageTable* GetCurrentPageTable() {
40 return current_page_table; 42 return current_page_table;
41} 43}
42 44
43PageTable::PageTable() = default; 45static void MapPages(Common::PageTable& page_table, VAddr base, u64 size, u8* memory,
44 46 Common::PageType type) {
45PageTable::PageTable(std::size_t address_space_width_in_bits) {
46 Resize(address_space_width_in_bits);
47}
48
49PageTable::~PageTable() = default;
50
51void PageTable::Resize(std::size_t address_space_width_in_bits) {
52 const std::size_t num_page_table_entries = 1ULL << (address_space_width_in_bits - PAGE_BITS);
53
54 pointers.resize(num_page_table_entries);
55 attributes.resize(num_page_table_entries);
56
57 // The default is a 39-bit address space, which causes an initial 1GB allocation size. If the
58 // vector size is subsequently decreased (via resize), the vector might not automatically
59 // actually reallocate/resize its underlying allocation, which wastes up to ~800 MB for
60 // 36-bit titles. Call shrink_to_fit to reduce capacity to what's actually in use.
61
62 pointers.shrink_to_fit();
63 attributes.shrink_to_fit();
64}
65
66static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, PageType type) {
67 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, 47 LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE,
68 (base + size) * PAGE_SIZE); 48 (base + size) * PAGE_SIZE);
69 49
70 // During boot, current_page_table might not be set yet, in which case we need not flush 50 // During boot, current_page_table might not be set yet, in which case we need not flush
71 if (current_page_table) { 51 if (current_page_table) {
72 RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, 52 Core::System::GetInstance().GPU().FlushAndInvalidateRegion(base << PAGE_BITS,
73 FlushMode::FlushAndInvalidate); 53 size * PAGE_SIZE);
74 } 54 }
75 55
76 VAddr end = base + size; 56 VAddr end = base + size;
@@ -91,41 +71,47 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa
91 } 71 }
92} 72}
93 73
94void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target) { 74void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target) {
95 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 75 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
96 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 76 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
97 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, PageType::Memory); 77 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
98} 78}
99 79
100void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler) { 80void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
81 Common::MemoryHookPointer mmio_handler) {
101 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 82 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
102 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 83 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
103 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Special); 84 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Special);
104 85
105 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 86 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
106 SpecialRegion region{SpecialRegion::Type::IODevice, std::move(mmio_handler)}; 87 Common::SpecialRegion region{Common::SpecialRegion::Type::IODevice, std::move(mmio_handler)};
107 page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region})); 88 page_table.special_regions.add(
89 std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
108} 90}
109 91
110void UnmapRegion(PageTable& page_table, VAddr base, u64 size) { 92void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
111 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); 93 ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
112 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); 94 ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
113 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, PageType::Unmapped); 95 MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, nullptr, Common::PageType::Unmapped);
114 96
115 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 97 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
116 page_table.special_regions.erase(interval); 98 page_table.special_regions.erase(interval);
117} 99}
118 100
119void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) { 101void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
102 Common::MemoryHookPointer hook) {
120 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 103 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
121 SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)}; 104 Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
122 page_table.special_regions.add(std::make_pair(interval, std::set<SpecialRegion>{region})); 105 page_table.special_regions.add(
106 std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
123} 107}
124 108
125void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook) { 109void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
110 Common::MemoryHookPointer hook) {
126 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1); 111 auto interval = boost::icl::discrete_interval<VAddr>::closed(base, base + size - 1);
127 SpecialRegion region{SpecialRegion::Type::DebugHook, std::move(hook)}; 112 Common::SpecialRegion region{Common::SpecialRegion::Type::DebugHook, std::move(hook)};
128 page_table.special_regions.subtract(std::make_pair(interval, std::set<SpecialRegion>{region})); 113 page_table.special_regions.subtract(
114 std::make_pair(interval, std::set<Common::SpecialRegion>{region}));
129} 115}
130 116
131/** 117/**
@@ -174,19 +160,19 @@ T Read(const VAddr vaddr) {
174 return value; 160 return value;
175 } 161 }
176 162
177 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; 163 Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
178 switch (type) { 164 switch (type) {
179 case PageType::Unmapped: 165 case Common::PageType::Unmapped:
180 LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr); 166 LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, vaddr);
181 return 0; 167 return 0;
182 case PageType::Memory: 168 case Common::PageType::Memory:
183 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); 169 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
184 break; 170 break;
185 case PageType::RasterizerCachedMemory: { 171 case Common::PageType::RasterizerCachedMemory: {
186 RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Flush); 172 auto host_ptr{GetPointerFromVMA(vaddr)};
187 173 Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T));
188 T value; 174 T value;
189 std::memcpy(&value, GetPointerFromVMA(vaddr), sizeof(T)); 175 std::memcpy(&value, host_ptr, sizeof(T));
190 return value; 176 return value;
191 } 177 }
192 default: 178 default:
@@ -204,18 +190,19 @@ void Write(const VAddr vaddr, const T data) {
204 return; 190 return;
205 } 191 }
206 192
207 PageType type = current_page_table->attributes[vaddr >> PAGE_BITS]; 193 Common::PageType type = current_page_table->attributes[vaddr >> PAGE_BITS];
208 switch (type) { 194 switch (type) {
209 case PageType::Unmapped: 195 case Common::PageType::Unmapped:
210 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, 196 LOG_ERROR(HW_Memory, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8,
211 static_cast<u32>(data), vaddr); 197 static_cast<u32>(data), vaddr);
212 return; 198 return;
213 case PageType::Memory: 199 case Common::PageType::Memory:
214 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr); 200 ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", vaddr);
215 break; 201 break;
216 case PageType::RasterizerCachedMemory: { 202 case Common::PageType::RasterizerCachedMemory: {
217 RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate); 203 auto host_ptr{GetPointerFromVMA(vaddr)};
218 std::memcpy(GetPointerFromVMA(vaddr), &data, sizeof(T)); 204 Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T));
205 std::memcpy(host_ptr, &data, sizeof(T));
219 break; 206 break;
220 } 207 }
221 default: 208 default:
@@ -230,10 +217,10 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
230 if (page_pointer) 217 if (page_pointer)
231 return true; 218 return true;
232 219
233 if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) 220 if (page_table.attributes[vaddr >> PAGE_BITS] == Common::PageType::RasterizerCachedMemory)
234 return true; 221 return true;
235 222
236 if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special) 223 if (page_table.attributes[vaddr >> PAGE_BITS] != Common::PageType::Special)
237 return false; 224 return false;
238 225
239 return false; 226 return false;
@@ -253,7 +240,8 @@ u8* GetPointer(const VAddr vaddr) {
253 return page_pointer + (vaddr & PAGE_MASK); 240 return page_pointer + (vaddr & PAGE_MASK);
254 } 241 }
255 242
256 if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) { 243 if (current_page_table->attributes[vaddr >> PAGE_BITS] ==
244 Common::PageType::RasterizerCachedMemory) {
257 return GetPointerFromVMA(vaddr); 245 return GetPointerFromVMA(vaddr);
258 } 246 }
259 247
@@ -287,20 +275,20 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
287 275
288 u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1; 276 u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
289 for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) { 277 for (unsigned i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
290 PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; 278 Common::PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS];
291 279
292 if (cached) { 280 if (cached) {
293 // Switch page type to cached if now cached 281 // Switch page type to cached if now cached
294 switch (page_type) { 282 switch (page_type) {
295 case PageType::Unmapped: 283 case Common::PageType::Unmapped:
296 // It is not necessary for a process to have this region mapped into its address 284 // It is not necessary for a process to have this region mapped into its address
297 // space, for example, a system module need not have a VRAM mapping. 285 // space, for example, a system module need not have a VRAM mapping.
298 break; 286 break;
299 case PageType::Memory: 287 case Common::PageType::Memory:
300 page_type = PageType::RasterizerCachedMemory; 288 page_type = Common::PageType::RasterizerCachedMemory;
301 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; 289 current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr;
302 break; 290 break;
303 case PageType::RasterizerCachedMemory: 291 case Common::PageType::RasterizerCachedMemory:
304 // There can be more than one GPU region mapped per CPU region, so it's common that 292 // There can be more than one GPU region mapped per CPU region, so it's common that
305 // this area is already marked as cached. 293 // this area is already marked as cached.
306 break; 294 break;
@@ -310,23 +298,23 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
310 } else { 298 } else {
311 // Switch page type to uncached if now uncached 299 // Switch page type to uncached if now uncached
312 switch (page_type) { 300 switch (page_type) {
313 case PageType::Unmapped: 301 case Common::PageType::Unmapped:
314 // It is not necessary for a process to have this region mapped into its address 302 // It is not necessary for a process to have this region mapped into its address
315 // space, for example, a system module need not have a VRAM mapping. 303 // space, for example, a system module need not have a VRAM mapping.
316 break; 304 break;
317 case PageType::Memory: 305 case Common::PageType::Memory:
318 // There can be more than one GPU region mapped per CPU region, so it's common that 306 // There can be more than one GPU region mapped per CPU region, so it's common that
319 // this area is already unmarked as cached. 307 // this area is already unmarked as cached.
320 break; 308 break;
321 case PageType::RasterizerCachedMemory: { 309 case Common::PageType::RasterizerCachedMemory: {
322 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK); 310 u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK);
323 if (pointer == nullptr) { 311 if (pointer == nullptr) {
324 // It's possible that this function has been called while updating the pagetable 312 // It's possible that this function has been called while updating the pagetable
325 // after unmapping a VMA. In that case the underlying VMA will no longer exist, 313 // after unmapping a VMA. In that case the underlying VMA will no longer exist,
326 // and we should just leave the pagetable entry blank. 314 // and we should just leave the pagetable entry blank.
327 page_type = PageType::Unmapped; 315 page_type = Common::PageType::Unmapped;
328 } else { 316 } else {
329 page_type = PageType::Memory; 317 page_type = Common::PageType::Memory;
330 current_page_table->pointers[vaddr >> PAGE_BITS] = pointer; 318 current_page_table->pointers[vaddr >> PAGE_BITS] = pointer;
331 } 319 }
332 break; 320 break;
@@ -338,47 +326,6 @@ void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
338 } 326 }
339} 327}
340 328
341void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) {
342 auto& system_instance = Core::System::GetInstance();
343
344 // Since pages are unmapped on shutdown after video core is shutdown, the renderer may be
345 // null here
346 if (!system_instance.IsPoweredOn()) {
347 return;
348 }
349
350 const VAddr end = start + size;
351
352 const auto CheckRegion = [&](VAddr region_start, VAddr region_end) {
353 if (start >= region_end || end <= region_start) {
354 // No overlap with region
355 return;
356 }
357
358 const VAddr overlap_start = std::max(start, region_start);
359 const VAddr overlap_end = std::min(end, region_end);
360 const VAddr overlap_size = overlap_end - overlap_start;
361
362 auto& gpu = system_instance.GPU();
363 switch (mode) {
364 case FlushMode::Flush:
365 gpu.FlushRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
366 break;
367 case FlushMode::Invalidate:
368 gpu.InvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
369 break;
370 case FlushMode::FlushAndInvalidate:
371 gpu.FlushAndInvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size);
372 break;
373 }
374 };
375
376 const auto& vm_manager = Core::CurrentProcess()->VMManager();
377
378 CheckRegion(vm_manager.GetCodeRegionBaseAddress(), vm_manager.GetCodeRegionEndAddress());
379 CheckRegion(vm_manager.GetHeapRegionBaseAddress(), vm_manager.GetHeapRegionEndAddress());
380}
381
382u8 Read8(const VAddr addr) { 329u8 Read8(const VAddr addr) {
383 return Read<u8>(addr); 330 return Read<u8>(addr);
384} 331}
@@ -409,24 +356,24 @@ void ReadBlock(const Kernel::Process& process, const VAddr src_addr, void* dest_
409 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 356 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
410 357
411 switch (page_table.attributes[page_index]) { 358 switch (page_table.attributes[page_index]) {
412 case PageType::Unmapped: { 359 case Common::PageType::Unmapped: {
413 LOG_ERROR(HW_Memory, 360 LOG_ERROR(HW_Memory,
414 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 361 "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
415 current_vaddr, src_addr, size); 362 current_vaddr, src_addr, size);
416 std::memset(dest_buffer, 0, copy_amount); 363 std::memset(dest_buffer, 0, copy_amount);
417 break; 364 break;
418 } 365 }
419 case PageType::Memory: { 366 case Common::PageType::Memory: {
420 DEBUG_ASSERT(page_table.pointers[page_index]); 367 DEBUG_ASSERT(page_table.pointers[page_index]);
421 368
422 const u8* src_ptr = page_table.pointers[page_index] + page_offset; 369 const u8* src_ptr = page_table.pointers[page_index] + page_offset;
423 std::memcpy(dest_buffer, src_ptr, copy_amount); 370 std::memcpy(dest_buffer, src_ptr, copy_amount);
424 break; 371 break;
425 } 372 }
426 case PageType::RasterizerCachedMemory: { 373 case Common::PageType::RasterizerCachedMemory: {
427 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 374 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
428 FlushMode::Flush); 375 Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
429 std::memcpy(dest_buffer, GetPointerFromVMA(process, current_vaddr), copy_amount); 376 std::memcpy(dest_buffer, host_ptr, copy_amount);
430 break; 377 break;
431 } 378 }
432 default: 379 default:
@@ -473,23 +420,23 @@ void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const voi
473 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 420 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
474 421
475 switch (page_table.attributes[page_index]) { 422 switch (page_table.attributes[page_index]) {
476 case PageType::Unmapped: { 423 case Common::PageType::Unmapped: {
477 LOG_ERROR(HW_Memory, 424 LOG_ERROR(HW_Memory,
478 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 425 "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
479 current_vaddr, dest_addr, size); 426 current_vaddr, dest_addr, size);
480 break; 427 break;
481 } 428 }
482 case PageType::Memory: { 429 case Common::PageType::Memory: {
483 DEBUG_ASSERT(page_table.pointers[page_index]); 430 DEBUG_ASSERT(page_table.pointers[page_index]);
484 431
485 u8* dest_ptr = page_table.pointers[page_index] + page_offset; 432 u8* dest_ptr = page_table.pointers[page_index] + page_offset;
486 std::memcpy(dest_ptr, src_buffer, copy_amount); 433 std::memcpy(dest_ptr, src_buffer, copy_amount);
487 break; 434 break;
488 } 435 }
489 case PageType::RasterizerCachedMemory: { 436 case Common::PageType::RasterizerCachedMemory: {
490 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 437 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
491 FlushMode::Invalidate); 438 Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
492 std::memcpy(GetPointerFromVMA(process, current_vaddr), src_buffer, copy_amount); 439 std::memcpy(host_ptr, src_buffer, copy_amount);
493 break; 440 break;
494 } 441 }
495 default: 442 default:
@@ -519,23 +466,23 @@ void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std:
519 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 466 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
520 467
521 switch (page_table.attributes[page_index]) { 468 switch (page_table.attributes[page_index]) {
522 case PageType::Unmapped: { 469 case Common::PageType::Unmapped: {
523 LOG_ERROR(HW_Memory, 470 LOG_ERROR(HW_Memory,
524 "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 471 "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
525 current_vaddr, dest_addr, size); 472 current_vaddr, dest_addr, size);
526 break; 473 break;
527 } 474 }
528 case PageType::Memory: { 475 case Common::PageType::Memory: {
529 DEBUG_ASSERT(page_table.pointers[page_index]); 476 DEBUG_ASSERT(page_table.pointers[page_index]);
530 477
531 u8* dest_ptr = page_table.pointers[page_index] + page_offset; 478 u8* dest_ptr = page_table.pointers[page_index] + page_offset;
532 std::memset(dest_ptr, 0, copy_amount); 479 std::memset(dest_ptr, 0, copy_amount);
533 break; 480 break;
534 } 481 }
535 case PageType::RasterizerCachedMemory: { 482 case Common::PageType::RasterizerCachedMemory: {
536 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 483 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
537 FlushMode::Invalidate); 484 Core::System::GetInstance().GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount);
538 std::memset(GetPointerFromVMA(process, current_vaddr), 0, copy_amount); 485 std::memset(host_ptr, 0, copy_amount);
539 break; 486 break;
540 } 487 }
541 default: 488 default:
@@ -561,23 +508,23 @@ void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
561 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); 508 const VAddr current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset);
562 509
563 switch (page_table.attributes[page_index]) { 510 switch (page_table.attributes[page_index]) {
564 case PageType::Unmapped: { 511 case Common::PageType::Unmapped: {
565 LOG_ERROR(HW_Memory, 512 LOG_ERROR(HW_Memory,
566 "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", 513 "Unmapped CopyBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
567 current_vaddr, src_addr, size); 514 current_vaddr, src_addr, size);
568 ZeroBlock(process, dest_addr, copy_amount); 515 ZeroBlock(process, dest_addr, copy_amount);
569 break; 516 break;
570 } 517 }
571 case PageType::Memory: { 518 case Common::PageType::Memory: {
572 DEBUG_ASSERT(page_table.pointers[page_index]); 519 DEBUG_ASSERT(page_table.pointers[page_index]);
573 const u8* src_ptr = page_table.pointers[page_index] + page_offset; 520 const u8* src_ptr = page_table.pointers[page_index] + page_offset;
574 WriteBlock(process, dest_addr, src_ptr, copy_amount); 521 WriteBlock(process, dest_addr, src_ptr, copy_amount);
575 break; 522 break;
576 } 523 }
577 case PageType::RasterizerCachedMemory: { 524 case Common::PageType::RasterizerCachedMemory: {
578 RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount), 525 const auto& host_ptr{GetPointerFromVMA(process, current_vaddr)};
579 FlushMode::Flush); 526 Core::System::GetInstance().GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount);
580 WriteBlock(process, dest_addr, GetPointerFromVMA(process, current_vaddr), copy_amount); 527 WriteBlock(process, dest_addr, host_ptr, copy_amount);
581 break; 528 break;
582 } 529 }
583 default: 530 default:
diff --git a/src/core/memory.h b/src/core/memory.h
index 1acf5ce8c..3f60d868c 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -10,7 +10,10 @@
10#include <vector> 10#include <vector>
11#include <boost/icl/interval_map.hpp> 11#include <boost/icl/interval_map.hpp>
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "core/memory_hook.h" 13
14namespace Common {
15struct PageTable;
16}
14 17
15namespace Kernel { 18namespace Kernel {
16class Process; 19class Process;
@@ -26,71 +29,6 @@ constexpr std::size_t PAGE_BITS = 12;
26constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS; 29constexpr u64 PAGE_SIZE = 1ULL << PAGE_BITS;
27constexpr u64 PAGE_MASK = PAGE_SIZE - 1; 30constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
28 31
29enum class PageType : u8 {
30 /// Page is unmapped and should cause an access error.
31 Unmapped,
32 /// Page is mapped to regular memory. This is the only type you can get pointers to.
33 Memory,
34 /// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
35 /// invalidation
36 RasterizerCachedMemory,
37 /// Page is mapped to a I/O region. Writing and reading to this page is handled by functions.
38 Special,
39};
40
41struct SpecialRegion {
42 enum class Type {
43 DebugHook,
44 IODevice,
45 } type;
46
47 MemoryHookPointer handler;
48
49 bool operator<(const SpecialRegion& other) const {
50 return std::tie(type, handler) < std::tie(other.type, other.handler);
51 }
52
53 bool operator==(const SpecialRegion& other) const {
54 return std::tie(type, handler) == std::tie(other.type, other.handler);
55 }
56};
57
58/**
59 * A (reasonably) fast way of allowing switchable and remappable process address spaces. It loosely
60 * mimics the way a real CPU page table works.
61 */
62struct PageTable {
63 explicit PageTable();
64 explicit PageTable(std::size_t address_space_width_in_bits);
65 ~PageTable();
66
67 /**
68 * Resizes the page table to be able to accomodate enough pages within
69 * a given address space.
70 *
71 * @param address_space_width_in_bits The address size width in bits.
72 */
73 void Resize(std::size_t address_space_width_in_bits);
74
75 /**
76 * Vector of memory pointers backing each page. An entry can only be non-null if the
77 * corresponding entry in the `attributes` vector is of type `Memory`.
78 */
79 std::vector<u8*> pointers;
80
81 /**
82 * Contains MMIO handlers that back memory regions whose entries in the `attribute` vector is
83 * of type `Special`.
84 */
85 boost::icl::interval_map<VAddr, std::set<SpecialRegion>> special_regions;
86
87 /**
88 * Vector of fine grained page attributes. If it is set to any value other than `Memory`, then
89 * the corresponding entry in `pointers` MUST be set to null.
90 */
91 std::vector<PageType> attributes;
92};
93
94/// Virtual user-space memory regions 32/// Virtual user-space memory regions
95enum : VAddr { 33enum : VAddr {
96 /// Read-only page containing kernel and system configuration values. 34 /// Read-only page containing kernel and system configuration values.
@@ -116,8 +54,8 @@ enum : VAddr {
116}; 54};
117 55
118/// Currently active page table 56/// Currently active page table
119void SetCurrentPageTable(PageTable* page_table); 57void SetCurrentPageTable(Common::PageTable* page_table);
120PageTable* GetCurrentPageTable(); 58Common::PageTable* GetCurrentPageTable();
121 59
122/// Determines if the given VAddr is valid for the specified process. 60/// Determines if the given VAddr is valid for the specified process.
123bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr); 61bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
@@ -161,10 +99,4 @@ enum class FlushMode {
161 */ 99 */
162void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached); 100void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
163 101
164/**
165 * Flushes and invalidates any externally cached rasterizer resources touching the given virtual
166 * address region.
167 */
168void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode);
169
170} // namespace Memory 102} // namespace Memory
diff --git a/src/core/memory_setup.h b/src/core/memory_setup.h
index 9a1a4f4be..5225ee8e2 100644
--- a/src/core/memory_setup.h
+++ b/src/core/memory_setup.h
@@ -5,7 +5,11 @@
5#pragma once 5#pragma once
6 6
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "core/memory_hook.h" 8#include "common/memory_hook.h"
9
10namespace Common {
11struct PageTable;
12}
9 13
10namespace Memory { 14namespace Memory {
11 15
@@ -17,7 +21,7 @@ namespace Memory {
17 * @param size The amount of bytes to map. Must be page-aligned. 21 * @param size The amount of bytes to map. Must be page-aligned.
18 * @param target Buffer with the memory backing the mapping. Must be of length at least `size`. 22 * @param target Buffer with the memory backing the mapping. Must be of length at least `size`.
19 */ 23 */
20void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target); 24void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, u8* target);
21 25
22/** 26/**
23 * Maps a region of the emulated process address space as a IO region. 27 * Maps a region of the emulated process address space as a IO region.
@@ -26,11 +30,14 @@ void MapMemoryRegion(PageTable& page_table, VAddr base, u64 size, u8* target);
26 * @param size The amount of bytes to map. Must be page-aligned. 30 * @param size The amount of bytes to map. Must be page-aligned.
27 * @param mmio_handler The handler that backs the mapping. 31 * @param mmio_handler The handler that backs the mapping.
28 */ 32 */
29void MapIoRegion(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer mmio_handler); 33void MapIoRegion(Common::PageTable& page_table, VAddr base, u64 size,
34 Common::MemoryHookPointer mmio_handler);
30 35
31void UnmapRegion(PageTable& page_table, VAddr base, u64 size); 36void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size);
32 37
33void AddDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook); 38void AddDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
34void RemoveDebugHook(PageTable& page_table, VAddr base, u64 size, MemoryHookPointer hook); 39 Common::MemoryHookPointer hook);
40void RemoveDebugHook(Common::PageTable& page_table, VAddr base, u64 size,
41 Common::MemoryHookPointer hook);
35 42
36} // namespace Memory 43} // namespace Memory
diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp
index 6fe56833d..3e1a735c3 100644
--- a/src/tests/core/arm/arm_test_common.cpp
+++ b/src/tests/core/arm/arm_test_common.cpp
@@ -4,6 +4,7 @@
4 4
5#include <algorithm> 5#include <algorithm>
6 6
7#include "common/page_table.h"
7#include "core/core.h" 8#include "core/core.h"
8#include "core/hle/kernel/process.h" 9#include "core/hle/kernel/process.h"
9#include "core/memory.h" 10#include "core/memory.h"
@@ -22,7 +23,7 @@ TestEnvironment::TestEnvironment(bool mutable_memory_)
22 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr); 23 std::fill(page_table->pointers.begin(), page_table->pointers.end(), nullptr);
23 page_table->special_regions.clear(); 24 page_table->special_regions.clear();
24 std::fill(page_table->attributes.begin(), page_table->attributes.end(), 25 std::fill(page_table->attributes.begin(), page_table->attributes.end(),
25 Memory::PageType::Unmapped); 26 Common::PageType::Unmapped);
26 27
27 Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); 28 Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory);
28 Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); 29 Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory);
diff --git a/src/tests/core/arm/arm_test_common.h b/src/tests/core/arm/arm_test_common.h
index 0b7539601..d145dbfcc 100644
--- a/src/tests/core/arm/arm_test_common.h
+++ b/src/tests/core/arm/arm_test_common.h
@@ -9,10 +9,10 @@
9#include <vector> 9#include <vector>
10 10
11#include "common/common_types.h" 11#include "common/common_types.h"
12#include "common/memory_hook.h"
12#include "core/hle/kernel/kernel.h" 13#include "core/hle/kernel/kernel.h"
13#include "core/memory_hook.h"
14 14
15namespace Memory { 15namespace Common {
16struct PageTable; 16struct PageTable;
17} 17}
18 18
@@ -58,7 +58,7 @@ public:
58 58
59private: 59private:
60 friend struct TestMemory; 60 friend struct TestMemory;
61 struct TestMemory final : Memory::MemoryHook { 61 struct TestMemory final : Common::MemoryHook {
62 explicit TestMemory(TestEnvironment* env_) : env(env_) {} 62 explicit TestMemory(TestEnvironment* env_) : env(env_) {}
63 TestEnvironment* env; 63 TestEnvironment* env;
64 64
@@ -86,7 +86,7 @@ private:
86 bool mutable_memory; 86 bool mutable_memory;
87 std::shared_ptr<TestMemory> test_memory; 87 std::shared_ptr<TestMemory> test_memory;
88 std::vector<WriteRecord> write_records; 88 std::vector<WriteRecord> write_records;
89 Memory::PageTable* page_table = nullptr; 89 Common::PageTable* page_table = nullptr;
90 Kernel::KernelCore kernel; 90 Kernel::KernelCore kernel;
91}; 91};
92 92
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index bff1a37ff..8b1bea1ae 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -55,12 +55,9 @@ bool DmaPusher::Step() {
55 } 55 }
56 56
57 // Push buffer non-empty, read a word 57 // Push buffer non-empty, read a word
58 const auto address = gpu.MemoryManager().GpuToCpuAddress(dma_get);
59 ASSERT_MSG(address, "Invalid GPU address");
60
61 command_headers.resize(command_list_header.size); 58 command_headers.resize(command_list_header.size);
62 59 gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
63 Memory::ReadBlock(*address, command_headers.data(), command_list_header.size * sizeof(u32)); 60 command_list_header.size * sizeof(u32));
64 61
65 for (const CommandHeader& command_header : command_headers) { 62 for (const CommandHeader& command_header : command_headers) {
66 63
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index daefa43a6..0931b9626 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -41,18 +41,13 @@ void KeplerMemory::ProcessData(u32 data) {
41 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); 41 ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
42 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); 42 ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
43 43
44 const GPUVAddr address = regs.dest.Address();
45 const auto dest_address =
46 memory_manager.GpuToCpuAddress(address + state.write_offset * sizeof(u32));
47 ASSERT_MSG(dest_address, "Invalid GPU address");
48
49 // We have to invalidate the destination region to evict any outdated surfaces from the cache. 44 // We have to invalidate the destination region to evict any outdated surfaces from the cache.
50 // We do this before actually writing the new data because the destination address might contain 45 // We do this before actually writing the new data because the destination address might
51 // a dirty surface that will have to be written back to memory. 46 // contain a dirty surface that will have to be written back to memory.
52 system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)), 47 const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
53 sizeof(u32)); 48 rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
49 memory_manager.Write32(address, data);
54 50
55 Memory::Write32(*dest_address, data);
56 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); 51 system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
57 52
58 state.write_offset++; 53 state.write_offset++;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 49979694e..c5d5be4ef 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -270,11 +270,9 @@ void Maxwell3D::ProcessMacroBind(u32 data) {
270} 270}
271 271
272void Maxwell3D::ProcessQueryGet() { 272void Maxwell3D::ProcessQueryGet() {
273 GPUVAddr sequence_address = regs.query.QueryAddress(); 273 const GPUVAddr sequence_address{regs.query.QueryAddress()};
274 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application 274 // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
275 // VAddr before writing. 275 // VAddr before writing.
276 const auto address = memory_manager.GpuToCpuAddress(sequence_address);
277 ASSERT_MSG(address, "Invalid GPU address");
278 276
279 // TODO(Subv): Support the other query units. 277 // TODO(Subv): Support the other query units.
280 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, 278 ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -309,7 +307,7 @@ void Maxwell3D::ProcessQueryGet() {
309 // Write the current query sequence to the sequence address. 307 // Write the current query sequence to the sequence address.
310 // TODO(Subv): Find out what happens if you use a long query type but mark it as a short 308 // TODO(Subv): Find out what happens if you use a long query type but mark it as a short
311 // query. 309 // query.
312 Memory::Write32(*address, sequence); 310 memory_manager.Write32(sequence_address, sequence);
313 } else { 311 } else {
314 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast 312 // Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
315 // GPU, this command may actually take a while to complete in real hardware due to GPU 313 // GPU, this command may actually take a while to complete in real hardware due to GPU
@@ -318,7 +316,7 @@ void Maxwell3D::ProcessQueryGet() {
318 query_result.value = result; 316 query_result.value = result;
319 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming 317 // TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
320 query_result.timestamp = system.CoreTiming().GetTicks(); 318 query_result.timestamp = system.CoreTiming().GetTicks();
321 Memory::WriteBlock(*address, &query_result, sizeof(query_result)); 319 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
322 } 320 }
323 dirty_flags.OnMemoryWrite(); 321 dirty_flags.OnMemoryWrite();
324 break; 322 break;
@@ -393,12 +391,11 @@ void Maxwell3D::ProcessCBData(u32 value) {
393 // Don't allow writing past the end of the buffer. 391 // Don't allow writing past the end of the buffer.
394 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size); 392 ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
395 393
396 const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); 394 const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
397 ASSERT_MSG(address, "Invalid GPU address");
398 395
399 u8* ptr{Memory::GetPointer(*address)}; 396 u8* ptr{memory_manager.GetPointer(address)};
400 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); 397 rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
401 std::memcpy(ptr, &value, sizeof(u32)); 398 memory_manager.Write32(address, value);
402 399
403 dirty_flags.OnMemoryWrite(); 400 dirty_flags.OnMemoryWrite();
404 401
@@ -407,14 +404,10 @@ void Maxwell3D::ProcessCBData(u32 value) {
407} 404}
408 405
409Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 406Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
410 const GPUVAddr tic_base_address = regs.tic.TICAddress(); 407 const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
411
412 const GPUVAddr tic_address_gpu = tic_base_address + tic_index * sizeof(Texture::TICEntry);
413 const auto tic_address_cpu = memory_manager.GpuToCpuAddress(tic_address_gpu);
414 ASSERT_MSG(tic_address_cpu, "Invalid GPU address");
415 408
416 Texture::TICEntry tic_entry; 409 Texture::TICEntry tic_entry;
417 Memory::ReadBlock(*tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry)); 410 memory_manager.ReadBlock(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
418 411
419 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear || 412 ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
420 tic_entry.header_version == Texture::TICHeaderVersion::Pitch, 413 tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
@@ -432,14 +425,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
432} 425}
433 426
434Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { 427Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
435 const GPUVAddr tsc_base_address = regs.tsc.TSCAddress(); 428 const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
436
437 const GPUVAddr tsc_address_gpu = tsc_base_address + tsc_index * sizeof(Texture::TSCEntry);
438 const auto tsc_address_cpu = memory_manager.GpuToCpuAddress(tsc_address_gpu);
439 ASSERT_MSG(tsc_address_cpu, "Invalid GPU address");
440 429
441 Texture::TSCEntry tsc_entry; 430 Texture::TSCEntry tsc_entry;
442 Memory::ReadBlock(*tsc_address_cpu, &tsc_entry, sizeof(Texture::TSCEntry)); 431 memory_manager.ReadBlock(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
443 return tsc_entry; 432 return tsc_entry;
444} 433}
445 434
@@ -458,10 +447,7 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
458 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset; 447 for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
459 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) { 448 current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
460 449
461 const auto address = memory_manager.GpuToCpuAddress(current_texture); 450 const Texture::TextureHandle tex_handle{memory_manager.Read32(current_texture)};
462 ASSERT_MSG(address, "Invalid GPU address");
463
464 const Texture::TextureHandle tex_handle{Memory::Read32(*address)};
465 451
466 Texture::FullTextureInfo tex_info{}; 452 Texture::FullTextureInfo tex_info{};
467 // TODO(Subv): Use the shader to determine which textures are actually accessed. 453 // TODO(Subv): Use the shader to determine which textures are actually accessed.
@@ -496,10 +482,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
496 482
497 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size); 483 ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
498 484
499 const auto tex_address_cpu = memory_manager.GpuToCpuAddress(tex_info_address); 485 const Texture::TextureHandle tex_handle{memory_manager.Read32(tex_info_address)};
500 ASSERT_MSG(tex_address_cpu, "Invalid GPU address");
501
502 const Texture::TextureHandle tex_handle{Memory::Read32(*tex_address_cpu)};
503 486
504 Texture::FullTextureInfo tex_info{}; 487 Texture::FullTextureInfo tex_info{};
505 tex_info.index = static_cast<u32>(offset); 488 tex_info.index = static_cast<u32>(offset);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 415a6319a..a0ded4c25 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -43,11 +43,6 @@ void MaxwellDMA::HandleCopy() {
43 const GPUVAddr source = regs.src_address.Address(); 43 const GPUVAddr source = regs.src_address.Address();
44 const GPUVAddr dest = regs.dst_address.Address(); 44 const GPUVAddr dest = regs.dst_address.Address();
45 45
46 const auto source_cpu = memory_manager.GpuToCpuAddress(source);
47 const auto dest_cpu = memory_manager.GpuToCpuAddress(dest);
48 ASSERT_MSG(source_cpu, "Invalid source GPU address");
49 ASSERT_MSG(dest_cpu, "Invalid destination GPU address");
50
51 // TODO(Subv): Perform more research and implement all features of this engine. 46 // TODO(Subv): Perform more research and implement all features of this engine.
52 ASSERT(regs.exec.enable_swizzle == 0); 47 ASSERT(regs.exec.enable_swizzle == 0);
53 ASSERT(regs.exec.query_mode == Regs::QueryMode::None); 48 ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
@@ -70,7 +65,7 @@ void MaxwellDMA::HandleCopy() {
70 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count, 65 // buffer of length `x_count`, otherwise we copy a 2D image of dimensions (x_count,
71 // y_count). 66 // y_count).
72 if (!regs.exec.enable_2d) { 67 if (!regs.exec.enable_2d) {
73 Memory::CopyBlock(*dest_cpu, *source_cpu, regs.x_count); 68 memory_manager.CopyBlock(dest, source, regs.x_count);
74 return; 69 return;
75 } 70 }
76 71
@@ -79,9 +74,9 @@ void MaxwellDMA::HandleCopy() {
79 // rectangle. There is no need to manually flush/invalidate the regions because 74 // rectangle. There is no need to manually flush/invalidate the regions because
80 // CopyBlock does that for us. 75 // CopyBlock does that for us.
81 for (u32 line = 0; line < regs.y_count; ++line) { 76 for (u32 line = 0; line < regs.y_count; ++line) {
82 const VAddr source_line = *source_cpu + line * regs.src_pitch; 77 const GPUVAddr source_line = source + line * regs.src_pitch;
83 const VAddr dest_line = *dest_cpu + line * regs.dst_pitch; 78 const GPUVAddr dest_line = dest + line * regs.dst_pitch;
84 Memory::CopyBlock(dest_line, source_line, regs.x_count); 79 memory_manager.CopyBlock(dest_line, source_line, regs.x_count);
85 } 80 }
86 return; 81 return;
87 } 82 }
@@ -90,17 +85,18 @@ void MaxwellDMA::HandleCopy() {
90 85
91 const std::size_t copy_size = regs.x_count * regs.y_count; 86 const std::size_t copy_size = regs.x_count * regs.y_count;
92 87
88 auto source_ptr{memory_manager.GetPointer(source)};
89 auto dst_ptr{memory_manager.GetPointer(dest)};
90
93 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { 91 const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) {
94 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated 92 // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
95 // copying. 93 // copying.
96 Core::System::GetInstance().Renderer().Rasterizer().FlushRegion( 94 rasterizer.FlushRegion(ToCacheAddr(source_ptr), src_size);
97 ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size);
98 95
99 // We have to invalidate the destination region to evict any outdated surfaces from the 96 // We have to invalidate the destination region to evict any outdated surfaces from the
100 // cache. We do this before actually writing the new data because the destination address 97 // cache. We do this before actually writing the new data because the destination address
101 // might contain a dirty surface that will have to be written back to memory. 98 // might contain a dirty surface that will have to be written back to memory.
102 Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion( 99 rasterizer.InvalidateRegion(ToCacheAddr(dst_ptr), dst_size);
103 ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size);
104 }; 100 };
105 101
106 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { 102 if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
@@ -113,8 +109,8 @@ void MaxwellDMA::HandleCopy() {
113 copy_size * src_bytes_per_pixel); 109 copy_size * src_bytes_per_pixel);
114 110
115 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, 111 Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
116 regs.src_params.size_x, src_bytes_per_pixel, *source_cpu, 112 regs.src_params.size_x, src_bytes_per_pixel, source_ptr, dst_ptr,
117 *dest_cpu, regs.src_params.BlockHeight(), regs.src_params.pos_x, 113 regs.src_params.BlockHeight(), regs.src_params.pos_x,
118 regs.src_params.pos_y); 114 regs.src_params.pos_y);
119 } else { 115 } else {
120 ASSERT(regs.dst_params.size_z == 1); 116 ASSERT(regs.dst_params.size_z == 1);
@@ -127,7 +123,7 @@ void MaxwellDMA::HandleCopy() {
127 123
128 // If the input is linear and the output is tiled, swizzle the input and copy it over. 124 // If the input is linear and the output is tiled, swizzle the input and copy it over.
129 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, 125 Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
130 src_bpp, *dest_cpu, *source_cpu, regs.dst_params.BlockHeight()); 126 src_bpp, dst_ptr, source_ptr, regs.dst_params.BlockHeight());
131 } 127 }
132} 128}
133 129
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 08abf8ac9..66c690494 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -274,7 +274,6 @@ void GPU::ProcessSemaphoreTriggerMethod() {
274 const auto op = 274 const auto op =
275 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask); 275 static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
276 if (op == GpuSemaphoreOperation::WriteLong) { 276 if (op == GpuSemaphoreOperation::WriteLong) {
277 auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
278 struct Block { 277 struct Block {
279 u32 sequence; 278 u32 sequence;
280 u32 zeros = 0; 279 u32 zeros = 0;
@@ -286,11 +285,9 @@ void GPU::ProcessSemaphoreTriggerMethod() {
286 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of 285 // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
287 // CoreTiming 286 // CoreTiming
288 block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks(); 287 block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
289 Memory::WriteBlock(*address, &block, sizeof(block)); 288 memory_manager->WriteBlock(regs.smaphore_address.SmaphoreAddress(), &block, sizeof(block));
290 } else { 289 } else {
291 const auto address = 290 const u32 word{memory_manager->Read32(regs.smaphore_address.SmaphoreAddress())};
292 memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress());
293 const u32 word = Memory::Read32(*address);
294 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || 291 if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
295 (op == GpuSemaphoreOperation::AcquireGequal && 292 (op == GpuSemaphoreOperation::AcquireGequal &&
296 static_cast<s32>(word - regs.semaphore_sequence) > 0) || 293 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
@@ -317,13 +314,11 @@ void GPU::ProcessSemaphoreTriggerMethod() {
317} 314}
318 315
319void GPU::ProcessSemaphoreRelease() { 316void GPU::ProcessSemaphoreRelease() {
320 const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); 317 memory_manager->Write32(regs.smaphore_address.SmaphoreAddress(), regs.semaphore_release);
321 Memory::Write32(*address, regs.semaphore_release);
322} 318}
323 319
324void GPU::ProcessSemaphoreAcquire() { 320void GPU::ProcessSemaphoreAcquire() {
325 const auto address = memory_manager->GpuToCpuAddress(regs.smaphore_address.SmaphoreAddress()); 321 const u32 word = memory_manager->Read32(regs.smaphore_address.SmaphoreAddress());
326 const u32 word = Memory::Read32(*address);
327 const auto value = regs.semaphore_acquire; 322 const auto value = regs.semaphore_acquire;
328 if (word != value) { 323 if (word != value) {
329 regs.acquire_active = true; 324 regs.acquire_active = true;
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 54abe5298..8e8f36f28 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -5,6 +5,7 @@
5#include "common/alignment.h" 5#include "common/alignment.h"
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "core/memory.h"
8#include "video_core/memory_manager.h" 9#include "video_core/memory_manager.h"
9 10
10namespace Tegra { 11namespace Tegra {
@@ -162,15 +163,51 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) {
162 return base_addr + (gpu_addr & PAGE_MASK); 163 return base_addr + (gpu_addr & PAGE_MASK);
163} 164}
164 165
165std::vector<GPUVAddr> MemoryManager::CpuToGpuAddress(VAddr cpu_addr) const { 166u8 MemoryManager::Read8(GPUVAddr addr) {
166 std::vector<GPUVAddr> results; 167 return Memory::Read8(*GpuToCpuAddress(addr));
167 for (const auto& region : mapped_regions) { 168}
168 if (cpu_addr >= region.cpu_addr && cpu_addr < (region.cpu_addr + region.size)) { 169
169 const u64 offset{cpu_addr - region.cpu_addr}; 170u16 MemoryManager::Read16(GPUVAddr addr) {
170 results.push_back(region.gpu_addr + offset); 171 return Memory::Read16(*GpuToCpuAddress(addr));
171 } 172}
172 } 173
173 return results; 174u32 MemoryManager::Read32(GPUVAddr addr) {
175 return Memory::Read32(*GpuToCpuAddress(addr));
176}
177
178u64 MemoryManager::Read64(GPUVAddr addr) {
179 return Memory::Read64(*GpuToCpuAddress(addr));
180}
181
182void MemoryManager::Write8(GPUVAddr addr, u8 data) {
183 Memory::Write8(*GpuToCpuAddress(addr), data);
184}
185
186void MemoryManager::Write16(GPUVAddr addr, u16 data) {
187 Memory::Write16(*GpuToCpuAddress(addr), data);
188}
189
190void MemoryManager::Write32(GPUVAddr addr, u32 data) {
191 Memory::Write32(*GpuToCpuAddress(addr), data);
192}
193
194void MemoryManager::Write64(GPUVAddr addr, u64 data) {
195 Memory::Write64(*GpuToCpuAddress(addr), data);
196}
197
198u8* MemoryManager::GetPointer(GPUVAddr addr) {
199 return Memory::GetPointer(*GpuToCpuAddress(addr));
200}
201
202void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) {
203 std::memcpy(dest_buffer, GetPointer(src_addr), size);
204}
205void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) {
206 std::memcpy(GetPointer(dest_addr), src_buffer, size);
207}
208
209void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size) {
210 std::memcpy(GetPointer(dest_addr), GetPointer(src_addr), size);
174} 211}
175 212
176VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) { 213VAddr& MemoryManager::PageSlot(GPUVAddr gpu_addr) {
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index fb03497ca..425e2f31c 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -27,12 +27,27 @@ public:
27 GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size); 27 GPUVAddr UnmapBuffer(GPUVAddr gpu_addr, u64 size);
28 GPUVAddr GetRegionEnd(GPUVAddr region_start) const; 28 GPUVAddr GetRegionEnd(GPUVAddr region_start) const;
29 std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr); 29 std::optional<VAddr> GpuToCpuAddress(GPUVAddr gpu_addr);
30 std::vector<GPUVAddr> CpuToGpuAddress(VAddr cpu_addr) const;
31 30
32 static constexpr u64 PAGE_BITS = 16; 31 static constexpr u64 PAGE_BITS = 16;
33 static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS; 32 static constexpr u64 PAGE_SIZE = 1 << PAGE_BITS;
34 static constexpr u64 PAGE_MASK = PAGE_SIZE - 1; 33 static constexpr u64 PAGE_MASK = PAGE_SIZE - 1;
35 34
35 u8 Read8(GPUVAddr addr);
36 u16 Read16(GPUVAddr addr);
37 u32 Read32(GPUVAddr addr);
38 u64 Read64(GPUVAddr addr);
39
40 void Write8(GPUVAddr addr, u8 data);
41 void Write16(GPUVAddr addr, u16 data);
42 void Write32(GPUVAddr addr, u32 data);
43 void Write64(GPUVAddr addr, u64 data);
44
45 u8* GetPointer(GPUVAddr vaddr);
46
47 void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size);
48 void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size);
49 void CopyBlock(VAddr dest_addr, VAddr src_addr, std::size_t size);
50
36private: 51private:
37 enum class PageStatus : u64 { 52 enum class PageStatus : u64 {
38 Unmapped = 0xFFFFFFFFFFFFFFFFULL, 53 Unmapped = 0xFFFFFFFFFFFFFFFFULL,
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 9692ce143..3e91cbc83 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -6,7 +6,6 @@
6#include <cstring> 6#include <cstring>
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "core/memory.h"
10#include "video_core/morton.h" 9#include "video_core/morton.h"
11#include "video_core/surface.h" 10#include "video_core/surface.h"
12#include "video_core/textures/decoders.h" 11#include "video_core/textures/decoders.h"
@@ -16,12 +15,12 @@ namespace VideoCore {
16using Surface::GetBytesPerPixel; 15using Surface::GetBytesPerPixel;
17using Surface::PixelFormat; 16using Surface::PixelFormat;
18 17
19using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, VAddr); 18using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
20using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>; 19using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
21 20
22template <bool morton_to_linear, PixelFormat format> 21template <bool morton_to_linear, PixelFormat format>
23static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth, 22static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
24 u32 tile_width_spacing, u8* buffer, VAddr addr) { 23 u32 tile_width_spacing, u8* buffer, u8* addr) {
25 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format); 24 constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
26 25
27 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual 26 // With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
@@ -34,10 +33,10 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
34 stride, height, depth, block_height, block_depth, 33 stride, height, depth, block_height, block_depth,
35 tile_width_spacing); 34 tile_width_spacing);
36 } else { 35 } else {
37 Tegra::Texture::CopySwizzledData( 36 Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
38 (stride + tile_size_x - 1) / tile_size_x, (height + tile_size_y - 1) / tile_size_y, 37 (height + tile_size_y - 1) / tile_size_y, depth,
39 depth, bytes_per_pixel, bytes_per_pixel, Memory::GetPointer(addr), buffer, false, 38 bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
40 block_height, block_depth, tile_width_spacing); 39 block_height, block_depth, tile_width_spacing);
41 } 40 }
42} 41}
43 42
@@ -282,7 +281,7 @@ static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
282 281
283void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride, 282void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
284 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 283 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
285 u8* buffer, VAddr addr) { 284 u8* buffer, u8* addr) {
286 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth, 285 GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
287 tile_width_spacing, buffer, addr); 286 tile_width_spacing, buffer, addr);
288} 287}
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index b565204b5..ee5b45555 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -13,7 +13,7 @@ enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
13 13
14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride, 14void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing, 15 u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
16 u8* buffer, VAddr addr); 16 u8* buffer, u8* addr);
17 17
18void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel, 18void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
19 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data); 19 u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index a4eea61a6..5048ed6ce 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -24,14 +24,12 @@ OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
24GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, 24GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size,
25 std::size_t alignment, bool cache) { 25 std::size_t alignment, bool cache) {
26 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); 26 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
27 const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
28 ASSERT_MSG(cpu_addr, "Invalid GPU address");
29 27
30 // Cache management is a big overhead, so only cache entries with a given size. 28 // Cache management is a big overhead, so only cache entries with a given size.
31 // TODO: Figure out which size is the best for given games. 29 // TODO: Figure out which size is the best for given games.
32 cache &= size >= 2048; 30 cache &= size >= 2048;
33 31
34 const auto& host_ptr{Memory::GetPointer(*cpu_addr)}; 32 const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
35 if (cache) { 33 if (cache) {
36 auto entry = TryGet(host_ptr); 34 auto entry = TryGet(host_ptr);
37 if (entry) { 35 if (entry) {
@@ -54,8 +52,8 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
54 buffer_offset += size; 52 buffer_offset += size;
55 53
56 if (cache) { 54 if (cache) {
57 auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, 55 auto entry = std::make_shared<CachedBufferEntry>(
58 alignment, host_ptr); 56 *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr);
59 Register(entry); 57 Register(entry);
60 } 58 }
61 59
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index a2c509c24..c8dbcacbd 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -7,7 +7,6 @@
7#include "common/assert.h" 7#include "common/assert.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/memory.h"
11#include "video_core/renderer_opengl/gl_global_cache.h" 10#include "video_core/renderer_opengl/gl_global_cache.h"
12#include "video_core/renderer_opengl/gl_rasterizer.h" 11#include "video_core/renderer_opengl/gl_rasterizer.h"
13#include "video_core/renderer_opengl/gl_shader_decompiler.h" 12#include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -39,7 +38,7 @@ void CachedGlobalRegion::Reload(u32 size_) {
39 glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); 38 glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW);
40} 39}
41 40
42GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { 41GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const {
43 const auto search{reserve.find(addr)}; 42 const auto search{reserve.find(addr)};
44 if (search == reserve.end()) { 43 if (search == reserve.end()) {
45 return {}; 44 return {};
@@ -47,11 +46,14 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32
47 return search->second; 46 return search->second;
48} 47}
49 48
50GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr) { 49GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size,
51 GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; 50 u8* host_ptr) {
51 GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)};
52 if (!region) { 52 if (!region) {
53 // No reserved surface available, create a new one and reserve it 53 // No reserved surface available, create a new one and reserve it
54 region = std::make_shared<CachedGlobalRegion>(addr, size, host_ptr); 54 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
55 const auto cpu_addr = *memory_manager.GpuToCpuAddress(addr);
56 region = std::make_shared<CachedGlobalRegion>(cpu_addr, size, host_ptr);
55 ReserveGlobalRegion(region); 57 ReserveGlobalRegion(region);
56 } 58 }
57 region->Reload(size); 59 region->Reload(size);
@@ -59,7 +61,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si
59} 61}
60 62
61void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { 63void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) {
62 reserve.insert_or_assign(region->GetCpuAddr(), std::move(region)); 64 reserve.insert_or_assign(region->GetCacheAddr(), std::move(region));
63} 65}
64 66
65GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) 67GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
@@ -70,23 +72,20 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
70 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { 72 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
71 73
72 auto& gpu{Core::System::GetInstance().GPU()}; 74 auto& gpu{Core::System::GetInstance().GPU()};
73 const auto cbufs = gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]; 75 auto& memory_manager{gpu.MemoryManager()};
74 const auto cbuf_addr = gpu.MemoryManager().GpuToCpuAddress( 76 const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<u64>(stage)]};
75 cbufs.const_buffers[global_region.GetCbufIndex()].address + global_region.GetCbufOffset()); 77 const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address +
76 ASSERT(cbuf_addr); 78 global_region.GetCbufOffset()};
77 79 const auto actual_addr{memory_manager.Read64(addr)};
78 const auto actual_addr_gpu = Memory::Read64(*cbuf_addr); 80 const auto size{memory_manager.Read32(addr + 8)};
79 const auto size = Memory::Read32(*cbuf_addr + 8);
80 const auto actual_addr = gpu.MemoryManager().GpuToCpuAddress(actual_addr_gpu);
81 ASSERT(actual_addr);
82 81
83 // Look up global region in the cache based on address 82 // Look up global region in the cache based on address
84 const auto& host_ptr{Memory::GetPointer(*actual_addr)}; 83 const auto& host_ptr{memory_manager.GetPointer(actual_addr)};
85 GlobalRegion region{TryGet(host_ptr)}; 84 GlobalRegion region{TryGet(host_ptr)};
86 85
87 if (!region) { 86 if (!region) {
88 // No global region found - create a new one 87 // No global region found - create a new one
89 region = GetUncachedGlobalRegion(*actual_addr, size, host_ptr); 88 region = GetUncachedGlobalRegion(actual_addr, size, host_ptr);
90 Register(region); 89 Register(region);
91 } 90 }
92 91
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
index e497a0619..a840491f7 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.h
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -65,11 +65,11 @@ public:
65 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); 65 Tegra::Engines::Maxwell3D::Regs::ShaderStage stage);
66 66
67private: 67private:
68 GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; 68 GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const;
69 GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr); 69 GlobalRegion GetUncachedGlobalRegion(Tegra::GPUVAddr addr, u32 size, u8* host_ptr);
70 void ReserveGlobalRegion(GlobalRegion region); 70 void ReserveGlobalRegion(GlobalRegion region);
71 71
72 std::unordered_map<VAddr, GlobalRegion> reserve; 72 std::unordered_map<CacheAddr, GlobalRegion> reserve;
73}; 73};
74 74
75} // namespace OpenGL 75} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
index 77d5cedd2..75d816795 100644
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
@@ -46,10 +46,7 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
46 auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); 46 auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
47 47
48 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); 48 auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
49 const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; 49 const u8* source{memory_manager.GetPointer(gpu_addr)};
50 ASSERT_MSG(cpu_addr, "Invalid GPU address");
51
52 const u8* source{Memory::GetPointer(*cpu_addr)};
53 50
54 for (u32 primitive = 0; primitive < count / 4; ++primitive) { 51 for (u32 primitive = 0; primitive < count / 4; ++primitive) {
55 for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { 52 for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
@@ -64,4 +61,4 @@ GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size
64 return index_offset; 61 return index_offset;
65} 62}
66 63
67} // namespace OpenGL \ No newline at end of file 64} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index bb6de5477..198c54872 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -749,11 +749,17 @@ void RasterizerOpenGL::FlushAll() {}
749 749
750void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { 750void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
751 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 751 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
752 if (!addr || !size) {
753 return;
754 }
752 res_cache.FlushRegion(addr, size); 755 res_cache.FlushRegion(addr, size);
753} 756}
754 757
755void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { 758void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
756 MICROPROFILE_SCOPE(OpenGL_CacheManagement); 759 MICROPROFILE_SCOPE(OpenGL_CacheManagement);
760 if (!addr || !size) {
761 return;
762 }
757 res_cache.InvalidateRegion(addr, size); 763 res_cache.InvalidateRegion(addr, size);
758 shader_cache.InvalidateRegion(addr, size); 764 shader_cache.InvalidateRegion(addr, size);
759 global_cache.InvalidateRegion(addr, size); 765 global_cache.InvalidateRegion(addr, size);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 451de00e8..57329cd61 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -57,11 +57,9 @@ static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) {
57 57
58void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { 58void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
59 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; 59 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
60 const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
61 60
62 addr = cpu_addr ? *cpu_addr : 0;
63 gpu_addr = gpu_addr_; 61 gpu_addr = gpu_addr_;
64 host_ptr = Memory::GetPointer(addr); 62 host_ptr = memory_manager.GetPointer(gpu_addr_);
65 size_in_bytes = SizeInBytesRaw(); 63 size_in_bytes = SizeInBytesRaw();
66 64
67 if (IsPixelFormatASTC(pixel_format)) { 65 if (IsPixelFormatASTC(pixel_format)) {
@@ -447,7 +445,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
447 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), 445 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
448 params.MipBlockHeight(mip_level), params.MipHeight(mip_level), 446 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
449 params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, 447 params.MipBlockDepth(mip_level), 1, params.tile_width_spacing,
450 gl_buffer.data() + offset_gl, params.addr + offset); 448 gl_buffer.data() + offset_gl, params.host_ptr + offset);
451 offset += layer_size; 449 offset += layer_size;
452 offset_gl += gl_size; 450 offset_gl += gl_size;
453 } 451 }
@@ -456,7 +454,7 @@ void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params,
456 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), 454 MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level),
457 params.MipBlockHeight(mip_level), params.MipHeight(mip_level), 455 params.MipBlockHeight(mip_level), params.MipHeight(mip_level),
458 params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, 456 params.MipBlockDepth(mip_level), depth, params.tile_width_spacing,
459 gl_buffer.data(), params.addr + offset); 457 gl_buffer.data(), params.host_ptr + offset);
460 } 458 }
461} 459}
462 460
@@ -514,9 +512,9 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac
514 "reinterpretation but the texture is tiled."); 512 "reinterpretation but the texture is tiled.");
515 } 513 }
516 const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; 514 const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes;
517 515 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
518 glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, 516 glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size,
519 Memory::GetPointer(dst_params.addr + src_params.size_in_bytes)); 517 memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes));
520 } 518 }
521 519
522 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); 520 glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -604,7 +602,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
604 602
605 ApplyTextureDefaults(texture.handle, params.max_mip_level); 603 ApplyTextureDefaults(texture.handle, params.max_mip_level);
606 604
607 OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.addr, params.IdentityString()); 605 OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString());
608 606
609 // Clamp size to mapped GPU memory region 607 // Clamp size to mapped GPU memory region
610 // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000 608 // TODO(bunnei): Super Mario Odyssey maps a 0x40000 byte region and then uses it for a 0x80000
@@ -617,6 +615,8 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
617 LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size); 615 LOG_ERROR(HW_GPU, "Surface size {} exceeds region size {}", params.size_in_bytes, max_size);
618 cached_size_in_bytes = max_size; 616 cached_size_in_bytes = max_size;
619 } 617 }
618
619 cpu_addr = *memory_manager.GpuToCpuAddress(params.gpu_addr);
620} 620}
621 621
622MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); 622MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64));
@@ -925,7 +925,7 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
925} 925}
926 926
927Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { 927Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) {
928 if (params.addr == 0 || params.height * params.width == 0) { 928 if (params.gpu_addr == 0 || params.height * params.width == 0) {
929 return {}; 929 return {};
930 } 930 }
931 931
@@ -979,14 +979,16 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface,
979 const Surface& dst_surface) { 979 const Surface& dst_surface) {
980 const auto& init_params{src_surface->GetSurfaceParams()}; 980 const auto& init_params{src_surface->GetSurfaceParams()};
981 const auto& dst_params{dst_surface->GetSurfaceParams()}; 981 const auto& dst_params{dst_surface->GetSurfaceParams()};
982 VAddr address = init_params.addr; 982 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
983 const std::size_t layer_size = dst_params.LayerMemorySize(); 983 Tegra::GPUVAddr address{init_params.gpu_addr};
984 const std::size_t layer_size{dst_params.LayerMemorySize()};
984 for (u32 layer = 0; layer < dst_params.depth; layer++) { 985 for (u32 layer = 0; layer < dst_params.depth; layer++) {
985 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { 986 for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) {
986 const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); 987 const Tegra::GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)};
987 const Surface& copy = TryGet(Memory::GetPointer(sub_address)); 988 const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))};
988 if (!copy) 989 if (!copy) {
989 continue; 990 continue;
991 }
990 const auto& src_params{copy->GetSurfaceParams()}; 992 const auto& src_params{copy->GetSurfaceParams()};
991 const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; 993 const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))};
992 const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; 994 const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))};
@@ -1242,9 +1244,10 @@ static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfacePar
1242 return {}; 1244 return {};
1243} 1245}
1244 1246
1245static std::optional<u32> TryFindBestLayer(VAddr addr, const SurfaceParams params, u32 mipmap) { 1247static std::optional<u32> TryFindBestLayer(Tegra::GPUVAddr addr, const SurfaceParams params,
1246 const std::size_t size = params.LayerMemorySize(); 1248 u32 mipmap) {
1247 VAddr start = params.addr + params.GetMipmapLevelOffset(mipmap); 1249 const std::size_t size{params.LayerMemorySize()};
1250 Tegra::GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)};
1248 for (u32 i = 0; i < params.depth; i++) { 1251 for (u32 i = 0; i < params.depth; i++) {
1249 if (start == addr) { 1252 if (start == addr) {
1250 return {i}; 1253 return {i};
@@ -1266,7 +1269,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa
1266 src_params.height == dst_params.MipHeight(*level) && 1269 src_params.height == dst_params.MipHeight(*level) &&
1267 src_params.block_height >= dst_params.MipBlockHeight(*level)) { 1270 src_params.block_height >= dst_params.MipBlockHeight(*level)) {
1268 const std::optional<u32> slot = 1271 const std::optional<u32> slot =
1269 TryFindBestLayer(render_surface->GetCpuAddr(), dst_params, *level); 1272 TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level);
1270 if (slot.has_value()) { 1273 if (slot.has_value()) {
1271 glCopyImageSubData(render_surface->Texture().handle, 1274 glCopyImageSubData(render_surface->Texture().handle,
1272 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, 1275 SurfaceTargetToGL(src_params.target), 0, 0, 0, 0,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index b3afad139..9366f47f2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -296,7 +296,6 @@ struct SurfaceParams {
296 bool is_array; 296 bool is_array;
297 bool srgb_conversion; 297 bool srgb_conversion;
298 // Parameters used for caching 298 // Parameters used for caching
299 VAddr addr;
300 u8* host_ptr; 299 u8* host_ptr;
301 Tegra::GPUVAddr gpu_addr; 300 Tegra::GPUVAddr gpu_addr;
302 std::size_t size_in_bytes; 301 std::size_t size_in_bytes;
@@ -349,7 +348,7 @@ public:
349 explicit CachedSurface(const SurfaceParams& params); 348 explicit CachedSurface(const SurfaceParams& params);
350 349
351 VAddr GetCpuAddr() const override { 350 VAddr GetCpuAddr() const override {
352 return params.addr; 351 return cpu_addr;
353 } 352 }
354 353
355 std::size_t GetSizeInBytes() const override { 354 std::size_t GetSizeInBytes() const override {
@@ -433,6 +432,7 @@ private:
433 std::size_t memory_size; 432 std::size_t memory_size;
434 bool reinterpreted = false; 433 bool reinterpreted = false;
435 bool must_reload = false; 434 bool must_reload = false;
435 VAddr cpu_addr{};
436}; 436};
437 437
438class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { 438class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 60a04e146..1ed740877 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -32,13 +32,10 @@ struct UnspecializedShader {
32namespace { 32namespace {
33 33
34/// Gets the address for the specified shader stage program 34/// Gets the address for the specified shader stage program
35VAddr GetShaderAddress(Maxwell::ShaderProgram program) { 35Tegra::GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
36 const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); 36 const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
37 const auto& shader_config = gpu.regs.shader_config[static_cast<std::size_t>(program)]; 37 const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
38 const auto address = gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() + 38 return gpu.regs.code_address.CodeAddress() + shader_config.offset;
39 shader_config.offset);
40 ASSERT_MSG(address, "Invalid GPU address");
41 return *address;
42} 39}
43 40
44/// Gets the shader program code from memory for the specified address 41/// Gets the shader program code from memory for the specified address
@@ -214,11 +211,11 @@ std::set<GLenum> GetSupportedFormats() {
214 211
215} // namespace 212} // namespace
216 213
217CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, 214CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
218 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, 215 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
219 const PrecompiledPrograms& precompiled_programs, 216 const PrecompiledPrograms& precompiled_programs,
220 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) 217 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr)
221 : host_ptr{host_ptr}, guest_addr{guest_addr}, unique_identifier{unique_identifier}, 218 : host_ptr{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier},
222 program_type{program_type}, disk_cache{disk_cache}, 219 program_type{program_type}, disk_cache{disk_cache},
223 precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} { 220 precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} {
224 221
@@ -244,11 +241,11 @@ CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier,
244 disk_cache.SaveRaw(raw); 241 disk_cache.SaveRaw(raw);
245} 242}
246 243
247CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, 244CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier,
248 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, 245 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
249 const PrecompiledPrograms& precompiled_programs, 246 const PrecompiledPrograms& precompiled_programs,
250 GLShader::ProgramResult result, u8* host_ptr) 247 GLShader::ProgramResult result, u8* host_ptr)
251 : guest_addr{guest_addr}, unique_identifier{unique_identifier}, program_type{program_type}, 248 : cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type},
252 disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{ 249 disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{
253 host_ptr} { 250 host_ptr} {
254 251
@@ -273,7 +270,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive
273 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); 270 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
274 } 271 }
275 272
276 LabelGLObject(GL_PROGRAM, program->handle, guest_addr); 273 LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
277 } 274 }
278 275
279 handle = program->handle; 276 handle = program->handle;
@@ -325,7 +322,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind
325 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); 322 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings));
326 } 323 }
327 324
328 LabelGLObject(GL_PROGRAM, target_program->handle, guest_addr, debug_name); 325 LabelGLObject(GL_PROGRAM, target_program->handle, cpu_addr, debug_name);
329 326
330 return target_program->handle; 327 return target_program->handle;
331}; 328};
@@ -488,31 +485,31 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
488 return last_shaders[static_cast<u32>(program)]; 485 return last_shaders[static_cast<u32>(program)];
489 } 486 }
490 487
491 const VAddr program_addr{GetShaderAddress(program)}; 488 auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
489 const Tegra::GPUVAddr program_addr{GetShaderAddress(program)};
492 490
493 // Look up shader in the cache based on address 491 // Look up shader in the cache based on address
494 const auto& host_ptr{Memory::GetPointer(program_addr)}; 492 const auto& host_ptr{memory_manager.GetPointer(program_addr)};
495 Shader shader{TryGet(host_ptr)}; 493 Shader shader{TryGet(host_ptr)};
496 494
497 if (!shader) { 495 if (!shader) {
498 // No shader found - create a new one 496 // No shader found - create a new one
499 const auto& host_ptr{Memory::GetPointer(program_addr)};
500 ProgramCode program_code{GetShaderCode(host_ptr)}; 497 ProgramCode program_code{GetShaderCode(host_ptr)};
501 ProgramCode program_code_b; 498 ProgramCode program_code_b;
502 if (program == Maxwell::ShaderProgram::VertexA) { 499 if (program == Maxwell::ShaderProgram::VertexA) {
503 program_code_b = GetShaderCode( 500 program_code_b = GetShaderCode(
504 Memory::GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); 501 memory_manager.GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
505 } 502 }
506 const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); 503 const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
507 504 const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
508 const auto found = precompiled_shaders.find(unique_identifier); 505 const auto found = precompiled_shaders.find(unique_identifier);
509 if (found != precompiled_shaders.end()) { 506 if (found != precompiled_shaders.end()) {
510 shader = 507 shader =
511 std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, 508 std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
512 precompiled_programs, found->second, host_ptr); 509 precompiled_programs, found->second, host_ptr);
513 } else { 510 } else {
514 shader = std::make_shared<CachedShader>( 511 shader = std::make_shared<CachedShader>(
515 program_addr, unique_identifier, program, disk_cache, precompiled_programs, 512 cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
516 std::move(program_code), std::move(program_code_b), host_ptr); 513 std::move(program_code), std::move(program_code_b), host_ptr);
517 } 514 }
518 Register(shader); 515 Register(shader);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 81fe716b4..fd1c85115 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
39 39
40class CachedShader final : public RasterizerCacheObject { 40class CachedShader final : public RasterizerCacheObject {
41public: 41public:
42 explicit CachedShader(VAddr guest_addr, u64 unique_identifier, 42 explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
43 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, 43 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
44 const PrecompiledPrograms& precompiled_programs, 44 const PrecompiledPrograms& precompiled_programs,
45 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); 45 ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr);
46 46
47 explicit CachedShader(VAddr guest_addr, u64 unique_identifier, 47 explicit CachedShader(VAddr cpu_addr, u64 unique_identifier,
48 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, 48 Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache,
49 const PrecompiledPrograms& precompiled_programs, 49 const PrecompiledPrograms& precompiled_programs,
50 GLShader::ProgramResult result, u8* host_ptr); 50 GLShader::ProgramResult result, u8* host_ptr);
51 51
52 VAddr GetCpuAddr() const override { 52 VAddr GetCpuAddr() const override {
53 return guest_addr; 53 return cpu_addr;
54 } 54 }
55 55
56 std::size_t GetSizeInBytes() const override { 56 std::size_t GetSizeInBytes() const override {
@@ -92,7 +92,7 @@ private:
92 ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; 92 ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const;
93 93
94 u8* host_ptr{}; 94 u8* host_ptr{};
95 VAddr guest_addr{}; 95 VAddr cpu_addr{};
96 u64 unique_identifier{}; 96 u64 unique_identifier{};
97 Maxwell::ShaderProgram program_type{}; 97 Maxwell::ShaderProgram program_type{};
98 ShaderDiskCacheOpenGL& disk_cache; 98 ShaderDiskCacheOpenGL& disk_cache;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index b97576309..5e3d862c6 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -164,8 +164,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
164 // Reset the screen info's display texture to its own permanent texture 164 // Reset the screen info's display texture to its own permanent texture
165 screen_info.display_texture = screen_info.texture.resource.handle; 165 screen_info.display_texture = screen_info.texture.resource.handle;
166 166
167 Memory::RasterizerFlushVirtualRegion(framebuffer_addr, size_in_bytes, 167 rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
168 Memory::FlushMode::Flush);
169 168
170 constexpr u32 linear_bpp = 4; 169 constexpr u32 linear_bpp = 4;
171 VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear, 170 VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index cad7340f5..995d0e068 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -6,7 +6,6 @@
6#include <cstring> 6#include <cstring>
7#include "common/alignment.h" 7#include "common/alignment.h"
8#include "common/assert.h" 8#include "common/assert.h"
9#include "core/memory.h"
10#include "video_core/gpu.h" 9#include "video_core/gpu.h"
11#include "video_core/textures/decoders.h" 10#include "video_core/textures/decoders.h"
12#include "video_core/textures/texture.h" 11#include "video_core/textures/texture.h"
@@ -230,18 +229,18 @@ u32 BytesPerPixel(TextureFormat format) {
230 } 229 }
231} 230}
232 231
233void UnswizzleTexture(u8* const unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, 232void UnswizzleTexture(u8* const unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
234 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height, 233 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, u32 block_height,
235 u32 block_depth, u32 width_spacing) { 234 u32 block_depth, u32 width_spacing) {
236 CopySwizzledData((width + tile_size_x - 1) / tile_size_x, 235 CopySwizzledData((width + tile_size_x - 1) / tile_size_x,
237 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel, 236 (height + tile_size_y - 1) / tile_size_y, depth, bytes_per_pixel,
238 bytes_per_pixel, Memory::GetPointer(address), unswizzled_data, true, 237 bytes_per_pixel, address, unswizzled_data, true, block_height, block_depth,
239 block_height, block_depth, width_spacing); 238 width_spacing);
240} 239}
241 240
242std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, 241std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
243 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 242 u32 width, u32 height, u32 depth, u32 block_height,
244 u32 block_height, u32 block_depth, u32 width_spacing) { 243 u32 block_depth, u32 width_spacing) {
245 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel); 244 std::vector<u8> unswizzled_data(width * height * depth * bytes_per_pixel);
246 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel, 245 UnswizzleTexture(unswizzled_data.data(), address, tile_size_x, tile_size_y, bytes_per_pixel,
247 width, height, depth, block_height, block_depth, width_spacing); 246 width, height, depth, block_height, block_depth, width_spacing);
@@ -249,8 +248,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y
249} 248}
250 249
251void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 250void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
252 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 251 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height) {
253 u32 block_height) {
254 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) / 252 const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
255 gob_size_x}; 253 gob_size_x};
256 for (u32 line = 0; line < subrect_height; ++line) { 254 for (u32 line = 0; line < subrect_height; ++line) {
@@ -262,17 +260,17 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32
262 const u32 gob_address = 260 const u32 gob_address =
263 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height; 261 gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
264 const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x]; 262 const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
265 const VAddr source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel; 263 u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
266 const VAddr dest_addr = swizzled_data + swizzled_offset; 264 u8* dest_addr = swizzled_data + swizzled_offset;
267 265
268 Memory::CopyBlock(dest_addr, source_line, bytes_per_pixel); 266 std::memcpy(dest_addr, source_line, bytes_per_pixel);
269 } 267 }
270 } 268 }
271} 269}
272 270
273void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 271void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
274 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 272 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
275 u32 block_height, u32 offset_x, u32 offset_y) { 273 u32 offset_x, u32 offset_y) {
276 for (u32 line = 0; line < subrect_height; ++line) { 274 for (u32 line = 0; line < subrect_height; ++line) {
277 const u32 y2 = line + offset_y; 275 const u32 y2 = line + offset_y;
278 const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height + 276 const u32 gob_address_y = (y2 / (gob_size_y * block_height)) * gob_size * block_height +
@@ -282,10 +280,10 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32
282 const u32 x2 = (x + offset_x) * bytes_per_pixel; 280 const u32 x2 = (x + offset_x) * bytes_per_pixel;
283 const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height; 281 const u32 gob_address = gob_address_y + (x2 / gob_size_x) * gob_size * block_height;
284 const u32 swizzled_offset = gob_address + table[x2 % gob_size_x]; 282 const u32 swizzled_offset = gob_address + table[x2 % gob_size_x];
285 const VAddr dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel; 283 u8* dest_line = unswizzled_data + line * dest_pitch + x * bytes_per_pixel;
286 const VAddr source_addr = swizzled_data + swizzled_offset; 284 u8* source_addr = swizzled_data + swizzled_offset;
287 285
288 Memory::CopyBlock(dest_line, source_addr, bytes_per_pixel); 286 std::memcpy(dest_line, source_addr, bytes_per_pixel);
289 } 287 }
290 } 288 }
291} 289}
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 65df86890..e078fa274 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -17,14 +17,14 @@ inline std::size_t GetGOBSize() {
17} 17}
18 18
19/// Unswizzles a swizzled texture without changing its format. 19/// Unswizzles a swizzled texture without changing its format.
20void UnswizzleTexture(u8* unswizzled_data, VAddr address, u32 tile_size_x, u32 tile_size_y, 20void UnswizzleTexture(u8* unswizzled_data, u8* address, u32 tile_size_x, u32 tile_size_y,
21 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 21 u32 bytes_per_pixel, u32 width, u32 height, u32 depth,
22 u32 block_height = TICEntry::DefaultBlockHeight, 22 u32 block_height = TICEntry::DefaultBlockHeight,
23 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0); 23 u32 block_depth = TICEntry::DefaultBlockHeight, u32 width_spacing = 0);
24 24
25/// Unswizzles a swizzled texture without changing its format. 25/// Unswizzles a swizzled texture without changing its format.
26std::vector<u8> UnswizzleTexture(VAddr address, u32 tile_size_x, u32 tile_size_y, 26std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y, u32 bytes_per_pixel,
27 u32 bytes_per_pixel, u32 width, u32 height, u32 depth, 27 u32 width, u32 height, u32 depth,
28 u32 block_height = TICEntry::DefaultBlockHeight, 28 u32 block_height = TICEntry::DefaultBlockHeight,
29 u32 block_depth = TICEntry::DefaultBlockHeight, 29 u32 block_depth = TICEntry::DefaultBlockHeight,
30 u32 width_spacing = 0); 30 u32 width_spacing = 0);
@@ -44,12 +44,11 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
44 44
45/// Copies an untiled subrectangle into a tiled surface. 45/// Copies an untiled subrectangle into a tiled surface.
46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width, 46void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
47 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 47 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
48 u32 block_height);
49 48
50/// Copies a tiled subrectangle into a linear surface. 49/// Copies a tiled subrectangle into a linear surface.
51void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, 50void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
52 u32 bytes_per_pixel, VAddr swizzled_data, VAddr unswizzled_data, 51 u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
53 u32 block_height, u32 offset_x, u32 offset_y); 52 u32 offset_x, u32 offset_y);
54 53
55} // namespace Tegra::Texture 54} // namespace Tegra::Texture
diff --git a/src/yuzu/debugger/graphics/graphics_surface.cpp b/src/yuzu/debugger/graphics/graphics_surface.cpp
index 71683da8e..29f01dfb2 100644
--- a/src/yuzu/debugger/graphics/graphics_surface.cpp
+++ b/src/yuzu/debugger/graphics/graphics_surface.cpp
@@ -383,13 +383,12 @@ void GraphicsSurfaceWidget::OnUpdate() {
383 // TODO: Implement a good way to visualize alpha components! 383 // TODO: Implement a good way to visualize alpha components!
384 384
385 QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32); 385 QImage decoded_image(surface_width, surface_height, QImage::Format_ARGB32);
386 std::optional<VAddr> address = gpu.MemoryManager().GpuToCpuAddress(surface_address);
387 386
388 // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles. 387 // TODO(bunnei): Will not work with BCn formats that swizzle 4x4 tiles.
389 // Needs to be fixed if we plan to use this feature more, otherwise we may remove it. 388 // Needs to be fixed if we plan to use this feature more, otherwise we may remove it.
390 auto unswizzled_data = Tegra::Texture::UnswizzleTexture( 389 auto unswizzled_data = Tegra::Texture::UnswizzleTexture(
391 *address, 1, 1, Tegra::Texture::BytesPerPixel(surface_format), surface_width, 390 gpu.MemoryManager().GetPointer(surface_address), 1, 1,
392 surface_height, 1U); 391 Tegra::Texture::BytesPerPixel(surface_format), surface_width, surface_height, 1U);
393 392
394 auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format, 393 auto texture_data = Tegra::Texture::DecodeTexture(unswizzled_data, surface_format,
395 surface_width, surface_height); 394 surface_width, surface_height);