summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/device/device_session.cpp6
-rw-r--r--src/audio_core/renderer/command/data_source/decode.cpp21
-rw-r--r--src/audio_core/renderer/command/effect/aux_.cpp82
-rw-r--r--src/common/page_table.cpp1
-rw-r--r--src/common/page_table.h1
-rw-r--r--src/core/core_timing.cpp3
-rw-r--r--src/core/core_timing.h2
-rw-r--r--src/core/hle/service/hle_ipc.cpp32
-rw-r--r--src/core/memory.cpp54
-rw-r--r--src/core/memory.h212
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h7
-rw-r--r--src/video_core/dma_pusher.cpp26
-rw-r--r--src/video_core/engines/engine_upload.cpp28
-rw-r--r--src/video_core/engines/kepler_compute.cpp1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp4
-rw-r--r--src/video_core/engines/maxwell_dma.cpp87
-rw-r--r--src/video_core/engines/sw_blitter/blitter.cpp29
-rw-r--r--src/video_core/memory_manager.cpp30
-rw-r--r--src/video_core/memory_manager.h18
-rw-r--r--src/video_core/texture_cache/texture_cache.h24
-rw-r--r--src/video_core/texture_cache/util.cpp26
-rw-r--r--src/video_core/texture_cache/util.h3
22 files changed, 463 insertions, 234 deletions
diff --git a/src/audio_core/device/device_session.cpp b/src/audio_core/device/device_session.cpp
index 86811fcb8..c41d9d1ea 100644
--- a/src/audio_core/device/device_session.cpp
+++ b/src/audio_core/device/device_session.cpp
@@ -92,9 +92,9 @@ void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) {
92 if (type == Sink::StreamType::In) { 92 if (type == Sink::StreamType::In) {
93 stream->AppendBuffer(new_buffer, tmp_samples); 93 stream->AppendBuffer(new_buffer, tmp_samples);
94 } else { 94 } else {
95 system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, tmp_samples.data(), 95 Core::Memory::CpuGuestMemory<s16, Core::Memory::GuestMemoryFlags::UnsafeRead> samples(
96 buffer.size); 96 system.ApplicationMemory(), buffer.samples, buffer.size / sizeof(s16));
97 stream->AppendBuffer(new_buffer, tmp_samples); 97 stream->AppendBuffer(new_buffer, samples);
98 } 98 }
99 } 99 }
100} 100}
diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp
index f45933203..257aa866e 100644
--- a/src/audio_core/renderer/command/data_source/decode.cpp
+++ b/src/audio_core/renderer/command/data_source/decode.cpp
@@ -28,7 +28,6 @@ constexpr std::array<u8, 3> PitchBySrcQuality = {4, 8, 4};
28template <typename T> 28template <typename T>
29static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, 29static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
30 const DecodeArg& req) { 30 const DecodeArg& req) {
31 std::array<T, TempBufferSize> tmp_samples{};
32 constexpr s32 min{std::numeric_limits<s16>::min()}; 31 constexpr s32 min{std::numeric_limits<s16>::min()};
33 constexpr s32 max{std::numeric_limits<s16>::max()}; 32 constexpr s32 max{std::numeric_limits<s16>::max()};
34 33
@@ -49,19 +48,18 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
49 const VAddr source{req.buffer + 48 const VAddr source{req.buffer +
50 (((req.start_offset + req.offset) * channel_count) * sizeof(T))}; 49 (((req.start_offset + req.offset) * channel_count) * sizeof(T))};
51 const u64 size{channel_count * samples_to_decode}; 50 const u64 size{channel_count * samples_to_decode};
52 const u64 size_bytes{size * sizeof(T)};
53
54 memory.ReadBlockUnsafe(source, tmp_samples.data(), size_bytes);
55 51
52 Core::Memory::CpuGuestMemory<T, Core::Memory::GuestMemoryFlags::UnsafeRead> samples(
53 memory, source, size);
56 if constexpr (std::is_floating_point_v<T>) { 54 if constexpr (std::is_floating_point_v<T>) {
57 for (u32 i = 0; i < samples_to_decode; i++) { 55 for (u32 i = 0; i < samples_to_decode; i++) {
58 auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] * 56 auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] *
59 std::numeric_limits<s16>::max())}; 57 std::numeric_limits<s16>::max())};
60 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); 58 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max));
61 } 59 }
62 } else { 60 } else {
63 for (u32 i = 0; i < samples_to_decode; i++) { 61 for (u32 i = 0; i < samples_to_decode; i++) {
64 out_buffer[i] = tmp_samples[i * channel_count + req.target_channel]; 62 out_buffer[i] = samples[i * channel_count + req.target_channel];
65 } 63 }
66 } 64 }
67 } break; 65 } break;
@@ -74,16 +72,17 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
74 } 72 }
75 73
76 const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))}; 74 const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))};
77 memory.ReadBlockUnsafe(source, tmp_samples.data(), samples_to_decode * sizeof(T)); 75 Core::Memory::CpuGuestMemory<T, Core::Memory::GuestMemoryFlags::UnsafeRead> samples(
76 memory, source, samples_to_decode);
78 77
79 if constexpr (std::is_floating_point_v<T>) { 78 if constexpr (std::is_floating_point_v<T>) {
80 for (u32 i = 0; i < samples_to_decode; i++) { 79 for (u32 i = 0; i < samples_to_decode; i++) {
81 auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] * 80 auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] *
82 std::numeric_limits<s16>::max())}; 81 std::numeric_limits<s16>::max())};
83 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); 82 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max));
84 } 83 }
85 } else { 84 } else {
86 std::memcpy(out_buffer.data(), tmp_samples.data(), samples_to_decode * sizeof(s16)); 85 std::memcpy(out_buffer.data(), samples.data(), samples_to_decode * sizeof(s16));
87 } 86 }
88 break; 87 break;
89 } 88 }
@@ -101,7 +100,6 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
101 */ 100 */
102static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, 101static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
103 const DecodeArg& req) { 102 const DecodeArg& req) {
104 std::array<u8, TempBufferSize> wavebuffer{};
105 constexpr u32 SamplesPerFrame{14}; 103 constexpr u32 SamplesPerFrame{14};
106 constexpr u32 NibblesPerFrame{16}; 104 constexpr u32 NibblesPerFrame{16};
107 105
@@ -139,7 +137,8 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
139 } 137 }
140 138
141 const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; 139 const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)};
142 memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), size); 140 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> wavebuffer(
141 memory, req.buffer + position_in_frame / 2, size);
143 142
144 auto context{req.adpcm_context}; 143 auto context{req.adpcm_context};
145 auto header{context->header}; 144 auto header{context->header};
diff --git a/src/audio_core/renderer/command/effect/aux_.cpp b/src/audio_core/renderer/command/effect/aux_.cpp
index c5650effa..a3e12b3e7 100644
--- a/src/audio_core/renderer/command/effect/aux_.cpp
+++ b/src/audio_core/renderer/command/effect/aux_.cpp
@@ -21,23 +21,13 @@ static void ResetAuxBufferDsp(Core::Memory::Memory& memory, const CpuAddr aux_in
21 } 21 }
22 22
23 AuxInfo::AuxInfoDsp info{}; 23 AuxInfo::AuxInfoDsp info{};
24 auto info_ptr{&info}; 24 memory.ReadBlockUnsafe(aux_info, &info, sizeof(AuxInfo::AuxInfoDsp));
25 bool host_safe{(aux_info & Core::Memory::YUZU_PAGEMASK) <=
26 (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp))};
27 25
28 if (host_safe) [[likely]] { 26 info.read_offset = 0;
29 info_ptr = memory.GetPointer<AuxInfo::AuxInfoDsp>(aux_info); 27 info.write_offset = 0;
30 } else { 28 info.total_sample_count = 0;
31 memory.ReadBlockUnsafe(aux_info, info_ptr, sizeof(AuxInfo::AuxInfoDsp));
32 }
33 29
34 info_ptr->read_offset = 0; 30 memory.WriteBlockUnsafe(aux_info, &info, sizeof(AuxInfo::AuxInfoDsp));
35 info_ptr->write_offset = 0;
36 info_ptr->total_sample_count = 0;
37
38 if (!host_safe) [[unlikely]] {
39 memory.WriteBlockUnsafe(aux_info, info_ptr, sizeof(AuxInfo::AuxInfoDsp));
40 }
41} 31}
42 32
43/** 33/**
@@ -86,17 +76,9 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_,
86 } 76 }
87 77
88 AuxInfo::AuxInfoDsp send_info{}; 78 AuxInfo::AuxInfoDsp send_info{};
89 auto send_ptr = &send_info; 79 memory.ReadBlockUnsafe(send_info_, &send_info, sizeof(AuxInfo::AuxInfoDsp));
90 bool host_safe = (send_info_ & Core::Memory::YUZU_PAGEMASK) <=
91 (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp));
92
93 if (host_safe) [[likely]] {
94 send_ptr = memory.GetPointer<AuxInfo::AuxInfoDsp>(send_info_);
95 } else {
96 memory.ReadBlockUnsafe(send_info_, send_ptr, sizeof(AuxInfo::AuxInfoDsp));
97 }
98 80
99 u32 target_write_offset{send_ptr->write_offset + write_offset}; 81 u32 target_write_offset{send_info.write_offset + write_offset};
100 if (target_write_offset > count_max) { 82 if (target_write_offset > count_max) {
101 return 0; 83 return 0;
102 } 84 }
@@ -105,15 +87,9 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_,
105 u32 read_pos{0}; 87 u32 read_pos{0};
106 while (write_count > 0) { 88 while (write_count > 0) {
107 u32 to_write{std::min(count_max - target_write_offset, write_count)}; 89 u32 to_write{std::min(count_max - target_write_offset, write_count)};
108 const auto write_addr = send_buffer + target_write_offset * sizeof(s32); 90 if (to_write > 0) {
109 bool write_safe{(write_addr & Core::Memory::YUZU_PAGEMASK) <= 91 const auto write_addr = send_buffer + target_write_offset * sizeof(s32);
110 (Core::Memory::YUZU_PAGESIZE - (write_addr + to_write * sizeof(s32)))}; 92 memory.WriteBlockUnsafe(write_addr, &input[read_pos], to_write * sizeof(s32));
111 if (write_safe) [[likely]] {
112 auto ptr = memory.GetPointer(write_addr);
113 std::memcpy(ptr, &input[read_pos], to_write * sizeof(s32));
114 } else {
115 memory.WriteBlockUnsafe(send_buffer + target_write_offset * sizeof(s32),
116 &input[read_pos], to_write * sizeof(s32));
117 } 93 }
118 target_write_offset = (target_write_offset + to_write) % count_max; 94 target_write_offset = (target_write_offset + to_write) % count_max;
119 write_count -= to_write; 95 write_count -= to_write;
@@ -121,13 +97,10 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_,
121 } 97 }
122 98
123 if (update_count) { 99 if (update_count) {
124 send_ptr->write_offset = (send_ptr->write_offset + update_count) % count_max; 100 send_info.write_offset = (send_info.write_offset + update_count) % count_max;
125 }
126
127 if (!host_safe) [[unlikely]] {
128 memory.WriteBlockUnsafe(send_info_, send_ptr, sizeof(AuxInfo::AuxInfoDsp));
129 } 101 }
130 102
103 memory.WriteBlockUnsafe(send_info_, &send_info, sizeof(AuxInfo::AuxInfoDsp));
131 return write_count_; 104 return write_count_;
132} 105}
133 106
@@ -174,17 +147,9 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_,
174 } 147 }
175 148
176 AuxInfo::AuxInfoDsp return_info{}; 149 AuxInfo::AuxInfoDsp return_info{};
177 auto return_ptr = &return_info; 150 memory.ReadBlockUnsafe(return_info_, &return_info, sizeof(AuxInfo::AuxInfoDsp));
178 bool host_safe = (return_info_ & Core::Memory::YUZU_PAGEMASK) <=
179 (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp));
180 151
181 if (host_safe) [[likely]] { 152 u32 target_read_offset{return_info.read_offset + read_offset};
182 return_ptr = memory.GetPointer<AuxInfo::AuxInfoDsp>(return_info_);
183 } else {
184 memory.ReadBlockUnsafe(return_info_, return_ptr, sizeof(AuxInfo::AuxInfoDsp));
185 }
186
187 u32 target_read_offset{return_ptr->read_offset + read_offset};
188 if (target_read_offset > count_max) { 153 if (target_read_offset > count_max) {
189 return 0; 154 return 0;
190 } 155 }
@@ -193,15 +158,9 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_,
193 u32 write_pos{0}; 158 u32 write_pos{0};
194 while (read_count > 0) { 159 while (read_count > 0) {
195 u32 to_read{std::min(count_max - target_read_offset, read_count)}; 160 u32 to_read{std::min(count_max - target_read_offset, read_count)};
196 const auto read_addr = return_buffer + target_read_offset * sizeof(s32); 161 if (to_read > 0) {
197 bool read_safe{(read_addr & Core::Memory::YUZU_PAGEMASK) <= 162 const auto read_addr = return_buffer + target_read_offset * sizeof(s32);
198 (Core::Memory::YUZU_PAGESIZE - (read_addr + to_read * sizeof(s32)))}; 163 memory.ReadBlockUnsafe(read_addr, &output[write_pos], to_read * sizeof(s32));
199 if (read_safe) [[likely]] {
200 auto ptr = memory.GetPointer(read_addr);
201 std::memcpy(&output[write_pos], ptr, to_read * sizeof(s32));
202 } else {
203 memory.ReadBlockUnsafe(return_buffer + target_read_offset * sizeof(s32),
204 &output[write_pos], to_read * sizeof(s32));
205 } 164 }
206 target_read_offset = (target_read_offset + to_read) % count_max; 165 target_read_offset = (target_read_offset + to_read) % count_max;
207 read_count -= to_read; 166 read_count -= to_read;
@@ -209,13 +168,10 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_,
209 } 168 }
210 169
211 if (update_count) { 170 if (update_count) {
212 return_ptr->read_offset = (return_ptr->read_offset + update_count) % count_max; 171 return_info.read_offset = (return_info.read_offset + update_count) % count_max;
213 }
214
215 if (!host_safe) [[unlikely]] {
216 memory.WriteBlockUnsafe(return_info_, return_ptr, sizeof(AuxInfo::AuxInfoDsp));
217 } 172 }
218 173
174 memory.WriteBlockUnsafe(return_info_, &return_info, sizeof(AuxInfo::AuxInfoDsp));
219 return read_count_; 175 return read_count_;
220} 176}
221 177
diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp
index b744b68ce..4b1690269 100644
--- a/src/common/page_table.cpp
+++ b/src/common/page_table.cpp
@@ -66,6 +66,7 @@ void PageTable::Resize(std::size_t address_space_width_in_bits, std::size_t page
66 << (address_space_width_in_bits - page_size_in_bits)}; 66 << (address_space_width_in_bits - page_size_in_bits)};
67 pointers.resize(num_page_table_entries); 67 pointers.resize(num_page_table_entries);
68 backing_addr.resize(num_page_table_entries); 68 backing_addr.resize(num_page_table_entries);
69 blocks.resize(num_page_table_entries);
69 current_address_space_width_in_bits = address_space_width_in_bits; 70 current_address_space_width_in_bits = address_space_width_in_bits;
70 page_size = 1ULL << page_size_in_bits; 71 page_size = 1ULL << page_size_in_bits;
71} 72}
diff --git a/src/common/page_table.h b/src/common/page_table.h
index 1ad3a9f8b..fec8378f3 100644
--- a/src/common/page_table.h
+++ b/src/common/page_table.h
@@ -122,6 +122,7 @@ struct PageTable {
122 * corresponding attribute element is of type `Memory`. 122 * corresponding attribute element is of type `Memory`.
123 */ 123 */
124 VirtualBuffer<PageInfo> pointers; 124 VirtualBuffer<PageInfo> pointers;
125 VirtualBuffer<u64> blocks;
125 126
126 VirtualBuffer<u64> backing_addr; 127 VirtualBuffer<u64> backing_addr;
127 128
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index e6112a3c9..b98a0cb33 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -70,7 +70,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
70 -> std::optional<std::chrono::nanoseconds> { return std::nullopt; }; 70 -> std::optional<std::chrono::nanoseconds> { return std::nullopt; };
71 ev_lost = CreateEvent("_lost_event", empty_timed_callback); 71 ev_lost = CreateEvent("_lost_event", empty_timed_callback);
72 if (is_multicore) { 72 if (is_multicore) {
73 timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this)); 73 timer_thread = std::make_unique<std::jthread>(ThreadEntry, std::ref(*this));
74 } 74 }
75} 75}
76 76
@@ -255,7 +255,6 @@ void CoreTiming::ThreadLoop() {
255#ifdef _WIN32 255#ifdef _WIN32
256 while (!paused && !event.IsSet() && wait_time > 0) { 256 while (!paused && !event.IsSet() && wait_time > 0) {
257 wait_time = *next_time - GetGlobalTimeNs().count(); 257 wait_time = *next_time - GetGlobalTimeNs().count();
258
259 if (wait_time >= timer_resolution_ns) { 258 if (wait_time >= timer_resolution_ns) {
260 Common::Windows::SleepForOneTick(); 259 Common::Windows::SleepForOneTick();
261 } else { 260 } else {
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 5bca1c78d..c20e906fb 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -163,7 +163,7 @@ private:
163 Common::Event pause_event{}; 163 Common::Event pause_event{};
164 std::mutex basic_lock; 164 std::mutex basic_lock;
165 std::mutex advance_lock; 165 std::mutex advance_lock;
166 std::unique_ptr<std::thread> timer_thread; 166 std::unique_ptr<std::jthread> timer_thread;
167 std::atomic<bool> paused{}; 167 std::atomic<bool> paused{};
168 std::atomic<bool> paused_set{}; 168 std::atomic<bool> paused_set{};
169 std::atomic<bool> wait_set{}; 169 std::atomic<bool> wait_set{};
diff --git a/src/core/hle/service/hle_ipc.cpp b/src/core/hle/service/hle_ipc.cpp
index 2290df705..f6a1e54f2 100644
--- a/src/core/hle/service/hle_ipc.cpp
+++ b/src/core/hle/service/hle_ipc.cpp
@@ -329,8 +329,22 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons
329} 329}
330 330
331std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { 331std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const {
332 static thread_local std::array<Common::ScratchBuffer<u8>, 2> read_buffer_a; 332 static thread_local std::array read_buffer_a{
333 static thread_local std::array<Common::ScratchBuffer<u8>, 2> read_buffer_x; 333 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
334 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
335 };
336 static thread_local std::array read_buffer_data_a{
337 Common::ScratchBuffer<u8>(),
338 Common::ScratchBuffer<u8>(),
339 };
340 static thread_local std::array read_buffer_x{
341 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
342 Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
343 };
344 static thread_local std::array read_buffer_data_x{
345 Common::ScratchBuffer<u8>(),
346 Common::ScratchBuffer<u8>(),
347 };
334 348
335 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && 349 const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
336 BufferDescriptorA()[buffer_index].Size()}; 350 BufferDescriptorA()[buffer_index].Size()};
@@ -339,19 +353,17 @@ std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) cons
339 BufferDescriptorA().size() > buffer_index, { return {}; }, 353 BufferDescriptorA().size() > buffer_index, { return {}; },
340 "BufferDescriptorA invalid buffer_index {}", buffer_index); 354 "BufferDescriptorA invalid buffer_index {}", buffer_index);
341 auto& read_buffer = read_buffer_a[buffer_index]; 355 auto& read_buffer = read_buffer_a[buffer_index];
342 read_buffer.resize_destructive(BufferDescriptorA()[buffer_index].Size()); 356 return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(),
343 memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), read_buffer.data(), 357 BufferDescriptorA()[buffer_index].Size(),
344 read_buffer.size()); 358 &read_buffer_data_a[buffer_index]);
345 return read_buffer;
346 } else { 359 } else {
347 ASSERT_OR_EXECUTE_MSG( 360 ASSERT_OR_EXECUTE_MSG(
348 BufferDescriptorX().size() > buffer_index, { return {}; }, 361 BufferDescriptorX().size() > buffer_index, { return {}; },
349 "BufferDescriptorX invalid buffer_index {}", buffer_index); 362 "BufferDescriptorX invalid buffer_index {}", buffer_index);
350 auto& read_buffer = read_buffer_x[buffer_index]; 363 auto& read_buffer = read_buffer_x[buffer_index];
351 read_buffer.resize_destructive(BufferDescriptorX()[buffer_index].Size()); 364 return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(),
352 memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), read_buffer.data(), 365 BufferDescriptorX()[buffer_index].Size(),
353 read_buffer.size()); 366 &read_buffer_data_x[buffer_index]);
354 return read_buffer;
355 } 367 }
356} 368}
357 369
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 257406f09..805963178 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -266,6 +266,22 @@ struct Memory::Impl {
266 ReadBlockImpl<true>(*system.ApplicationProcess(), src_addr, dest_buffer, size); 266 ReadBlockImpl<true>(*system.ApplicationProcess(), src_addr, dest_buffer, size);
267 } 267 }
268 268
269 const u8* GetSpan(const VAddr src_addr, const std::size_t size) const {
270 if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] ==
271 current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) {
272 return GetPointerSilent(src_addr);
273 }
274 return nullptr;
275 }
276
277 u8* GetSpan(const VAddr src_addr, const std::size_t size) {
278 if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] ==
279 current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) {
280 return GetPointerSilent(src_addr);
281 }
282 return nullptr;
283 }
284
269 template <bool UNSAFE> 285 template <bool UNSAFE>
270 void WriteBlockImpl(const Kernel::KProcess& process, const Common::ProcessAddress dest_addr, 286 void WriteBlockImpl(const Kernel::KProcess& process, const Common::ProcessAddress dest_addr,
271 const void* src_buffer, const std::size_t size) { 287 const void* src_buffer, const std::size_t size) {
@@ -559,7 +575,7 @@ struct Memory::Impl {
559 } 575 }
560 } 576 }
561 577
562 const Common::ProcessAddress end = base + size; 578 const auto end = base + size;
563 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", 579 ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
564 base + page_table.pointers.size()); 580 base + page_table.pointers.size());
565 581
@@ -570,14 +586,18 @@ struct Memory::Impl {
570 while (base != end) { 586 while (base != end) {
571 page_table.pointers[base].Store(nullptr, type); 587 page_table.pointers[base].Store(nullptr, type);
572 page_table.backing_addr[base] = 0; 588 page_table.backing_addr[base] = 0;
573 589 page_table.blocks[base] = 0;
574 base += 1; 590 base += 1;
575 } 591 }
576 } else { 592 } else {
593 auto orig_base = base;
577 while (base != end) { 594 while (base != end) {
578 page_table.pointers[base].Store( 595 auto host_ptr =
579 system.DeviceMemory().GetPointer<u8>(target) - (base << YUZU_PAGEBITS), type); 596 system.DeviceMemory().GetPointer<u8>(target) - (base << YUZU_PAGEBITS);
580 page_table.backing_addr[base] = GetInteger(target) - (base << YUZU_PAGEBITS); 597 auto backing = GetInteger(target) - (base << YUZU_PAGEBITS);
598 page_table.pointers[base].Store(host_ptr, type);
599 page_table.backing_addr[base] = backing;
600 page_table.blocks[base] = orig_base << YUZU_PAGEBITS;
581 601
582 ASSERT_MSG(page_table.pointers[base].Pointer(), 602 ASSERT_MSG(page_table.pointers[base].Pointer(),
583 "memory mapping base yield a nullptr within the table"); 603 "memory mapping base yield a nullptr within the table");
@@ -747,6 +767,14 @@ struct Memory::Impl {
747 VAddr last_address; 767 VAddr last_address;
748 }; 768 };
749 769
770 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
771 system.GPU().InvalidateRegion(GetInteger(dest_addr), size);
772 }
773
774 void FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
775 system.GPU().FlushRegion(GetInteger(dest_addr), size);
776 }
777
750 Core::System& system; 778 Core::System& system;
751 Common::PageTable* current_page_table = nullptr; 779 Common::PageTable* current_page_table = nullptr;
752 std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> 780 std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
@@ -881,6 +909,14 @@ void Memory::ReadBlockUnsafe(const Common::ProcessAddress src_addr, void* dest_b
881 impl->ReadBlockUnsafe(src_addr, dest_buffer, size); 909 impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
882} 910}
883 911
912const u8* Memory::GetSpan(const VAddr src_addr, const std::size_t size) const {
913 return impl->GetSpan(src_addr, size);
914}
915
916u8* Memory::GetSpan(const VAddr src_addr, const std::size_t size) {
917 return impl->GetSpan(src_addr, size);
918}
919
884void Memory::WriteBlock(const Common::ProcessAddress dest_addr, const void* src_buffer, 920void Memory::WriteBlock(const Common::ProcessAddress dest_addr, const void* src_buffer,
885 const std::size_t size) { 921 const std::size_t size) {
886 impl->WriteBlock(dest_addr, src_buffer, size); 922 impl->WriteBlock(dest_addr, src_buffer, size);
@@ -924,4 +960,12 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug)
924 impl->MarkRegionDebug(GetInteger(vaddr), size, debug); 960 impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
925} 961}
926 962
963void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
964 impl->InvalidateRegion(dest_addr, size);
965}
966
967void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
968 impl->FlushRegion(dest_addr, size);
969}
970
927} // namespace Core::Memory 971} // namespace Core::Memory
diff --git a/src/core/memory.h b/src/core/memory.h
index ea01824f8..ea33c769c 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -5,8 +5,12 @@
5 5
6#include <cstddef> 6#include <cstddef>
7#include <memory> 7#include <memory>
8#include <optional>
8#include <span> 9#include <span>
9#include <string> 10#include <string>
11#include <vector>
12
13#include "common/scratch_buffer.h"
10#include "common/typed_address.h" 14#include "common/typed_address.h"
11#include "core/hle/result.h" 15#include "core/hle/result.h"
12 16
@@ -24,6 +28,10 @@ class PhysicalMemory;
24class KProcess; 28class KProcess;
25} // namespace Kernel 29} // namespace Kernel
26 30
31namespace Tegra {
32class MemoryManager;
33}
34
27namespace Core::Memory { 35namespace Core::Memory {
28 36
29/** 37/**
@@ -343,6 +351,9 @@ public:
343 */ 351 */
344 void ReadBlockUnsafe(Common::ProcessAddress src_addr, void* dest_buffer, std::size_t size); 352 void ReadBlockUnsafe(Common::ProcessAddress src_addr, void* dest_buffer, std::size_t size);
345 353
354 const u8* GetSpan(const VAddr src_addr, const std::size_t size) const;
355 u8* GetSpan(const VAddr src_addr, const std::size_t size);
356
346 /** 357 /**
347 * Writes a range of bytes into the current process' address space at the specified 358 * Writes a range of bytes into the current process' address space at the specified
348 * virtual address. 359 * virtual address.
@@ -461,6 +472,8 @@ public:
461 void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); 472 void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
462 473
463 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); 474 void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
475 void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size);
476 void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
464 477
465private: 478private:
466 Core::System& system; 479 Core::System& system;
@@ -469,4 +482,203 @@ private:
469 std::unique_ptr<Impl> impl; 482 std::unique_ptr<Impl> impl;
470}; 483};
471 484
485enum GuestMemoryFlags : u32 {
486 Read = 1 << 0,
487 Write = 1 << 1,
488 Safe = 1 << 2,
489 Cached = 1 << 3,
490
491 SafeRead = Read | Safe,
492 SafeWrite = Write | Safe,
493 SafeReadWrite = SafeRead | SafeWrite,
494 SafeReadCachedWrite = SafeReadWrite | Cached,
495
496 UnsafeRead = Read,
497 UnsafeWrite = Write,
498 UnsafeReadWrite = UnsafeRead | UnsafeWrite,
499 UnsafeReadCachedWrite = UnsafeReadWrite | Cached,
500};
501
502namespace {
503template <typename M, typename T, GuestMemoryFlags FLAGS>
504class GuestMemory {
505 using iterator = T*;
506 using const_iterator = const T*;
507 using value_type = T;
508 using element_type = T;
509 using iterator_category = std::contiguous_iterator_tag;
510
511public:
512 GuestMemory() = delete;
513 explicit GuestMemory(M& memory_, u64 addr_, std::size_t size_,
514 Common::ScratchBuffer<T>* backup = nullptr)
515 : memory{memory_}, addr{addr_}, size{size_} {
516 static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
517 if constexpr (FLAGS & GuestMemoryFlags::Read) {
518 Read(addr, size, backup);
519 }
520 }
521
522 ~GuestMemory() = default;
523
524 T* data() noexcept {
525 return data_span.data();
526 }
527
528 const T* data() const noexcept {
529 return data_span.data();
530 }
531
532 [[nodiscard]] T* begin() noexcept {
533 return data();
534 }
535
536 [[nodiscard]] const T* begin() const noexcept {
537 return data();
538 }
539
540 [[nodiscard]] T* end() noexcept {
541 return data() + size;
542 }
543
544 [[nodiscard]] const T* end() const noexcept {
545 return data() + size;
546 }
547
548 T& operator[](size_t index) noexcept {
549 return data_span[index];
550 }
551
552 const T& operator[](size_t index) const noexcept {
553 return data_span[index];
554 }
555
556 void SetAddressAndSize(u64 addr_, std::size_t size_) noexcept {
557 addr = addr_;
558 size = size_;
559 addr_changed = true;
560 }
561
562 std::span<T> Read(u64 addr_, std::size_t size_,
563 Common::ScratchBuffer<T>* backup = nullptr) noexcept {
564 addr = addr_;
565 size = size_;
566 if (size == 0) {
567 is_data_copy = true;
568 return {};
569 }
570
571 if (TrySetSpan()) {
572 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
573 memory.FlushRegion(addr, size * sizeof(T));
574 }
575 } else {
576 if (backup) {
577 backup->resize_destructive(size);
578 data_span = *backup;
579 } else {
580 data_copy.resize(size);
581 data_span = std::span(data_copy);
582 }
583 is_data_copy = true;
584 span_valid = true;
585 if constexpr (FLAGS & GuestMemoryFlags::Safe) {
586 memory.ReadBlock(addr, data_span.data(), size * sizeof(T));
587 } else {
588 memory.ReadBlockUnsafe(addr, data_span.data(), size * sizeof(T));
589 }
590 }
591 return data_span;
592 }
593
594 void Write(std::span<T> write_data) noexcept {
595 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
596 memory.WriteBlockCached(addr, write_data.data(), size * sizeof(T));
597 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
598 memory.WriteBlock(addr, write_data.data(), size * sizeof(T));
599 } else {
600 memory.WriteBlockUnsafe(addr, write_data.data(), size * sizeof(T));
601 }
602 }
603
604 bool TrySetSpan() noexcept {
605 if (u8* ptr = memory.GetSpan(addr, size * sizeof(T)); ptr) {
606 data_span = {reinterpret_cast<T*>(ptr), size};
607 span_valid = true;
608 return true;
609 }
610 return false;
611 }
612
613protected:
614 bool IsDataCopy() const noexcept {
615 return is_data_copy;
616 }
617
618 bool AddressChanged() const noexcept {
619 return addr_changed;
620 }
621
622 M& memory;
623 u64 addr;
624 size_t size;
625 std::span<T> data_span{};
626 std::vector<T> data_copy;
627 bool span_valid{false};
628 bool is_data_copy{false};
629 bool addr_changed{false};
630};
631
632template <typename M, typename T, GuestMemoryFlags FLAGS>
633class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> {
634public:
635 GuestMemoryScoped() = delete;
636 explicit GuestMemoryScoped(M& memory_, u64 addr_, std::size_t size_,
637 Common::ScratchBuffer<T>* backup = nullptr)
638 : GuestMemory<M, T, FLAGS>(memory_, addr_, size_, backup) {
639 if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
640 if (!this->TrySetSpan()) {
641 if (backup) {
642 this->data_span = *backup;
643 this->span_valid = true;
644 this->is_data_copy = true;
645 }
646 }
647 }
648 }
649
650 ~GuestMemoryScoped() {
651 if constexpr (FLAGS & GuestMemoryFlags::Write) {
652 if (this->size == 0) [[unlikely]] {
653 return;
654 }
655
656 if (this->AddressChanged() || this->IsDataCopy()) {
657 ASSERT(this->span_valid);
658 if constexpr (FLAGS & GuestMemoryFlags::Cached) {
659 this->memory.WriteBlockCached(this->addr, this->data_span.data(),
660 this->size * sizeof(T));
661 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
662 this->memory.WriteBlock(this->addr, this->data_span.data(),
663 this->size * sizeof(T));
664 } else {
665 this->memory.WriteBlockUnsafe(this->addr, this->data_span.data(),
666 this->size * sizeof(T));
667 }
668 } else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
669 this->memory.InvalidateRegion(this->addr, this->size * sizeof(T));
670 }
671 }
672 }
673};
674} // namespace
675
676template <typename T, GuestMemoryFlags FLAGS>
677using CpuGuestMemory = GuestMemory<Memory, T, FLAGS>;
678template <typename T, GuestMemoryFlags FLAGS>
679using CpuGuestMemoryScoped = GuestMemoryScoped<Memory, T, FLAGS>;
680template <typename T, GuestMemoryFlags FLAGS>
681using GpuGuestMemory = GuestMemory<Tegra::MemoryManager, T, FLAGS>;
682template <typename T, GuestMemoryFlags FLAGS>
683using GpuGuestMemoryScoped = GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>;
472} // namespace Core::Memory 684} // namespace Core::Memory
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index b5ed3380f..6ed4b78f2 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -234,9 +234,10 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
234 if (has_new_downloads) { 234 if (has_new_downloads) {
235 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); 235 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
236 } 236 }
237 tmp_buffer.resize_destructive(amount); 237
238 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); 238 Core::Memory::CpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp(
239 cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); 239 cpu_memory, *cpu_src_address, amount, &tmp_buffer);
240 tmp.SetAddressAndSize(*cpu_dest_address, amount);
240 return true; 241 return true;
241} 242}
242 243
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 551929824..9f1b340a9 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -5,6 +5,7 @@
5#include "common/microprofile.h" 5#include "common/microprofile.h"
6#include "common/settings.h" 6#include "common/settings.h"
7#include "core/core.h" 7#include "core/core.h"
8#include "core/memory.h"
8#include "video_core/dma_pusher.h" 9#include "video_core/dma_pusher.h"
9#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
10#include "video_core/gpu.h" 11#include "video_core/gpu.h"
@@ -12,6 +13,8 @@
12 13
13namespace Tegra { 14namespace Tegra {
14 15
16constexpr u32 MacroRegistersStart = 0xE00;
17
15DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, 18DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
16 Control::ChannelState& channel_state_) 19 Control::ChannelState& channel_state_)
17 : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_, 20 : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_,
@@ -74,25 +77,16 @@ bool DmaPusher::Step() {
74 } 77 }
75 78
76 // Push buffer non-empty, read a word 79 // Push buffer non-empty, read a word
77 command_headers.resize_destructive(command_list_header.size); 80 if (dma_state.method >= MacroRegistersStart) {
78 constexpr u32 MacroRegistersStart = 0xE00;
79 if (dma_state.method < MacroRegistersStart) {
80 if (Settings::IsGPULevelHigh()) {
81 memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(),
82 command_list_header.size * sizeof(u32));
83 } else {
84 memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(),
85 command_list_header.size * sizeof(u32));
86 }
87 } else {
88 const size_t copy_size = command_list_header.size * sizeof(u32);
89 if (subchannels[dma_state.subchannel]) { 81 if (subchannels[dma_state.subchannel]) {
90 subchannels[dma_state.subchannel]->current_dirty = 82 subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty(
91 memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size); 83 dma_state.dma_get, command_list_header.size * sizeof(u32));
92 } 84 }
93 memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size);
94 } 85 }
95 ProcessCommands(command_headers); 86 Core::Memory::GpuGuestMemory<Tegra::CommandHeader,
87 Core::Memory::GuestMemoryFlags::UnsafeRead>
88 headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers);
89 ProcessCommands(headers);
96 } 90 }
97 91
98 return true; 92 return true;
diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp
index 7f5a0c29d..bc64d4486 100644
--- a/src/video_core/engines/engine_upload.cpp
+++ b/src/video_core/engines/engine_upload.cpp
@@ -5,6 +5,7 @@
5 5
6#include "common/algorithm.h" 6#include "common/algorithm.h"
7#include "common/assert.h" 7#include "common/assert.h"
8#include "core/memory.h"
8#include "video_core/engines/engine_upload.h" 9#include "video_core/engines/engine_upload.h"
9#include "video_core/memory_manager.h" 10#include "video_core/memory_manager.h"
10#include "video_core/rasterizer_interface.h" 11#include "video_core/rasterizer_interface.h"
@@ -46,15 +47,11 @@ void State::ProcessData(const u32* data, size_t num_data) {
46void State::ProcessData(std::span<const u8> read_buffer) { 47void State::ProcessData(std::span<const u8> read_buffer) {
47 const GPUVAddr address{regs.dest.Address()}; 48 const GPUVAddr address{regs.dest.Address()};
48 if (is_linear) { 49 if (is_linear) {
49 if (regs.line_count == 1) { 50 for (size_t line = 0; line < regs.line_count; ++line) {
50 rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer); 51 const GPUVAddr dest_line = address + line * regs.dest.pitch;
51 } else { 52 std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in,
52 for (size_t line = 0; line < regs.line_count; ++line) { 53 regs.line_length_in);
53 const GPUVAddr dest_line = address + line * regs.dest.pitch; 54 rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer);
54 std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in,
55 regs.line_length_in);
56 rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer);
57 }
58 } 55 }
59 } else { 56 } else {
60 u32 width = regs.dest.width; 57 u32 width = regs.dest.width;
@@ -70,13 +67,14 @@ void State::ProcessData(std::span<const u8> read_buffer) {
70 const std::size_t dst_size = Tegra::Texture::CalculateSize( 67 const std::size_t dst_size = Tegra::Texture::CalculateSize(
71 true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, 68 true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
72 regs.dest.BlockHeight(), regs.dest.BlockDepth()); 69 regs.dest.BlockHeight(), regs.dest.BlockDepth());
73 tmp_buffer.resize_destructive(dst_size); 70
74 memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); 71 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
75 Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width, 72 tmp(memory_manager, address, dst_size, &tmp_buffer);
76 regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, 73
77 x_elements, regs.line_count, regs.dest.BlockHeight(), 74 Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height,
75 regs.dest.depth, x_offset, regs.dest.y, x_elements,
76 regs.line_count, regs.dest.BlockHeight(),
78 regs.dest.BlockDepth(), regs.line_length_in); 77 regs.dest.BlockDepth(), regs.line_length_in);
79 memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
80 } 78 }
81} 79}
82 80
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 601095f03..a38d9528a 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -84,7 +84,6 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
84 84
85 Texture::TICEntry tic_entry; 85 Texture::TICEntry tic_entry;
86 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); 86 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
87
88 return tic_entry; 87 return tic_entry;
89} 88}
90 89
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 62d70e9f3..c3696096d 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -9,6 +9,7 @@
9#include "common/settings.h" 9#include "common/settings.h"
10#include "core/core.h" 10#include "core/core.h"
11#include "core/core_timing.h" 11#include "core/core_timing.h"
12#include "core/memory.h"
12#include "video_core/dirty_flags.h" 13#include "video_core/dirty_flags.h"
13#include "video_core/engines/draw_manager.h" 14#include "video_core/engines/draw_manager.h"
14#include "video_core/engines/maxwell_3d.h" 15#include "video_core/engines/maxwell_3d.h"
@@ -679,17 +680,14 @@ void Maxwell3D::ProcessCBData(u32 value) {
679Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { 680Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
680 const GPUVAddr tic_address_gpu{regs.tex_header.Address() + 681 const GPUVAddr tic_address_gpu{regs.tex_header.Address() +
681 tic_index * sizeof(Texture::TICEntry)}; 682 tic_index * sizeof(Texture::TICEntry)};
682
683 Texture::TICEntry tic_entry; 683 Texture::TICEntry tic_entry;
684 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); 684 memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
685
686 return tic_entry; 685 return tic_entry;
687} 686}
688 687
689Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { 688Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
690 const GPUVAddr tsc_address_gpu{regs.tex_sampler.Address() + 689 const GPUVAddr tsc_address_gpu{regs.tex_sampler.Address() +
691 tsc_index * sizeof(Texture::TSCEntry)}; 690 tsc_index * sizeof(Texture::TSCEntry)};
692
693 Texture::TSCEntry tsc_entry; 691 Texture::TSCEntry tsc_entry;
694 memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); 692 memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
695 return tsc_entry; 693 return tsc_entry;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index f8598fd98..cd8e24b0b 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -7,6 +7,7 @@
7#include "common/microprofile.h" 7#include "common/microprofile.h"
8#include "common/settings.h" 8#include "common/settings.h"
9#include "core/core.h" 9#include "core/core.h"
10#include "core/memory.h"
10#include "video_core/engines/maxwell_3d.h" 11#include "video_core/engines/maxwell_3d.h"
11#include "video_core/engines/maxwell_dma.h" 12#include "video_core/engines/maxwell_dma.h"
12#include "video_core/memory_manager.h" 13#include "video_core/memory_manager.h"
@@ -130,11 +131,12 @@ void MaxwellDMA::Launch() {
130 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 131 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
131 read_buffer.resize_destructive(16); 132 read_buffer.resize_destructive(16);
132 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 133 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
133 memory_manager.ReadBlock( 134 Core::Memory::GpuGuestMemoryScoped<
134 convert_linear_2_blocklinear_addr(regs.offset_in + offset), 135 u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
135 read_buffer.data(), read_buffer.size()); 136 tmp_write_buffer(memory_manager,
136 memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(), 137 convert_linear_2_blocklinear_addr(regs.offset_in + offset),
137 read_buffer.size()); 138 16, &read_buffer);
139 tmp_write_buffer.SetAddressAndSize(regs.offset_out + offset, 16);
138 } 140 }
139 } else if (is_src_pitch && !is_dst_pitch) { 141 } else if (is_src_pitch && !is_dst_pitch) {
140 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); 142 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
@@ -142,20 +144,19 @@ void MaxwellDMA::Launch() {
142 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 144 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
143 read_buffer.resize_destructive(16); 145 read_buffer.resize_destructive(16);
144 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 146 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
145 memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), 147 Core::Memory::GpuGuestMemoryScoped<
146 read_buffer.size()); 148 u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
147 memory_manager.WriteBlockCached( 149 tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer);
148 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 150 tmp_write_buffer.SetAddressAndSize(
149 read_buffer.data(), read_buffer.size()); 151 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16);
150 } 152 }
151 } else { 153 } else {
152 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { 154 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
153 read_buffer.resize_destructive(regs.line_length_in); 155 Core::Memory::GpuGuestMemoryScoped<
154 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), 156 u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
155 regs.line_length_in, 157 tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in,
156 VideoCommon::CacheType::NoBufferCache); 158 &read_buffer);
157 memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(), 159 tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in);
158 regs.line_length_in);
159 } 160 }
160 } 161 }
161 } 162 }
@@ -222,17 +223,15 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
222 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); 223 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
223 224
224 const size_t dst_size = dst_operand.pitch * regs.line_count; 225 const size_t dst_size = dst_operand.pitch * regs.line_count;
225 read_buffer.resize_destructive(src_size);
226 write_buffer.resize_destructive(dst_size);
227 226
228 memory_manager.ReadBlock(src_operand.address, read_buffer.data(), src_size); 227 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
229 memory_manager.ReadBlock(dst_operand.address, write_buffer.data(), dst_size); 228 memory_manager, src_operand.address, src_size, &read_buffer);
229 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
230 tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer);
230 231
231 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, 232 UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth,
232 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 233 x_offset, src_params.origin.y, x_elements, regs.line_count, block_height,
233 dst_operand.pitch); 234 block_depth, dst_operand.pitch);
234
235 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
236} 235}
237 236
238void MaxwellDMA::CopyPitchToBlockLinear() { 237void MaxwellDMA::CopyPitchToBlockLinear() {
@@ -287,18 +286,17 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
287 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); 286 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
288 const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; 287 const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
289 288
290 read_buffer.resize_destructive(src_size); 289 GPUVAddr src_addr = regs.offset_in;
291 write_buffer.resize_destructive(dst_size); 290 GPUVAddr dst_addr = regs.offset_out;
292 291 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
293 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); 292 memory_manager, src_addr, src_size, &read_buffer);
294 memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); 293 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
295 294 tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer);
296 // If the input is linear and the output is tiled, swizzle the input and copy it over. 295
297 SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, 296 // If the input is linear and the output is tiled, swizzle the input and copy it over.
298 dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 297 SwizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth,
299 regs.pitch_in); 298 x_offset, dst_params.origin.y, x_elements, regs.line_count, block_height,
300 299 block_depth, regs.pitch_in);
301 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
302} 300}
303 301
304void MaxwellDMA::CopyBlockLinearToBlockLinear() { 302void MaxwellDMA::CopyBlockLinearToBlockLinear() {
@@ -342,23 +340,20 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
342 const u32 pitch = x_elements * bytes_per_pixel; 340 const u32 pitch = x_elements * bytes_per_pixel;
343 const size_t mid_buffer_size = pitch * regs.line_count; 341 const size_t mid_buffer_size = pitch * regs.line_count;
344 342
345 read_buffer.resize_destructive(src_size);
346 write_buffer.resize_destructive(dst_size);
347
348 intermediate_buffer.resize_destructive(mid_buffer_size); 343 intermediate_buffer.resize_destructive(mid_buffer_size);
349 344
350 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); 345 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
351 memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); 346 memory_manager, regs.offset_in, src_size, &read_buffer);
347 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
348 tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer);
352 349
353 UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height, 350 UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height,
354 src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count, 351 src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count,
355 src.block_size.height, src.block_size.depth, pitch); 352 src.block_size.height, src.block_size.depth, pitch);
356 353
357 SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, 354 SwizzleSubrect(tmp_write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height,
358 dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, 355 dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
359 dst.block_size.height, dst.block_size.depth, pitch); 356 dst.block_size.height, dst.block_size.depth, pitch);
360
361 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
362} 357}
363 358
364void MaxwellDMA::ReleaseSemaphore() { 359void MaxwellDMA::ReleaseSemaphore() {
diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp
index ff88cd03d..3a599f466 100644
--- a/src/video_core/engines/sw_blitter/blitter.cpp
+++ b/src/video_core/engines/sw_blitter/blitter.cpp
@@ -159,11 +159,11 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
159 const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); 159 const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
160 const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); 160 const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format));
161 const size_t src_size = get_surface_size(src, src_bytes_per_pixel); 161 const size_t src_size = get_surface_size(src, src_bytes_per_pixel);
162 impl->tmp_buffer.resize_destructive(src_size);
163 memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size);
164 162
165 const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; 163 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_buffer(
164 memory_manager, src.Address(), src_size, &impl->tmp_buffer);
166 165
166 const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel;
167 const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; 167 const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel;
168 168
169 impl->src_buffer.resize_destructive(src_copy_size); 169 impl->src_buffer.resize_destructive(src_copy_size);
@@ -200,12 +200,11 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
200 200
201 impl->dst_buffer.resize_destructive(dst_copy_size); 201 impl->dst_buffer.resize_destructive(dst_copy_size);
202 if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { 202 if (src.linear == Fermi2D::MemoryLayout::BlockLinear) {
203 UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width, 203 UnswizzleSubrect(impl->src_buffer, tmp_buffer, src_bytes_per_pixel, src.width, src.height,
204 src.height, src.depth, config.src_x0, config.src_y0, src_extent_x, 204 src.depth, config.src_x0, config.src_y0, src_extent_x, src_extent_y,
205 src_extent_y, src.block_height, src.block_depth, 205 src.block_height, src.block_depth, src_extent_x * src_bytes_per_pixel);
206 src_extent_x * src_bytes_per_pixel);
207 } else { 206 } else {
208 process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, 207 process_pitch_linear(false, tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y,
209 src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); 208 src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel);
210 } 209 }
211 210
@@ -221,20 +220,18 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
221 } 220 }
222 221
223 const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); 222 const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel);
224 impl->tmp_buffer.resize_destructive(dst_size); 223 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadWrite>
225 memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); 224 tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer);
226 225
227 if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { 226 if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) {
228 SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width, 227 SwizzleSubrect(tmp_buffer2, impl->dst_buffer, dst_bytes_per_pixel, dst.width, dst.height,
229 dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, 228 dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, dst_extent_y,
230 dst_extent_y, dst.block_height, dst.block_depth, 229 dst.block_height, dst.block_depth, dst_extent_x * dst_bytes_per_pixel);
231 dst_extent_x * dst_bytes_per_pixel);
232 } else { 230 } else {
233 process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y, 231 process_pitch_linear(true, impl->dst_buffer, tmp_buffer2, dst_extent_x, dst_extent_y,
234 dst.pitch, config.dst_x0, config.dst_y0, 232 dst.pitch, config.dst_x0, config.dst_y0,
235 static_cast<size_t>(dst_bytes_per_pixel)); 233 static_cast<size_t>(dst_bytes_per_pixel));
236 } 234 }
237 memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
238 return true; 235 return true;
239} 236}
240 237
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 45141e488..d16040613 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -10,13 +10,13 @@
10#include "core/device_memory.h" 10#include "core/device_memory.h"
11#include "core/hle/kernel/k_page_table.h" 11#include "core/hle/kernel/k_page_table.h"
12#include "core/hle/kernel/k_process.h" 12#include "core/hle/kernel/k_process.h"
13#include "core/memory.h"
14#include "video_core/invalidation_accumulator.h" 13#include "video_core/invalidation_accumulator.h"
15#include "video_core/memory_manager.h" 14#include "video_core/memory_manager.h"
16#include "video_core/rasterizer_interface.h" 15#include "video_core/rasterizer_interface.h"
17#include "video_core/renderer_base.h" 16#include "video_core/renderer_base.h"
18 17
19namespace Tegra { 18namespace Tegra {
19using Core::Memory::GuestMemoryFlags;
20 20
21std::atomic<size_t> MemoryManager::unique_identifier_generator{}; 21std::atomic<size_t> MemoryManager::unique_identifier_generator{};
22 22
@@ -587,13 +587,10 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
587 587
588void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, 588void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
589 VideoCommon::CacheType which) { 589 VideoCommon::CacheType which) {
590 tmp_buffer.resize_destructive(size); 590 Core::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data(
591 ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); 591 *this, gpu_src_addr, size);
592 592 data.SetAddressAndSize(gpu_dest_addr, size);
593 // The output block must be flushed in case it has data modified from the GPU.
594 // Fixes NPC geometry in Zombie Panic in Wonderland DX
595 FlushRegion(gpu_dest_addr, size, which); 593 FlushRegion(gpu_dest_addr, size, which);
596 WriteBlock(gpu_dest_addr, tmp_buffer.data(), size, which);
597} 594}
598 595
599bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { 596bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
@@ -758,4 +755,23 @@ void MemoryManager::FlushCaching() {
758 accumulator->Clear(); 755 accumulator->Clear();
759} 756}
760 757
758const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const {
759 auto cpu_addr = GpuToCpuAddress(src_addr);
760 if (cpu_addr) {
761 return memory.GetSpan(*cpu_addr, size);
762 }
763 return nullptr;
764}
765
766u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) {
767 if (!IsContinuousRange(src_addr, size)) {
768 return nullptr;
769 }
770 auto cpu_addr = GpuToCpuAddress(src_addr);
771 if (cpu_addr) {
772 return memory.GetSpan(*cpu_addr, size);
773 }
774 return nullptr;
775}
776
761} // namespace Tegra 777} // namespace Tegra
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 4202c26ff..9b311b9e5 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -15,6 +15,7 @@
15#include "common/range_map.h" 15#include "common/range_map.h"
16#include "common/scratch_buffer.h" 16#include "common/scratch_buffer.h"
17#include "common/virtual_buffer.h" 17#include "common/virtual_buffer.h"
18#include "core/memory.h"
18#include "video_core/cache_types.h" 19#include "video_core/cache_types.h"
19#include "video_core/pte_kind.h" 20#include "video_core/pte_kind.h"
20 21
@@ -62,6 +63,20 @@ public:
62 [[nodiscard]] u8* GetPointer(GPUVAddr addr); 63 [[nodiscard]] u8* GetPointer(GPUVAddr addr);
63 [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const; 64 [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const;
64 65
66 template <typename T>
67 [[nodiscard]] T* GetPointer(GPUVAddr addr) {
68 const auto address{GpuToCpuAddress(addr)};
69 if (!address) {
70 return {};
71 }
72 return memory.GetPointer(*address);
73 }
74
75 template <typename T>
76 [[nodiscard]] const T* GetPointer(GPUVAddr addr) const {
77 return GetPointer<T*>(addr);
78 }
79
65 /** 80 /**
66 * ReadBlock and WriteBlock are full read and write operations over virtual 81 * ReadBlock and WriteBlock are full read and write operations over virtual
67 * GPU Memory. It's important to use these when GPU memory may not be continuous 82 * GPU Memory. It's important to use these when GPU memory may not be continuous
@@ -139,6 +154,9 @@ public:
139 154
140 void FlushCaching(); 155 void FlushCaching();
141 156
157 const u8* GetSpan(const GPUVAddr src_addr, const std::size_t size) const;
158 u8* GetSpan(const GPUVAddr src_addr, const std::size_t size);
159
142private: 160private:
143 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> 161 template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
144 inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, 162 inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 79f158db4..a1457798a 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -8,6 +8,7 @@
8 8
9#include "common/alignment.h" 9#include "common/alignment.h"
10#include "common/settings.h" 10#include "common/settings.h"
11#include "core/memory.h"
11#include "video_core/control/channel_state.h" 12#include "video_core/control/channel_state.h"
12#include "video_core/dirty_flags.h" 13#include "video_core/dirty_flags.h"
13#include "video_core/engines/kepler_compute.h" 14#include "video_core/engines/kepler_compute.h"
@@ -1022,19 +1023,19 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
1022 runtime.AccelerateImageUpload(image, staging, uploads); 1023 runtime.AccelerateImageUpload(image, staging, uploads);
1023 return; 1024 return;
1024 } 1025 }
1025 const size_t guest_size_bytes = image.guest_size_bytes; 1026
1026 swizzle_data_buffer.resize_destructive(guest_size_bytes); 1027 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data(
1027 gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); 1028 *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer);
1028 1029
1029 if (True(image.flags & ImageFlagBits::Converted)) { 1030 if (True(image.flags & ImageFlagBits::Converted)) {
1030 unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); 1031 unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
1031 auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, 1032 auto copies =
1032 unswizzle_data_buffer); 1033 UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer);
1033 ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); 1034 ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies);
1034 image.UploadMemory(staging, copies); 1035 image.UploadMemory(staging, copies);
1035 } else { 1036 } else {
1036 const auto copies = 1037 const auto copies =
1037 UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span); 1038 UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span);
1038 image.UploadMemory(staging, copies); 1039 image.UploadMemory(staging, copies);
1039 } 1040 }
1040} 1041}
@@ -1227,11 +1228,12 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
1227 decode->image_id = image_id; 1228 decode->image_id = image_id;
1228 async_decodes.push_back(std::move(decode)); 1229 async_decodes.push_back(std::move(decode));
1229 1230
1230 Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes); 1231 static Common::ScratchBuffer<u8> local_unswizzle_data_buffer;
1231 const size_t guest_size_bytes = image.guest_size_bytes; 1232 local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
1232 swizzle_data_buffer.resize_destructive(guest_size_bytes); 1233 Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data(
1233 gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); 1234 *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer);
1234 auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer, 1235
1236 auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data,
1235 local_unswizzle_data_buffer); 1237 local_unswizzle_data_buffer);
1236 const size_t out_size = MapSizeBytes(image); 1238 const size_t out_size = MapSizeBytes(image);
1237 1239
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 0de6ed09d..a83f5d41c 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -20,6 +20,7 @@
20#include "common/div_ceil.h" 20#include "common/div_ceil.h"
21#include "common/scratch_buffer.h" 21#include "common/scratch_buffer.h"
22#include "common/settings.h" 22#include "common/settings.h"
23#include "core/memory.h"
23#include "video_core/compatible_formats.h" 24#include "video_core/compatible_formats.h"
24#include "video_core/engines/maxwell_3d.h" 25#include "video_core/engines/maxwell_3d.h"
25#include "video_core/memory_manager.h" 26#include "video_core/memory_manager.h"
@@ -544,17 +545,15 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
544 tile_size.height, info.tile_width_spacing); 545 tile_size.height, info.tile_width_spacing);
545 const size_t subresource_size = sizes[level]; 546 const size_t subresource_size = sizes[level];
546 547
547 tmp_buffer.resize_destructive(subresource_size);
548 const std::span<u8> dst(tmp_buffer);
549
550 for (s32 layer = 0; layer < info.resources.layers; ++layer) { 548 for (s32 layer = 0; layer < info.resources.layers; ++layer) {
551 const std::span<const u8> src = input.subspan(host_offset); 549 const std::span<const u8> src = input.subspan(host_offset);
552 gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); 550 {
553 551 Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite>
554 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, 552 dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer);
555 num_tiles.depth, block.height, block.depth);
556 553
557 gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); 554 SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
555 num_tiles.depth, block.height, block.depth);
556 }
558 557
559 host_offset += host_bytes_per_layer; 558 host_offset += host_bytes_per_layer;
560 guest_offset += layer_stride; 559 guest_offset += layer_stride;
@@ -837,6 +836,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
837 const Extent3D size = info.size; 836 const Extent3D size = info.size;
838 837
839 if (info.type == ImageType::Linear) { 838 if (info.type == ImageType::Linear) {
839 ASSERT(output.size_bytes() >= guest_size_bytes);
840 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); 840 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes);
841 841
842 ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); 842 ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch);
@@ -904,16 +904,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
904 return copies; 904 return copies;
905} 905}
906 906
907BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
908 const ImageBase& image, std::span<u8> output) {
909 gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes);
910 return BufferCopy{
911 .src_offset = 0,
912 .dst_offset = 0,
913 .size = image.guest_size_bytes,
914 };
915}
916
917void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, 907void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
918 std::span<BufferImageCopy> copies) { 908 std::span<BufferImageCopy> copies) {
919 u32 output_offset = 0; 909 u32 output_offset = 0;
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index ab45a43c4..5a0649d24 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -66,9 +66,6 @@ struct OverlapResult {
66 Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, 66 Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
67 std::span<const u8> input, std::span<u8> output); 67 std::span<const u8> input, std::span<u8> output);
68 68
69[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
70 const ImageBase& image, std::span<u8> output);
71
72void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, 69void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
73 std::span<BufferImageCopy> copies); 70 std::span<BufferImageCopy> copies);
74 71