diff options
| -rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 6 | ||||
| -rw-r--r-- | src/video_core/memory_manager.cpp | 91 | ||||
| -rw-r--r-- | src/video_core/memory_manager.h | 5 |
3 files changed, 47 insertions, 55 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 262d0fc6e..eb2f9ecf2 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -53,7 +53,7 @@ public: | |||
| 53 | if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { | 53 | if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { |
| 54 | auto& memory_manager = system.GPU().MemoryManager(); | 54 | auto& memory_manager = system.GPU().MemoryManager(); |
| 55 | if (use_fast_cbuf) { | 55 | if (use_fast_cbuf) { |
| 56 | if (Tegra::MemoryManager::IsGranularRange(gpu_addr, size)) { | 56 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 57 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 57 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
| 58 | return ConstBufferUpload(host_ptr, size); | 58 | return ConstBufferUpload(host_ptr, size); |
| 59 | } else { | 59 | } else { |
| @@ -62,7 +62,7 @@ public: | |||
| 62 | return ConstBufferUpload(staging_buffer.data(), size); | 62 | return ConstBufferUpload(staging_buffer.data(), size); |
| 63 | } | 63 | } |
| 64 | } else { | 64 | } else { |
| 65 | if (Tegra::MemoryManager::IsGranularRange(gpu_addr, size)) { | 65 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 66 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 66 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
| 67 | return StreamBufferUpload(host_ptr, size, alignment); | 67 | return StreamBufferUpload(host_ptr, size, alignment); |
| 68 | } else { | 68 | } else { |
| @@ -228,7 +228,7 @@ private: | |||
| 228 | auto& memory_manager = system.GPU().MemoryManager(); | 228 | auto& memory_manager = system.GPU().MemoryManager(); |
| 229 | const VAddr cpu_addr_end = cpu_addr + size; | 229 | const VAddr cpu_addr_end = cpu_addr + size; |
| 230 | MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); | 230 | MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); |
| 231 | if (Tegra::MemoryManager::IsGranularRange(gpu_addr, size)) { | 231 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 232 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); | 232 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); |
| 233 | UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); | 233 | UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); |
| 234 | } else { | 234 | } else { |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index cddef8d86..eb934ad5e 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -139,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const { | |||
| 139 | return {}; | 139 | return {}; |
| 140 | } | 140 | } |
| 141 | 141 | ||
| 142 | const u8* page_pointer{page_table.pointers[addr >> page_bits]}; | 142 | const u8* page_pointer{GetPointer(addr)}; |
| 143 | if (page_pointer) { | 143 | if (page_pointer) { |
| 144 | // NOTE: Avoid adding any extra logic to this fast-path block | 144 | // NOTE: Avoid adding any extra logic to this fast-path block |
| 145 | T value; | 145 | T value; |
| 146 | std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T)); | 146 | std::memcpy(&value, page_pointer, sizeof(T)); |
| 147 | return value; | 147 | return value; |
| 148 | } | 148 | } |
| 149 | 149 | ||
| @@ -166,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) { | |||
| 166 | return; | 166 | return; |
| 167 | } | 167 | } |
| 168 | 168 | ||
| 169 | u8* page_pointer{page_table.pointers[addr >> page_bits]}; | 169 | u8* page_pointer{GetPointer(addr)}; |
| 170 | if (page_pointer) { | 170 | if (page_pointer) { |
| 171 | // NOTE: Avoid adding any extra logic to this fast-path block | 171 | // NOTE: Avoid adding any extra logic to this fast-path block |
| 172 | std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T)); | 172 | std::memcpy(page_pointer, &data, sizeof(T)); |
| 173 | return; | 173 | return; |
| 174 | } | 174 | } |
| 175 | 175 | ||
| @@ -200,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) { | |||
| 200 | return {}; | 200 | return {}; |
| 201 | } | 201 | } |
| 202 | 202 | ||
| 203 | u8* const page_pointer{page_table.pointers[addr >> page_bits]}; | 203 | auto& memory = system.Memory(); |
| 204 | if (page_pointer != nullptr) { | 204 | |
| 205 | return page_pointer + (addr & page_mask); | 205 | const VAddr const page_addr{page_table.backing_addr[addr >> page_bits]}; |
| 206 | |||
| 207 | if (page_addr != 0) { | ||
| 208 | return memory.GetPointer(page_addr + (addr & page_mask)); | ||
| 206 | } | 209 | } |
| 207 | 210 | ||
| 208 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); | 211 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); |
| @@ -214,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { | |||
| 214 | return {}; | 217 | return {}; |
| 215 | } | 218 | } |
| 216 | 219 | ||
| 217 | const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; | 220 | const auto& memory = system.Memory(); |
| 218 | if (page_pointer != nullptr) { | 221 | |
| 219 | return page_pointer + (addr & page_mask); | 222 | const VAddr const page_addr{page_table.backing_addr[addr >> page_bits]}; |
| 223 | |||
| 224 | if (page_addr != 0) { | ||
| 225 | return memory.GetPointer(page_addr + (addr & page_mask)); | ||
| 220 | } | 226 | } |
| 221 | 227 | ||
| 222 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); | 228 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); |
| @@ -237,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s | |||
| 237 | std::size_t page_index{src_addr >> page_bits}; | 243 | std::size_t page_index{src_addr >> page_bits}; |
| 238 | std::size_t page_offset{src_addr & page_mask}; | 244 | std::size_t page_offset{src_addr & page_mask}; |
| 239 | 245 | ||
| 246 | auto& memory = system.Memory(); | ||
| 247 | |||
| 240 | while (remaining_size > 0) { | 248 | while (remaining_size > 0) { |
| 241 | const std::size_t copy_amount{ | 249 | const std::size_t copy_amount{ |
| 242 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 250 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 243 | 251 | ||
| 244 | switch (page_table.attributes[page_index]) { | 252 | switch (page_table.attributes[page_index]) { |
| 245 | case Common::PageType::Memory: { | 253 | case Common::PageType::Memory: { |
| 246 | const u8* src_ptr{page_table.pointers[page_index] + page_offset}; | 254 | const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; |
| 247 | // Flush must happen on the rasterizer interface, such that memory is always synchronous | 255 | // Flush must happen on the rasterizer interface, such that memory is always synchronous |
| 248 | // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. | 256 | // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. |
| 249 | rasterizer.FlushRegion(page_table.backing_addr[page_index] + page_offset, copy_amount); | 257 | rasterizer.FlushRegion(src_addr, copy_amount); |
| 250 | std::memcpy(dest_buffer, src_ptr, copy_amount); | 258 | memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); |
| 251 | break; | 259 | break; |
| 252 | } | 260 | } |
| 253 | default: | 261 | default: |
| @@ -267,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, | |||
| 267 | std::size_t page_index{src_addr >> page_bits}; | 275 | std::size_t page_index{src_addr >> page_bits}; |
| 268 | std::size_t page_offset{src_addr & page_mask}; | 276 | std::size_t page_offset{src_addr & page_mask}; |
| 269 | 277 | ||
| 278 | auto& memory = system.Memory(); | ||
| 279 | |||
| 270 | while (remaining_size > 0) { | 280 | while (remaining_size > 0) { |
| 271 | const std::size_t copy_amount{ | 281 | const std::size_t copy_amount{ |
| 272 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 282 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 273 | const u8* page_pointer = page_table.pointers[page_index]; | 283 | const u8* page_pointer = page_table.pointers[page_index]; |
| 274 | if (page_pointer) { | 284 | if (page_pointer) { |
| 275 | const u8* src_ptr{page_pointer + page_offset}; | 285 | const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; |
| 276 | std::memcpy(dest_buffer, src_ptr, copy_amount); | 286 | memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); |
| 277 | } else { | 287 | } else { |
| 278 | std::memset(dest_buffer, 0, copy_amount); | 288 | std::memset(dest_buffer, 0, copy_amount); |
| 279 | } | 289 | } |
| @@ -289,18 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const | |||
| 289 | std::size_t page_index{dest_addr >> page_bits}; | 299 | std::size_t page_index{dest_addr >> page_bits}; |
| 290 | std::size_t page_offset{dest_addr & page_mask}; | 300 | std::size_t page_offset{dest_addr & page_mask}; |
| 291 | 301 | ||
| 302 | auto& memory = system.Memory(); | ||
| 303 | |||
| 292 | while (remaining_size > 0) { | 304 | while (remaining_size > 0) { |
| 293 | const std::size_t copy_amount{ | 305 | const std::size_t copy_amount{ |
| 294 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 306 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 295 | 307 | ||
| 296 | switch (page_table.attributes[page_index]) { | 308 | switch (page_table.attributes[page_index]) { |
| 297 | case Common::PageType::Memory: { | 309 | case Common::PageType::Memory: { |
| 298 | u8* dest_ptr{page_table.pointers[page_index] + page_offset}; | 310 | const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; |
| 299 | // Invalidate must happen on the rasterizer interface, such that memory is always | 311 | // Invalidate must happen on the rasterizer interface, such that memory is always |
| 300 | // synchronous when it is written (even when in asynchronous GPU mode). | 312 | // synchronous when it is written (even when in asynchronous GPU mode). |
| 301 | rasterizer.InvalidateRegion(page_table.backing_addr[page_index] + page_offset, | 313 | rasterizer.InvalidateRegion(dest_addr, copy_amount); |
| 302 | copy_amount); | 314 | memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); |
| 303 | std::memcpy(dest_ptr, src_buffer, copy_amount); | ||
| 304 | break; | 315 | break; |
| 305 | } | 316 | } |
| 306 | default: | 317 | default: |
| @@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, | |||
| 320 | std::size_t page_index{dest_addr >> page_bits}; | 331 | std::size_t page_index{dest_addr >> page_bits}; |
| 321 | std::size_t page_offset{dest_addr & page_mask}; | 332 | std::size_t page_offset{dest_addr & page_mask}; |
| 322 | 333 | ||
| 334 | auto& memory = system.Memory(); | ||
| 335 | |||
| 323 | while (remaining_size > 0) { | 336 | while (remaining_size > 0) { |
| 324 | const std::size_t copy_amount{ | 337 | const std::size_t copy_amount{ |
| 325 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 338 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 326 | u8* page_pointer = page_table.pointers[page_index]; | 339 | u8* page_pointer = page_table.pointers[page_index]; |
| 327 | if (page_pointer) { | 340 | if (page_pointer) { |
| 328 | u8* dest_ptr{page_pointer + page_offset}; | 341 | const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; |
| 329 | std::memcpy(dest_ptr, src_buffer, copy_amount); | 342 | memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); |
| 330 | } | 343 | } |
| 331 | page_index++; | 344 | page_index++; |
| 332 | page_offset = 0; | 345 | page_offset = 0; |
| @@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, | |||
| 336 | } | 349 | } |
| 337 | 350 | ||
| 338 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { | 351 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { |
| 339 | std::size_t remaining_size{size}; | 352 | std::vector<u8> tmp_buffer(size); |
| 340 | std::size_t page_index{src_addr >> page_bits}; | 353 | ReadBlock(src_addr, tmp_buffer.data(), size); |
| 341 | std::size_t page_offset{src_addr & page_mask}; | 354 | WriteBlock(dest_addr, tmp_buffer.data(), size); |
| 342 | |||
| 343 | while (remaining_size > 0) { | ||
| 344 | const std::size_t copy_amount{ | ||
| 345 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||
| 346 | |||
| 347 | switch (page_table.attributes[page_index]) { | ||
| 348 | case Common::PageType::Memory: { | ||
| 349 | // Flush must happen on the rasterizer interface, such that memory is always synchronous | ||
| 350 | // when it is copied (even when in asynchronous GPU mode). | ||
| 351 | const u8* src_ptr{page_table.pointers[page_index] + page_offset}; | ||
| 352 | rasterizer.FlushRegion(page_table.backing_addr[page_index] + page_offset, copy_amount); | ||
| 353 | WriteBlock(dest_addr, src_ptr, copy_amount); | ||
| 354 | break; | ||
| 355 | } | ||
| 356 | default: | ||
| 357 | UNREACHABLE(); | ||
| 358 | } | ||
| 359 | |||
| 360 | page_index++; | ||
| 361 | page_offset = 0; | ||
| 362 | dest_addr += static_cast<VAddr>(copy_amount); | ||
| 363 | src_addr += static_cast<VAddr>(copy_amount); | ||
| 364 | remaining_size -= copy_amount; | ||
| 365 | } | ||
| 366 | } | 355 | } |
| 367 | 356 | ||
| 368 | void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { | 357 | void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { |
| @@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const | |||
| 371 | WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); | 360 | WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); |
| 372 | } | 361 | } |
| 373 | 362 | ||
| 363 | bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { | ||
| 364 | const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; | ||
| 365 | const std::size_t page = (addr & Memory::PAGE_MASK) + size; | ||
| 366 | return page <= Memory::PAGE_SIZE; | ||
| 367 | } | ||
| 368 | |||
| 374 | void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, | 369 | void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, |
| 375 | VAddr backing_addr) { | 370 | VAddr backing_addr) { |
| 376 | LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, | 371 | LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index f4ec77a3d..987400fdd 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -97,10 +97,7 @@ public: | |||
| 97 | void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); | 97 | void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); |
| 98 | void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); | 98 | void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); |
| 99 | 99 | ||
| 100 | static bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { | 100 | bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); |
| 101 | const std::size_t page = (gpu_addr & page_mask) + size; | ||
| 102 | return page <= page_size; | ||
| 103 | } | ||
| 104 | 101 | ||
| 105 | private: | 102 | private: |
| 106 | using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; | 103 | using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; |