diff options
32 files changed, 573 insertions, 430 deletions
diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f0888327f..6061d37ae 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp | |||
| @@ -242,7 +242,52 @@ struct Memory::Impl { | |||
| 242 | } | 242 | } |
| 243 | case Common::PageType::RasterizerCachedMemory: { | 243 | case Common::PageType::RasterizerCachedMemory: { |
| 244 | const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | 244 | const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); |
| 245 | system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); | 245 | system.GPU().FlushRegion(current_vaddr, copy_amount); |
| 246 | std::memcpy(dest_buffer, host_ptr, copy_amount); | ||
| 247 | break; | ||
| 248 | } | ||
| 249 | default: | ||
| 250 | UNREACHABLE(); | ||
| 251 | } | ||
| 252 | |||
| 253 | page_index++; | ||
| 254 | page_offset = 0; | ||
| 255 | dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||
| 256 | remaining_size -= copy_amount; | ||
| 257 | } | ||
| 258 | } | ||
| 259 | |||
| 260 | void ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer, | ||
| 261 | const std::size_t size) { | ||
| 262 | const auto& page_table = process.VMManager().page_table; | ||
| 263 | |||
| 264 | std::size_t remaining_size = size; | ||
| 265 | std::size_t page_index = src_addr >> PAGE_BITS; | ||
| 266 | std::size_t page_offset = src_addr & PAGE_MASK; | ||
| 267 | |||
| 268 | while (remaining_size > 0) { | ||
| 269 | const std::size_t copy_amount = | ||
| 270 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); | ||
| 271 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); | ||
| 272 | |||
| 273 | switch (page_table.attributes[page_index]) { | ||
| 274 | case Common::PageType::Unmapped: { | ||
| 275 | LOG_ERROR(HW_Memory, | ||
| 276 | "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||
| 277 | current_vaddr, src_addr, size); | ||
| 278 | std::memset(dest_buffer, 0, copy_amount); | ||
| 279 | break; | ||
| 280 | } | ||
| 281 | case Common::PageType::Memory: { | ||
| 282 | DEBUG_ASSERT(page_table.pointers[page_index]); | ||
| 283 | |||
| 284 | const u8* const src_ptr = | ||
| 285 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 286 | std::memcpy(dest_buffer, src_ptr, copy_amount); | ||
| 287 | break; | ||
| 288 | } | ||
| 289 | case Common::PageType::RasterizerCachedMemory: { | ||
| 290 | const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | ||
| 246 | std::memcpy(dest_buffer, host_ptr, copy_amount); | 291 | std::memcpy(dest_buffer, host_ptr, copy_amount); |
| 247 | break; | 292 | break; |
| 248 | } | 293 | } |
| @@ -261,6 +306,10 @@ struct Memory::Impl { | |||
| 261 | ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size); | 306 | ReadBlock(*system.CurrentProcess(), src_addr, dest_buffer, size); |
| 262 | } | 307 | } |
| 263 | 308 | ||
| 309 | void ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) { | ||
| 310 | ReadBlockUnsafe(*system.CurrentProcess(), src_addr, dest_buffer, size); | ||
| 311 | } | ||
| 312 | |||
| 264 | void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, | 313 | void WriteBlock(const Kernel::Process& process, const VAddr dest_addr, const void* src_buffer, |
| 265 | const std::size_t size) { | 314 | const std::size_t size) { |
| 266 | const auto& page_table = process.VMManager().page_table; | 315 | const auto& page_table = process.VMManager().page_table; |
| @@ -290,7 +339,50 @@ struct Memory::Impl { | |||
| 290 | } | 339 | } |
| 291 | case Common::PageType::RasterizerCachedMemory: { | 340 | case Common::PageType::RasterizerCachedMemory: { |
| 292 | u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | 341 | u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); |
| 293 | system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); | 342 | system.GPU().InvalidateRegion(current_vaddr, copy_amount); |
| 343 | std::memcpy(host_ptr, src_buffer, copy_amount); | ||
| 344 | break; | ||
| 345 | } | ||
| 346 | default: | ||
| 347 | UNREACHABLE(); | ||
| 348 | } | ||
| 349 | |||
| 350 | page_index++; | ||
| 351 | page_offset = 0; | ||
| 352 | src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||
| 353 | remaining_size -= copy_amount; | ||
| 354 | } | ||
| 355 | } | ||
| 356 | |||
| 357 | void WriteBlockUnsafe(const Kernel::Process& process, const VAddr dest_addr, | ||
| 358 | const void* src_buffer, const std::size_t size) { | ||
| 359 | const auto& page_table = process.VMManager().page_table; | ||
| 360 | std::size_t remaining_size = size; | ||
| 361 | std::size_t page_index = dest_addr >> PAGE_BITS; | ||
| 362 | std::size_t page_offset = dest_addr & PAGE_MASK; | ||
| 363 | |||
| 364 | while (remaining_size > 0) { | ||
| 365 | const std::size_t copy_amount = | ||
| 366 | std::min(static_cast<std::size_t>(PAGE_SIZE) - page_offset, remaining_size); | ||
| 367 | const auto current_vaddr = static_cast<VAddr>((page_index << PAGE_BITS) + page_offset); | ||
| 368 | |||
| 369 | switch (page_table.attributes[page_index]) { | ||
| 370 | case Common::PageType::Unmapped: { | ||
| 371 | LOG_ERROR(HW_Memory, | ||
| 372 | "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})", | ||
| 373 | current_vaddr, dest_addr, size); | ||
| 374 | break; | ||
| 375 | } | ||
| 376 | case Common::PageType::Memory: { | ||
| 377 | DEBUG_ASSERT(page_table.pointers[page_index]); | ||
| 378 | |||
| 379 | u8* const dest_ptr = | ||
| 380 | page_table.pointers[page_index] + page_offset + (page_index << PAGE_BITS); | ||
| 381 | std::memcpy(dest_ptr, src_buffer, copy_amount); | ||
| 382 | break; | ||
| 383 | } | ||
| 384 | case Common::PageType::RasterizerCachedMemory: { | ||
| 385 | u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | ||
| 294 | std::memcpy(host_ptr, src_buffer, copy_amount); | 386 | std::memcpy(host_ptr, src_buffer, copy_amount); |
| 295 | break; | 387 | break; |
| 296 | } | 388 | } |
| @@ -309,6 +401,10 @@ struct Memory::Impl { | |||
| 309 | WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size); | 401 | WriteBlock(*system.CurrentProcess(), dest_addr, src_buffer, size); |
| 310 | } | 402 | } |
| 311 | 403 | ||
| 404 | void WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, const std::size_t size) { | ||
| 405 | WriteBlockUnsafe(*system.CurrentProcess(), dest_addr, src_buffer, size); | ||
| 406 | } | ||
| 407 | |||
| 312 | void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) { | 408 | void ZeroBlock(const Kernel::Process& process, const VAddr dest_addr, const std::size_t size) { |
| 313 | const auto& page_table = process.VMManager().page_table; | 409 | const auto& page_table = process.VMManager().page_table; |
| 314 | std::size_t remaining_size = size; | 410 | std::size_t remaining_size = size; |
| @@ -337,7 +433,7 @@ struct Memory::Impl { | |||
| 337 | } | 433 | } |
| 338 | case Common::PageType::RasterizerCachedMemory: { | 434 | case Common::PageType::RasterizerCachedMemory: { |
| 339 | u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | 435 | u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); |
| 340 | system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), copy_amount); | 436 | system.GPU().InvalidateRegion(current_vaddr, copy_amount); |
| 341 | std::memset(host_ptr, 0, copy_amount); | 437 | std::memset(host_ptr, 0, copy_amount); |
| 342 | break; | 438 | break; |
| 343 | } | 439 | } |
| @@ -384,7 +480,7 @@ struct Memory::Impl { | |||
| 384 | } | 480 | } |
| 385 | case Common::PageType::RasterizerCachedMemory: { | 481 | case Common::PageType::RasterizerCachedMemory: { |
| 386 | const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); | 482 | const u8* const host_ptr = GetPointerFromVMA(process, current_vaddr); |
| 387 | system.GPU().FlushRegion(ToCacheAddr(host_ptr), copy_amount); | 483 | system.GPU().FlushRegion(current_vaddr, copy_amount); |
| 388 | WriteBlock(process, dest_addr, host_ptr, copy_amount); | 484 | WriteBlock(process, dest_addr, host_ptr, copy_amount); |
| 389 | break; | 485 | break; |
| 390 | } | 486 | } |
| @@ -545,7 +641,7 @@ struct Memory::Impl { | |||
| 545 | break; | 641 | break; |
| 546 | case Common::PageType::RasterizerCachedMemory: { | 642 | case Common::PageType::RasterizerCachedMemory: { |
| 547 | const u8* const host_ptr = GetPointerFromVMA(vaddr); | 643 | const u8* const host_ptr = GetPointerFromVMA(vaddr); |
| 548 | system.GPU().FlushRegion(ToCacheAddr(host_ptr), sizeof(T)); | 644 | system.GPU().FlushRegion(vaddr, sizeof(T)); |
| 549 | T value; | 645 | T value; |
| 550 | std::memcpy(&value, host_ptr, sizeof(T)); | 646 | std::memcpy(&value, host_ptr, sizeof(T)); |
| 551 | return value; | 647 | return value; |
| @@ -587,7 +683,7 @@ struct Memory::Impl { | |||
| 587 | break; | 683 | break; |
| 588 | case Common::PageType::RasterizerCachedMemory: { | 684 | case Common::PageType::RasterizerCachedMemory: { |
| 589 | u8* const host_ptr{GetPointerFromVMA(vaddr)}; | 685 | u8* const host_ptr{GetPointerFromVMA(vaddr)}; |
| 590 | system.GPU().InvalidateRegion(ToCacheAddr(host_ptr), sizeof(T)); | 686 | system.GPU().InvalidateRegion(vaddr, sizeof(T)); |
| 591 | std::memcpy(host_ptr, &data, sizeof(T)); | 687 | std::memcpy(host_ptr, &data, sizeof(T)); |
| 592 | break; | 688 | break; |
| 593 | } | 689 | } |
| @@ -696,6 +792,15 @@ void Memory::ReadBlock(const VAddr src_addr, void* dest_buffer, const std::size_ | |||
| 696 | impl->ReadBlock(src_addr, dest_buffer, size); | 792 | impl->ReadBlock(src_addr, dest_buffer, size); |
| 697 | } | 793 | } |
| 698 | 794 | ||
| 795 | void Memory::ReadBlockUnsafe(const Kernel::Process& process, const VAddr src_addr, | ||
| 796 | void* dest_buffer, const std::size_t size) { | ||
| 797 | impl->ReadBlockUnsafe(process, src_addr, dest_buffer, size); | ||
| 798 | } | ||
| 799 | |||
| 800 | void Memory::ReadBlockUnsafe(const VAddr src_addr, void* dest_buffer, const std::size_t size) { | ||
| 801 | impl->ReadBlockUnsafe(src_addr, dest_buffer, size); | ||
| 802 | } | ||
| 803 | |||
| 699 | void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, | 804 | void Memory::WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, |
| 700 | std::size_t size) { | 805 | std::size_t size) { |
| 701 | impl->WriteBlock(process, dest_addr, src_buffer, size); | 806 | impl->WriteBlock(process, dest_addr, src_buffer, size); |
| @@ -705,6 +810,16 @@ void Memory::WriteBlock(const VAddr dest_addr, const void* src_buffer, const std | |||
| 705 | impl->WriteBlock(dest_addr, src_buffer, size); | 810 | impl->WriteBlock(dest_addr, src_buffer, size); |
| 706 | } | 811 | } |
| 707 | 812 | ||
| 813 | void Memory::WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, | ||
| 814 | const void* src_buffer, std::size_t size) { | ||
| 815 | impl->WriteBlockUnsafe(process, dest_addr, src_buffer, size); | ||
| 816 | } | ||
| 817 | |||
| 818 | void Memory::WriteBlockUnsafe(const VAddr dest_addr, const void* src_buffer, | ||
| 819 | const std::size_t size) { | ||
| 820 | impl->WriteBlockUnsafe(dest_addr, src_buffer, size); | ||
| 821 | } | ||
| 822 | |||
| 708 | void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) { | 823 | void Memory::ZeroBlock(const Kernel::Process& process, VAddr dest_addr, std::size_t size) { |
| 709 | impl->ZeroBlock(process, dest_addr, size); | 824 | impl->ZeroBlock(process, dest_addr, size); |
| 710 | } | 825 | } |
diff --git a/src/core/memory.h b/src/core/memory.h index 8913a9da4..b92d678a4 100644 --- a/src/core/memory.h +++ b/src/core/memory.h | |||
| @@ -295,6 +295,27 @@ public: | |||
| 295 | std::size_t size); | 295 | std::size_t size); |
| 296 | 296 | ||
| 297 | /** | 297 | /** |
| 298 | * Reads a contiguous block of bytes from a specified process' address space. | ||
| 299 | * This unsafe version does not trigger GPU flushing. | ||
| 300 | * | ||
| 301 | * @param process The process to read the data from. | ||
| 302 | * @param src_addr The virtual address to begin reading from. | ||
| 303 | * @param dest_buffer The buffer to place the read bytes into. | ||
| 304 | * @param size The amount of data to read, in bytes. | ||
| 305 | * | ||
| 306 | * @note If a size of 0 is specified, then this function reads nothing and | ||
| 307 | * no attempts to access memory are made at all. | ||
| 308 | * | ||
| 309 | * @pre dest_buffer must be at least size bytes in length, otherwise a | ||
| 310 | * buffer overrun will occur. | ||
| 311 | * | ||
| 312 | * @post The range [dest_buffer, size) contains the read bytes from the | ||
| 313 | * process' address space. | ||
| 314 | */ | ||
| 315 | void ReadBlockUnsafe(const Kernel::Process& process, VAddr src_addr, void* dest_buffer, | ||
| 316 | std::size_t size); | ||
| 317 | |||
| 318 | /** | ||
| 298 | * Reads a contiguous block of bytes from the current process' address space. | 319 | * Reads a contiguous block of bytes from the current process' address space. |
| 299 | * | 320 | * |
| 300 | * @param src_addr The virtual address to begin reading from. | 321 | * @param src_addr The virtual address to begin reading from. |
| @@ -313,6 +334,25 @@ public: | |||
| 313 | void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size); | 334 | void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size); |
| 314 | 335 | ||
| 315 | /** | 336 | /** |
| 337 | * Reads a contiguous block of bytes from the current process' address space. | ||
| 338 | * This unsafe version does not trigger GPU flushing. | ||
| 339 | * | ||
| 340 | * @param src_addr The virtual address to begin reading from. | ||
| 341 | * @param dest_buffer The buffer to place the read bytes into. | ||
| 342 | * @param size The amount of data to read, in bytes. | ||
| 343 | * | ||
| 344 | * @note If a size of 0 is specified, then this function reads nothing and | ||
| 345 | * no attempts to access memory are made at all. | ||
| 346 | * | ||
| 347 | * @pre dest_buffer must be at least size bytes in length, otherwise a | ||
| 348 | * buffer overrun will occur. | ||
| 349 | * | ||
| 350 | * @post The range [dest_buffer, size) contains the read bytes from the | ||
| 351 | * current process' address space. | ||
| 352 | */ | ||
| 353 | void ReadBlockUnsafe(VAddr src_addr, void* dest_buffer, std::size_t size); | ||
| 354 | |||
| 355 | /** | ||
| 316 | * Writes a range of bytes into a given process' address space at the specified | 356 | * Writes a range of bytes into a given process' address space at the specified |
| 317 | * virtual address. | 357 | * virtual address. |
| 318 | * | 358 | * |
| @@ -336,6 +376,26 @@ public: | |||
| 336 | std::size_t size); | 376 | std::size_t size); |
| 337 | 377 | ||
| 338 | /** | 378 | /** |
| 379 | * Writes a range of bytes into a given process' address space at the specified | ||
| 380 | * virtual address. | ||
| 381 | * This unsafe version does not invalidate GPU Memory. | ||
| 382 | * | ||
| 383 | * @param process The process to write data into the address space of. | ||
| 384 | * @param dest_addr The destination virtual address to begin writing the data at. | ||
| 385 | * @param src_buffer The data to write into the process' address space. | ||
| 386 | * @param size The size of the data to write, in bytes. | ||
| 387 | * | ||
| 388 | * @post The address range [dest_addr, size) in the process' address space | ||
| 389 | * contains the data that was within src_buffer. | ||
| 390 | * | ||
| 391 | * @post If an attempt is made to write into an unmapped region of memory, the writes | ||
| 392 | * will be ignored and an error will be logged. | ||
| 393 | * | ||
| 394 | */ | ||
| 395 | void WriteBlockUnsafe(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer, | ||
| 396 | std::size_t size); | ||
| 397 | |||
| 398 | /** | ||
| 339 | * Writes a range of bytes into the current process' address space at the specified | 399 | * Writes a range of bytes into the current process' address space at the specified |
| 340 | * virtual address. | 400 | * virtual address. |
| 341 | * | 401 | * |
| @@ -357,6 +417,24 @@ public: | |||
| 357 | void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size); | 417 | void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size); |
| 358 | 418 | ||
| 359 | /** | 419 | /** |
| 420 | * Writes a range of bytes into the current process' address space at the specified | ||
| 421 | * virtual address. | ||
| 422 | * This unsafe version does not invalidate GPU Memory. | ||
| 423 | * | ||
| 424 | * @param dest_addr The destination virtual address to begin writing the data at. | ||
| 425 | * @param src_buffer The data to write into the current process' address space. | ||
| 426 | * @param size The size of the data to write, in bytes. | ||
| 427 | * | ||
| 428 | * @post The address range [dest_addr, size) in the current process' address space | ||
| 429 | * contains the data that was within src_buffer. | ||
| 430 | * | ||
| 431 | * @post If an attempt is made to write into an unmapped region of memory, the writes | ||
| 432 | * will be ignored and an error will be logged. | ||
| 433 | * | ||
| 434 | */ | ||
| 435 | void WriteBlockUnsafe(VAddr dest_addr, const void* src_buffer, std::size_t size); | ||
| 436 | |||
| 437 | /** | ||
| 360 | * Fills the specified address range within a process' address space with zeroes. | 438 | * Fills the specified address range within a process' address space with zeroes. |
| 361 | * | 439 | * |
| 362 | * @param process The process that will have a portion of its memory zeroed out. | 440 | * @param process The process that will have a portion of its memory zeroed out. |
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index 4b9193182..e35ee0b67 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h | |||
| @@ -15,37 +15,29 @@ namespace VideoCommon { | |||
| 15 | 15 | ||
| 16 | class BufferBlock { | 16 | class BufferBlock { |
| 17 | public: | 17 | public: |
| 18 | bool Overlaps(const CacheAddr start, const CacheAddr end) const { | 18 | bool Overlaps(const VAddr start, const VAddr end) const { |
| 19 | return (cache_addr < end) && (cache_addr_end > start); | 19 | return (cpu_addr < end) && (cpu_addr_end > start); |
| 20 | } | 20 | } |
| 21 | 21 | ||
| 22 | bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { | 22 | bool IsInside(const VAddr other_start, const VAddr other_end) const { |
| 23 | return cache_addr <= other_start && other_end <= cache_addr_end; | 23 | return cpu_addr <= other_start && other_end <= cpu_addr_end; |
| 24 | } | 24 | } |
| 25 | 25 | ||
| 26 | u8* GetWritableHostPtr() const { | 26 | std::size_t GetOffset(const VAddr in_addr) { |
| 27 | return FromCacheAddr(cache_addr); | 27 | return static_cast<std::size_t>(in_addr - cpu_addr); |
| 28 | } | 28 | } |
| 29 | 29 | ||
| 30 | u8* GetWritableHostPtr(std::size_t offset) const { | 30 | VAddr GetCpuAddr() const { |
| 31 | return FromCacheAddr(cache_addr + offset); | 31 | return cpu_addr; |
| 32 | } | 32 | } |
| 33 | 33 | ||
| 34 | std::size_t GetOffset(const CacheAddr in_addr) { | 34 | VAddr GetCpuAddrEnd() const { |
| 35 | return static_cast<std::size_t>(in_addr - cache_addr); | 35 | return cpu_addr_end; |
| 36 | } | 36 | } |
| 37 | 37 | ||
| 38 | CacheAddr GetCacheAddr() const { | 38 | void SetCpuAddr(const VAddr new_addr) { |
| 39 | return cache_addr; | 39 | cpu_addr = new_addr; |
| 40 | } | 40 | cpu_addr_end = new_addr + size; |
| 41 | |||
| 42 | CacheAddr GetCacheAddrEnd() const { | ||
| 43 | return cache_addr_end; | ||
| 44 | } | ||
| 45 | |||
| 46 | void SetCacheAddr(const CacheAddr new_addr) { | ||
| 47 | cache_addr = new_addr; | ||
| 48 | cache_addr_end = new_addr + size; | ||
| 49 | } | 41 | } |
| 50 | 42 | ||
| 51 | std::size_t GetSize() const { | 43 | std::size_t GetSize() const { |
| @@ -61,14 +53,14 @@ public: | |||
| 61 | } | 53 | } |
| 62 | 54 | ||
| 63 | protected: | 55 | protected: |
| 64 | explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { | 56 | explicit BufferBlock(VAddr cpu_addr, const std::size_t size) : size{size} { |
| 65 | SetCacheAddr(cache_addr); | 57 | SetCpuAddr(cpu_addr); |
| 66 | } | 58 | } |
| 67 | ~BufferBlock() = default; | 59 | ~BufferBlock() = default; |
| 68 | 60 | ||
| 69 | private: | 61 | private: |
| 70 | CacheAddr cache_addr{}; | 62 | VAddr cpu_addr{}; |
| 71 | CacheAddr cache_addr_end{}; | 63 | VAddr cpu_addr_end{}; |
| 72 | std::size_t size{}; | 64 | std::size_t size{}; |
| 73 | u64 epoch{}; | 65 | u64 epoch{}; |
| 74 | }; | 66 | }; |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 186aca61d..b57c0d4d4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include "common/alignment.h" | 19 | #include "common/alignment.h" |
| 20 | #include "common/common_types.h" | 20 | #include "common/common_types.h" |
| 21 | #include "core/core.h" | 21 | #include "core/core.h" |
| 22 | #include "core/memory.h" | ||
| 22 | #include "video_core/buffer_cache/buffer_block.h" | 23 | #include "video_core/buffer_cache/buffer_block.h" |
| 23 | #include "video_core/buffer_cache/map_interval.h" | 24 | #include "video_core/buffer_cache/map_interval.h" |
| 24 | #include "video_core/memory_manager.h" | 25 | #include "video_core/memory_manager.h" |
| @@ -37,28 +38,45 @@ public: | |||
| 37 | bool is_written = false, bool use_fast_cbuf = false) { | 38 | bool is_written = false, bool use_fast_cbuf = false) { |
| 38 | std::lock_guard lock{mutex}; | 39 | std::lock_guard lock{mutex}; |
| 39 | 40 | ||
| 40 | auto& memory_manager = system.GPU().MemoryManager(); | 41 | const std::optional<VAddr> cpu_addr_opt = |
| 41 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 42 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 42 | if (!host_ptr) { | 43 | |
| 44 | if (!cpu_addr_opt) { | ||
| 43 | return {GetEmptyBuffer(size), 0}; | 45 | return {GetEmptyBuffer(size), 0}; |
| 44 | } | 46 | } |
| 45 | const auto cache_addr = ToCacheAddr(host_ptr); | 47 | |
| 48 | VAddr cpu_addr = *cpu_addr_opt; | ||
| 46 | 49 | ||
| 47 | // Cache management is a big overhead, so only cache entries with a given size. | 50 | // Cache management is a big overhead, so only cache entries with a given size. |
| 48 | // TODO: Figure out which size is the best for given games. | 51 | // TODO: Figure out which size is the best for given games. |
| 49 | constexpr std::size_t max_stream_size = 0x800; | 52 | constexpr std::size_t max_stream_size = 0x800; |
| 50 | if (use_fast_cbuf || size < max_stream_size) { | 53 | if (use_fast_cbuf || size < max_stream_size) { |
| 51 | if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { | 54 | if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { |
| 55 | auto& memory_manager = system.GPU().MemoryManager(); | ||
| 52 | if (use_fast_cbuf) { | 56 | if (use_fast_cbuf) { |
| 53 | return ConstBufferUpload(host_ptr, size); | 57 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 58 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 59 | return ConstBufferUpload(host_ptr, size); | ||
| 60 | } else { | ||
| 61 | staging_buffer.resize(size); | ||
| 62 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||
| 63 | return ConstBufferUpload(staging_buffer.data(), size); | ||
| 64 | } | ||
| 54 | } else { | 65 | } else { |
| 55 | return StreamBufferUpload(host_ptr, size, alignment); | 66 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 67 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 68 | return StreamBufferUpload(host_ptr, size, alignment); | ||
| 69 | } else { | ||
| 70 | staging_buffer.resize(size); | ||
| 71 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||
| 72 | return StreamBufferUpload(staging_buffer.data(), size, alignment); | ||
| 73 | } | ||
| 56 | } | 74 | } |
| 57 | } | 75 | } |
| 58 | } | 76 | } |
| 59 | 77 | ||
| 60 | auto block = GetBlock(cache_addr, size); | 78 | auto block = GetBlock(cpu_addr, size); |
| 61 | auto map = MapAddress(block, gpu_addr, cache_addr, size); | 79 | auto map = MapAddress(block, gpu_addr, cpu_addr, size); |
| 62 | if (is_written) { | 80 | if (is_written) { |
| 63 | map->MarkAsModified(true, GetModifiedTicks()); | 81 | map->MarkAsModified(true, GetModifiedTicks()); |
| 64 | if (!map->IsWritten()) { | 82 | if (!map->IsWritten()) { |
| @@ -71,7 +89,7 @@ public: | |||
| 71 | } | 89 | } |
| 72 | } | 90 | } |
| 73 | 91 | ||
| 74 | const u64 offset = static_cast<u64>(block->GetOffset(cache_addr)); | 92 | const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr)); |
| 75 | 93 | ||
| 76 | return {ToHandle(block), offset}; | 94 | return {ToHandle(block), offset}; |
| 77 | } | 95 | } |
| @@ -112,7 +130,7 @@ public: | |||
| 112 | } | 130 | } |
| 113 | 131 | ||
| 114 | /// Write any cached resources overlapping the specified region back to memory | 132 | /// Write any cached resources overlapping the specified region back to memory |
| 115 | void FlushRegion(CacheAddr addr, std::size_t size) { | 133 | void FlushRegion(VAddr addr, std::size_t size) { |
| 116 | std::lock_guard lock{mutex}; | 134 | std::lock_guard lock{mutex}; |
| 117 | 135 | ||
| 118 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | 136 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); |
| @@ -127,7 +145,7 @@ public: | |||
| 127 | } | 145 | } |
| 128 | 146 | ||
| 129 | /// Mark the specified region as being invalidated | 147 | /// Mark the specified region as being invalidated |
| 130 | void InvalidateRegion(CacheAddr addr, u64 size) { | 148 | void InvalidateRegion(VAddr addr, u64 size) { |
| 131 | std::lock_guard lock{mutex}; | 149 | std::lock_guard lock{mutex}; |
| 132 | 150 | ||
| 133 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); | 151 | std::vector<MapInterval> objects = GetMapsInRange(addr, size); |
| @@ -152,7 +170,7 @@ protected: | |||
| 152 | 170 | ||
| 153 | virtual void WriteBarrier() = 0; | 171 | virtual void WriteBarrier() = 0; |
| 154 | 172 | ||
| 155 | virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; | 173 | virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; |
| 156 | 174 | ||
| 157 | virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, | 175 | virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, |
| 158 | const u8* data) = 0; | 176 | const u8* data) = 0; |
| @@ -169,20 +187,17 @@ protected: | |||
| 169 | 187 | ||
| 170 | /// Register an object into the cache | 188 | /// Register an object into the cache |
| 171 | void Register(const MapInterval& new_map, bool inherit_written = false) { | 189 | void Register(const MapInterval& new_map, bool inherit_written = false) { |
| 172 | const CacheAddr cache_ptr = new_map->GetStart(); | 190 | const VAddr cpu_addr = new_map->GetStart(); |
| 173 | const std::optional<VAddr> cpu_addr = | 191 | if (!cpu_addr) { |
| 174 | system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); | ||
| 175 | if (!cache_ptr || !cpu_addr) { | ||
| 176 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", | 192 | LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", |
| 177 | new_map->GetGpuAddress()); | 193 | new_map->GetGpuAddress()); |
| 178 | return; | 194 | return; |
| 179 | } | 195 | } |
| 180 | const std::size_t size = new_map->GetEnd() - new_map->GetStart(); | 196 | const std::size_t size = new_map->GetEnd() - new_map->GetStart(); |
| 181 | new_map->SetCpuAddress(*cpu_addr); | ||
| 182 | new_map->MarkAsRegistered(true); | 197 | new_map->MarkAsRegistered(true); |
| 183 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; | 198 | const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; |
| 184 | mapped_addresses.insert({interval, new_map}); | 199 | mapped_addresses.insert({interval, new_map}); |
| 185 | rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); | 200 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); |
| 186 | if (inherit_written) { | 201 | if (inherit_written) { |
| 187 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); | 202 | MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); |
| 188 | new_map->MarkAsWritten(true); | 203 | new_map->MarkAsWritten(true); |
| @@ -192,7 +207,7 @@ protected: | |||
| 192 | /// Unregisters an object from the cache | 207 | /// Unregisters an object from the cache |
| 193 | void Unregister(MapInterval& map) { | 208 | void Unregister(MapInterval& map) { |
| 194 | const std::size_t size = map->GetEnd() - map->GetStart(); | 209 | const std::size_t size = map->GetEnd() - map->GetStart(); |
| 195 | rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); | 210 | rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); |
| 196 | map->MarkAsRegistered(false); | 211 | map->MarkAsRegistered(false); |
| 197 | if (map->IsWritten()) { | 212 | if (map->IsWritten()) { |
| 198 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); | 213 | UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); |
| @@ -202,32 +217,39 @@ protected: | |||
| 202 | } | 217 | } |
| 203 | 218 | ||
| 204 | private: | 219 | private: |
| 205 | MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { | 220 | MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { |
| 206 | return std::make_shared<MapIntervalBase>(start, end, gpu_addr); | 221 | return std::make_shared<MapIntervalBase>(start, end, gpu_addr); |
| 207 | } | 222 | } |
| 208 | 223 | ||
| 209 | MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, | 224 | MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, |
| 210 | const CacheAddr cache_addr, const std::size_t size) { | 225 | const std::size_t size) { |
| 211 | 226 | ||
| 212 | std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size); | 227 | std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); |
| 213 | if (overlaps.empty()) { | 228 | if (overlaps.empty()) { |
| 214 | const CacheAddr cache_addr_end = cache_addr + size; | 229 | auto& memory_manager = system.GPU().MemoryManager(); |
| 215 | MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); | 230 | const VAddr cpu_addr_end = cpu_addr + size; |
| 216 | u8* host_ptr = FromCacheAddr(cache_addr); | 231 | MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); |
| 217 | UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); | 232 | if (memory_manager.IsGranularRange(gpu_addr, size)) { |
| 233 | u8* host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 234 | UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); | ||
| 235 | } else { | ||
| 236 | staging_buffer.resize(size); | ||
| 237 | memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); | ||
| 238 | UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); | ||
| 239 | } | ||
| 218 | Register(new_map); | 240 | Register(new_map); |
| 219 | return new_map; | 241 | return new_map; |
| 220 | } | 242 | } |
| 221 | 243 | ||
| 222 | const CacheAddr cache_addr_end = cache_addr + size; | 244 | const VAddr cpu_addr_end = cpu_addr + size; |
| 223 | if (overlaps.size() == 1) { | 245 | if (overlaps.size() == 1) { |
| 224 | MapInterval& current_map = overlaps[0]; | 246 | MapInterval& current_map = overlaps[0]; |
| 225 | if (current_map->IsInside(cache_addr, cache_addr_end)) { | 247 | if (current_map->IsInside(cpu_addr, cpu_addr_end)) { |
| 226 | return current_map; | 248 | return current_map; |
| 227 | } | 249 | } |
| 228 | } | 250 | } |
| 229 | CacheAddr new_start = cache_addr; | 251 | VAddr new_start = cpu_addr; |
| 230 | CacheAddr new_end = cache_addr_end; | 252 | VAddr new_end = cpu_addr_end; |
| 231 | bool write_inheritance = false; | 253 | bool write_inheritance = false; |
| 232 | bool modified_inheritance = false; | 254 | bool modified_inheritance = false; |
| 233 | // Calculate new buffer parameters | 255 | // Calculate new buffer parameters |
| @@ -237,7 +259,7 @@ private: | |||
| 237 | write_inheritance |= overlap->IsWritten(); | 259 | write_inheritance |= overlap->IsWritten(); |
| 238 | modified_inheritance |= overlap->IsModified(); | 260 | modified_inheritance |= overlap->IsModified(); |
| 239 | } | 261 | } |
| 240 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; | 262 | GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; |
| 241 | for (auto& overlap : overlaps) { | 263 | for (auto& overlap : overlaps) { |
| 242 | Unregister(overlap); | 264 | Unregister(overlap); |
| 243 | } | 265 | } |
| @@ -250,7 +272,7 @@ private: | |||
| 250 | return new_map; | 272 | return new_map; |
| 251 | } | 273 | } |
| 252 | 274 | ||
| 253 | void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, | 275 | void UpdateBlock(const TBuffer& block, VAddr start, VAddr end, |
| 254 | std::vector<MapInterval>& overlaps) { | 276 | std::vector<MapInterval>& overlaps) { |
| 255 | const IntervalType base_interval{start, end}; | 277 | const IntervalType base_interval{start, end}; |
| 256 | IntervalSet interval_set{}; | 278 | IntervalSet interval_set{}; |
| @@ -262,13 +284,15 @@ private: | |||
| 262 | for (auto& interval : interval_set) { | 284 | for (auto& interval : interval_set) { |
| 263 | std::size_t size = interval.upper() - interval.lower(); | 285 | std::size_t size = interval.upper() - interval.lower(); |
| 264 | if (size > 0) { | 286 | if (size > 0) { |
| 265 | u8* host_ptr = FromCacheAddr(interval.lower()); | 287 | staging_buffer.resize(size); |
| 266 | UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); | 288 | system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); |
| 289 | UploadBlockData(block, block->GetOffset(interval.lower()), size, | ||
| 290 | staging_buffer.data()); | ||
| 267 | } | 291 | } |
| 268 | } | 292 | } |
| 269 | } | 293 | } |
| 270 | 294 | ||
| 271 | std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) { | 295 | std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { |
| 272 | if (size == 0) { | 296 | if (size == 0) { |
| 273 | return {}; | 297 | return {}; |
| 274 | } | 298 | } |
| @@ -290,8 +314,9 @@ private: | |||
| 290 | void FlushMap(MapInterval map) { | 314 | void FlushMap(MapInterval map) { |
| 291 | std::size_t size = map->GetEnd() - map->GetStart(); | 315 | std::size_t size = map->GetEnd() - map->GetStart(); |
| 292 | TBuffer block = blocks[map->GetStart() >> block_page_bits]; | 316 | TBuffer block = blocks[map->GetStart() >> block_page_bits]; |
| 293 | u8* host_ptr = FromCacheAddr(map->GetStart()); | 317 | staging_buffer.resize(size); |
| 294 | DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); | 318 | DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); |
| 319 | system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); | ||
| 295 | map->MarkAsModified(false, 0); | 320 | map->MarkAsModified(false, 0); |
| 296 | } | 321 | } |
| 297 | 322 | ||
| @@ -316,14 +341,14 @@ private: | |||
| 316 | TBuffer EnlargeBlock(TBuffer buffer) { | 341 | TBuffer EnlargeBlock(TBuffer buffer) { |
| 317 | const std::size_t old_size = buffer->GetSize(); | 342 | const std::size_t old_size = buffer->GetSize(); |
| 318 | const std::size_t new_size = old_size + block_page_size; | 343 | const std::size_t new_size = old_size + block_page_size; |
| 319 | const CacheAddr cache_addr = buffer->GetCacheAddr(); | 344 | const VAddr cpu_addr = buffer->GetCpuAddr(); |
| 320 | TBuffer new_buffer = CreateBlock(cache_addr, new_size); | 345 | TBuffer new_buffer = CreateBlock(cpu_addr, new_size); |
| 321 | CopyBlock(buffer, new_buffer, 0, 0, old_size); | 346 | CopyBlock(buffer, new_buffer, 0, 0, old_size); |
| 322 | buffer->SetEpoch(epoch); | 347 | buffer->SetEpoch(epoch); |
| 323 | pending_destruction.push_back(buffer); | 348 | pending_destruction.push_back(buffer); |
| 324 | const CacheAddr cache_addr_end = cache_addr + new_size - 1; | 349 | const VAddr cpu_addr_end = cpu_addr + new_size - 1; |
| 325 | u64 page_start = cache_addr >> block_page_bits; | 350 | u64 page_start = cpu_addr >> block_page_bits; |
| 326 | const u64 page_end = cache_addr_end >> block_page_bits; | 351 | const u64 page_end = cpu_addr_end >> block_page_bits; |
| 327 | while (page_start <= page_end) { | 352 | while (page_start <= page_end) { |
| 328 | blocks[page_start] = new_buffer; | 353 | blocks[page_start] = new_buffer; |
| 329 | ++page_start; | 354 | ++page_start; |
| @@ -334,9 +359,9 @@ private: | |||
| 334 | TBuffer MergeBlocks(TBuffer first, TBuffer second) { | 359 | TBuffer MergeBlocks(TBuffer first, TBuffer second) { |
| 335 | const std::size_t size_1 = first->GetSize(); | 360 | const std::size_t size_1 = first->GetSize(); |
| 336 | const std::size_t size_2 = second->GetSize(); | 361 | const std::size_t size_2 = second->GetSize(); |
| 337 | const CacheAddr first_addr = first->GetCacheAddr(); | 362 | const VAddr first_addr = first->GetCpuAddr(); |
| 338 | const CacheAddr second_addr = second->GetCacheAddr(); | 363 | const VAddr second_addr = second->GetCpuAddr(); |
| 339 | const CacheAddr new_addr = std::min(first_addr, second_addr); | 364 | const VAddr new_addr = std::min(first_addr, second_addr); |
| 340 | const std::size_t new_size = size_1 + size_2; | 365 | const std::size_t new_size = size_1 + size_2; |
| 341 | TBuffer new_buffer = CreateBlock(new_addr, new_size); | 366 | TBuffer new_buffer = CreateBlock(new_addr, new_size); |
| 342 | CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); | 367 | CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); |
| @@ -345,9 +370,9 @@ private: | |||
| 345 | second->SetEpoch(epoch); | 370 | second->SetEpoch(epoch); |
| 346 | pending_destruction.push_back(first); | 371 | pending_destruction.push_back(first); |
| 347 | pending_destruction.push_back(second); | 372 | pending_destruction.push_back(second); |
| 348 | const CacheAddr cache_addr_end = new_addr + new_size - 1; | 373 | const VAddr cpu_addr_end = new_addr + new_size - 1; |
| 349 | u64 page_start = new_addr >> block_page_bits; | 374 | u64 page_start = new_addr >> block_page_bits; |
| 350 | const u64 page_end = cache_addr_end >> block_page_bits; | 375 | const u64 page_end = cpu_addr_end >> block_page_bits; |
| 351 | while (page_start <= page_end) { | 376 | while (page_start <= page_end) { |
| 352 | blocks[page_start] = new_buffer; | 377 | blocks[page_start] = new_buffer; |
| 353 | ++page_start; | 378 | ++page_start; |
| @@ -355,18 +380,18 @@ private: | |||
| 355 | return new_buffer; | 380 | return new_buffer; |
| 356 | } | 381 | } |
| 357 | 382 | ||
| 358 | TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { | 383 | TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { |
| 359 | TBuffer found{}; | 384 | TBuffer found{}; |
| 360 | const CacheAddr cache_addr_end = cache_addr + size - 1; | 385 | const VAddr cpu_addr_end = cpu_addr + size - 1; |
| 361 | u64 page_start = cache_addr >> block_page_bits; | 386 | u64 page_start = cpu_addr >> block_page_bits; |
| 362 | const u64 page_end = cache_addr_end >> block_page_bits; | 387 | const u64 page_end = cpu_addr_end >> block_page_bits; |
| 363 | while (page_start <= page_end) { | 388 | while (page_start <= page_end) { |
| 364 | auto it = blocks.find(page_start); | 389 | auto it = blocks.find(page_start); |
| 365 | if (it == blocks.end()) { | 390 | if (it == blocks.end()) { |
| 366 | if (found) { | 391 | if (found) { |
| 367 | found = EnlargeBlock(found); | 392 | found = EnlargeBlock(found); |
| 368 | } else { | 393 | } else { |
| 369 | const CacheAddr start_addr = (page_start << block_page_bits); | 394 | const VAddr start_addr = (page_start << block_page_bits); |
| 370 | found = CreateBlock(start_addr, block_page_size); | 395 | found = CreateBlock(start_addr, block_page_size); |
| 371 | blocks[page_start] = found; | 396 | blocks[page_start] = found; |
| 372 | } | 397 | } |
| @@ -386,7 +411,7 @@ private: | |||
| 386 | return found; | 411 | return found; |
| 387 | } | 412 | } |
| 388 | 413 | ||
| 389 | void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { | 414 | void MarkRegionAsWritten(const VAddr start, const VAddr end) { |
| 390 | u64 page_start = start >> write_page_bit; | 415 | u64 page_start = start >> write_page_bit; |
| 391 | const u64 page_end = end >> write_page_bit; | 416 | const u64 page_end = end >> write_page_bit; |
| 392 | while (page_start <= page_end) { | 417 | while (page_start <= page_end) { |
| @@ -400,7 +425,7 @@ private: | |||
| 400 | } | 425 | } |
| 401 | } | 426 | } |
| 402 | 427 | ||
| 403 | void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { | 428 | void UnmarkRegionAsWritten(const VAddr start, const VAddr end) { |
| 404 | u64 page_start = start >> write_page_bit; | 429 | u64 page_start = start >> write_page_bit; |
| 405 | const u64 page_end = end >> write_page_bit; | 430 | const u64 page_end = end >> write_page_bit; |
| 406 | while (page_start <= page_end) { | 431 | while (page_start <= page_end) { |
| @@ -416,7 +441,7 @@ private: | |||
| 416 | } | 441 | } |
| 417 | } | 442 | } |
| 418 | 443 | ||
| 419 | bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { | 444 | bool IsRegionWritten(const VAddr start, const VAddr end) const { |
| 420 | u64 page_start = start >> write_page_bit; | 445 | u64 page_start = start >> write_page_bit; |
| 421 | const u64 page_end = end >> write_page_bit; | 446 | const u64 page_end = end >> write_page_bit; |
| 422 | while (page_start <= page_end) { | 447 | while (page_start <= page_end) { |
| @@ -440,8 +465,8 @@ private: | |||
| 440 | u64 buffer_offset = 0; | 465 | u64 buffer_offset = 0; |
| 441 | u64 buffer_offset_base = 0; | 466 | u64 buffer_offset_base = 0; |
| 442 | 467 | ||
| 443 | using IntervalSet = boost::icl::interval_set<CacheAddr>; | 468 | using IntervalSet = boost::icl::interval_set<VAddr>; |
| 444 | using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>; | 469 | using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; |
| 445 | using IntervalType = typename IntervalCache::interval_type; | 470 | using IntervalType = typename IntervalCache::interval_type; |
| 446 | IntervalCache mapped_addresses; | 471 | IntervalCache mapped_addresses; |
| 447 | 472 | ||
| @@ -456,6 +481,8 @@ private: | |||
| 456 | u64 epoch = 0; | 481 | u64 epoch = 0; |
| 457 | u64 modified_ticks = 0; | 482 | u64 modified_ticks = 0; |
| 458 | 483 | ||
| 484 | std::vector<u8> staging_buffer; | ||
| 485 | |||
| 459 | std::recursive_mutex mutex; | 486 | std::recursive_mutex mutex; |
| 460 | }; | 487 | }; |
| 461 | 488 | ||
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 3a104d5cd..b0956029d 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h | |||
| @@ -11,7 +11,7 @@ namespace VideoCommon { | |||
| 11 | 11 | ||
| 12 | class MapIntervalBase { | 12 | class MapIntervalBase { |
| 13 | public: | 13 | public: |
| 14 | MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) | 14 | MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) |
| 15 | : start{start}, end{end}, gpu_addr{gpu_addr} {} | 15 | : start{start}, end{end}, gpu_addr{gpu_addr} {} |
| 16 | 16 | ||
| 17 | void SetCpuAddress(VAddr new_cpu_addr) { | 17 | void SetCpuAddress(VAddr new_cpu_addr) { |
| @@ -26,7 +26,7 @@ public: | |||
| 26 | return gpu_addr; | 26 | return gpu_addr; |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { | 29 | bool IsInside(const VAddr other_start, const VAddr other_end) const { |
| 30 | return (start <= other_start && other_end <= end); | 30 | return (start <= other_start && other_end <= end); |
| 31 | } | 31 | } |
| 32 | 32 | ||
| @@ -46,11 +46,11 @@ public: | |||
| 46 | return is_registered; | 46 | return is_registered; |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | CacheAddr GetStart() const { | 49 | VAddr GetStart() const { |
| 50 | return start; | 50 | return start; |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | CacheAddr GetEnd() const { | 53 | VAddr GetEnd() const { |
| 54 | return end; | 54 | return end; |
| 55 | } | 55 | } |
| 56 | 56 | ||
| @@ -76,8 +76,8 @@ public: | |||
| 76 | } | 76 | } |
| 77 | 77 | ||
| 78 | private: | 78 | private: |
| 79 | CacheAddr start; | 79 | VAddr start; |
| 80 | CacheAddr end; | 80 | VAddr end; |
| 81 | GPUVAddr gpu_addr; | 81 | GPUVAddr gpu_addr; |
| 82 | VAddr cpu_addr{}; | 82 | VAddr cpu_addr{}; |
| 83 | bool is_written{}; | 83 | bool is_written{}; |
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ced9d7e28..1a2d747be 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h | |||
| @@ -270,13 +270,13 @@ public: | |||
| 270 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; | 270 | virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; |
| 271 | 271 | ||
| 272 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 272 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 273 | virtual void FlushRegion(CacheAddr addr, u64 size) = 0; | 273 | virtual void FlushRegion(VAddr addr, u64 size) = 0; |
| 274 | 274 | ||
| 275 | /// Notify rasterizer that any caches of the specified region should be invalidated | 275 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 276 | virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; | 276 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; |
| 277 | 277 | ||
| 278 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 278 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 279 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 279 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |
| 280 | 280 | ||
| 281 | protected: | 281 | protected: |
| 282 | virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; | 282 | virtual void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const = 0; |
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 925be8d7b..cc434faf7 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp | |||
| @@ -30,15 +30,15 @@ void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 30 | gpu_thread.SwapBuffers(framebuffer); | 30 | gpu_thread.SwapBuffers(framebuffer); |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { | 33 | void GPUAsynch::FlushRegion(VAddr addr, u64 size) { |
| 34 | gpu_thread.FlushRegion(addr, size); | 34 | gpu_thread.FlushRegion(addr, size); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { | 37 | void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { |
| 38 | gpu_thread.InvalidateRegion(addr, size); | 38 | gpu_thread.InvalidateRegion(addr, size); |
| 39 | } | 39 | } |
| 40 | 40 | ||
| 41 | void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 41 | void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 42 | gpu_thread.FlushAndInvalidateRegion(addr, size); | 42 | gpu_thread.FlushAndInvalidateRegion(addr, size); |
| 43 | } | 43 | } |
| 44 | 44 | ||
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 265c62758..03fd0eef0 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h | |||
| @@ -27,9 +27,9 @@ public: | |||
| 27 | void Start() override; | 27 | void Start() override; |
| 28 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 28 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 29 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | 29 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 30 | void FlushRegion(CacheAddr addr, u64 size) override; | 30 | void FlushRegion(VAddr addr, u64 size) override; |
| 31 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 31 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 32 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 32 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 33 | void WaitIdle() const override; | 33 | void WaitIdle() const override; |
| 34 | 34 | ||
| 35 | protected: | 35 | protected: |
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index bd5278a5c..6f38a672a 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp | |||
| @@ -26,15 +26,15 @@ void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 26 | renderer->SwapBuffers(framebuffer); | 26 | renderer->SwapBuffers(framebuffer); |
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { | 29 | void GPUSynch::FlushRegion(VAddr addr, u64 size) { |
| 30 | renderer->Rasterizer().FlushRegion(addr, size); | 30 | renderer->Rasterizer().FlushRegion(addr, size); |
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { | 33 | void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { |
| 34 | renderer->Rasterizer().InvalidateRegion(addr, size); | 34 | renderer->Rasterizer().InvalidateRegion(addr, size); |
| 35 | } | 35 | } |
| 36 | 36 | ||
| 37 | void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 37 | void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 38 | renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); | 38 | renderer->Rasterizer().FlushAndInvalidateRegion(addr, size); |
| 39 | } | 39 | } |
| 40 | 40 | ||
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 866a94c8c..4a6e9a01d 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h | |||
| @@ -26,9 +26,9 @@ public: | |||
| 26 | void Start() override; | 26 | void Start() override; |
| 27 | void PushGPUEntries(Tegra::CommandList&& entries) override; | 27 | void PushGPUEntries(Tegra::CommandList&& entries) override; |
| 28 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; | 28 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; |
| 29 | void FlushRegion(CacheAddr addr, u64 size) override; | 29 | void FlushRegion(VAddr addr, u64 size) override; |
| 30 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 30 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 31 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 31 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 32 | void WaitIdle() const override {} | 32 | void WaitIdle() const override {} |
| 33 | 33 | ||
| 34 | protected: | 34 | protected: |
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 270c7ae0d..10cda686b 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp | |||
| @@ -77,15 +77,15 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { | |||
| 77 | PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); | 77 | PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); |
| 78 | } | 78 | } |
| 79 | 79 | ||
| 80 | void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { | 80 | void ThreadManager::FlushRegion(VAddr addr, u64 size) { |
| 81 | PushCommand(FlushRegionCommand(addr, size)); | 81 | PushCommand(FlushRegionCommand(addr, size)); |
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { | 84 | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { |
| 85 | system.Renderer().Rasterizer().InvalidateRegion(addr, size); | 85 | system.Renderer().Rasterizer().InvalidateRegion(addr, size); |
| 86 | } | 86 | } |
| 87 | 87 | ||
| 88 | void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 88 | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 89 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important | 89 | // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important |
| 90 | InvalidateRegion(addr, size); | 90 | InvalidateRegion(addr, size); |
| 91 | } | 91 | } |
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index be36c580e..cd74ad330 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h | |||
| @@ -47,26 +47,26 @@ struct SwapBuffersCommand final { | |||
| 47 | 47 | ||
| 48 | /// Command to signal to the GPU thread to flush a region | 48 | /// Command to signal to the GPU thread to flush a region |
| 49 | struct FlushRegionCommand final { | 49 | struct FlushRegionCommand final { |
| 50 | explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} | 50 | explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} |
| 51 | 51 | ||
| 52 | CacheAddr addr; | 52 | VAddr addr; |
| 53 | u64 size; | 53 | u64 size; |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| 56 | /// Command to signal to the GPU thread to invalidate a region | 56 | /// Command to signal to the GPU thread to invalidate a region |
| 57 | struct InvalidateRegionCommand final { | 57 | struct InvalidateRegionCommand final { |
| 58 | explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} | 58 | explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} |
| 59 | 59 | ||
| 60 | CacheAddr addr; | 60 | VAddr addr; |
| 61 | u64 size; | 61 | u64 size; |
| 62 | }; | 62 | }; |
| 63 | 63 | ||
| 64 | /// Command to signal to the GPU thread to flush and invalidate a region | 64 | /// Command to signal to the GPU thread to flush and invalidate a region |
| 65 | struct FlushAndInvalidateRegionCommand final { | 65 | struct FlushAndInvalidateRegionCommand final { |
| 66 | explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) | 66 | explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) |
| 67 | : addr{addr}, size{size} {} | 67 | : addr{addr}, size{size} {} |
| 68 | 68 | ||
| 69 | CacheAddr addr; | 69 | VAddr addr; |
| 70 | u64 size; | 70 | u64 size; |
| 71 | }; | 71 | }; |
| 72 | 72 | ||
| @@ -111,13 +111,13 @@ public: | |||
| 111 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); | 111 | void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); |
| 112 | 112 | ||
| 113 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 113 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 114 | void FlushRegion(CacheAddr addr, u64 size); | 114 | void FlushRegion(VAddr addr, u64 size); |
| 115 | 115 | ||
| 116 | /// Notify rasterizer that any caches of the specified region should be invalidated | 116 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 117 | void InvalidateRegion(CacheAddr addr, u64 size); | 117 | void InvalidateRegion(VAddr addr, u64 size); |
| 118 | 118 | ||
| 119 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | 119 | /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |
| 120 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size); | 120 | void FlushAndInvalidateRegion(VAddr addr, u64 size); |
| 121 | 121 | ||
| 122 | // Wait until the gpu thread is idle. | 122 | // Wait until the gpu thread is idle. |
| 123 | void WaitIdle() const; | 123 | void WaitIdle() const; |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index f5d33f27a..a3389d0d2 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -81,12 +81,11 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { | |||
| 81 | ASSERT((gpu_addr & page_mask) == 0); | 81 | ASSERT((gpu_addr & page_mask) == 0); |
| 82 | 82 | ||
| 83 | const u64 aligned_size{Common::AlignUp(size, page_size)}; | 83 | const u64 aligned_size{Common::AlignUp(size, page_size)}; |
| 84 | const CacheAddr cache_addr{ToCacheAddr(GetPointer(gpu_addr))}; | ||
| 85 | const auto cpu_addr = GpuToCpuAddress(gpu_addr); | 84 | const auto cpu_addr = GpuToCpuAddress(gpu_addr); |
| 86 | ASSERT(cpu_addr); | 85 | ASSERT(cpu_addr); |
| 87 | 86 | ||
| 88 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. | 87 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. |
| 89 | system.GPU().FlushAndInvalidateRegion(cache_addr, aligned_size); | 88 | system.GPU().FlushAndInvalidateRegion(*cpu_addr, aligned_size); |
| 90 | 89 | ||
| 91 | UnmapRange(gpu_addr, aligned_size); | 90 | UnmapRange(gpu_addr, aligned_size); |
| 92 | ASSERT(system.CurrentProcess() | 91 | ASSERT(system.CurrentProcess() |
| @@ -140,11 +139,11 @@ T MemoryManager::Read(GPUVAddr addr) const { | |||
| 140 | return {}; | 139 | return {}; |
| 141 | } | 140 | } |
| 142 | 141 | ||
| 143 | const u8* page_pointer{page_table.pointers[addr >> page_bits]}; | 142 | const u8* page_pointer{GetPointer(addr)}; |
| 144 | if (page_pointer) { | 143 | if (page_pointer) { |
| 145 | // NOTE: Avoid adding any extra logic to this fast-path block | 144 | // NOTE: Avoid adding any extra logic to this fast-path block |
| 146 | T value; | 145 | T value; |
| 147 | std::memcpy(&value, &page_pointer[addr & page_mask], sizeof(T)); | 146 | std::memcpy(&value, page_pointer, sizeof(T)); |
| 148 | return value; | 147 | return value; |
| 149 | } | 148 | } |
| 150 | 149 | ||
| @@ -167,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) { | |||
| 167 | return; | 166 | return; |
| 168 | } | 167 | } |
| 169 | 168 | ||
| 170 | u8* page_pointer{page_table.pointers[addr >> page_bits]}; | 169 | u8* page_pointer{GetPointer(addr)}; |
| 171 | if (page_pointer) { | 170 | if (page_pointer) { |
| 172 | // NOTE: Avoid adding any extra logic to this fast-path block | 171 | // NOTE: Avoid adding any extra logic to this fast-path block |
| 173 | std::memcpy(&page_pointer[addr & page_mask], &data, sizeof(T)); | 172 | std::memcpy(page_pointer, &data, sizeof(T)); |
| 174 | return; | 173 | return; |
| 175 | } | 174 | } |
| 176 | 175 | ||
| @@ -201,9 +200,12 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) { | |||
| 201 | return {}; | 200 | return {}; |
| 202 | } | 201 | } |
| 203 | 202 | ||
| 204 | u8* const page_pointer{page_table.pointers[addr >> page_bits]}; | 203 | auto& memory = system.Memory(); |
| 205 | if (page_pointer != nullptr) { | 204 | |
| 206 | return page_pointer + (addr & page_mask); | 205 | const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; |
| 206 | |||
| 207 | if (page_addr != 0) { | ||
| 208 | return memory.GetPointer(page_addr + (addr & page_mask)); | ||
| 207 | } | 209 | } |
| 208 | 210 | ||
| 209 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); | 211 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); |
| @@ -215,9 +217,12 @@ const u8* MemoryManager::GetPointer(GPUVAddr addr) const { | |||
| 215 | return {}; | 217 | return {}; |
| 216 | } | 218 | } |
| 217 | 219 | ||
| 218 | const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; | 220 | const auto& memory = system.Memory(); |
| 219 | if (page_pointer != nullptr) { | 221 | |
| 220 | return page_pointer + (addr & page_mask); | 222 | const VAddr page_addr{page_table.backing_addr[addr >> page_bits]}; |
| 223 | |||
| 224 | if (page_addr != 0) { | ||
| 225 | return memory.GetPointer(page_addr + (addr & page_mask)); | ||
| 221 | } | 226 | } |
| 222 | 227 | ||
| 223 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); | 228 | LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); |
| @@ -238,17 +243,19 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s | |||
| 238 | std::size_t page_index{src_addr >> page_bits}; | 243 | std::size_t page_index{src_addr >> page_bits}; |
| 239 | std::size_t page_offset{src_addr & page_mask}; | 244 | std::size_t page_offset{src_addr & page_mask}; |
| 240 | 245 | ||
| 246 | auto& memory = system.Memory(); | ||
| 247 | |||
| 241 | while (remaining_size > 0) { | 248 | while (remaining_size > 0) { |
| 242 | const std::size_t copy_amount{ | 249 | const std::size_t copy_amount{ |
| 243 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 250 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 244 | 251 | ||
| 245 | switch (page_table.attributes[page_index]) { | 252 | switch (page_table.attributes[page_index]) { |
| 246 | case Common::PageType::Memory: { | 253 | case Common::PageType::Memory: { |
| 247 | const u8* src_ptr{page_table.pointers[page_index] + page_offset}; | 254 | const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; |
| 248 | // Flush must happen on the rasterizer interface, such that memory is always synchronous | 255 | // Flush must happen on the rasterizer interface, such that memory is always synchronous |
| 249 | // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. | 256 | // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. |
| 250 | rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); | 257 | rasterizer.FlushRegion(src_addr, copy_amount); |
| 251 | std::memcpy(dest_buffer, src_ptr, copy_amount); | 258 | memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); |
| 252 | break; | 259 | break; |
| 253 | } | 260 | } |
| 254 | default: | 261 | default: |
| @@ -268,13 +275,15 @@ void MemoryManager::ReadBlockUnsafe(GPUVAddr src_addr, void* dest_buffer, | |||
| 268 | std::size_t page_index{src_addr >> page_bits}; | 275 | std::size_t page_index{src_addr >> page_bits}; |
| 269 | std::size_t page_offset{src_addr & page_mask}; | 276 | std::size_t page_offset{src_addr & page_mask}; |
| 270 | 277 | ||
| 278 | auto& memory = system.Memory(); | ||
| 279 | |||
| 271 | while (remaining_size > 0) { | 280 | while (remaining_size > 0) { |
| 272 | const std::size_t copy_amount{ | 281 | const std::size_t copy_amount{ |
| 273 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 282 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 274 | const u8* page_pointer = page_table.pointers[page_index]; | 283 | const u8* page_pointer = page_table.pointers[page_index]; |
| 275 | if (page_pointer) { | 284 | if (page_pointer) { |
| 276 | const u8* src_ptr{page_pointer + page_offset}; | 285 | const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; |
| 277 | std::memcpy(dest_buffer, src_ptr, copy_amount); | 286 | memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); |
| 278 | } else { | 287 | } else { |
| 279 | std::memset(dest_buffer, 0, copy_amount); | 288 | std::memset(dest_buffer, 0, copy_amount); |
| 280 | } | 289 | } |
| @@ -290,17 +299,19 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const | |||
| 290 | std::size_t page_index{dest_addr >> page_bits}; | 299 | std::size_t page_index{dest_addr >> page_bits}; |
| 291 | std::size_t page_offset{dest_addr & page_mask}; | 300 | std::size_t page_offset{dest_addr & page_mask}; |
| 292 | 301 | ||
| 302 | auto& memory = system.Memory(); | ||
| 303 | |||
| 293 | while (remaining_size > 0) { | 304 | while (remaining_size > 0) { |
| 294 | const std::size_t copy_amount{ | 305 | const std::size_t copy_amount{ |
| 295 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 306 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 296 | 307 | ||
| 297 | switch (page_table.attributes[page_index]) { | 308 | switch (page_table.attributes[page_index]) { |
| 298 | case Common::PageType::Memory: { | 309 | case Common::PageType::Memory: { |
| 299 | u8* dest_ptr{page_table.pointers[page_index] + page_offset}; | 310 | const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; |
| 300 | // Invalidate must happen on the rasterizer interface, such that memory is always | 311 | // Invalidate must happen on the rasterizer interface, such that memory is always |
| 301 | // synchronous when it is written (even when in asynchronous GPU mode). | 312 | // synchronous when it is written (even when in asynchronous GPU mode). |
| 302 | rasterizer.InvalidateRegion(ToCacheAddr(dest_ptr), copy_amount); | 313 | rasterizer.InvalidateRegion(dest_addr, copy_amount); |
| 303 | std::memcpy(dest_ptr, src_buffer, copy_amount); | 314 | memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); |
| 304 | break; | 315 | break; |
| 305 | } | 316 | } |
| 306 | default: | 317 | default: |
| @@ -320,13 +331,15 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, | |||
| 320 | std::size_t page_index{dest_addr >> page_bits}; | 331 | std::size_t page_index{dest_addr >> page_bits}; |
| 321 | std::size_t page_offset{dest_addr & page_mask}; | 332 | std::size_t page_offset{dest_addr & page_mask}; |
| 322 | 333 | ||
| 334 | auto& memory = system.Memory(); | ||
| 335 | |||
| 323 | while (remaining_size > 0) { | 336 | while (remaining_size > 0) { |
| 324 | const std::size_t copy_amount{ | 337 | const std::size_t copy_amount{ |
| 325 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | 338 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |
| 326 | u8* page_pointer = page_table.pointers[page_index]; | 339 | u8* page_pointer = page_table.pointers[page_index]; |
| 327 | if (page_pointer) { | 340 | if (page_pointer) { |
| 328 | u8* dest_ptr{page_pointer + page_offset}; | 341 | const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; |
| 329 | std::memcpy(dest_ptr, src_buffer, copy_amount); | 342 | memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); |
| 330 | } | 343 | } |
| 331 | page_index++; | 344 | page_index++; |
| 332 | page_offset = 0; | 345 | page_offset = 0; |
| @@ -336,33 +349,9 @@ void MemoryManager::WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, | |||
| 336 | } | 349 | } |
| 337 | 350 | ||
| 338 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { | 351 | void MemoryManager::CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { |
| 339 | std::size_t remaining_size{size}; | 352 | std::vector<u8> tmp_buffer(size); |
| 340 | std::size_t page_index{src_addr >> page_bits}; | 353 | ReadBlock(src_addr, tmp_buffer.data(), size); |
| 341 | std::size_t page_offset{src_addr & page_mask}; | 354 | WriteBlock(dest_addr, tmp_buffer.data(), size); |
| 342 | |||
| 343 | while (remaining_size > 0) { | ||
| 344 | const std::size_t copy_amount{ | ||
| 345 | std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; | ||
| 346 | |||
| 347 | switch (page_table.attributes[page_index]) { | ||
| 348 | case Common::PageType::Memory: { | ||
| 349 | // Flush must happen on the rasterizer interface, such that memory is always synchronous | ||
| 350 | // when it is copied (even when in asynchronous GPU mode). | ||
| 351 | const u8* src_ptr{page_table.pointers[page_index] + page_offset}; | ||
| 352 | rasterizer.FlushRegion(ToCacheAddr(src_ptr), copy_amount); | ||
| 353 | WriteBlock(dest_addr, src_ptr, copy_amount); | ||
| 354 | break; | ||
| 355 | } | ||
| 356 | default: | ||
| 357 | UNREACHABLE(); | ||
| 358 | } | ||
| 359 | |||
| 360 | page_index++; | ||
| 361 | page_offset = 0; | ||
| 362 | dest_addr += static_cast<VAddr>(copy_amount); | ||
| 363 | src_addr += static_cast<VAddr>(copy_amount); | ||
| 364 | remaining_size -= copy_amount; | ||
| 365 | } | ||
| 366 | } | 355 | } |
| 367 | 356 | ||
| 368 | void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { | 357 | void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const std::size_t size) { |
| @@ -371,6 +360,12 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const | |||
| 371 | WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); | 360 | WriteBlockUnsafe(dest_addr, tmp_buffer.data(), size); |
| 372 | } | 361 | } |
| 373 | 362 | ||
| 363 | bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { | ||
| 364 | const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; | ||
| 365 | const std::size_t page = (addr & Memory::PAGE_MASK) + size; | ||
| 366 | return page <= Memory::PAGE_SIZE; | ||
| 367 | } | ||
| 368 | |||
| 374 | void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, | 369 | void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, |
| 375 | VAddr backing_addr) { | 370 | VAddr backing_addr) { |
| 376 | LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, | 371 | LOG_DEBUG(HW_GPU, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * page_size, |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 073bdb491..0d9468535 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -97,6 +97,11 @@ public: | |||
| 97 | void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); | 97 | void WriteBlockUnsafe(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); |
| 98 | void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); | 98 | void CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); |
| 99 | 99 | ||
| 100 | /** | ||
| 101 | * IsGranularRange checks if a gpu region can be simply read with a pointer | ||
| 102 | */ | ||
| 103 | bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); | ||
| 104 | |||
| 100 | private: | 105 | private: |
| 101 | using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; | 106 | using VMAMap = std::map<GPUVAddr, VirtualMemoryArea>; |
| 102 | using VMAHandle = VMAMap::const_iterator; | 107 | using VMAHandle = VMAMap::const_iterator; |
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index e66054ed0..5ea2b01f2 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h | |||
| @@ -98,12 +98,12 @@ public: | |||
| 98 | static_cast<QueryCache&>(*this), | 98 | static_cast<QueryCache&>(*this), |
| 99 | VideoCore::QueryType::SamplesPassed}}} {} | 99 | VideoCore::QueryType::SamplesPassed}}} {} |
| 100 | 100 | ||
| 101 | void InvalidateRegion(CacheAddr addr, std::size_t size) { | 101 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| 102 | std::unique_lock lock{mutex}; | 102 | std::unique_lock lock{mutex}; |
| 103 | FlushAndRemoveRegion(addr, size); | 103 | FlushAndRemoveRegion(addr, size); |
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | void FlushRegion(CacheAddr addr, std::size_t size) { | 106 | void FlushRegion(VAddr addr, std::size_t size) { |
| 107 | std::unique_lock lock{mutex}; | 107 | std::unique_lock lock{mutex}; |
| 108 | FlushAndRemoveRegion(addr, size); | 108 | FlushAndRemoveRegion(addr, size); |
| 109 | } | 109 | } |
| @@ -117,14 +117,16 @@ public: | |||
| 117 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { | 117 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { |
| 118 | std::unique_lock lock{mutex}; | 118 | std::unique_lock lock{mutex}; |
| 119 | auto& memory_manager = system.GPU().MemoryManager(); | 119 | auto& memory_manager = system.GPU().MemoryManager(); |
| 120 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 120 | const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); |
| 121 | ASSERT(cpu_addr_opt); | ||
| 122 | VAddr cpu_addr = *cpu_addr_opt; | ||
| 121 | 123 | ||
| 122 | CachedQuery* query = TryGet(ToCacheAddr(host_ptr)); | 124 | CachedQuery* query = TryGet(cpu_addr); |
| 123 | if (!query) { | 125 | if (!query) { |
| 124 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); | 126 | ASSERT_OR_EXECUTE(cpu_addr_opt, return;); |
| 125 | ASSERT_OR_EXECUTE(cpu_addr, return;); | 127 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); |
| 126 | 128 | ||
| 127 | query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); | 129 | query = Register(type, cpu_addr, host_ptr, timestamp.has_value()); |
| 128 | } | 130 | } |
| 129 | 131 | ||
| 130 | query->BindCounter(Stream(type).Current(), timestamp); | 132 | query->BindCounter(Stream(type).Current(), timestamp); |
| @@ -173,11 +175,11 @@ protected: | |||
| 173 | 175 | ||
| 174 | private: | 176 | private: |
| 175 | /// Flushes a memory range to guest memory and removes it from the cache. | 177 | /// Flushes a memory range to guest memory and removes it from the cache. |
| 176 | void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { | 178 | void FlushAndRemoveRegion(VAddr addr, std::size_t size) { |
| 177 | const u64 addr_begin = static_cast<u64>(addr); | 179 | const u64 addr_begin = static_cast<u64>(addr); |
| 178 | const u64 addr_end = addr_begin + static_cast<u64>(size); | 180 | const u64 addr_end = addr_begin + static_cast<u64>(size); |
| 179 | const auto in_range = [addr_begin, addr_end](CachedQuery& query) { | 181 | const auto in_range = [addr_begin, addr_end](CachedQuery& query) { |
| 180 | const u64 cache_begin = query.GetCacheAddr(); | 182 | const u64 cache_begin = query.GetCpuAddr(); |
| 181 | const u64 cache_end = cache_begin + query.SizeInBytes(); | 183 | const u64 cache_end = cache_begin + query.SizeInBytes(); |
| 182 | return cache_begin < addr_end && addr_begin < cache_end; | 184 | return cache_begin < addr_end && addr_begin < cache_end; |
| 183 | }; | 185 | }; |
| @@ -193,7 +195,7 @@ private: | |||
| 193 | if (!in_range(query)) { | 195 | if (!in_range(query)) { |
| 194 | continue; | 196 | continue; |
| 195 | } | 197 | } |
| 196 | rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1); | 198 | rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1); |
| 197 | query.Flush(); | 199 | query.Flush(); |
| 198 | } | 200 | } |
| 199 | contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), | 201 | contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range), |
| @@ -204,22 +206,21 @@ private: | |||
| 204 | /// Registers the passed parameters as cached and returns a pointer to the stored cached query. | 206 | /// Registers the passed parameters as cached and returns a pointer to the stored cached query. |
| 205 | CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { | 207 | CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) { |
| 206 | rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); | 208 | rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1); |
| 207 | const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT; | 209 | const u64 page = static_cast<u64>(cpu_addr) >> PAGE_SHIFT; |
| 208 | return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, | 210 | return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr, |
| 209 | host_ptr); | 211 | host_ptr); |
| 210 | } | 212 | } |
| 211 | 213 | ||
| 212 | /// Tries to a get a cached query. Returns nullptr on failure. | 214 | /// Tries to a get a cached query. Returns nullptr on failure. |
| 213 | CachedQuery* TryGet(CacheAddr addr) { | 215 | CachedQuery* TryGet(VAddr addr) { |
| 214 | const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; | 216 | const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT; |
| 215 | const auto it = cached_queries.find(page); | 217 | const auto it = cached_queries.find(page); |
| 216 | if (it == std::end(cached_queries)) { | 218 | if (it == std::end(cached_queries)) { |
| 217 | return nullptr; | 219 | return nullptr; |
| 218 | } | 220 | } |
| 219 | auto& contents = it->second; | 221 | auto& contents = it->second; |
| 220 | const auto found = | 222 | const auto found = std::find_if(std::begin(contents), std::end(contents), |
| 221 | std::find_if(std::begin(contents), std::end(contents), | 223 | [addr](auto& query) { return query.GetCpuAddr() == addr; }); |
| 222 | [addr](auto& query) { return query.GetCacheAddr() == addr; }); | ||
| 223 | return found != std::end(contents) ? &*found : nullptr; | 224 | return found != std::end(contents) ? &*found : nullptr; |
| 224 | } | 225 | } |
| 225 | 226 | ||
| @@ -323,14 +324,10 @@ public: | |||
| 323 | timestamp = timestamp_; | 324 | timestamp = timestamp_; |
| 324 | } | 325 | } |
| 325 | 326 | ||
| 326 | VAddr CpuAddr() const noexcept { | 327 | VAddr GetCpuAddr() const noexcept { |
| 327 | return cpu_addr; | 328 | return cpu_addr; |
| 328 | } | 329 | } |
| 329 | 330 | ||
| 330 | CacheAddr GetCacheAddr() const noexcept { | ||
| 331 | return ToCacheAddr(host_ptr); | ||
| 332 | } | ||
| 333 | |||
| 334 | u64 SizeInBytes() const noexcept { | 331 | u64 SizeInBytes() const noexcept { |
| 335 | return SizeInBytes(timestamp.has_value()); | 332 | return SizeInBytes(timestamp.has_value()); |
| 336 | } | 333 | } |
diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 6de1597a2..22987751e 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h | |||
| @@ -18,22 +18,14 @@ | |||
| 18 | 18 | ||
| 19 | class RasterizerCacheObject { | 19 | class RasterizerCacheObject { |
| 20 | public: | 20 | public: |
| 21 | explicit RasterizerCacheObject(const u8* host_ptr) | 21 | explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {} |
| 22 | : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {} | ||
| 23 | 22 | ||
| 24 | virtual ~RasterizerCacheObject(); | 23 | virtual ~RasterizerCacheObject(); |
| 25 | 24 | ||
| 26 | CacheAddr GetCacheAddr() const { | 25 | VAddr GetCpuAddr() const { |
| 27 | return cache_addr; | 26 | return cpu_addr; |
| 28 | } | 27 | } |
| 29 | 28 | ||
| 30 | const u8* GetHostPtr() const { | ||
| 31 | return host_ptr; | ||
| 32 | } | ||
| 33 | |||
| 34 | /// Gets the address of the shader in guest memory, required for cache management | ||
| 35 | virtual VAddr GetCpuAddr() const = 0; | ||
| 36 | |||
| 37 | /// Gets the size of the shader in guest memory, required for cache management | 29 | /// Gets the size of the shader in guest memory, required for cache management |
| 38 | virtual std::size_t GetSizeInBytes() const = 0; | 30 | virtual std::size_t GetSizeInBytes() const = 0; |
| 39 | 31 | ||
| @@ -68,8 +60,7 @@ private: | |||
| 68 | bool is_registered{}; ///< Whether the object is currently registered with the cache | 60 | bool is_registered{}; ///< Whether the object is currently registered with the cache |
| 69 | bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) | 61 | bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) |
| 70 | u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing | 62 | u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing |
| 71 | const u8* host_ptr{}; ///< Pointer to the memory backing this cached region | 63 | VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space |
| 72 | CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space | ||
| 73 | }; | 64 | }; |
| 74 | 65 | ||
| 75 | template <class T> | 66 | template <class T> |
| @@ -80,7 +71,7 @@ public: | |||
| 80 | explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} | 71 | explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} |
| 81 | 72 | ||
| 82 | /// Write any cached resources overlapping the specified region back to memory | 73 | /// Write any cached resources overlapping the specified region back to memory |
| 83 | void FlushRegion(CacheAddr addr, std::size_t size) { | 74 | void FlushRegion(VAddr addr, std::size_t size) { |
| 84 | std::lock_guard lock{mutex}; | 75 | std::lock_guard lock{mutex}; |
| 85 | 76 | ||
| 86 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | 77 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; |
| @@ -90,7 +81,7 @@ public: | |||
| 90 | } | 81 | } |
| 91 | 82 | ||
| 92 | /// Mark the specified region as being invalidated | 83 | /// Mark the specified region as being invalidated |
| 93 | void InvalidateRegion(CacheAddr addr, u64 size) { | 84 | void InvalidateRegion(VAddr addr, u64 size) { |
| 94 | std::lock_guard lock{mutex}; | 85 | std::lock_guard lock{mutex}; |
| 95 | 86 | ||
| 96 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | 87 | const auto& objects{GetSortedObjectsFromRegion(addr, size)}; |
| @@ -114,27 +105,20 @@ public: | |||
| 114 | 105 | ||
| 115 | protected: | 106 | protected: |
| 116 | /// Tries to get an object from the cache with the specified cache address | 107 | /// Tries to get an object from the cache with the specified cache address |
| 117 | T TryGet(CacheAddr addr) const { | 108 | T TryGet(VAddr addr) const { |
| 118 | const auto iter = map_cache.find(addr); | 109 | const auto iter = map_cache.find(addr); |
| 119 | if (iter != map_cache.end()) | 110 | if (iter != map_cache.end()) |
| 120 | return iter->second; | 111 | return iter->second; |
| 121 | return nullptr; | 112 | return nullptr; |
| 122 | } | 113 | } |
| 123 | 114 | ||
| 124 | T TryGet(const void* addr) const { | ||
| 125 | const auto iter = map_cache.find(ToCacheAddr(addr)); | ||
| 126 | if (iter != map_cache.end()) | ||
| 127 | return iter->second; | ||
| 128 | return nullptr; | ||
| 129 | } | ||
| 130 | |||
| 131 | /// Register an object into the cache | 115 | /// Register an object into the cache |
| 132 | virtual void Register(const T& object) { | 116 | virtual void Register(const T& object) { |
| 133 | std::lock_guard lock{mutex}; | 117 | std::lock_guard lock{mutex}; |
| 134 | 118 | ||
| 135 | object->SetIsRegistered(true); | 119 | object->SetIsRegistered(true); |
| 136 | interval_cache.add({GetInterval(object), ObjectSet{object}}); | 120 | interval_cache.add({GetInterval(object), ObjectSet{object}}); |
| 137 | map_cache.insert({object->GetCacheAddr(), object}); | 121 | map_cache.insert({object->GetCpuAddr(), object}); |
| 138 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); | 122 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); |
| 139 | } | 123 | } |
| 140 | 124 | ||
| @@ -144,7 +128,7 @@ protected: | |||
| 144 | 128 | ||
| 145 | object->SetIsRegistered(false); | 129 | object->SetIsRegistered(false); |
| 146 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); | 130 | rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); |
| 147 | const CacheAddr addr = object->GetCacheAddr(); | 131 | const VAddr addr = object->GetCpuAddr(); |
| 148 | interval_cache.subtract({GetInterval(object), ObjectSet{object}}); | 132 | interval_cache.subtract({GetInterval(object), ObjectSet{object}}); |
| 149 | map_cache.erase(addr); | 133 | map_cache.erase(addr); |
| 150 | } | 134 | } |
| @@ -173,7 +157,7 @@ protected: | |||
| 173 | 157 | ||
| 174 | private: | 158 | private: |
| 175 | /// Returns a list of cached objects from the specified memory region, ordered by access time | 159 | /// Returns a list of cached objects from the specified memory region, ordered by access time |
| 176 | std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { | 160 | std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { |
| 177 | if (size == 0) { | 161 | if (size == 0) { |
| 178 | return {}; | 162 | return {}; |
| 179 | } | 163 | } |
| @@ -197,13 +181,13 @@ private: | |||
| 197 | } | 181 | } |
| 198 | 182 | ||
| 199 | using ObjectSet = std::set<T>; | 183 | using ObjectSet = std::set<T>; |
| 200 | using ObjectCache = std::unordered_map<CacheAddr, T>; | 184 | using ObjectCache = std::unordered_map<VAddr, T>; |
| 201 | using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; | 185 | using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; |
| 202 | using ObjectInterval = typename IntervalCache::interval_type; | 186 | using ObjectInterval = typename IntervalCache::interval_type; |
| 203 | 187 | ||
| 204 | static auto GetInterval(const T& object) { | 188 | static auto GetInterval(const T& object) { |
| 205 | return ObjectInterval::right_open(object->GetCacheAddr(), | 189 | return ObjectInterval::right_open(object->GetCpuAddr(), |
| 206 | object->GetCacheAddr() + object->GetSizeInBytes()); | 190 | object->GetCpuAddr() + object->GetSizeInBytes()); |
| 207 | } | 191 | } |
| 208 | 192 | ||
| 209 | ObjectCache map_cache; | 193 | ObjectCache map_cache; |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 1a68e3caa..8ae5b9c4e 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -53,14 +53,14 @@ public: | |||
| 53 | virtual void FlushAll() = 0; | 53 | virtual void FlushAll() = 0; |
| 54 | 54 | ||
| 55 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 55 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 56 | virtual void FlushRegion(CacheAddr addr, u64 size) = 0; | 56 | virtual void FlushRegion(VAddr addr, u64 size) = 0; |
| 57 | 57 | ||
| 58 | /// Notify rasterizer that any caches of the specified region should be invalidated | 58 | /// Notify rasterizer that any caches of the specified region should be invalidated |
| 59 | virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; | 59 | virtual void InvalidateRegion(VAddr addr, u64 size) = 0; |
| 60 | 60 | ||
| 61 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 61 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 62 | /// and invalidated | 62 | /// and invalidated |
| 63 | virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | 63 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |
| 64 | 64 | ||
| 65 | /// Notify the rasterizer to send all written commands to the host GPU. | 65 | /// Notify the rasterizer to send all written commands to the host GPU. |
| 66 | virtual void FlushCommands() = 0; | 66 | virtual void FlushCommands() = 0; |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 0375fca17..4eb37a96c 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp | |||
| @@ -21,8 +21,8 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||
| 21 | 21 | ||
| 22 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); | 22 | MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); |
| 23 | 23 | ||
| 24 | CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) | 24 | CachedBufferBlock::CachedBufferBlock(VAddr cpu_addr, const std::size_t size) |
| 25 | : VideoCommon::BufferBlock{cache_addr, size} { | 25 | : VideoCommon::BufferBlock{cpu_addr, size} { |
| 26 | gl_buffer.Create(); | 26 | gl_buffer.Create(); |
| 27 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); | 27 | glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW); |
| 28 | } | 28 | } |
| @@ -47,8 +47,8 @@ OGLBufferCache::~OGLBufferCache() { | |||
| 47 | glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); | 47 | glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs)); |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { | 50 | Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { |
| 51 | return std::make_shared<CachedBufferBlock>(cache_addr, size); | 51 | return std::make_shared<CachedBufferBlock>(cpu_addr, size); |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | void OGLBufferCache::WriteBarrier() { | 54 | void OGLBufferCache::WriteBarrier() { |
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 8c7145443..d94a11252 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h | |||
| @@ -31,7 +31,7 @@ using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuf | |||
| 31 | 31 | ||
| 32 | class CachedBufferBlock : public VideoCommon::BufferBlock { | 32 | class CachedBufferBlock : public VideoCommon::BufferBlock { |
| 33 | public: | 33 | public: |
| 34 | explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); | 34 | explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); |
| 35 | ~CachedBufferBlock(); | 35 | ~CachedBufferBlock(); |
| 36 | 36 | ||
| 37 | const GLuint* GetHandle() const { | 37 | const GLuint* GetHandle() const { |
| @@ -55,7 +55,7 @@ public: | |||
| 55 | } | 55 | } |
| 56 | 56 | ||
| 57 | protected: | 57 | protected: |
| 58 | Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; | 58 | Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 59 | 59 | ||
| 60 | void WriteBarrier() override; | 60 | void WriteBarrier() override; |
| 61 | 61 | ||
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 346feeb2f..368f399df 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -656,9 +656,9 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
| 656 | 656 | ||
| 657 | void RasterizerOpenGL::FlushAll() {} | 657 | void RasterizerOpenGL::FlushAll() {} |
| 658 | 658 | ||
| 659 | void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | 659 | void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { |
| 660 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 660 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 661 | if (!addr || !size) { | 661 | if (addr == 0 || size == 0) { |
| 662 | return; | 662 | return; |
| 663 | } | 663 | } |
| 664 | texture_cache.FlushRegion(addr, size); | 664 | texture_cache.FlushRegion(addr, size); |
| @@ -666,9 +666,9 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | |||
| 666 | query_cache.FlushRegion(addr, size); | 666 | query_cache.FlushRegion(addr, size); |
| 667 | } | 667 | } |
| 668 | 668 | ||
| 669 | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | 669 | void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { |
| 670 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 670 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 671 | if (!addr || !size) { | 671 | if (addr == 0 || size == 0) { |
| 672 | return; | 672 | return; |
| 673 | } | 673 | } |
| 674 | texture_cache.InvalidateRegion(addr, size); | 674 | texture_cache.InvalidateRegion(addr, size); |
| @@ -677,7 +677,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | |||
| 677 | query_cache.InvalidateRegion(addr, size); | 677 | query_cache.InvalidateRegion(addr, size); |
| 678 | } | 678 | } |
| 679 | 679 | ||
| 680 | void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 680 | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 681 | if (Settings::values.use_accurate_gpu_emulation) { | 681 | if (Settings::values.use_accurate_gpu_emulation) { |
| 682 | FlushRegion(addr, size); | 682 | FlushRegion(addr, size); |
| 683 | } | 683 | } |
| @@ -716,8 +716,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 716 | 716 | ||
| 717 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); | 717 | MICROPROFILE_SCOPE(OpenGL_CacheManagement); |
| 718 | 718 | ||
| 719 | const auto surface{ | 719 | const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; |
| 720 | texture_cache.TryFindFramebufferSurface(system.Memory().GetPointer(framebuffer_addr))}; | ||
| 721 | if (!surface) { | 720 | if (!surface) { |
| 722 | return {}; | 721 | return {}; |
| 723 | } | 722 | } |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2d3be2437..212dad852 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -65,9 +65,9 @@ public: | |||
| 65 | void ResetCounter(VideoCore::QueryType type) override; | 65 | void ResetCounter(VideoCore::QueryType type) override; |
| 66 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 66 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 67 | void FlushAll() override; | 67 | void FlushAll() override; |
| 68 | void FlushRegion(CacheAddr addr, u64 size) override; | 68 | void FlushRegion(VAddr addr, u64 size) override; |
| 69 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 69 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 70 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 70 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 71 | void FlushCommands() override; | 71 | void FlushCommands() override; |
| 72 | void TickFrame() override; | 72 | void TickFrame() override; |
| 73 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 73 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 046ee55a5..6d2ff20f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -214,11 +214,11 @@ std::unordered_set<GLenum> GetSupportedFormats() { | |||
| 214 | 214 | ||
| 215 | } // Anonymous namespace | 215 | } // Anonymous namespace |
| 216 | 216 | ||
| 217 | CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, | 217 | CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, |
| 218 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | 218 | std::shared_ptr<VideoCommon::Shader::Registry> registry, |
| 219 | ShaderEntries entries, std::shared_ptr<OGLProgram> program) | 219 | ShaderEntries entries, std::shared_ptr<OGLProgram> program) |
| 220 | : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, | 220 | : RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)}, |
| 221 | cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} | 221 | size_in_bytes{size_in_bytes}, program{std::move(program)} {} |
| 222 | 222 | ||
| 223 | CachedShader::~CachedShader() = default; | 223 | CachedShader::~CachedShader() = default; |
| 224 | 224 | ||
| @@ -254,9 +254,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
| 254 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 254 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| 255 | params.disk_cache.SaveEntry(std::move(entry)); | 255 | params.disk_cache.SaveEntry(std::move(entry)); |
| 256 | 256 | ||
| 257 | return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | 257 | return std::shared_ptr<CachedShader>(new CachedShader( |
| 258 | size_in_bytes, std::move(registry), | 258 | params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); |
| 259 | MakeEntries(ir), std::move(program))); | ||
| 260 | } | 259 | } |
| 261 | 260 | ||
| 262 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | 261 | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { |
| @@ -279,17 +278,16 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog | |||
| 279 | entry.bindless_samplers = registry->GetBindlessSamplers(); | 278 | entry.bindless_samplers = registry->GetBindlessSamplers(); |
| 280 | params.disk_cache.SaveEntry(std::move(entry)); | 279 | params.disk_cache.SaveEntry(std::move(entry)); |
| 281 | 280 | ||
| 282 | return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, | 281 | return std::shared_ptr<CachedShader>(new CachedShader( |
| 283 | size_in_bytes, std::move(registry), | 282 | params.cpu_addr, size_in_bytes, std::move(registry), MakeEntries(ir), std::move(program))); |
| 284 | MakeEntries(ir), std::move(program))); | ||
| 285 | } | 283 | } |
| 286 | 284 | ||
| 287 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, | 285 | Shader CachedShader::CreateFromCache(const ShaderParameters& params, |
| 288 | const PrecompiledShader& precompiled_shader, | 286 | const PrecompiledShader& precompiled_shader, |
| 289 | std::size_t size_in_bytes) { | 287 | std::size_t size_in_bytes) { |
| 290 | return std::shared_ptr<CachedShader>(new CachedShader( | 288 | return std::shared_ptr<CachedShader>( |
| 291 | params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, | 289 | new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry, |
| 292 | precompiled_shader.entries, precompiled_shader.program)); | 290 | precompiled_shader.entries, precompiled_shader.program)); |
| 293 | } | 291 | } |
| 294 | 292 | ||
| 295 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, | 293 | ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, |
| @@ -449,12 +447,14 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 449 | const GPUVAddr address{GetShaderAddress(system, program)}; | 447 | const GPUVAddr address{GetShaderAddress(system, program)}; |
| 450 | 448 | ||
| 451 | // Look up shader in the cache based on address | 449 | // Look up shader in the cache based on address |
| 452 | const auto host_ptr{memory_manager.GetPointer(address)}; | 450 | const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; |
| 453 | Shader shader{TryGet(host_ptr)}; | 451 | Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr}; |
| 454 | if (shader) { | 452 | if (shader) { |
| 455 | return last_shaders[static_cast<std::size_t>(program)] = shader; | 453 | return last_shaders[static_cast<std::size_t>(program)] = shader; |
| 456 | } | 454 | } |
| 457 | 455 | ||
| 456 | const auto host_ptr{memory_manager.GetPointer(address)}; | ||
| 457 | |||
| 458 | // No shader found - create a new one | 458 | // No shader found - create a new one |
| 459 | ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; | 459 | ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; |
| 460 | ProgramCode code_b; | 460 | ProgramCode code_b; |
| @@ -465,9 +465,9 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 465 | 465 | ||
| 466 | const auto unique_identifier = GetUniqueIdentifier( | 466 | const auto unique_identifier = GetUniqueIdentifier( |
| 467 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); | 467 | GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); |
| 468 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; | 468 | |
| 469 | const ShaderParameters params{system, disk_cache, device, | 469 | const ShaderParameters params{system, disk_cache, device, |
| 470 | cpu_addr, host_ptr, unique_identifier}; | 470 | *cpu_addr, host_ptr, unique_identifier}; |
| 471 | 471 | ||
| 472 | const auto found = runtime_cache.find(unique_identifier); | 472 | const auto found = runtime_cache.find(unique_identifier); |
| 473 | if (found == runtime_cache.end()) { | 473 | if (found == runtime_cache.end()) { |
| @@ -484,18 +484,20 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
| 484 | 484 | ||
| 485 | Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | 485 | Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { |
| 486 | auto& memory_manager{system.GPU().MemoryManager()}; | 486 | auto& memory_manager{system.GPU().MemoryManager()}; |
| 487 | const auto host_ptr{memory_manager.GetPointer(code_addr)}; | 487 | const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; |
| 488 | auto kernel = TryGet(host_ptr); | 488 | |
| 489 | auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr; | ||
| 489 | if (kernel) { | 490 | if (kernel) { |
| 490 | return kernel; | 491 | return kernel; |
| 491 | } | 492 | } |
| 492 | 493 | ||
| 494 | const auto host_ptr{memory_manager.GetPointer(code_addr)}; | ||
| 493 | // No kernel found, create a new one | 495 | // No kernel found, create a new one |
| 494 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | 496 | auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; |
| 495 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; | 497 | const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; |
| 496 | const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | 498 | |
| 497 | const ShaderParameters params{system, disk_cache, device, | 499 | const ShaderParameters params{system, disk_cache, device, |
| 498 | cpu_addr, host_ptr, unique_identifier}; | 500 | *cpu_addr, host_ptr, unique_identifier}; |
| 499 | 501 | ||
| 500 | const auto found = runtime_cache.find(unique_identifier); | 502 | const auto found = runtime_cache.find(unique_identifier); |
| 501 | if (found == runtime_cache.end()) { | 503 | if (found == runtime_cache.end()) { |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 4935019fc..c836df5bd 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h | |||
| @@ -65,11 +65,6 @@ public: | |||
| 65 | /// Gets the GL program handle for the shader | 65 | /// Gets the GL program handle for the shader |
| 66 | GLuint GetHandle() const; | 66 | GLuint GetHandle() const; |
| 67 | 67 | ||
| 68 | /// Returns the guest CPU address of the shader | ||
| 69 | VAddr GetCpuAddr() const override { | ||
| 70 | return cpu_addr; | ||
| 71 | } | ||
| 72 | |||
| 73 | /// Returns the size in bytes of the shader | 68 | /// Returns the size in bytes of the shader |
| 74 | std::size_t GetSizeInBytes() const override { | 69 | std::size_t GetSizeInBytes() const override { |
| 75 | return size_in_bytes; | 70 | return size_in_bytes; |
| @@ -90,13 +85,12 @@ public: | |||
| 90 | std::size_t size_in_bytes); | 85 | std::size_t size_in_bytes); |
| 91 | 86 | ||
| 92 | private: | 87 | private: |
| 93 | explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, | 88 | explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes, |
| 94 | std::shared_ptr<VideoCommon::Shader::Registry> registry, | 89 | std::shared_ptr<VideoCommon::Shader::Registry> registry, |
| 95 | ShaderEntries entries, std::shared_ptr<OGLProgram> program); | 90 | ShaderEntries entries, std::shared_ptr<OGLProgram> program); |
| 96 | 91 | ||
| 97 | std::shared_ptr<VideoCommon::Shader::Registry> registry; | 92 | std::shared_ptr<VideoCommon::Shader::Registry> registry; |
| 98 | ShaderEntries entries; | 93 | ShaderEntries entries; |
| 99 | VAddr cpu_addr = 0; | ||
| 100 | std::size_t size_in_bytes = 0; | 94 | std::size_t size_in_bytes = 0; |
| 101 | std::shared_ptr<OGLProgram> program; | 95 | std::shared_ptr<OGLProgram> program; |
| 102 | }; | 96 | }; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1ba544943..326d74f29 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -42,8 +42,8 @@ auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { | |||
| 42 | } // Anonymous namespace | 42 | } // Anonymous namespace |
| 43 | 43 | ||
| 44 | CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, | 44 | CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, |
| 45 | CacheAddr cache_addr, std::size_t size) | 45 | VAddr cpu_addr, std::size_t size) |
| 46 | : VideoCommon::BufferBlock{cache_addr, size} { | 46 | : VideoCommon::BufferBlock{cpu_addr, size} { |
| 47 | const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), | 47 | const vk::BufferCreateInfo buffer_ci({}, static_cast<vk::DeviceSize>(size), |
| 48 | BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | | 48 | BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | |
| 49 | vk::BufferUsageFlagBits::eTransferDst, | 49 | vk::BufferUsageFlagBits::eTransferDst, |
| @@ -68,8 +68,8 @@ VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::S | |||
| 68 | 68 | ||
| 69 | VKBufferCache::~VKBufferCache() = default; | 69 | VKBufferCache::~VKBufferCache() = default; |
| 70 | 70 | ||
| 71 | Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { | 71 | Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { |
| 72 | return std::make_shared<CachedBufferBlock>(device, memory_manager, cache_addr, size); | 72 | return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size); |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { | 75 | const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 3f38eed0c..508214618 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -30,7 +30,7 @@ class VKScheduler; | |||
| 30 | class CachedBufferBlock final : public VideoCommon::BufferBlock { | 30 | class CachedBufferBlock final : public VideoCommon::BufferBlock { |
| 31 | public: | 31 | public: |
| 32 | explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, | 32 | explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, |
| 33 | CacheAddr cache_addr, std::size_t size); | 33 | VAddr cpu_addr, std::size_t size); |
| 34 | ~CachedBufferBlock(); | 34 | ~CachedBufferBlock(); |
| 35 | 35 | ||
| 36 | const vk::Buffer* GetHandle() const { | 36 | const vk::Buffer* GetHandle() const { |
| @@ -55,7 +55,7 @@ public: | |||
| 55 | protected: | 55 | protected: |
| 56 | void WriteBarrier() override {} | 56 | void WriteBarrier() override {} |
| 57 | 57 | ||
| 58 | Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; | 58 | Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; |
| 59 | 59 | ||
| 60 | const vk::Buffer* ToHandle(const Buffer& buffer) override; | 60 | const vk::Buffer* ToHandle(const Buffer& buffer) override; |
| 61 | 61 | ||
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 557b9d662..c2a426aeb 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -158,11 +158,11 @@ u32 FillDescriptorLayout(const ShaderEntries& entries, | |||
| 158 | } // Anonymous namespace | 158 | } // Anonymous namespace |
| 159 | 159 | ||
| 160 | CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, | 160 | CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, |
| 161 | GPUVAddr gpu_addr, VAddr cpu_addr, u8* host_ptr, | 161 | GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, |
| 162 | ProgramCode program_code, u32 main_offset) | 162 | u32 main_offset) |
| 163 | : RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr}, | 163 | : RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)}, |
| 164 | program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)}, | 164 | registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, |
| 165 | shader_ir{this->program_code, main_offset, compiler_settings, registry}, | 165 | compiler_settings, registry}, |
| 166 | entries{GenerateShaderEntries(shader_ir)} {} | 166 | entries{GenerateShaderEntries(shader_ir)} {} |
| 167 | 167 | ||
| 168 | CachedShader::~CachedShader() = default; | 168 | CachedShader::~CachedShader() = default; |
| @@ -201,19 +201,19 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { | |||
| 201 | 201 | ||
| 202 | auto& memory_manager{system.GPU().MemoryManager()}; | 202 | auto& memory_manager{system.GPU().MemoryManager()}; |
| 203 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; | 203 | const GPUVAddr program_addr{GetShaderAddress(system, program)}; |
| 204 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; | 204 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); |
| 205 | auto shader = TryGet(host_ptr); | 205 | ASSERT(cpu_addr); |
| 206 | auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; | ||
| 206 | if (!shader) { | 207 | if (!shader) { |
| 208 | const auto host_ptr{memory_manager.GetPointer(program_addr)}; | ||
| 209 | |||
| 207 | // No shader found - create a new one | 210 | // No shader found - create a new one |
| 208 | constexpr u32 stage_offset = 10; | 211 | constexpr u32 stage_offset = 10; |
| 209 | const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); | 212 | const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1); |
| 210 | auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); | 213 | auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false); |
| 211 | 214 | ||
| 212 | const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | ||
| 213 | ASSERT(cpu_addr); | ||
| 214 | |||
| 215 | shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, | 215 | shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr, |
| 216 | host_ptr, std::move(code), stage_offset); | 216 | std::move(code), stage_offset); |
| 217 | Register(shader); | 217 | Register(shader); |
| 218 | } | 218 | } |
| 219 | shaders[index] = std::move(shader); | 219 | shaders[index] = std::move(shader); |
| @@ -253,18 +253,19 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach | |||
| 253 | 253 | ||
| 254 | auto& memory_manager = system.GPU().MemoryManager(); | 254 | auto& memory_manager = system.GPU().MemoryManager(); |
| 255 | const auto program_addr = key.shader; | 255 | const auto program_addr = key.shader; |
| 256 | const auto host_ptr = memory_manager.GetPointer(program_addr); | ||
| 257 | 256 | ||
| 258 | auto shader = TryGet(host_ptr); | 257 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); |
| 258 | ASSERT(cpu_addr); | ||
| 259 | |||
| 260 | auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr; | ||
| 259 | if (!shader) { | 261 | if (!shader) { |
| 260 | // No shader found - create a new one | 262 | // No shader found - create a new one |
| 261 | const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); | 263 | const auto host_ptr = memory_manager.GetPointer(program_addr); |
| 262 | ASSERT(cpu_addr); | ||
| 263 | 264 | ||
| 264 | auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); | 265 | auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true); |
| 265 | constexpr u32 kernel_main_offset = 0; | 266 | constexpr u32 kernel_main_offset = 0; |
| 266 | shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, | 267 | shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute, |
| 267 | program_addr, *cpu_addr, host_ptr, std::move(code), | 268 | program_addr, *cpu_addr, std::move(code), |
| 268 | kernel_main_offset); | 269 | kernel_main_offset); |
| 269 | Register(shader); | 270 | Register(shader); |
| 270 | } | 271 | } |
| @@ -345,8 +346,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { | |||
| 345 | } | 346 | } |
| 346 | 347 | ||
| 347 | const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); | 348 | const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); |
| 348 | const auto host_ptr = memory_manager.GetPointer(gpu_addr); | 349 | const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); |
| 349 | const auto shader = TryGet(host_ptr); | 350 | ASSERT(cpu_addr); |
| 351 | const auto shader = TryGet(*cpu_addr); | ||
| 350 | ASSERT(shader); | 352 | ASSERT(shader); |
| 351 | 353 | ||
| 352 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 | 354 | const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index c4c112290..27c01732f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h | |||
| @@ -113,17 +113,13 @@ namespace Vulkan { | |||
| 113 | class CachedShader final : public RasterizerCacheObject { | 113 | class CachedShader final : public RasterizerCacheObject { |
| 114 | public: | 114 | public: |
| 115 | explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, | 115 | explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, |
| 116 | VAddr cpu_addr, u8* host_ptr, ProgramCode program_code, u32 main_offset); | 116 | VAddr cpu_addr, ProgramCode program_code, u32 main_offset); |
| 117 | ~CachedShader(); | 117 | ~CachedShader(); |
| 118 | 118 | ||
| 119 | GPUVAddr GetGpuAddr() const { | 119 | GPUVAddr GetGpuAddr() const { |
| 120 | return gpu_addr; | 120 | return gpu_addr; |
| 121 | } | 121 | } |
| 122 | 122 | ||
| 123 | VAddr GetCpuAddr() const override { | ||
| 124 | return cpu_addr; | ||
| 125 | } | ||
| 126 | |||
| 127 | std::size_t GetSizeInBytes() const override { | 123 | std::size_t GetSizeInBytes() const override { |
| 128 | return program_code.size() * sizeof(u64); | 124 | return program_code.size() * sizeof(u64); |
| 129 | } | 125 | } |
| @@ -149,7 +145,6 @@ private: | |||
| 149 | Tegra::Engines::ShaderType stage); | 145 | Tegra::Engines::ShaderType stage); |
| 150 | 146 | ||
| 151 | GPUVAddr gpu_addr{}; | 147 | GPUVAddr gpu_addr{}; |
| 152 | VAddr cpu_addr{}; | ||
| 153 | ProgramCode program_code; | 148 | ProgramCode program_code; |
| 154 | VideoCommon::Shader::Registry registry; | 149 | VideoCommon::Shader::Registry registry; |
| 155 | VideoCommon::Shader::ShaderIR shader_ir; | 150 | VideoCommon::Shader::ShaderIR shader_ir; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 58c69b786..0a2ea4fd4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -495,20 +495,26 @@ void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | |||
| 495 | 495 | ||
| 496 | void RasterizerVulkan::FlushAll() {} | 496 | void RasterizerVulkan::FlushAll() {} |
| 497 | 497 | ||
| 498 | void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { | 498 | void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) { |
| 499 | if (addr == 0 || size == 0) { | ||
| 500 | return; | ||
| 501 | } | ||
| 499 | texture_cache.FlushRegion(addr, size); | 502 | texture_cache.FlushRegion(addr, size); |
| 500 | buffer_cache.FlushRegion(addr, size); | 503 | buffer_cache.FlushRegion(addr, size); |
| 501 | query_cache.FlushRegion(addr, size); | 504 | query_cache.FlushRegion(addr, size); |
| 502 | } | 505 | } |
| 503 | 506 | ||
| 504 | void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { | 507 | void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) { |
| 508 | if (addr == 0 || size == 0) { | ||
| 509 | return; | ||
| 510 | } | ||
| 505 | texture_cache.InvalidateRegion(addr, size); | 511 | texture_cache.InvalidateRegion(addr, size); |
| 506 | pipeline_cache.InvalidateRegion(addr, size); | 512 | pipeline_cache.InvalidateRegion(addr, size); |
| 507 | buffer_cache.InvalidateRegion(addr, size); | 513 | buffer_cache.InvalidateRegion(addr, size); |
| 508 | query_cache.InvalidateRegion(addr, size); | 514 | query_cache.InvalidateRegion(addr, size); |
| 509 | } | 515 | } |
| 510 | 516 | ||
| 511 | void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | 517 | void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) { |
| 512 | FlushRegion(addr, size); | 518 | FlushRegion(addr, size); |
| 513 | InvalidateRegion(addr, size); | 519 | InvalidateRegion(addr, size); |
| 514 | } | 520 | } |
| @@ -540,8 +546,7 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| 540 | return false; | 546 | return false; |
| 541 | } | 547 | } |
| 542 | 548 | ||
| 543 | const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; | 549 | const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)}; |
| 544 | const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)}; | ||
| 545 | if (!surface) { | 550 | if (!surface) { |
| 546 | return false; | 551 | return false; |
| 547 | } | 552 | } |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3185868e9..f642dde76 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -118,9 +118,9 @@ public: | |||
| 118 | void ResetCounter(VideoCore::QueryType type) override; | 118 | void ResetCounter(VideoCore::QueryType type) override; |
| 119 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | 119 | void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; |
| 120 | void FlushAll() override; | 120 | void FlushAll() override; |
| 121 | void FlushRegion(CacheAddr addr, u64 size) override; | 121 | void FlushRegion(VAddr addr, u64 size) override; |
| 122 | void InvalidateRegion(CacheAddr addr, u64 size) override; | 122 | void InvalidateRegion(VAddr addr, u64 size) override; |
| 123 | void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | 123 | void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |
| 124 | void FlushCommands() override; | 124 | void FlushCommands() override; |
| 125 | void TickFrame() override; | 125 | void TickFrame() override; |
| 126 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | 126 | bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 6fe815135..7af0e792c 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -190,22 +190,11 @@ void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, | |||
| 190 | MICROPROFILE_SCOPE(GPU_Load_Texture); | 190 | MICROPROFILE_SCOPE(GPU_Load_Texture); |
| 191 | auto& staging_buffer = staging_cache.GetBuffer(0); | 191 | auto& staging_buffer = staging_cache.GetBuffer(0); |
| 192 | u8* host_ptr; | 192 | u8* host_ptr; |
| 193 | is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); | 193 | // Use an extra temporal buffer |
| 194 | 194 | auto& tmp_buffer = staging_cache.GetBuffer(1); | |
| 195 | // Handle continuouty | 195 | tmp_buffer.resize(guest_memory_size); |
| 196 | if (is_continuous) { | 196 | host_ptr = tmp_buffer.data(); |
| 197 | // Use physical memory directly | 197 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); |
| 198 | host_ptr = memory_manager.GetPointer(gpu_addr); | ||
| 199 | if (!host_ptr) { | ||
| 200 | return; | ||
| 201 | } | ||
| 202 | } else { | ||
| 203 | // Use an extra temporal buffer | ||
| 204 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 205 | tmp_buffer.resize(guest_memory_size); | ||
| 206 | host_ptr = tmp_buffer.data(); | ||
| 207 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 208 | } | ||
| 209 | 198 | ||
| 210 | if (params.is_tiled) { | 199 | if (params.is_tiled) { |
| 211 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", | 200 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", |
| @@ -257,19 +246,10 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | |||
| 257 | auto& staging_buffer = staging_cache.GetBuffer(0); | 246 | auto& staging_buffer = staging_cache.GetBuffer(0); |
| 258 | u8* host_ptr; | 247 | u8* host_ptr; |
| 259 | 248 | ||
| 260 | // Handle continuouty | 249 | // Use an extra temporal buffer |
| 261 | if (is_continuous) { | 250 | auto& tmp_buffer = staging_cache.GetBuffer(1); |
| 262 | // Use physical memory directly | 251 | tmp_buffer.resize(guest_memory_size); |
| 263 | host_ptr = memory_manager.GetPointer(gpu_addr); | 252 | host_ptr = tmp_buffer.data(); |
| 264 | if (!host_ptr) { | ||
| 265 | return; | ||
| 266 | } | ||
| 267 | } else { | ||
| 268 | // Use an extra temporal buffer | ||
| 269 | auto& tmp_buffer = staging_cache.GetBuffer(1); | ||
| 270 | tmp_buffer.resize(guest_memory_size); | ||
| 271 | host_ptr = tmp_buffer.data(); | ||
| 272 | } | ||
| 273 | 253 | ||
| 274 | if (params.is_tiled) { | 254 | if (params.is_tiled) { |
| 275 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); | 255 | ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); |
| @@ -300,9 +280,7 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | |||
| 300 | } | 280 | } |
| 301 | } | 281 | } |
| 302 | } | 282 | } |
| 303 | if (!is_continuous) { | 283 | memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); |
| 304 | memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | ||
| 305 | } | ||
| 306 | } | 284 | } |
| 307 | 285 | ||
| 308 | } // namespace VideoCommon | 286 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index d7882a031..a39a8661b 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -68,8 +68,8 @@ public: | |||
| 68 | return gpu_addr; | 68 | return gpu_addr; |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | bool Overlaps(const CacheAddr start, const CacheAddr end) const { | 71 | bool Overlaps(const VAddr start, const VAddr end) const { |
| 72 | return (cache_addr < end) && (cache_addr_end > start); | 72 | return (cpu_addr < end) && (cpu_addr_end > start); |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { | 75 | bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { |
| @@ -86,21 +86,13 @@ public: | |||
| 86 | return cpu_addr; | 86 | return cpu_addr; |
| 87 | } | 87 | } |
| 88 | 88 | ||
| 89 | void SetCpuAddr(const VAddr new_addr) { | 89 | VAddr GetCpuAddrEnd() const { |
| 90 | cpu_addr = new_addr; | 90 | return cpu_addr_end; |
| 91 | } | ||
| 92 | |||
| 93 | CacheAddr GetCacheAddr() const { | ||
| 94 | return cache_addr; | ||
| 95 | } | ||
| 96 | |||
| 97 | CacheAddr GetCacheAddrEnd() const { | ||
| 98 | return cache_addr_end; | ||
| 99 | } | 91 | } |
| 100 | 92 | ||
| 101 | void SetCacheAddr(const CacheAddr new_addr) { | 93 | void SetCpuAddr(const VAddr new_addr) { |
| 102 | cache_addr = new_addr; | 94 | cpu_addr = new_addr; |
| 103 | cache_addr_end = new_addr + guest_memory_size; | 95 | cpu_addr_end = new_addr + guest_memory_size; |
| 104 | } | 96 | } |
| 105 | 97 | ||
| 106 | const SurfaceParams& GetSurfaceParams() const { | 98 | const SurfaceParams& GetSurfaceParams() const { |
| @@ -119,14 +111,6 @@ public: | |||
| 119 | return mipmap_sizes[level]; | 111 | return mipmap_sizes[level]; |
| 120 | } | 112 | } |
| 121 | 113 | ||
| 122 | void MarkAsContinuous(const bool is_continuous) { | ||
| 123 | this->is_continuous = is_continuous; | ||
| 124 | } | ||
| 125 | |||
| 126 | bool IsContinuous() const { | ||
| 127 | return is_continuous; | ||
| 128 | } | ||
| 129 | |||
| 130 | bool IsLinear() const { | 114 | bool IsLinear() const { |
| 131 | return !params.is_tiled; | 115 | return !params.is_tiled; |
| 132 | } | 116 | } |
| @@ -175,10 +159,8 @@ protected: | |||
| 175 | std::size_t guest_memory_size; | 159 | std::size_t guest_memory_size; |
| 176 | std::size_t host_memory_size; | 160 | std::size_t host_memory_size; |
| 177 | GPUVAddr gpu_addr{}; | 161 | GPUVAddr gpu_addr{}; |
| 178 | CacheAddr cache_addr{}; | ||
| 179 | CacheAddr cache_addr_end{}; | ||
| 180 | VAddr cpu_addr{}; | 162 | VAddr cpu_addr{}; |
| 181 | bool is_continuous{}; | 163 | VAddr cpu_addr_end{}; |
| 182 | bool is_converted{}; | 164 | bool is_converted{}; |
| 183 | 165 | ||
| 184 | std::vector<std::size_t> mipmap_sizes; | 166 | std::vector<std::size_t> mipmap_sizes; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index c8f8d659d..88fe3e25f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -52,11 +52,9 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | |||
| 52 | 52 | ||
| 53 | template <typename TSurface, typename TView> | 53 | template <typename TSurface, typename TView> |
| 54 | class TextureCache { | 54 | class TextureCache { |
| 55 | using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>; | ||
| 56 | using IntervalType = typename IntervalMap::interval_type; | ||
| 57 | 55 | ||
| 58 | public: | 56 | public: |
| 59 | void InvalidateRegion(CacheAddr addr, std::size_t size) { | 57 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| 60 | std::lock_guard lock{mutex}; | 58 | std::lock_guard lock{mutex}; |
| 61 | 59 | ||
| 62 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { | 60 | for (const auto& surface : GetSurfacesInRegion(addr, size)) { |
| @@ -76,7 +74,7 @@ public: | |||
| 76 | guard_samplers = new_guard; | 74 | guard_samplers = new_guard; |
| 77 | } | 75 | } |
| 78 | 76 | ||
| 79 | void FlushRegion(CacheAddr addr, std::size_t size) { | 77 | void FlushRegion(VAddr addr, std::size_t size) { |
| 80 | std::lock_guard lock{mutex}; | 78 | std::lock_guard lock{mutex}; |
| 81 | 79 | ||
| 82 | auto surfaces = GetSurfacesInRegion(addr, size); | 80 | auto surfaces = GetSurfacesInRegion(addr, size); |
| @@ -99,9 +97,9 @@ public: | |||
| 99 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 97 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); |
| 100 | } | 98 | } |
| 101 | 99 | ||
| 102 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | 100 | const std::optional<VAddr> cpu_addr = |
| 103 | const auto cache_addr{ToCacheAddr(host_ptr)}; | 101 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 104 | if (!cache_addr) { | 102 | if (!cpu_addr) { |
| 105 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 103 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); |
| 106 | } | 104 | } |
| 107 | 105 | ||
| @@ -110,7 +108,7 @@ public: | |||
| 110 | } | 108 | } |
| 111 | 109 | ||
| 112 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; | 110 | const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)}; |
| 113 | const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); | 111 | const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); |
| 114 | if (guard_samplers) { | 112 | if (guard_samplers) { |
| 115 | sampled_textures.push_back(surface); | 113 | sampled_textures.push_back(surface); |
| 116 | } | 114 | } |
| @@ -124,13 +122,13 @@ public: | |||
| 124 | if (!gpu_addr) { | 122 | if (!gpu_addr) { |
| 125 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 123 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); |
| 126 | } | 124 | } |
| 127 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | 125 | const std::optional<VAddr> cpu_addr = |
| 128 | const auto cache_addr{ToCacheAddr(host_ptr)}; | 126 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 129 | if (!cache_addr) { | 127 | if (!cpu_addr) { |
| 130 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); | 128 | return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); |
| 131 | } | 129 | } |
| 132 | const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; | 130 | const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)}; |
| 133 | const auto [surface, view] = GetSurface(gpu_addr, cache_addr, params, true, false); | 131 | const auto [surface, view] = GetSurface(gpu_addr, *cpu_addr, params, true, false); |
| 134 | if (guard_samplers) { | 132 | if (guard_samplers) { |
| 135 | sampled_textures.push_back(surface); | 133 | sampled_textures.push_back(surface); |
| 136 | } | 134 | } |
| @@ -159,14 +157,14 @@ public: | |||
| 159 | SetEmptyDepthBuffer(); | 157 | SetEmptyDepthBuffer(); |
| 160 | return {}; | 158 | return {}; |
| 161 | } | 159 | } |
| 162 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | 160 | const std::optional<VAddr> cpu_addr = |
| 163 | const auto cache_addr{ToCacheAddr(host_ptr)}; | 161 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 164 | if (!cache_addr) { | 162 | if (!cpu_addr) { |
| 165 | SetEmptyDepthBuffer(); | 163 | SetEmptyDepthBuffer(); |
| 166 | return {}; | 164 | return {}; |
| 167 | } | 165 | } |
| 168 | const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; | 166 | const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; |
| 169 | auto surface_view = GetSurface(gpu_addr, cache_addr, depth_params, preserve_contents, true); | 167 | auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); |
| 170 | if (depth_buffer.target) | 168 | if (depth_buffer.target) |
| 171 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | 169 | depth_buffer.target->MarkAsRenderTarget(false, NO_RT); |
| 172 | depth_buffer.target = surface_view.first; | 170 | depth_buffer.target = surface_view.first; |
| @@ -199,15 +197,15 @@ public: | |||
| 199 | return {}; | 197 | return {}; |
| 200 | } | 198 | } |
| 201 | 199 | ||
| 202 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | 200 | const std::optional<VAddr> cpu_addr = |
| 203 | const auto cache_addr{ToCacheAddr(host_ptr)}; | 201 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 204 | if (!cache_addr) { | 202 | if (!cpu_addr) { |
| 205 | SetEmptyColorBuffer(index); | 203 | SetEmptyColorBuffer(index); |
| 206 | return {}; | 204 | return {}; |
| 207 | } | 205 | } |
| 208 | 206 | ||
| 209 | auto surface_view = | 207 | auto surface_view = |
| 210 | GetSurface(gpu_addr, cache_addr, SurfaceParams::CreateForFramebuffer(system, index), | 208 | GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), |
| 211 | preserve_contents, true); | 209 | preserve_contents, true); |
| 212 | if (render_targets[index].target) | 210 | if (render_targets[index].target) |
| 213 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | 211 | render_targets[index].target->MarkAsRenderTarget(false, NO_RT); |
| @@ -257,27 +255,26 @@ public: | |||
| 257 | const GPUVAddr src_gpu_addr = src_config.Address(); | 255 | const GPUVAddr src_gpu_addr = src_config.Address(); |
| 258 | const GPUVAddr dst_gpu_addr = dst_config.Address(); | 256 | const GPUVAddr dst_gpu_addr = dst_config.Address(); |
| 259 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); | 257 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); |
| 260 | const auto dst_host_ptr{system.GPU().MemoryManager().GetPointer(dst_gpu_addr)}; | 258 | const std::optional<VAddr> dst_cpu_addr = |
| 261 | const auto dst_cache_addr{ToCacheAddr(dst_host_ptr)}; | 259 | system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); |
| 262 | const auto src_host_ptr{system.GPU().MemoryManager().GetPointer(src_gpu_addr)}; | 260 | const std::optional<VAddr> src_cpu_addr = |
| 263 | const auto src_cache_addr{ToCacheAddr(src_host_ptr)}; | 261 | system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); |
| 264 | std::pair<TSurface, TView> dst_surface = | 262 | std::pair<TSurface, TView> dst_surface = |
| 265 | GetSurface(dst_gpu_addr, dst_cache_addr, dst_params, true, false); | 263 | GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); |
| 266 | std::pair<TSurface, TView> src_surface = | 264 | std::pair<TSurface, TView> src_surface = |
| 267 | GetSurface(src_gpu_addr, src_cache_addr, src_params, true, false); | 265 | GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); |
| 268 | ImageBlit(src_surface.second, dst_surface.second, copy_config); | 266 | ImageBlit(src_surface.second, dst_surface.second, copy_config); |
| 269 | dst_surface.first->MarkAsModified(true, Tick()); | 267 | dst_surface.first->MarkAsModified(true, Tick()); |
| 270 | } | 268 | } |
| 271 | 269 | ||
| 272 | TSurface TryFindFramebufferSurface(const u8* host_ptr) { | 270 | TSurface TryFindFramebufferSurface(VAddr addr) { |
| 273 | const CacheAddr cache_addr = ToCacheAddr(host_ptr); | 271 | if (!addr) { |
| 274 | if (!cache_addr) { | ||
| 275 | return nullptr; | 272 | return nullptr; |
| 276 | } | 273 | } |
| 277 | const CacheAddr page = cache_addr >> registry_page_bits; | 274 | const VAddr page = addr >> registry_page_bits; |
| 278 | std::vector<TSurface>& list = registry[page]; | 275 | std::vector<TSurface>& list = registry[page]; |
| 279 | for (auto& surface : list) { | 276 | for (auto& surface : list) { |
| 280 | if (surface->GetCacheAddr() == cache_addr) { | 277 | if (surface->GetCpuAddr() == addr) { |
| 281 | return surface; | 278 | return surface; |
| 282 | } | 279 | } |
| 283 | } | 280 | } |
| @@ -338,18 +335,14 @@ protected: | |||
| 338 | 335 | ||
| 339 | void Register(TSurface surface) { | 336 | void Register(TSurface surface) { |
| 340 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); | 337 | const GPUVAddr gpu_addr = surface->GetGpuAddr(); |
| 341 | const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); | ||
| 342 | const std::size_t size = surface->GetSizeInBytes(); | 338 | const std::size_t size = surface->GetSizeInBytes(); |
| 343 | const std::optional<VAddr> cpu_addr = | 339 | const std::optional<VAddr> cpu_addr = |
| 344 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); | 340 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 345 | if (!cache_ptr || !cpu_addr) { | 341 | if (!cpu_addr) { |
| 346 | LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", | 342 | LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", |
| 347 | gpu_addr); | 343 | gpu_addr); |
| 348 | return; | 344 | return; |
| 349 | } | 345 | } |
| 350 | const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); | ||
| 351 | surface->MarkAsContinuous(continuous); | ||
| 352 | surface->SetCacheAddr(cache_ptr); | ||
| 353 | surface->SetCpuAddr(*cpu_addr); | 346 | surface->SetCpuAddr(*cpu_addr); |
| 354 | RegisterInnerCache(surface); | 347 | RegisterInnerCache(surface); |
| 355 | surface->MarkAsRegistered(true); | 348 | surface->MarkAsRegistered(true); |
| @@ -634,7 +627,7 @@ private: | |||
| 634 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, | 627 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, |
| 635 | const SurfaceParams& params, | 628 | const SurfaceParams& params, |
| 636 | const GPUVAddr gpu_addr, | 629 | const GPUVAddr gpu_addr, |
| 637 | const CacheAddr cache_addr, | 630 | const VAddr cpu_addr, |
| 638 | bool preserve_contents) { | 631 | bool preserve_contents) { |
| 639 | if (params.target == SurfaceTarget::Texture3D) { | 632 | if (params.target == SurfaceTarget::Texture3D) { |
| 640 | bool failed = false; | 633 | bool failed = false; |
| @@ -659,7 +652,7 @@ private: | |||
| 659 | failed = true; | 652 | failed = true; |
| 660 | break; | 653 | break; |
| 661 | } | 654 | } |
| 662 | const u32 offset = static_cast<u32>(surface->GetCacheAddr() - cache_addr); | 655 | const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); |
| 663 | const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); | 656 | const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); |
| 664 | modified |= surface->IsModified(); | 657 | modified |= surface->IsModified(); |
| 665 | const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, | 658 | const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, |
| @@ -679,7 +672,7 @@ private: | |||
| 679 | } else { | 672 | } else { |
| 680 | for (const auto& surface : overlaps) { | 673 | for (const auto& surface : overlaps) { |
| 681 | if (!surface->MatchTarget(params.target)) { | 674 | if (!surface->MatchTarget(params.target)) { |
| 682 | if (overlaps.size() == 1 && surface->GetCacheAddr() == cache_addr) { | 675 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { |
| 683 | if (Settings::values.use_accurate_gpu_emulation) { | 676 | if (Settings::values.use_accurate_gpu_emulation) { |
| 684 | return std::nullopt; | 677 | return std::nullopt; |
| 685 | } | 678 | } |
| @@ -688,7 +681,7 @@ private: | |||
| 688 | } | 681 | } |
| 689 | return std::nullopt; | 682 | return std::nullopt; |
| 690 | } | 683 | } |
| 691 | if (surface->GetCacheAddr() != cache_addr) { | 684 | if (surface->GetCpuAddr() != cpu_addr) { |
| 692 | continue; | 685 | continue; |
| 693 | } | 686 | } |
| 694 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { | 687 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { |
| @@ -722,13 +715,13 @@ private: | |||
| 722 | * left blank. | 715 | * left blank. |
| 723 | * @param is_render Whether or not the surface is a render target. | 716 | * @param is_render Whether or not the surface is a render target. |
| 724 | **/ | 717 | **/ |
| 725 | std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const CacheAddr cache_addr, | 718 | std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const VAddr cpu_addr, |
| 726 | const SurfaceParams& params, bool preserve_contents, | 719 | const SurfaceParams& params, bool preserve_contents, |
| 727 | bool is_render) { | 720 | bool is_render) { |
| 728 | // Step 1 | 721 | // Step 1 |
| 729 | // Check Level 1 Cache for a fast structural match. If candidate surface | 722 | // Check Level 1 Cache for a fast structural match. If candidate surface |
| 730 | // matches at certain level we are pretty much done. | 723 | // matches at certain level we are pretty much done. |
| 731 | if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { | 724 | if (const auto iter = l1_cache.find(cpu_addr); iter != l1_cache.end()) { |
| 732 | TSurface& current_surface = iter->second; | 725 | TSurface& current_surface = iter->second; |
| 733 | const auto topological_result = current_surface->MatchesTopology(params); | 726 | const auto topological_result = current_surface->MatchesTopology(params); |
| 734 | if (topological_result != MatchTopologyResult::FullMatch) { | 727 | if (topological_result != MatchTopologyResult::FullMatch) { |
| @@ -755,7 +748,7 @@ private: | |||
| 755 | // Step 2 | 748 | // Step 2 |
| 756 | // Obtain all possible overlaps in the memory region | 749 | // Obtain all possible overlaps in the memory region |
| 757 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); | 750 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); |
| 758 | auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; | 751 | auto overlaps{GetSurfacesInRegion(cpu_addr, candidate_size)}; |
| 759 | 752 | ||
| 760 | // If none are found, we are done. we just load the surface and create it. | 753 | // If none are found, we are done. we just load the surface and create it. |
| 761 | if (overlaps.empty()) { | 754 | if (overlaps.empty()) { |
| @@ -777,7 +770,7 @@ private: | |||
| 777 | // Check if it's a 3D texture | 770 | // Check if it's a 3D texture |
| 778 | if (params.block_depth > 0) { | 771 | if (params.block_depth > 0) { |
| 779 | auto surface = | 772 | auto surface = |
| 780 | Manage3DSurfaces(overlaps, params, gpu_addr, cache_addr, preserve_contents); | 773 | Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); |
| 781 | if (surface) { | 774 | if (surface) { |
| 782 | return *surface; | 775 | return *surface; |
| 783 | } | 776 | } |
| @@ -852,16 +845,16 @@ private: | |||
| 852 | * @param params The parameters on the candidate surface. | 845 | * @param params The parameters on the candidate surface. |
| 853 | **/ | 846 | **/ |
| 854 | Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { | 847 | Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { |
| 855 | const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; | 848 | const std::optional<VAddr> cpu_addr = |
| 856 | const auto cache_addr{ToCacheAddr(host_ptr)}; | 849 | system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
| 857 | 850 | ||
| 858 | if (!cache_addr) { | 851 | if (!cpu_addr) { |
| 859 | Deduction result{}; | 852 | Deduction result{}; |
| 860 | result.type = DeductionType::DeductionFailed; | 853 | result.type = DeductionType::DeductionFailed; |
| 861 | return result; | 854 | return result; |
| 862 | } | 855 | } |
| 863 | 856 | ||
| 864 | if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { | 857 | if (const auto iter = l1_cache.find(*cpu_addr); iter != l1_cache.end()) { |
| 865 | TSurface& current_surface = iter->second; | 858 | TSurface& current_surface = iter->second; |
| 866 | const auto topological_result = current_surface->MatchesTopology(params); | 859 | const auto topological_result = current_surface->MatchesTopology(params); |
| 867 | if (topological_result != MatchTopologyResult::FullMatch) { | 860 | if (topological_result != MatchTopologyResult::FullMatch) { |
| @@ -880,7 +873,7 @@ private: | |||
| 880 | } | 873 | } |
| 881 | 874 | ||
| 882 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); | 875 | const std::size_t candidate_size = params.GetGuestSizeInBytes(); |
| 883 | auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; | 876 | auto overlaps{GetSurfacesInRegion(*cpu_addr, candidate_size)}; |
| 884 | 877 | ||
| 885 | if (overlaps.empty()) { | 878 | if (overlaps.empty()) { |
| 886 | Deduction result{}; | 879 | Deduction result{}; |
| @@ -1024,10 +1017,10 @@ private: | |||
| 1024 | } | 1017 | } |
| 1025 | 1018 | ||
| 1026 | void RegisterInnerCache(TSurface& surface) { | 1019 | void RegisterInnerCache(TSurface& surface) { |
| 1027 | const CacheAddr cache_addr = surface->GetCacheAddr(); | 1020 | const VAddr cpu_addr = surface->GetCpuAddr(); |
| 1028 | CacheAddr start = cache_addr >> registry_page_bits; | 1021 | VAddr start = cpu_addr >> registry_page_bits; |
| 1029 | const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; | 1022 | const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; |
| 1030 | l1_cache[cache_addr] = surface; | 1023 | l1_cache[cpu_addr] = surface; |
| 1031 | while (start <= end) { | 1024 | while (start <= end) { |
| 1032 | registry[start].push_back(surface); | 1025 | registry[start].push_back(surface); |
| 1033 | start++; | 1026 | start++; |
| @@ -1035,10 +1028,10 @@ private: | |||
| 1035 | } | 1028 | } |
| 1036 | 1029 | ||
| 1037 | void UnregisterInnerCache(TSurface& surface) { | 1030 | void UnregisterInnerCache(TSurface& surface) { |
| 1038 | const CacheAddr cache_addr = surface->GetCacheAddr(); | 1031 | const VAddr cpu_addr = surface->GetCpuAddr(); |
| 1039 | CacheAddr start = cache_addr >> registry_page_bits; | 1032 | VAddr start = cpu_addr >> registry_page_bits; |
| 1040 | const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; | 1033 | const VAddr end = (surface->GetCpuAddrEnd() - 1) >> registry_page_bits; |
| 1041 | l1_cache.erase(cache_addr); | 1034 | l1_cache.erase(cpu_addr); |
| 1042 | while (start <= end) { | 1035 | while (start <= end) { |
| 1043 | auto& reg{registry[start]}; | 1036 | auto& reg{registry[start]}; |
| 1044 | reg.erase(std::find(reg.begin(), reg.end(), surface)); | 1037 | reg.erase(std::find(reg.begin(), reg.end(), surface)); |
| @@ -1046,18 +1039,18 @@ private: | |||
| 1046 | } | 1039 | } |
| 1047 | } | 1040 | } |
| 1048 | 1041 | ||
| 1049 | std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { | 1042 | std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { |
| 1050 | if (size == 0) { | 1043 | if (size == 0) { |
| 1051 | return {}; | 1044 | return {}; |
| 1052 | } | 1045 | } |
| 1053 | const CacheAddr cache_addr_end = cache_addr + size; | 1046 | const VAddr cpu_addr_end = cpu_addr + size; |
| 1054 | CacheAddr start = cache_addr >> registry_page_bits; | 1047 | VAddr start = cpu_addr >> registry_page_bits; |
| 1055 | const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; | 1048 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; |
| 1056 | std::vector<TSurface> surfaces; | 1049 | std::vector<TSurface> surfaces; |
| 1057 | while (start <= end) { | 1050 | while (start <= end) { |
| 1058 | std::vector<TSurface>& list = registry[start]; | 1051 | std::vector<TSurface>& list = registry[start]; |
| 1059 | for (auto& surface : list) { | 1052 | for (auto& surface : list) { |
| 1060 | if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { | 1053 | if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { |
| 1061 | surface->MarkAsPicked(true); | 1054 | surface->MarkAsPicked(true); |
| 1062 | surfaces.push_back(surface); | 1055 | surfaces.push_back(surface); |
| 1063 | } | 1056 | } |
| @@ -1146,14 +1139,14 @@ private: | |||
| 1146 | // large in size. | 1139 | // large in size. |
| 1147 | static constexpr u64 registry_page_bits{20}; | 1140 | static constexpr u64 registry_page_bits{20}; |
| 1148 | static constexpr u64 registry_page_size{1 << registry_page_bits}; | 1141 | static constexpr u64 registry_page_size{1 << registry_page_bits}; |
| 1149 | std::unordered_map<CacheAddr, std::vector<TSurface>> registry; | 1142 | std::unordered_map<VAddr, std::vector<TSurface>> registry; |
| 1150 | 1143 | ||
| 1151 | static constexpr u32 DEPTH_RT = 8; | 1144 | static constexpr u32 DEPTH_RT = 8; |
| 1152 | static constexpr u32 NO_RT = 0xFFFFFFFF; | 1145 | static constexpr u32 NO_RT = 0xFFFFFFFF; |
| 1153 | 1146 | ||
| 1154 | // The L1 Cache is used for fast texture lookup before checking the overlaps | 1147 | // The L1 Cache is used for fast texture lookup before checking the overlaps |
| 1155 | // This avoids calculating size and other stuffs. | 1148 | // This avoids calculating size and other stuffs. |
| 1156 | std::unordered_map<CacheAddr, TSurface> l1_cache; | 1149 | std::unordered_map<VAddr, TSurface> l1_cache; |
| 1157 | 1150 | ||
| 1158 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have | 1151 | /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have |
| 1159 | /// previously been used. This is to prevent surfaces from being constantly created and | 1152 | /// previously been used. This is to prevent surfaces from being constantly created and |