diff options
Diffstat (limited to 'src')
21 files changed, 632 insertions, 80 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 65a4922ea..f8ec8fea8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt | |||
| @@ -82,6 +82,7 @@ else() | |||
| 82 | -Werror=missing-declarations | 82 | -Werror=missing-declarations |
| 83 | -Werror=missing-field-initializers | 83 | -Werror=missing-field-initializers |
| 84 | -Werror=reorder | 84 | -Werror=reorder |
| 85 | -Werror=sign-compare | ||
| 85 | -Werror=switch | 86 | -Werror=switch |
| 86 | -Werror=uninitialized | 87 | -Werror=uninitialized |
| 87 | -Werror=unused-function | 88 | -Werror=unused-function |
diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index d25a1a645..090dd19b1 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt | |||
| @@ -51,9 +51,6 @@ if (NOT MSVC) | |||
| 51 | target_compile_options(audio_core PRIVATE | 51 | target_compile_options(audio_core PRIVATE |
| 52 | -Werror=conversion | 52 | -Werror=conversion |
| 53 | -Werror=ignored-qualifiers | 53 | -Werror=ignored-qualifiers |
| 54 | -Werror=implicit-fallthrough | ||
| 55 | -Werror=reorder | ||
| 56 | -Werror=sign-compare | ||
| 57 | -Werror=shadow | 54 | -Werror=shadow |
| 58 | -Werror=unused-parameter | 55 | -Werror=unused-parameter |
| 59 | -Werror=unused-variable | 56 | -Werror=unused-variable |
diff --git a/src/common/fs/file.cpp b/src/common/fs/file.cpp index 077f34995..274f57659 100644 --- a/src/common/fs/file.cpp +++ b/src/common/fs/file.cpp | |||
| @@ -306,9 +306,9 @@ bool IOFile::Flush() const { | |||
| 306 | errno = 0; | 306 | errno = 0; |
| 307 | 307 | ||
| 308 | #ifdef _WIN32 | 308 | #ifdef _WIN32 |
| 309 | const auto flush_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0; | 309 | const auto flush_result = std::fflush(file) == 0; |
| 310 | #else | 310 | #else |
| 311 | const auto flush_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0; | 311 | const auto flush_result = std::fflush(file) == 0; |
| 312 | #endif | 312 | #endif |
| 313 | 313 | ||
| 314 | if (!flush_result) { | 314 | if (!flush_result) { |
| @@ -320,6 +320,28 @@ bool IOFile::Flush() const { | |||
| 320 | return flush_result; | 320 | return flush_result; |
| 321 | } | 321 | } |
| 322 | 322 | ||
| 323 | bool IOFile::Commit() const { | ||
| 324 | if (!IsOpen()) { | ||
| 325 | return false; | ||
| 326 | } | ||
| 327 | |||
| 328 | errno = 0; | ||
| 329 | |||
| 330 | #ifdef _WIN32 | ||
| 331 | const auto commit_result = std::fflush(file) == 0 && _commit(fileno(file)) == 0; | ||
| 332 | #else | ||
| 333 | const auto commit_result = std::fflush(file) == 0 && fsync(fileno(file)) == 0; | ||
| 334 | #endif | ||
| 335 | |||
| 336 | if (!commit_result) { | ||
| 337 | const auto ec = std::error_code{errno, std::generic_category()}; | ||
| 338 | LOG_ERROR(Common_Filesystem, "Failed to commit the file at path={}, ec_message={}", | ||
| 339 | PathToUTF8String(file_path), ec.message()); | ||
| 340 | } | ||
| 341 | |||
| 342 | return commit_result; | ||
| 343 | } | ||
| 344 | |||
| 323 | bool IOFile::SetSize(u64 size) const { | 345 | bool IOFile::SetSize(u64 size) const { |
| 324 | if (!IsOpen()) { | 346 | if (!IsOpen()) { |
| 325 | return false; | 347 | return false; |
| @@ -347,6 +369,9 @@ u64 IOFile::GetSize() const { | |||
| 347 | return 0; | 369 | return 0; |
| 348 | } | 370 | } |
| 349 | 371 | ||
| 372 | // Flush any unwritten buffered data into the file prior to retrieving the file size. | ||
| 373 | std::fflush(file); | ||
| 374 | |||
| 350 | std::error_code ec; | 375 | std::error_code ec; |
| 351 | 376 | ||
| 352 | const auto file_size = fs::file_size(file_path, ec); | 377 | const auto file_size = fs::file_size(file_path, ec); |
diff --git a/src/common/fs/file.h b/src/common/fs/file.h index 588fe619d..2c4ab4332 100644 --- a/src/common/fs/file.h +++ b/src/common/fs/file.h | |||
| @@ -396,13 +396,22 @@ public: | |||
| 396 | [[nodiscard]] size_t WriteString(std::span<const char> string) const; | 396 | [[nodiscard]] size_t WriteString(std::span<const char> string) const; |
| 397 | 397 | ||
| 398 | /** | 398 | /** |
| 399 | * Attempts to flush any unwritten buffered data into the file and flush the file into the disk. | 399 | * Attempts to flush any unwritten buffered data into the file. |
| 400 | * | 400 | * |
| 401 | * @returns True if the flush was successful, false otherwise. | 401 | * @returns True if the flush was successful, false otherwise. |
| 402 | */ | 402 | */ |
| 403 | bool Flush() const; | 403 | bool Flush() const; |
| 404 | 404 | ||
| 405 | /** | 405 | /** |
| 406 | * Attempts to commit the file into the disk. | ||
| 407 | * Note that this is an expensive operation as this forces the operating system to write | ||
| 408 | * the contents of the file associated with the file descriptor into the disk. | ||
| 409 | * | ||
| 410 | * @returns True if the commit was successful, false otherwise. | ||
| 411 | */ | ||
| 412 | bool Commit() const; | ||
| 413 | |||
| 414 | /** | ||
| 406 | * Resizes the file to a given size. | 415 | * Resizes the file to a given size. |
| 407 | * If the file is resized to a smaller size, the remainder of the file is discarded. | 416 | * If the file is resized to a smaller size, the remainder of the file is discarded. |
| 408 | * If the file is resized to a larger size, the new area appears as if zero-filled. | 417 | * If the file is resized to a larger size, the new area appears as if zero-filled. |
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index b6fa4affb..61dddab3f 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp | |||
| @@ -171,19 +171,22 @@ FileBackend::FileBackend(const std::filesystem::path& filename) { | |||
| 171 | FileBackend::~FileBackend() = default; | 171 | FileBackend::~FileBackend() = default; |
| 172 | 172 | ||
| 173 | void FileBackend::Write(const Entry& entry) { | 173 | void FileBackend::Write(const Entry& entry) { |
| 174 | if (!file->IsOpen()) { | ||
| 175 | return; | ||
| 176 | } | ||
| 177 | |||
| 174 | using namespace Common::Literals; | 178 | using namespace Common::Literals; |
| 175 | // prevent logs from going over the maximum size (in case its spamming and the user doesn't | 179 | // Prevent logs from exceeding a set maximum size in the event that log entries are spammed. |
| 176 | // know) | ||
| 177 | constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB; | 180 | constexpr std::size_t MAX_BYTES_WRITTEN = 100_MiB; |
| 178 | constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB; | 181 | constexpr std::size_t MAX_BYTES_WRITTEN_EXTENDED = 1_GiB; |
| 179 | 182 | ||
| 180 | if (!file->IsOpen()) { | 183 | const bool write_limit_exceeded = |
| 181 | return; | 184 | bytes_written > MAX_BYTES_WRITTEN_EXTENDED || |
| 182 | } | 185 | (bytes_written > MAX_BYTES_WRITTEN && !Settings::values.extended_logging); |
| 183 | 186 | ||
| 184 | if (Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN_EXTENDED) { | 187 | // Close the file after the write limit is exceeded. |
| 185 | return; | 188 | if (write_limit_exceeded) { |
| 186 | } else if (!Settings::values.extended_logging && bytes_written > MAX_BYTES_WRITTEN) { | 189 | file->Close(); |
| 187 | return; | 190 | return; |
| 188 | } | 191 | } |
| 189 | 192 | ||
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 19b970981..b2b0dbe05 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt | |||
| @@ -667,8 +667,6 @@ else() | |||
| 667 | target_compile_options(core PRIVATE | 667 | target_compile_options(core PRIVATE |
| 668 | -Werror=conversion | 668 | -Werror=conversion |
| 669 | -Werror=ignored-qualifiers | 669 | -Werror=ignored-qualifiers |
| 670 | -Werror=implicit-fallthrough | ||
| 671 | -Werror=sign-compare | ||
| 672 | -Werror=shadow | 670 | -Werror=shadow |
| 673 | 671 | ||
| 674 | $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess> | 672 | $<$<CXX_COMPILER_ID:GNU>:-Werror=class-memaccess> |
diff --git a/src/input_common/CMakeLists.txt b/src/input_common/CMakeLists.txt index c3423c815..c4283a952 100644 --- a/src/input_common/CMakeLists.txt +++ b/src/input_common/CMakeLists.txt | |||
| @@ -44,10 +44,7 @@ else() | |||
| 44 | -Werror | 44 | -Werror |
| 45 | -Werror=conversion | 45 | -Werror=conversion |
| 46 | -Werror=ignored-qualifiers | 46 | -Werror=ignored-qualifiers |
| 47 | -Werror=implicit-fallthrough | ||
| 48 | -Werror=reorder | ||
| 49 | -Werror=shadow | 47 | -Werror=shadow |
| 50 | -Werror=sign-compare | ||
| 51 | $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> | 48 | $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-parameter> |
| 52 | $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> | 49 | $<$<CXX_COMPILER_ID:GNU>:-Werror=unused-but-set-variable> |
| 53 | -Werror=unused-variable | 50 | -Werror=unused-variable |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 7124c755c..d2b9d5f2b 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -69,11 +69,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||
| 69 | } else { | 69 | } else { |
| 70 | UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); | 70 | UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); |
| 71 | } | 71 | } |
| 72 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. | ||
| 73 | const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr); | ||
| 74 | ASSERT(cpu_addr); | ||
| 75 | 72 | ||
| 76 | rasterizer->UnmapMemory(*cpu_addr, size); | 73 | const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); |
| 74 | |||
| 75 | for (const auto& map : submapped_ranges) { | ||
| 76 | // Flush and invalidate through the GPU interface, to be asynchronous if possible. | ||
| 77 | const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first); | ||
| 78 | ASSERT(cpu_addr); | ||
| 79 | |||
| 80 | rasterizer->UnmapMemory(*cpu_addr, map.second); | ||
| 81 | } | ||
| 77 | 82 | ||
| 78 | UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); | 83 | UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); |
| 79 | } | 84 | } |
| @@ -127,8 +132,14 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s | |||
| 127 | 132 | ||
| 128 | //// Lock the new page | 133 | //// Lock the new page |
| 129 | // TryLockPage(page_entry, size); | 134 | // TryLockPage(page_entry, size); |
| 135 | auto& current_page = page_table[PageEntryIndex(gpu_addr)]; | ||
| 130 | 136 | ||
| 131 | page_table[PageEntryIndex(gpu_addr)] = page_entry; | 137 | if ((!current_page.IsValid() && page_entry.IsValid()) || |
| 138 | current_page.ToAddress() != page_entry.ToAddress()) { | ||
| 139 | rasterizer->ModifyGPUMemory(gpu_addr, size); | ||
| 140 | } | ||
| 141 | |||
| 142 | current_page = page_entry; | ||
| 132 | } | 143 | } |
| 133 | 144 | ||
| 134 | std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, | 145 | std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, |
| @@ -174,6 +185,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | |||
| 174 | return page_entry.ToAddress() + (gpu_addr & page_mask); | 185 | return page_entry.ToAddress() + (gpu_addr & page_mask); |
| 175 | } | 186 | } |
| 176 | 187 | ||
| 188 | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { | ||
| 189 | size_t page_index{addr >> page_bits}; | ||
| 190 | const size_t page_last{(addr + size + page_size - 1) >> page_bits}; | ||
| 191 | while (page_index < page_last) { | ||
| 192 | const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; | ||
| 193 | if (page_addr && *page_addr != 0) { | ||
| 194 | return page_addr; | ||
| 195 | } | ||
| 196 | ++page_index; | ||
| 197 | } | ||
| 198 | return std::nullopt; | ||
| 199 | } | ||
| 200 | |||
| 177 | template <typename T> | 201 | template <typename T> |
| 178 | T MemoryManager::Read(GPUVAddr addr) const { | 202 | T MemoryManager::Read(GPUVAddr addr) const { |
| 179 | if (auto page_pointer{GetPointer(addr)}; page_pointer) { | 203 | if (auto page_pointer{GetPointer(addr)}; page_pointer) { |
| @@ -370,4 +394,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { | |||
| 370 | return page <= Core::Memory::PAGE_SIZE; | 394 | return page <= Core::Memory::PAGE_SIZE; |
| 371 | } | 395 | } |
| 372 | 396 | ||
| 397 | bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { | ||
| 398 | size_t page_index{gpu_addr >> page_bits}; | ||
| 399 | const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; | ||
| 400 | std::optional<VAddr> old_page_addr{}; | ||
| 401 | while (page_index != page_last) { | ||
| 402 | const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; | ||
| 403 | if (!page_addr || *page_addr == 0) { | ||
| 404 | return false; | ||
| 405 | } | ||
| 406 | if (old_page_addr) { | ||
| 407 | if (*old_page_addr + page_size != *page_addr) { | ||
| 408 | return false; | ||
| 409 | } | ||
| 410 | } | ||
| 411 | old_page_addr = page_addr; | ||
| 412 | ++page_index; | ||
| 413 | } | ||
| 414 | return true; | ||
| 415 | } | ||
| 416 | |||
| 417 | bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const { | ||
| 418 | size_t page_index{gpu_addr >> page_bits}; | ||
| 419 | const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; | ||
| 420 | while (page_index < page_last) { | ||
| 421 | if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) { | ||
| 422 | return false; | ||
| 423 | } | ||
| 424 | ++page_index; | ||
| 425 | } | ||
| 426 | return true; | ||
| 427 | } | ||
| 428 | |||
| 429 | std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | ||
| 430 | GPUVAddr gpu_addr, std::size_t size) const { | ||
| 431 | std::vector<std::pair<GPUVAddr, std::size_t>> result{}; | ||
| 432 | size_t page_index{gpu_addr >> page_bits}; | ||
| 433 | size_t remaining_size{size}; | ||
| 434 | size_t page_offset{gpu_addr & page_mask}; | ||
| 435 | std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; | ||
| 436 | std::optional<VAddr> old_page_addr{}; | ||
| 437 | const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) { | ||
| 438 | if (!last_segment) { | ||
| 439 | GPUVAddr new_base_addr = page_index << page_bits; | ||
| 440 | last_segment = {new_base_addr, bytes}; | ||
| 441 | } else { | ||
| 442 | last_segment->second += bytes; | ||
| 443 | } | ||
| 444 | }; | ||
| 445 | const auto split = [this, &last_segment, &result] { | ||
| 446 | if (last_segment) { | ||
| 447 | result.push_back(*last_segment); | ||
| 448 | last_segment = std::nullopt; | ||
| 449 | } | ||
| 450 | }; | ||
| 451 | while (remaining_size > 0) { | ||
| 452 | const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; | ||
| 453 | const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; | ||
| 454 | if (!page_addr) { | ||
| 455 | split(); | ||
| 456 | } else if (old_page_addr) { | ||
| 457 | if (*old_page_addr + page_size != *page_addr) { | ||
| 458 | split(); | ||
| 459 | } | ||
| 460 | extend_size(num_bytes); | ||
| 461 | } else { | ||
| 462 | extend_size(num_bytes); | ||
| 463 | } | ||
| 464 | ++page_index; | ||
| 465 | page_offset = 0; | ||
| 466 | remaining_size -= num_bytes; | ||
| 467 | } | ||
| 468 | split(); | ||
| 469 | return result; | ||
| 470 | } | ||
| 471 | |||
| 373 | } // namespace Tegra | 472 | } // namespace Tegra |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index b3538d503..99d13e7f6 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -76,6 +76,8 @@ public: | |||
| 76 | 76 | ||
| 77 | [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; | 77 | [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; |
| 78 | 78 | ||
| 79 | [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; | ||
| 80 | |||
| 79 | template <typename T> | 81 | template <typename T> |
| 80 | [[nodiscard]] T Read(GPUVAddr addr) const; | 82 | [[nodiscard]] T Read(GPUVAddr addr) const; |
| 81 | 83 | ||
| @@ -112,10 +114,28 @@ public: | |||
| 112 | void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); | 114 | void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); |
| 113 | 115 | ||
| 114 | /** | 116 | /** |
| 115 | * IsGranularRange checks if a gpu region can be simply read with a pointer. | 117 | * Checks if a gpu region can be simply read with a pointer. |
| 116 | */ | 118 | */ |
| 117 | [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; | 119 | [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; |
| 118 | 120 | ||
| 121 | /** | ||
| 122 | * Checks if a gpu region is mapped by a single range of cpu addresses. | ||
| 123 | */ | ||
| 124 | [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const; | ||
| 125 | |||
| 126 | /** | ||
| 127 | * Checks if a gpu region is mapped entirely. | ||
| 128 | */ | ||
| 129 | [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; | ||
| 130 | |||
| 131 | /** | ||
| 132 | * Returns a vector with all the subranges of cpu addresses mapped beneath. | ||
| 133 | * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector | ||
| 134 | * will be returned; | ||
| 135 | */ | ||
| 136 | std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, | ||
| 137 | std::size_t size) const; | ||
| 138 | |||
| 119 | [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); | 139 | [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); |
| 120 | [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); | 140 | [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); |
| 121 | [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); | 141 | [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); |
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 07939432f..0cec4225b 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h | |||
| @@ -87,6 +87,9 @@ public: | |||
| 87 | /// Unmap memory range | 87 | /// Unmap memory range |
| 88 | virtual void UnmapMemory(VAddr addr, u64 size) = 0; | 88 | virtual void UnmapMemory(VAddr addr, u64 size) = 0; |
| 89 | 89 | ||
| 90 | /// Remap GPU memory range. This means underneath backing memory changed | ||
| 91 | virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0; | ||
| 92 | |||
| 90 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | 93 | /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |
| 91 | /// and invalidated | 94 | /// and invalidated |
| 92 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; | 95 | virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index eb8bdaa85..07ad0e205 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -611,6 +611,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | |||
| 611 | shader_cache.OnCPUWrite(addr, size); | 611 | shader_cache.OnCPUWrite(addr, size); |
| 612 | } | 612 | } |
| 613 | 613 | ||
| 614 | void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) { | ||
| 615 | { | ||
| 616 | std::scoped_lock lock{texture_cache.mutex}; | ||
| 617 | texture_cache.UnmapGPUMemory(addr, size); | ||
| 618 | } | ||
| 619 | } | ||
| 620 | |||
| 614 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { | 621 | void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { |
| 615 | if (!gpu.IsAsync()) { | 622 | if (!gpu.IsAsync()) { |
| 616 | gpu_memory.Write<u32>(addr, value); | 623 | gpu_memory.Write<u32>(addr, value); |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9995a563b..482efed7a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h | |||
| @@ -80,6 +80,7 @@ public: | |||
| 80 | void OnCPUWrite(VAddr addr, u64 size) override; | 80 | void OnCPUWrite(VAddr addr, u64 size) override; |
| 81 | void SyncGuestHost() override; | 81 | void SyncGuestHost() override; |
| 82 | void UnmapMemory(VAddr addr, u64 size) override; | 82 | void UnmapMemory(VAddr addr, u64 size) override; |
| 83 | void ModifyGPUMemory(GPUVAddr addr, u64 size) override; | ||
| 83 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | 84 | void SignalSemaphore(GPUVAddr addr, u32 value) override; |
| 84 | void SignalSyncPoint(u32 value) override; | 85 | void SignalSyncPoint(u32 value) override; |
| 85 | void ReleaseFences() override; | 86 | void ReleaseFences() override; |
diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index abaf1ee6a..8fb5be393 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp | |||
| @@ -261,9 +261,9 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im | |||
| 261 | glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); | 261 | glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z); |
| 262 | glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); | 262 | glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z); |
| 263 | glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), | 263 | glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), |
| 264 | copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI); | 264 | copy.src_subresource.base_level, GL_TRUE, 0, GL_READ_ONLY, GL_RG32UI); |
| 265 | glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), | 265 | glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), |
| 266 | copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI); | 266 | copy.dst_subresource.base_level, GL_TRUE, 0, GL_WRITE_ONLY, GL_RGBA8UI); |
| 267 | glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); | 267 | glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); |
| 268 | } | 268 | } |
| 269 | program_manager.RestoreGuestCompute(); | 269 | program_manager.RestoreGuestCompute(); |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 1c9120170..bd4d649cc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -557,6 +557,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | |||
| 557 | pipeline_cache.OnCPUWrite(addr, size); | 557 | pipeline_cache.OnCPUWrite(addr, size); |
| 558 | } | 558 | } |
| 559 | 559 | ||
| 560 | void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) { | ||
| 561 | { | ||
| 562 | std::scoped_lock lock{texture_cache.mutex}; | ||
| 563 | texture_cache.UnmapGPUMemory(addr, size); | ||
| 564 | } | ||
| 565 | } | ||
| 566 | |||
| 560 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { | 567 | void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { |
| 561 | if (!gpu.IsAsync()) { | 568 | if (!gpu.IsAsync()) { |
| 562 | gpu_memory.Write<u32>(addr, value); | 569 | gpu_memory.Write<u32>(addr, value); |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index cb8c5c279..41459c5c5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h | |||
| @@ -72,6 +72,7 @@ public: | |||
| 72 | void OnCPUWrite(VAddr addr, u64 size) override; | 72 | void OnCPUWrite(VAddr addr, u64 size) override; |
| 73 | void SyncGuestHost() override; | 73 | void SyncGuestHost() override; |
| 74 | void UnmapMemory(VAddr addr, u64 size) override; | 74 | void UnmapMemory(VAddr addr, u64 size) override; |
| 75 | void ModifyGPUMemory(GPUVAddr addr, u64 size) override; | ||
| 75 | void SignalSemaphore(GPUVAddr addr, u32 value) override; | 76 | void SignalSemaphore(GPUVAddr addr, u32 value) override; |
| 76 | void SignalSyncPoint(u32 value) override; | 77 | void SignalSyncPoint(u32 value) override; |
| 77 | void ReleaseFences() override; | 78 | void ReleaseFences() override; |
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index f22358c90..6052d148a 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp | |||
| @@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_ | |||
| 69 | } | 69 | } |
| 70 | } | 70 | } |
| 71 | 71 | ||
| 72 | ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) | ||
| 73 | : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} | ||
| 74 | |||
| 72 | std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { | 75 | std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { |
| 73 | if (other_addr < gpu_addr) { | 76 | if (other_addr < gpu_addr) { |
| 74 | // Subresource address can't be lower than the base | 77 | // Subresource address can't be lower than the base |
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index e326cab71..ff1feda9b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -25,12 +25,14 @@ enum class ImageFlagBits : u32 { | |||
| 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted | 25 | Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted |
| 26 | Registered = 1 << 6, ///< True when the image is registered | 26 | Registered = 1 << 6, ///< True when the image is registered |
| 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked | 27 | Picked = 1 << 7, ///< Temporary flag to mark the image as picked |
| 28 | Remapped = 1 << 8, ///< Image has been remapped. | ||
| 29 | Sparse = 1 << 9, ///< Image has non continous submemory. | ||
| 28 | 30 | ||
| 29 | // Garbage Collection Flags | 31 | // Garbage Collection Flags |
| 30 | BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher | 32 | BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher |
| 31 | ///< garbage collection priority | 33 | ///< garbage collection priority |
| 32 | Alias = 1 << 9, ///< This image has aliases and has priority on garbage | 34 | Alias = 1 << 11, ///< This image has aliases and has priority on garbage |
| 33 | ///< collection | 35 | ///< collection |
| 34 | }; | 36 | }; |
| 35 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) | 37 | DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) |
| 36 | 38 | ||
| @@ -57,6 +59,12 @@ struct ImageBase { | |||
| 57 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; | 59 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; |
| 58 | } | 60 | } |
| 59 | 61 | ||
| 62 | [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { | ||
| 63 | const VAddr overlap_end = overlap_gpu_addr + overlap_size; | ||
| 64 | const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes; | ||
| 65 | return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; | ||
| 66 | } | ||
| 67 | |||
| 60 | void CheckBadOverlapState(); | 68 | void CheckBadOverlapState(); |
| 61 | void CheckAliasState(); | 69 | void CheckAliasState(); |
| 62 | 70 | ||
| @@ -84,6 +92,29 @@ struct ImageBase { | |||
| 84 | 92 | ||
| 85 | std::vector<AliasedImage> aliased_images; | 93 | std::vector<AliasedImage> aliased_images; |
| 86 | std::vector<ImageId> overlapping_images; | 94 | std::vector<ImageId> overlapping_images; |
| 95 | ImageMapId map_view_id{}; | ||
| 96 | }; | ||
| 97 | |||
| 98 | struct ImageMapView { | ||
| 99 | explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id); | ||
| 100 | |||
| 101 | [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { | ||
| 102 | const VAddr overlap_end = overlap_cpu_addr + overlap_size; | ||
| 103 | const VAddr cpu_addr_end = cpu_addr + size; | ||
| 104 | return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; | ||
| 105 | } | ||
| 106 | |||
| 107 | [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { | ||
| 108 | const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size; | ||
| 109 | const GPUVAddr gpu_addr_end = gpu_addr + size; | ||
| 110 | return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; | ||
| 111 | } | ||
| 112 | |||
| 113 | GPUVAddr gpu_addr; | ||
| 114 | VAddr cpu_addr; | ||
| 115 | size_t size; | ||
| 116 | ImageId image_id; | ||
| 117 | bool picked{}; | ||
| 87 | }; | 118 | }; |
| 88 | 119 | ||
| 89 | struct ImageAllocBase { | 120 | struct ImageAllocBase { |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d8dbd3824..e3542301e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <span> | 13 | #include <span> |
| 14 | #include <type_traits> | 14 | #include <type_traits> |
| 15 | #include <unordered_map> | 15 | #include <unordered_map> |
| 16 | #include <unordered_set> | ||
| 16 | #include <utility> | 17 | #include <utility> |
| 17 | #include <vector> | 18 | #include <vector> |
| 18 | 19 | ||
| @@ -152,6 +153,9 @@ public: | |||
| 152 | /// Remove images in a region | 153 | /// Remove images in a region |
| 153 | void UnmapMemory(VAddr cpu_addr, size_t size); | 154 | void UnmapMemory(VAddr cpu_addr, size_t size); |
| 154 | 155 | ||
| 156 | /// Remove images in a region | ||
| 157 | void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); | ||
| 158 | |||
| 155 | /// Blit an image with the given parameters | 159 | /// Blit an image with the given parameters |
| 156 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | 160 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 157 | const Tegra::Engines::Fermi2D::Surface& src, | 161 | const Tegra::Engines::Fermi2D::Surface& src, |
| @@ -190,7 +194,22 @@ public: | |||
| 190 | private: | 194 | private: |
| 191 | /// Iterate over all page indices in a range | 195 | /// Iterate over all page indices in a range |
| 192 | template <typename Func> | 196 | template <typename Func> |
| 193 | static void ForEachPage(VAddr addr, size_t size, Func&& func) { | 197 | static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { |
| 198 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; | ||
| 199 | const u64 page_end = (addr + size - 1) >> PAGE_BITS; | ||
| 200 | for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { | ||
| 201 | if constexpr (RETURNS_BOOL) { | ||
| 202 | if (func(page)) { | ||
| 203 | break; | ||
| 204 | } | ||
| 205 | } else { | ||
| 206 | func(page); | ||
| 207 | } | ||
| 208 | } | ||
| 209 | } | ||
| 210 | |||
| 211 | template <typename Func> | ||
| 212 | static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { | ||
| 194 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; | 213 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; |
| 195 | const u64 page_end = (addr + size - 1) >> PAGE_BITS; | 214 | const u64 page_end = (addr + size - 1) >> PAGE_BITS; |
| 196 | for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { | 215 | for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { |
| @@ -220,7 +239,7 @@ private: | |||
| 220 | FramebufferId GetFramebufferId(const RenderTargets& key); | 239 | FramebufferId GetFramebufferId(const RenderTargets& key); |
| 221 | 240 | ||
| 222 | /// Refresh the contents (pixel data) of an image | 241 | /// Refresh the contents (pixel data) of an image |
| 223 | void RefreshContents(Image& image); | 242 | void RefreshContents(Image& image, ImageId image_id); |
| 224 | 243 | ||
| 225 | /// Upload data from guest to an image | 244 | /// Upload data from guest to an image |
| 226 | template <typename StagingBuffer> | 245 | template <typename StagingBuffer> |
| @@ -269,6 +288,16 @@ private: | |||
| 269 | template <typename Func> | 288 | template <typename Func> |
| 270 | void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); | 289 | void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); |
| 271 | 290 | ||
| 291 | template <typename Func> | ||
| 292 | void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); | ||
| 293 | |||
| 294 | template <typename Func> | ||
| 295 | void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); | ||
| 296 | |||
| 297 | /// Iterates over all the images in a region calling func | ||
| 298 | template <typename Func> | ||
| 299 | void ForEachSparseSegment(ImageBase& image, Func&& func); | ||
| 300 | |||
| 272 | /// Find or create an image view in the given image with the passed parameters | 301 | /// Find or create an image view in the given image with the passed parameters |
| 273 | [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); | 302 | [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); |
| 274 | 303 | ||
| @@ -279,10 +308,10 @@ private: | |||
| 279 | void UnregisterImage(ImageId image); | 308 | void UnregisterImage(ImageId image); |
| 280 | 309 | ||
| 281 | /// Track CPU reads and writes for image | 310 | /// Track CPU reads and writes for image |
| 282 | void TrackImage(ImageBase& image); | 311 | void TrackImage(ImageBase& image, ImageId image_id); |
| 283 | 312 | ||
| 284 | /// Stop tracking CPU reads and writes for image | 313 | /// Stop tracking CPU reads and writes for image |
| 285 | void UntrackImage(ImageBase& image); | 314 | void UntrackImage(ImageBase& image, ImageId image_id); |
| 286 | 315 | ||
| 287 | /// Delete image from the cache | 316 | /// Delete image from the cache |
| 288 | void DeleteImage(ImageId image); | 317 | void DeleteImage(ImageId image); |
| @@ -340,7 +369,13 @@ private: | |||
| 340 | std::unordered_map<TSCEntry, SamplerId> samplers; | 369 | std::unordered_map<TSCEntry, SamplerId> samplers; |
| 341 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; | 370 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; |
| 342 | 371 | ||
| 343 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table; | 372 | std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; |
| 373 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; | ||
| 374 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; | ||
| 375 | |||
| 376 | std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; | ||
| 377 | |||
| 378 | VAddr virtual_invalid_space{}; | ||
| 344 | 379 | ||
| 345 | bool has_deleted_images = false; | 380 | bool has_deleted_images = false; |
| 346 | u64 total_used_memory = 0; | 381 | u64 total_used_memory = 0; |
| @@ -349,6 +384,7 @@ private: | |||
| 349 | u64 critical_memory; | 384 | u64 critical_memory; |
| 350 | 385 | ||
| 351 | SlotVector<Image> slot_images; | 386 | SlotVector<Image> slot_images; |
| 387 | SlotVector<ImageMapView> slot_map_views; | ||
| 352 | SlotVector<ImageView> slot_image_views; | 388 | SlotVector<ImageView> slot_image_views; |
| 353 | SlotVector<ImageAlloc> slot_image_allocs; | 389 | SlotVector<ImageAlloc> slot_image_allocs; |
| 354 | SlotVector<Sampler> slot_samplers; | 390 | SlotVector<Sampler> slot_samplers; |
| @@ -459,7 +495,7 @@ void TextureCache<P>::RunGarbageCollector() { | |||
| 459 | } | 495 | } |
| 460 | } | 496 | } |
| 461 | if (True(image->flags & ImageFlagBits::Tracked)) { | 497 | if (True(image->flags & ImageFlagBits::Tracked)) { |
| 462 | UntrackImage(*image); | 498 | UntrackImage(*image, image_id); |
| 463 | } | 499 | } |
| 464 | UnregisterImage(image_id); | 500 | UnregisterImage(image_id); |
| 465 | DeleteImage(image_id); | 501 | DeleteImage(image_id); |
| @@ -658,7 +694,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | |||
| 658 | return; | 694 | return; |
| 659 | } | 695 | } |
| 660 | image.flags |= ImageFlagBits::CpuModified; | 696 | image.flags |= ImageFlagBits::CpuModified; |
| 661 | UntrackImage(image); | 697 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 698 | UntrackImage(image, image_id); | ||
| 699 | } | ||
| 662 | }); | 700 | }); |
| 663 | } | 701 | } |
| 664 | 702 | ||
| @@ -695,7 +733,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | |||
| 695 | for (const ImageId id : deleted_images) { | 733 | for (const ImageId id : deleted_images) { |
| 696 | Image& image = slot_images[id]; | 734 | Image& image = slot_images[id]; |
| 697 | if (True(image.flags & ImageFlagBits::Tracked)) { | 735 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 698 | UntrackImage(image); | 736 | UntrackImage(image, id); |
| 699 | } | 737 | } |
| 700 | UnregisterImage(id); | 738 | UnregisterImage(id); |
| 701 | DeleteImage(id); | 739 | DeleteImage(id); |
| @@ -703,6 +741,23 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | |||
| 703 | } | 741 | } |
| 704 | 742 | ||
| 705 | template <class P> | 743 | template <class P> |
| 744 | void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { | ||
| 745 | std::vector<ImageId> deleted_images; | ||
| 746 | ForEachImageInRegionGPU(gpu_addr, size, | ||
| 747 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); | ||
| 748 | for (const ImageId id : deleted_images) { | ||
| 749 | Image& image = slot_images[id]; | ||
| 750 | if (True(image.flags & ImageFlagBits::Remapped)) { | ||
| 751 | continue; | ||
| 752 | } | ||
| 753 | image.flags |= ImageFlagBits::Remapped; | ||
| 754 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 755 | UntrackImage(image, id); | ||
| 756 | } | ||
| 757 | } | ||
| 758 | } | ||
| 759 | |||
| 760 | template <class P> | ||
| 706 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | 761 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 707 | const Tegra::Engines::Fermi2D::Surface& src, | 762 | const Tegra::Engines::Fermi2D::Surface& src, |
| 708 | const Tegra::Engines::Fermi2D::Config& copy, | 763 | const Tegra::Engines::Fermi2D::Config& copy, |
| @@ -833,9 +888,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad | |||
| 833 | if (it == page_table.end()) { | 888 | if (it == page_table.end()) { |
| 834 | return nullptr; | 889 | return nullptr; |
| 835 | } | 890 | } |
| 836 | const auto& image_ids = it->second; | 891 | const auto& image_map_ids = it->second; |
| 837 | for (const ImageId image_id : image_ids) { | 892 | for (const ImageMapId map_id : image_map_ids) { |
| 838 | const ImageBase& image = slot_images[image_id]; | 893 | const ImageMapView& map = slot_map_views[map_id]; |
| 894 | const ImageBase& image = slot_images[map.image_id]; | ||
| 839 | if (image.cpu_addr != cpu_addr) { | 895 | if (image.cpu_addr != cpu_addr) { |
| 840 | continue; | 896 | continue; |
| 841 | } | 897 | } |
| @@ -915,13 +971,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | |||
| 915 | } | 971 | } |
| 916 | 972 | ||
| 917 | template <class P> | 973 | template <class P> |
| 918 | void TextureCache<P>::RefreshContents(Image& image) { | 974 | void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { |
| 919 | if (False(image.flags & ImageFlagBits::CpuModified)) { | 975 | if (False(image.flags & ImageFlagBits::CpuModified)) { |
| 920 | // Only upload modified images | 976 | // Only upload modified images |
| 921 | return; | 977 | return; |
| 922 | } | 978 | } |
| 923 | image.flags &= ~ImageFlagBits::CpuModified; | 979 | image.flags &= ~ImageFlagBits::CpuModified; |
| 924 | TrackImage(image); | 980 | TrackImage(image, image_id); |
| 925 | 981 | ||
| 926 | if (image.info.num_samples > 1) { | 982 | if (image.info.num_samples > 1) { |
| 927 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); | 983 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); |
| @@ -958,7 +1014,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) | |||
| 958 | 1014 | ||
| 959 | template <class P> | 1015 | template <class P> |
| 960 | ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { | 1016 | ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { |
| 961 | if (!IsValidAddress(gpu_memory, config)) { | 1017 | if (!IsValidEntry(gpu_memory, config)) { |
| 962 | return NULL_IMAGE_VIEW_ID; | 1018 | return NULL_IMAGE_VIEW_ID; |
| 963 | } | 1019 | } |
| 964 | const auto [pair, is_new] = image_views.try_emplace(config); | 1020 | const auto [pair, is_new] = image_views.try_emplace(config); |
| @@ -1000,14 +1056,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a | |||
| 1000 | template <class P> | 1056 | template <class P> |
| 1001 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | 1057 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1002 | RelaxedOptions options) { | 1058 | RelaxedOptions options) { |
| 1003 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 1059 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| 1004 | if (!cpu_addr) { | 1060 | if (!cpu_addr) { |
| 1005 | return ImageId{}; | 1061 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); |
| 1062 | if (!cpu_addr) { | ||
| 1063 | return ImageId{}; | ||
| 1064 | } | ||
| 1006 | } | 1065 | } |
| 1007 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | 1066 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); |
| 1008 | const bool native_bgr = runtime.HasNativeBgr(); | 1067 | const bool native_bgr = runtime.HasNativeBgr(); |
| 1009 | ImageId image_id; | 1068 | ImageId image_id; |
| 1010 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | 1069 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1070 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | ||
| 1071 | return false; | ||
| 1072 | } | ||
| 1011 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { | 1073 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { |
| 1012 | const bool strict_size = False(options & RelaxedOptions::Size) && | 1074 | const bool strict_size = False(options & RelaxedOptions::Size) && |
| 1013 | True(existing_image.flags & ImageFlagBits::Strong); | 1075 | True(existing_image.flags & ImageFlagBits::Strong); |
| @@ -1033,7 +1095,16 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||
| 1033 | template <class P> | 1095 | template <class P> |
| 1034 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | 1096 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 1035 | RelaxedOptions options) { | 1097 | RelaxedOptions options) { |
| 1036 | const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | 1098 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
| 1099 | if (!cpu_addr) { | ||
| 1100 | const auto size = CalculateGuestSizeInBytes(info); | ||
| 1101 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); | ||
| 1102 | if (!cpu_addr) { | ||
| 1103 | const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; | ||
| 1104 | virtual_invalid_space += Common::AlignUp(size, 32); | ||
| 1105 | cpu_addr = std::optional<VAddr>(fake_addr); | ||
| 1106 | } | ||
| 1107 | } | ||
| 1037 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | 1108 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); |
| 1038 | const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); | 1109 | const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); |
| 1039 | const Image& image = slot_images[image_id]; | 1110 | const Image& image = slot_images[image_id]; |
| @@ -1053,10 +1124,16 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1053 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | 1124 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); |
| 1054 | const bool native_bgr = runtime.HasNativeBgr(); | 1125 | const bool native_bgr = runtime.HasNativeBgr(); |
| 1055 | std::vector<ImageId> overlap_ids; | 1126 | std::vector<ImageId> overlap_ids; |
| 1127 | std::unordered_set<ImageId> overlaps_found; | ||
| 1056 | std::vector<ImageId> left_aliased_ids; | 1128 | std::vector<ImageId> left_aliased_ids; |
| 1057 | std::vector<ImageId> right_aliased_ids; | 1129 | std::vector<ImageId> right_aliased_ids; |
| 1130 | std::unordered_set<ImageId> ignore_textures; | ||
| 1058 | std::vector<ImageId> bad_overlap_ids; | 1131 | std::vector<ImageId> bad_overlap_ids; |
| 1059 | ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { | 1132 | const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { |
| 1133 | if (True(overlap.flags & ImageFlagBits::Remapped)) { | ||
| 1134 | ignore_textures.insert(overlap_id); | ||
| 1135 | return; | ||
| 1136 | } | ||
| 1060 | if (info.type == ImageType::Linear) { | 1137 | if (info.type == ImageType::Linear) { |
| 1061 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { | 1138 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { |
| 1062 | // Alias linear images with the same pitch | 1139 | // Alias linear images with the same pitch |
| @@ -1064,6 +1141,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1064 | } | 1141 | } |
| 1065 | return; | 1142 | return; |
| 1066 | } | 1143 | } |
| 1144 | overlaps_found.insert(overlap_id); | ||
| 1067 | static constexpr bool strict_size = true; | 1145 | static constexpr bool strict_size = true; |
| 1068 | const std::optional<OverlapResult> solution = ResolveOverlap( | 1146 | const std::optional<OverlapResult> solution = ResolveOverlap( |
| 1069 | new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); | 1147 | new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); |
| @@ -1087,12 +1165,40 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1087 | bad_overlap_ids.push_back(overlap_id); | 1165 | bad_overlap_ids.push_back(overlap_id); |
| 1088 | overlap.flags |= ImageFlagBits::BadOverlap; | 1166 | overlap.flags |= ImageFlagBits::BadOverlap; |
| 1089 | } | 1167 | } |
| 1090 | }); | 1168 | }; |
| 1169 | ForEachImageInRegion(cpu_addr, size_bytes, region_check); | ||
| 1170 | const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 1171 | if (!overlaps_found.contains(overlap_id)) { | ||
| 1172 | if (True(overlap.flags & ImageFlagBits::Remapped)) { | ||
| 1173 | ignore_textures.insert(overlap_id); | ||
| 1174 | } | ||
| 1175 | if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { | ||
| 1176 | ignore_textures.insert(overlap_id); | ||
| 1177 | } | ||
| 1178 | } | ||
| 1179 | }; | ||
| 1180 | ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); | ||
| 1091 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | 1181 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); |
| 1092 | Image& new_image = slot_images[new_image_id]; | 1182 | Image& new_image = slot_images[new_image_id]; |
| 1093 | 1183 | ||
| 1184 | if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { | ||
| 1185 | new_image.flags |= ImageFlagBits::Sparse; | ||
| 1186 | } | ||
| 1187 | |||
| 1188 | for (const ImageId overlap_id : ignore_textures) { | ||
| 1189 | Image& overlap = slot_images[overlap_id]; | ||
| 1190 | if (True(overlap.flags & ImageFlagBits::GpuModified)) { | ||
| 1191 | UNIMPLEMENTED(); | ||
| 1192 | } | ||
| 1193 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | ||
| 1194 | UntrackImage(overlap, overlap_id); | ||
| 1195 | } | ||
| 1196 | UnregisterImage(overlap_id); | ||
| 1197 | DeleteImage(overlap_id); | ||
| 1198 | } | ||
| 1199 | |||
| 1094 | // TODO: Only upload what we need | 1200 | // TODO: Only upload what we need |
| 1095 | RefreshContents(new_image); | 1201 | RefreshContents(new_image, new_image_id); |
| 1096 | 1202 | ||
| 1097 | for (const ImageId overlap_id : overlap_ids) { | 1203 | for (const ImageId overlap_id : overlap_ids) { |
| 1098 | Image& overlap = slot_images[overlap_id]; | 1204 | Image& overlap = slot_images[overlap_id]; |
| @@ -1104,7 +1210,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | |||
| 1104 | runtime.CopyImage(new_image, overlap, copies); | 1210 | runtime.CopyImage(new_image, overlap, copies); |
| 1105 | } | 1211 | } |
| 1106 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | 1212 | if (True(overlap.flags & ImageFlagBits::Tracked)) { |
| 1107 | UntrackImage(overlap); | 1213 | UntrackImage(overlap, overlap_id); |
| 1108 | } | 1214 | } |
| 1109 | UnregisterImage(overlap_id); | 1215 | UnregisterImage(overlap_id); |
| 1110 | DeleteImage(overlap_id); | 1216 | DeleteImage(overlap_id); |
| @@ -1239,7 +1345,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f | |||
| 1239 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | 1345 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; |
| 1240 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | 1346 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; |
| 1241 | boost::container::small_vector<ImageId, 32> images; | 1347 | boost::container::small_vector<ImageId, 32> images; |
| 1242 | ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { | 1348 | boost::container::small_vector<ImageMapId, 32> maps; |
| 1349 | ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { | ||
| 1243 | const auto it = page_table.find(page); | 1350 | const auto it = page_table.find(page); |
| 1244 | if (it == page_table.end()) { | 1351 | if (it == page_table.end()) { |
| 1245 | if constexpr (BOOL_BREAK) { | 1352 | if constexpr (BOOL_BREAK) { |
| @@ -1248,12 +1355,105 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f | |||
| 1248 | return; | 1355 | return; |
| 1249 | } | 1356 | } |
| 1250 | } | 1357 | } |
| 1358 | for (const ImageMapId map_id : it->second) { | ||
| 1359 | ImageMapView& map = slot_map_views[map_id]; | ||
| 1360 | if (map.picked) { | ||
| 1361 | continue; | ||
| 1362 | } | ||
| 1363 | if (!map.Overlaps(cpu_addr, size)) { | ||
| 1364 | continue; | ||
| 1365 | } | ||
| 1366 | map.picked = true; | ||
| 1367 | maps.push_back(map_id); | ||
| 1368 | Image& image = slot_images[map.image_id]; | ||
| 1369 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 1370 | continue; | ||
| 1371 | } | ||
| 1372 | image.flags |= ImageFlagBits::Picked; | ||
| 1373 | images.push_back(map.image_id); | ||
| 1374 | if constexpr (BOOL_BREAK) { | ||
| 1375 | if (func(map.image_id, image)) { | ||
| 1376 | return true; | ||
| 1377 | } | ||
| 1378 | } else { | ||
| 1379 | func(map.image_id, image); | ||
| 1380 | } | ||
| 1381 | } | ||
| 1382 | if constexpr (BOOL_BREAK) { | ||
| 1383 | return false; | ||
| 1384 | } | ||
| 1385 | }); | ||
| 1386 | for (const ImageId image_id : images) { | ||
| 1387 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 1388 | } | ||
| 1389 | for (const ImageMapId map_id : maps) { | ||
| 1390 | slot_map_views[map_id].picked = false; | ||
| 1391 | } | ||
| 1392 | } | ||
| 1393 | |||
| 1394 | template <class P> | ||
| 1395 | template <typename Func> | ||
| 1396 | void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { | ||
| 1397 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 1398 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 1399 | boost::container::small_vector<ImageId, 8> images; | ||
| 1400 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | ||
| 1401 | const auto it = gpu_page_table.find(page); | ||
| 1402 | if (it == gpu_page_table.end()) { | ||
| 1403 | if constexpr (BOOL_BREAK) { | ||
| 1404 | return false; | ||
| 1405 | } else { | ||
| 1406 | return; | ||
| 1407 | } | ||
| 1408 | } | ||
| 1409 | for (const ImageId image_id : it->second) { | ||
| 1410 | Image& image = slot_images[image_id]; | ||
| 1411 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 1412 | continue; | ||
| 1413 | } | ||
| 1414 | if (!image.OverlapsGPU(gpu_addr, size)) { | ||
| 1415 | continue; | ||
| 1416 | } | ||
| 1417 | image.flags |= ImageFlagBits::Picked; | ||
| 1418 | images.push_back(image_id); | ||
| 1419 | if constexpr (BOOL_BREAK) { | ||
| 1420 | if (func(image_id, image)) { | ||
| 1421 | return true; | ||
| 1422 | } | ||
| 1423 | } else { | ||
| 1424 | func(image_id, image); | ||
| 1425 | } | ||
| 1426 | } | ||
| 1427 | if constexpr (BOOL_BREAK) { | ||
| 1428 | return false; | ||
| 1429 | } | ||
| 1430 | }); | ||
| 1431 | for (const ImageId image_id : images) { | ||
| 1432 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 1433 | } | ||
| 1434 | } | ||
| 1435 | |||
| 1436 | template <class P> | ||
| 1437 | template <typename Func> | ||
| 1438 | void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { | ||
| 1439 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 1440 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 1441 | boost::container::small_vector<ImageId, 8> images; | ||
| 1442 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | ||
| 1443 | const auto it = sparse_page_table.find(page); | ||
| 1444 | if (it == sparse_page_table.end()) { | ||
| 1445 | if constexpr (BOOL_BREAK) { | ||
| 1446 | return false; | ||
| 1447 | } else { | ||
| 1448 | return; | ||
| 1449 | } | ||
| 1450 | } | ||
| 1251 | for (const ImageId image_id : it->second) { | 1451 | for (const ImageId image_id : it->second) { |
| 1252 | Image& image = slot_images[image_id]; | 1452 | Image& image = slot_images[image_id]; |
| 1253 | if (True(image.flags & ImageFlagBits::Picked)) { | 1453 | if (True(image.flags & ImageFlagBits::Picked)) { |
| 1254 | continue; | 1454 | continue; |
| 1255 | } | 1455 | } |
| 1256 | if (!image.Overlaps(cpu_addr, size)) { | 1456 | if (!image.OverlapsGPU(gpu_addr, size)) { |
| 1257 | continue; | 1457 | continue; |
| 1258 | } | 1458 | } |
| 1259 | image.flags |= ImageFlagBits::Picked; | 1459 | image.flags |= ImageFlagBits::Picked; |
| @@ -1276,6 +1476,27 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f | |||
| 1276 | } | 1476 | } |
| 1277 | 1477 | ||
| 1278 | template <class P> | 1478 | template <class P> |
| 1479 | template <typename Func> | ||
| 1480 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | ||
| 1481 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; | ||
| 1482 | static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; | ||
| 1483 | const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); | ||
| 1484 | for (auto& segment : segments) { | ||
| 1485 | const auto gpu_addr = segment.first; | ||
| 1486 | const auto size = segment.second; | ||
| 1487 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 1488 | ASSERT(cpu_addr); | ||
| 1489 | if constexpr (RETURNS_BOOL) { | ||
| 1490 | if (func(gpu_addr, *cpu_addr, size)) { | ||
| 1491 | break; | ||
| 1492 | } | ||
| 1493 | } else { | ||
| 1494 | func(gpu_addr, *cpu_addr, size); | ||
| 1495 | } | ||
| 1496 | } | ||
| 1497 | } | ||
| 1498 | |||
| 1499 | template <class P> | ||
| 1279 | ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { | 1500 | ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { |
| 1280 | Image& image = slot_images[image_id]; | 1501 | Image& image = slot_images[image_id]; |
| 1281 | if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { | 1502 | if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { |
| @@ -1292,8 +1513,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1292 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), | 1513 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), |
| 1293 | "Trying to register an already registered image"); | 1514 | "Trying to register an already registered image"); |
| 1294 | image.flags |= ImageFlagBits::Registered; | 1515 | image.flags |= ImageFlagBits::Registered; |
| 1295 | ForEachPage(image.cpu_addr, image.guest_size_bytes, | ||
| 1296 | [this, image_id](u64 page) { page_table[page].push_back(image_id); }); | ||
| 1297 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | 1516 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); |
| 1298 | if ((IsPixelFormatASTC(image.info.format) && | 1517 | if ((IsPixelFormatASTC(image.info.format) && |
| 1299 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | 1518 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || |
| @@ -1301,6 +1520,27 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1301 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | 1520 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |
| 1302 | } | 1521 | } |
| 1303 | total_used_memory += Common::AlignUp(tentative_size, 1024); | 1522 | total_used_memory += Common::AlignUp(tentative_size, 1024); |
| 1523 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1524 | [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); | ||
| 1525 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1526 | auto map_id = | ||
| 1527 | slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); | ||
| 1528 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, | ||
| 1529 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||
| 1530 | image.map_view_id = map_id; | ||
| 1531 | return; | ||
| 1532 | } | ||
| 1533 | std::vector<ImageViewId> sparse_maps{}; | ||
| 1534 | ForEachSparseSegment( | ||
| 1535 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||
| 1536 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | ||
| 1537 | ForEachCPUPage(cpu_addr, size, | ||
| 1538 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||
| 1539 | sparse_maps.push_back(map_id); | ||
| 1540 | }); | ||
| 1541 | sparse_views.emplace(image_id, std::move(sparse_maps)); | ||
| 1542 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1543 | [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); | ||
| 1304 | } | 1544 | } |
| 1305 | 1545 | ||
| 1306 | template <class P> | 1546 | template <class P> |
| @@ -1317,34 +1557,125 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | |||
| 1317 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | 1557 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); |
| 1318 | } | 1558 | } |
| 1319 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | 1559 | total_used_memory -= Common::AlignUp(tentative_size, 1024); |
| 1320 | ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { | 1560 | const auto& clear_page_table = |
| 1321 | const auto page_it = page_table.find(page); | 1561 | [this, image_id]( |
| 1322 | if (page_it == page_table.end()) { | 1562 | u64 page, |
| 1323 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | 1563 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { |
| 1324 | return; | 1564 | const auto page_it = selected_page_table.find(page); |
| 1325 | } | 1565 | if (page_it == selected_page_table.end()) { |
| 1326 | std::vector<ImageId>& image_ids = page_it->second; | 1566 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); |
| 1327 | const auto vector_it = std::ranges::find(image_ids, image_id); | 1567 | return; |
| 1328 | if (vector_it == image_ids.end()) { | 1568 | } |
| 1329 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); | 1569 | std::vector<ImageId>& image_ids = page_it->second; |
| 1330 | return; | 1570 | const auto vector_it = std::ranges::find(image_ids, image_id); |
| 1331 | } | 1571 | if (vector_it == image_ids.end()) { |
| 1332 | image_ids.erase(vector_it); | 1572 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", |
| 1573 | page << PAGE_BITS); | ||
| 1574 | return; | ||
| 1575 | } | ||
| 1576 | image_ids.erase(vector_it); | ||
| 1577 | }; | ||
| 1578 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1579 | [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); | ||
| 1580 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1581 | const auto map_id = image.map_view_id; | ||
| 1582 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { | ||
| 1583 | const auto page_it = page_table.find(page); | ||
| 1584 | if (page_it == page_table.end()) { | ||
| 1585 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1586 | return; | ||
| 1587 | } | ||
| 1588 | std::vector<ImageMapId>& image_map_ids = page_it->second; | ||
| 1589 | const auto vector_it = std::ranges::find(image_map_ids, map_id); | ||
| 1590 | if (vector_it == image_map_ids.end()) { | ||
| 1591 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", | ||
| 1592 | page << PAGE_BITS); | ||
| 1593 | return; | ||
| 1594 | } | ||
| 1595 | image_map_ids.erase(vector_it); | ||
| 1596 | }); | ||
| 1597 | slot_map_views.erase(map_id); | ||
| 1598 | return; | ||
| 1599 | } | ||
| 1600 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { | ||
| 1601 | clear_page_table(page, sparse_page_table); | ||
| 1333 | }); | 1602 | }); |
| 1603 | auto it = sparse_views.find(image_id); | ||
| 1604 | ASSERT(it != sparse_views.end()); | ||
| 1605 | auto& sparse_maps = it->second; | ||
| 1606 | for (auto& map_view_id : sparse_maps) { | ||
| 1607 | const auto& map_range = slot_map_views[map_view_id]; | ||
| 1608 | const VAddr cpu_addr = map_range.cpu_addr; | ||
| 1609 | const std::size_t size = map_range.size; | ||
| 1610 | ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { | ||
| 1611 | const auto page_it = page_table.find(page); | ||
| 1612 | if (page_it == page_table.end()) { | ||
| 1613 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1614 | return; | ||
| 1615 | } | ||
| 1616 | std::vector<ImageMapId>& image_map_ids = page_it->second; | ||
| 1617 | auto vector_it = image_map_ids.begin(); | ||
| 1618 | while (vector_it != image_map_ids.end()) { | ||
| 1619 | ImageMapView& map = slot_map_views[*vector_it]; | ||
| 1620 | if (map.image_id != image_id) { | ||
| 1621 | vector_it++; | ||
| 1622 | continue; | ||
| 1623 | } | ||
| 1624 | if (!map.picked) { | ||
| 1625 | map.picked = true; | ||
| 1626 | } | ||
| 1627 | vector_it = image_map_ids.erase(vector_it); | ||
| 1628 | } | ||
| 1629 | }); | ||
| 1630 | slot_map_views.erase(map_view_id); | ||
| 1631 | } | ||
| 1632 | sparse_views.erase(it); | ||
| 1334 | } | 1633 | } |
| 1335 | 1634 | ||
| 1336 | template <class P> | 1635 | template <class P> |
| 1337 | void TextureCache<P>::TrackImage(ImageBase& image) { | 1636 | void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { |
| 1338 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); | 1637 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); |
| 1339 | image.flags |= ImageFlagBits::Tracked; | 1638 | image.flags |= ImageFlagBits::Tracked; |
| 1340 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | 1639 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| 1640 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | ||
| 1641 | return; | ||
| 1642 | } | ||
| 1643 | if (True(image.flags & ImageFlagBits::Registered)) { | ||
| 1644 | auto it = sparse_views.find(image_id); | ||
| 1645 | ASSERT(it != sparse_views.end()); | ||
| 1646 | auto& sparse_maps = it->second; | ||
| 1647 | for (auto& map_view_id : sparse_maps) { | ||
| 1648 | const auto& map = slot_map_views[map_view_id]; | ||
| 1649 | const VAddr cpu_addr = map.cpu_addr; | ||
| 1650 | const std::size_t size = map.size; | ||
| 1651 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 1652 | } | ||
| 1653 | return; | ||
| 1654 | } | ||
| 1655 | ForEachSparseSegment(image, | ||
| 1656 | [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||
| 1657 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 1658 | }); | ||
| 1341 | } | 1659 | } |
| 1342 | 1660 | ||
| 1343 | template <class P> | 1661 | template <class P> |
| 1344 | void TextureCache<P>::UntrackImage(ImageBase& image) { | 1662 | void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { |
| 1345 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); | 1663 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); |
| 1346 | image.flags &= ~ImageFlagBits::Tracked; | 1664 | image.flags &= ~ImageFlagBits::Tracked; |
| 1347 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | 1665 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| 1666 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||
| 1667 | return; | ||
| 1668 | } | ||
| 1669 | ASSERT(True(image.flags & ImageFlagBits::Registered)); | ||
| 1670 | auto it = sparse_views.find(image_id); | ||
| 1671 | ASSERT(it != sparse_views.end()); | ||
| 1672 | auto& sparse_maps = it->second; | ||
| 1673 | for (auto& map_view_id : sparse_maps) { | ||
| 1674 | const auto& map = slot_map_views[map_view_id]; | ||
| 1675 | const VAddr cpu_addr = map.cpu_addr; | ||
| 1676 | const std::size_t size = map.size; | ||
| 1677 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||
| 1678 | } | ||
| 1348 | } | 1679 | } |
| 1349 | 1680 | ||
| 1350 | template <class P> | 1681 | template <class P> |
| @@ -1486,10 +1817,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool | |||
| 1486 | if (invalidate) { | 1817 | if (invalidate) { |
| 1487 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); | 1818 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); |
| 1488 | if (False(image.flags & ImageFlagBits::Tracked)) { | 1819 | if (False(image.flags & ImageFlagBits::Tracked)) { |
| 1489 | TrackImage(image); | 1820 | TrackImage(image, image_id); |
| 1490 | } | 1821 | } |
| 1491 | } else { | 1822 | } else { |
| 1492 | RefreshContents(image); | 1823 | RefreshContents(image, image_id); |
| 1493 | SynchronizeAliases(image_id); | 1824 | SynchronizeAliases(image_id); |
| 1494 | } | 1825 | } |
| 1495 | if (is_modification) { | 1826 | if (is_modification) { |
diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index c9571f7e4..9fbdc1ac6 100644 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h | |||
| @@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14; | |||
| 16 | constexpr SlotId CORRUPT_ID{0xfffffffe}; | 16 | constexpr SlotId CORRUPT_ID{0xfffffffe}; |
| 17 | 17 | ||
| 18 | using ImageId = SlotId; | 18 | using ImageId = SlotId; |
| 19 | using ImageMapId = SlotId; | ||
| 19 | using ImageViewId = SlotId; | 20 | using ImageViewId = SlotId; |
| 20 | using ImageAllocId = SlotId; | 21 | using ImageAllocId = SlotId; |
| 21 | using SamplerId = SlotId; | 22 | using SamplerId = SlotId; |
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 20794fa32..c872517b8 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept { | |||
| 664 | return offsets; | 664 | return offsets; |
| 665 | } | 665 | } |
| 666 | 666 | ||
| 667 | LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { | ||
| 668 | const u32 num_levels = info.resources.levels; | ||
| 669 | const LevelInfo level_info = MakeLevelInfo(info); | ||
| 670 | LevelArray sizes{}; | ||
| 671 | for (u32 level = 0; level < num_levels; ++level) { | ||
| 672 | sizes[level] = CalculateLevelSize(level_info, level); | ||
| 673 | } | ||
| 674 | return sizes; | ||
| 675 | } | ||
| 676 | |||
| 667 | std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | 677 | std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { |
| 668 | ASSERT(info.type == ImageType::e3D); | 678 | ASSERT(info.type == ImageType::e3D); |
| 669 | std::vector<u32> offsets; | 679 | std::vector<u32> offsets; |
| @@ -776,14 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | |||
| 776 | return copies; | 786 | return copies; |
| 777 | } | 787 | } |
| 778 | 788 | ||
| 779 | bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { | 789 | bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { |
| 780 | if (config.Address() == 0) { | 790 | const GPUVAddr address = config.Address(); |
| 791 | if (address == 0) { | ||
| 781 | return false; | 792 | return false; |
| 782 | } | 793 | } |
| 783 | if (config.Address() > (u64(1) << 48)) { | 794 | if (address > (1ULL << 48)) { |
| 784 | return false; | 795 | return false; |
| 785 | } | 796 | } |
| 786 | return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); | 797 | if (gpu_memory.GpuToCpuAddress(address).has_value()) { |
| 798 | return true; | ||
| 799 | } | ||
| 800 | const ImageInfo info{config}; | ||
| 801 | const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); | ||
| 802 | return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); | ||
| 787 | } | 803 | } |
| 788 | 804 | ||
| 789 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 805 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, |
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index cdc5cbc75..766502908 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h | |||
| @@ -40,6 +40,8 @@ struct OverlapResult { | |||
| 40 | 40 | ||
| 41 | [[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept; | 41 | [[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept; |
| 42 | 42 | ||
| 43 | [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; | ||
| 44 | |||
| 43 | [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); | 45 | [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); |
| 44 | 46 | ||
| 45 | [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); | 47 | [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); |
| @@ -55,7 +57,7 @@ struct OverlapResult { | |||
| 55 | const ImageInfo& src, | 57 | const ImageInfo& src, |
| 56 | SubresourceBase base); | 58 | SubresourceBase base); |
| 57 | 59 | ||
| 58 | [[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | 60 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); |
| 59 | 61 | ||
| 60 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | 62 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, |
| 61 | GPUVAddr gpu_addr, const ImageInfo& info, | 63 | GPUVAddr gpu_addr, const ImageInfo& info, |