diff options
Diffstat (limited to '')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache_templates.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp | 10 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 1528 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_templates.h | 1507 |
7 files changed, 1533 insertions, 1532 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1eb67c051..1250cca6f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -97,6 +97,7 @@ add_library(video_core STATIC | |||
| 97 | renderer_opengl/gl_stream_buffer.h | 97 | renderer_opengl/gl_stream_buffer.h |
| 98 | renderer_opengl/gl_texture_cache.cpp | 98 | renderer_opengl/gl_texture_cache.cpp |
| 99 | renderer_opengl/gl_texture_cache.h | 99 | renderer_opengl/gl_texture_cache.h |
| 100 | renderer_opengl/gl_texture_cache_templates.cpp | ||
| 100 | renderer_opengl/gl_query_cache.cpp | 101 | renderer_opengl/gl_query_cache.cpp |
| 101 | renderer_opengl/gl_query_cache.h | 102 | renderer_opengl/gl_query_cache.h |
| 102 | renderer_opengl/maxwell_to_gl.h | 103 | renderer_opengl/maxwell_to_gl.h |
| @@ -155,6 +156,7 @@ add_library(video_core STATIC | |||
| 155 | renderer_vulkan/vk_swapchain.h | 156 | renderer_vulkan/vk_swapchain.h |
| 156 | renderer_vulkan/vk_texture_cache.cpp | 157 | renderer_vulkan/vk_texture_cache.cpp |
| 157 | renderer_vulkan/vk_texture_cache.h | 158 | renderer_vulkan/vk_texture_cache.h |
| 159 | renderer_vulkan/vk_texture_cache_templates.cpp | ||
| 158 | renderer_vulkan/vk_update_descriptor.cpp | 160 | renderer_vulkan/vk_update_descriptor.cpp |
| 159 | renderer_vulkan/vk_update_descriptor.h | 161 | renderer_vulkan/vk_update_descriptor.h |
| 160 | shader_cache.cpp | 162 | shader_cache.cpp |
| @@ -186,6 +188,7 @@ add_library(video_core STATIC | |||
| 186 | texture_cache/samples_helper.h | 188 | texture_cache/samples_helper.h |
| 187 | texture_cache/slot_vector.h | 189 | texture_cache/slot_vector.h |
| 188 | texture_cache/texture_cache.h | 190 | texture_cache/texture_cache.h |
| 191 | texture_cache/texture_cache_templates.h | ||
| 189 | texture_cache/types.h | 192 | texture_cache/types.h |
| 190 | texture_cache/util.cpp | 193 | texture_cache/util.cpp |
| 191 | texture_cache/util.h | 194 | texture_cache/util.h |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index c373c9cb4..26b423f5e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| @@ -18,10 +18,7 @@ | |||
| 18 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 18 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 19 | #include "video_core/renderer_opengl/util_shaders.h" | 19 | #include "video_core/renderer_opengl/util_shaders.h" |
| 20 | #include "video_core/surface.h" | 20 | #include "video_core/surface.h" |
| 21 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 22 | #include "video_core/texture_cache/samples_helper.h" | 21 | #include "video_core/texture_cache/samples_helper.h" |
| 23 | #include "video_core/texture_cache/texture_cache.h" | ||
| 24 | #include "video_core/textures/decoders.h" | ||
| 25 | 22 | ||
| 26 | namespace OpenGL { | 23 | namespace OpenGL { |
| 27 | namespace { | 24 | namespace { |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp b/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp new file mode 100644 index 000000000..00ed06447 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/renderer_opengl/gl_texture_cache.h" | ||
| 6 | #include "video_core/texture_cache/texture_cache_templates.h" | ||
| 7 | |||
| 8 | namespace VideoCommon { | ||
| 9 | template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>; | ||
| 10 | } | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8e029bcb3..b0496556d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp b/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp new file mode 100644 index 000000000..fd8978954 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp | |||
| @@ -0,0 +1,10 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | ||
| 6 | #include "video_core/texture_cache/texture_cache_templates.h" | ||
| 7 | |||
| 8 | namespace VideoCommon { | ||
| 9 | template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>; | ||
| 10 | } | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index f34c9d9ca..a4f6e9422 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | // Copyright 2019 yuzu Emulator Project | 1 | // Copyright 2021 yuzu Emulator Project |
| 2 | // Licensed under GPLv2 or any later version | 2 | // Licensed under GPLv2 or any later version |
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| @@ -164,14 +164,6 @@ public: | |||
| 164 | const Tegra::Engines::Fermi2D::Surface& src, | 164 | const Tegra::Engines::Fermi2D::Surface& src, |
| 165 | const Tegra::Engines::Fermi2D::Config& copy); | 165 | const Tegra::Engines::Fermi2D::Config& copy); |
| 166 | 166 | ||
| 167 | /// Invalidate the contents of the color buffer index | ||
| 168 | /// These contents become unspecified, the cache can assume aggressive optimizations. | ||
| 169 | void InvalidateColorBuffer(size_t index); | ||
| 170 | |||
| 171 | /// Invalidate the contents of the depth buffer | ||
| 172 | /// These contents become unspecified, the cache can assume aggressive optimizations. | ||
| 173 | void InvalidateDepthBuffer(); | ||
| 174 | |||
| 175 | /// Try to find a cached image view in the given CPU address | 167 | /// Try to find a cached image view in the given CPU address |
| 176 | [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); | 168 | [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); |
| 177 | 169 | ||
| @@ -407,1522 +399,4 @@ private: | |||
| 407 | typename SlotVector<Image>::Iterator deletion_iterator; | 399 | typename SlotVector<Image>::Iterator deletion_iterator; |
| 408 | }; | 400 | }; |
| 409 | 401 | ||
| 410 | template <class P> | ||
| 411 | TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, | ||
| 412 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 413 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 414 | Tegra::MemoryManager& gpu_memory_) | ||
| 415 | : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 416 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { | ||
| 417 | // Configure null sampler | ||
| 418 | TSCEntry sampler_descriptor{}; | ||
| 419 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||
| 420 | sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||
| 421 | sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); | ||
| 422 | sampler_descriptor.cubemap_anisotropy.Assign(1); | ||
| 423 | |||
| 424 | // Make sure the first index is reserved for the null resources | ||
| 425 | // This way the null resource becomes a compile time constant | ||
| 426 | void(slot_image_views.insert(runtime, NullImageParams{})); | ||
| 427 | void(slot_samplers.insert(runtime, sampler_descriptor)); | ||
| 428 | |||
| 429 | deletion_iterator = slot_images.begin(); | ||
| 430 | |||
| 431 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | ||
| 432 | const auto device_memory = runtime.GetDeviceLocalMemory(); | ||
| 433 | const u64 possible_expected_memory = (device_memory * 3) / 10; | ||
| 434 | const u64 possible_critical_memory = (device_memory * 6) / 10; | ||
| 435 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); | ||
| 436 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); | ||
| 437 | minimum_memory = 0; | ||
| 438 | } else { | ||
| 439 | // on OGL we can be more conservatives as the driver takes care. | ||
| 440 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; | ||
| 441 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; | ||
| 442 | minimum_memory = expected_memory; | ||
| 443 | } | ||
| 444 | } | ||
| 445 | |||
| 446 | template <class P> | ||
| 447 | void TextureCache<P>::RunGarbageCollector() { | ||
| 448 | const bool high_priority_mode = total_used_memory >= expected_memory; | ||
| 449 | const bool aggressive_mode = total_used_memory >= critical_memory; | ||
| 450 | const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; | ||
| 451 | int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); | ||
| 452 | for (; num_iterations > 0; --num_iterations) { | ||
| 453 | if (deletion_iterator == slot_images.end()) { | ||
| 454 | deletion_iterator = slot_images.begin(); | ||
| 455 | if (deletion_iterator == slot_images.end()) { | ||
| 456 | break; | ||
| 457 | } | ||
| 458 | } | ||
| 459 | auto [image_id, image_tmp] = *deletion_iterator; | ||
| 460 | Image* image = image_tmp; // fix clang error. | ||
| 461 | const bool is_alias = True(image->flags & ImageFlagBits::Alias); | ||
| 462 | const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); | ||
| 463 | const bool must_download = image->IsSafeDownload(); | ||
| 464 | bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); | ||
| 465 | const u64 ticks_needed = | ||
| 466 | is_bad_overlap | ||
| 467 | ? ticks_to_destroy >> 4 | ||
| 468 | : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); | ||
| 469 | should_care |= aggressive_mode; | ||
| 470 | if (should_care && image->frame_tick + ticks_needed < frame_tick) { | ||
| 471 | if (is_bad_overlap) { | ||
| 472 | const bool overlap_check = std::ranges::all_of( | ||
| 473 | image->overlapping_images, [&, image](const ImageId& overlap_id) { | ||
| 474 | auto& overlap = slot_images[overlap_id]; | ||
| 475 | return overlap.frame_tick >= image->frame_tick; | ||
| 476 | }); | ||
| 477 | if (!overlap_check) { | ||
| 478 | ++deletion_iterator; | ||
| 479 | continue; | ||
| 480 | } | ||
| 481 | } | ||
| 482 | if (!is_bad_overlap && must_download) { | ||
| 483 | const bool alias_check = std::ranges::none_of( | ||
| 484 | image->aliased_images, [&, image](const AliasedImage& alias) { | ||
| 485 | auto& alias_image = slot_images[alias.id]; | ||
| 486 | return (alias_image.frame_tick < image->frame_tick) || | ||
| 487 | (alias_image.modification_tick < image->modification_tick); | ||
| 488 | }); | ||
| 489 | |||
| 490 | if (alias_check) { | ||
| 491 | auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); | ||
| 492 | const auto copies = FullDownloadCopies(image->info); | ||
| 493 | image->DownloadMemory(map, copies); | ||
| 494 | runtime.Finish(); | ||
| 495 | SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); | ||
| 496 | } | ||
| 497 | } | ||
| 498 | if (True(image->flags & ImageFlagBits::Tracked)) { | ||
| 499 | UntrackImage(*image, image_id); | ||
| 500 | } | ||
| 501 | UnregisterImage(image_id); | ||
| 502 | DeleteImage(image_id); | ||
| 503 | if (is_bad_overlap) { | ||
| 504 | ++num_iterations; | ||
| 505 | } | ||
| 506 | } | ||
| 507 | ++deletion_iterator; | ||
| 508 | } | ||
| 509 | } | ||
| 510 | |||
| 511 | template <class P> | ||
| 512 | void TextureCache<P>::TickFrame() { | ||
| 513 | if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { | ||
| 514 | RunGarbageCollector(); | ||
| 515 | } | ||
| 516 | sentenced_images.Tick(); | ||
| 517 | sentenced_framebuffers.Tick(); | ||
| 518 | sentenced_image_view.Tick(); | ||
| 519 | ++frame_tick; | ||
| 520 | } | ||
| 521 | |||
| 522 | template <class P> | ||
| 523 | const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { | ||
| 524 | return slot_image_views[id]; | ||
| 525 | } | ||
| 526 | |||
| 527 | template <class P> | ||
| 528 | typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { | ||
| 529 | return slot_image_views[id]; | ||
| 530 | } | ||
| 531 | |||
| 532 | template <class P> | ||
| 533 | void TextureCache<P>::MarkModification(ImageId id) noexcept { | ||
| 534 | MarkModification(slot_images[id]); | ||
| 535 | } | ||
| 536 | |||
| 537 | template <class P> | ||
| 538 | void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, | ||
| 539 | std::span<ImageViewId> image_view_ids) { | ||
| 540 | FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); | ||
| 541 | } | ||
| 542 | |||
| 543 | template <class P> | ||
| 544 | void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, | ||
| 545 | std::span<ImageViewId> image_view_ids) { | ||
| 546 | FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); | ||
| 547 | } | ||
| 548 | |||
| 549 | template <class P> | ||
| 550 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { | ||
| 551 | if (index > graphics_sampler_table.Limit()) { | ||
| 552 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); | ||
| 553 | return &slot_samplers[NULL_SAMPLER_ID]; | ||
| 554 | } | ||
| 555 | const auto [descriptor, is_new] = graphics_sampler_table.Read(index); | ||
| 556 | SamplerId& id = graphics_sampler_ids[index]; | ||
| 557 | if (is_new) { | ||
| 558 | id = FindSampler(descriptor); | ||
| 559 | } | ||
| 560 | return &slot_samplers[id]; | ||
| 561 | } | ||
| 562 | |||
| 563 | template <class P> | ||
| 564 | typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { | ||
| 565 | if (index > compute_sampler_table.Limit()) { | ||
| 566 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); | ||
| 567 | return &slot_samplers[NULL_SAMPLER_ID]; | ||
| 568 | } | ||
| 569 | const auto [descriptor, is_new] = compute_sampler_table.Read(index); | ||
| 570 | SamplerId& id = compute_sampler_ids[index]; | ||
| 571 | if (is_new) { | ||
| 572 | id = FindSampler(descriptor); | ||
| 573 | } | ||
| 574 | return &slot_samplers[id]; | ||
| 575 | } | ||
| 576 | |||
| 577 | template <class P> | ||
| 578 | void TextureCache<P>::SynchronizeGraphicsDescriptors() { | ||
| 579 | using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; | ||
| 580 | const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; | ||
| 581 | const u32 tic_limit = maxwell3d.regs.tic.limit; | ||
| 582 | const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; | ||
| 583 | if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { | ||
| 584 | graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); | ||
| 585 | } | ||
| 586 | if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { | ||
| 587 | graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 588 | } | ||
| 589 | } | ||
| 590 | |||
| 591 | template <class P> | ||
| 592 | void TextureCache<P>::SynchronizeComputeDescriptors() { | ||
| 593 | const bool linked_tsc = kepler_compute.launch_description.linked_tsc; | ||
| 594 | const u32 tic_limit = kepler_compute.regs.tic.limit; | ||
| 595 | const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; | ||
| 596 | const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); | ||
| 597 | if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { | ||
| 598 | compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); | ||
| 599 | } | ||
| 600 | if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { | ||
| 601 | compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 602 | } | ||
| 603 | } | ||
| 604 | |||
| 605 | template <class P> | ||
| 606 | void TextureCache<P>::UpdateRenderTargets(bool is_clear) { | ||
| 607 | using namespace VideoCommon::Dirty; | ||
| 608 | auto& flags = maxwell3d.dirty.flags; | ||
| 609 | if (!flags[Dirty::RenderTargets]) { | ||
| 610 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 611 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | ||
| 612 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); | ||
| 613 | } | ||
| 614 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; | ||
| 615 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | ||
| 616 | return; | ||
| 617 | } | ||
| 618 | flags[Dirty::RenderTargets] = false; | ||
| 619 | |||
| 620 | // Render target control is used on all render targets, so force look ups when this one is up | ||
| 621 | const bool force = flags[Dirty::RenderTargetControl]; | ||
| 622 | flags[Dirty::RenderTargetControl] = false; | ||
| 623 | |||
| 624 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 625 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | ||
| 626 | if (flags[Dirty::ColorBuffer0 + index] || force) { | ||
| 627 | flags[Dirty::ColorBuffer0 + index] = false; | ||
| 628 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); | ||
| 629 | } | ||
| 630 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); | ||
| 631 | } | ||
| 632 | if (flags[Dirty::ZetaBuffer] || force) { | ||
| 633 | flags[Dirty::ZetaBuffer] = false; | ||
| 634 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); | ||
| 635 | } | ||
| 636 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; | ||
| 637 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | ||
| 638 | |||
| 639 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 640 | render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); | ||
| 641 | } | ||
| 642 | render_targets.size = Extent2D{ | ||
| 643 | maxwell3d.regs.render_area.width, | ||
| 644 | maxwell3d.regs.render_area.height, | ||
| 645 | }; | ||
| 646 | } | ||
| 647 | |||
| 648 | template <class P> | ||
| 649 | typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { | ||
| 650 | return &slot_framebuffers[GetFramebufferId(render_targets)]; | ||
| 651 | } | ||
| 652 | |||
| 653 | template <class P> | ||
| 654 | void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, | ||
| 655 | std::span<ImageViewId> cached_image_view_ids, | ||
| 656 | std::span<const u32> indices, | ||
| 657 | std::span<ImageViewId> image_view_ids) { | ||
| 658 | ASSERT(indices.size() <= image_view_ids.size()); | ||
| 659 | do { | ||
| 660 | has_deleted_images = false; | ||
| 661 | std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { | ||
| 662 | return VisitImageView(table, cached_image_view_ids, index); | ||
| 663 | }); | ||
| 664 | } while (has_deleted_images); | ||
| 665 | } | ||
| 666 | |||
| 667 | template <class P> | ||
| 668 | ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, | ||
| 669 | std::span<ImageViewId> cached_image_view_ids, | ||
| 670 | u32 index) { | ||
| 671 | if (index > table.Limit()) { | ||
| 672 | LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); | ||
| 673 | return NULL_IMAGE_VIEW_ID; | ||
| 674 | } | ||
| 675 | const auto [descriptor, is_new] = table.Read(index); | ||
| 676 | ImageViewId& image_view_id = cached_image_view_ids[index]; | ||
| 677 | if (is_new) { | ||
| 678 | image_view_id = FindImageView(descriptor); | ||
| 679 | } | ||
| 680 | if (image_view_id != NULL_IMAGE_VIEW_ID) { | ||
| 681 | PrepareImageView(image_view_id, false, false); | ||
| 682 | } | ||
| 683 | return image_view_id; | ||
| 684 | } | ||
| 685 | |||
| 686 | template <class P> | ||
| 687 | FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { | ||
| 688 | const auto [pair, is_new] = framebuffers.try_emplace(key); | ||
| 689 | FramebufferId& framebuffer_id = pair->second; | ||
| 690 | if (!is_new) { | ||
| 691 | return framebuffer_id; | ||
| 692 | } | ||
| 693 | std::array<ImageView*, NUM_RT> color_buffers; | ||
| 694 | std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), | ||
| 695 | [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); | ||
| 696 | ImageView* const depth_buffer = | ||
| 697 | key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; | ||
| 698 | framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); | ||
| 699 | return framebuffer_id; | ||
| 700 | } | ||
| 701 | |||
| 702 | template <class P> | ||
| 703 | void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | ||
| 704 | ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { | ||
| 705 | if (True(image.flags & ImageFlagBits::CpuModified)) { | ||
| 706 | return; | ||
| 707 | } | ||
| 708 | image.flags |= ImageFlagBits::CpuModified; | ||
| 709 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 710 | UntrackImage(image, image_id); | ||
| 711 | } | ||
| 712 | }); | ||
| 713 | } | ||
| 714 | |||
| 715 | template <class P> | ||
| 716 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | ||
| 717 | std::vector<ImageId> images; | ||
| 718 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { | ||
| 719 | if (!image.IsSafeDownload()) { | ||
| 720 | return; | ||
| 721 | } | ||
| 722 | image.flags &= ~ImageFlagBits::GpuModified; | ||
| 723 | images.push_back(image_id); | ||
| 724 | }); | ||
| 725 | if (images.empty()) { | ||
| 726 | return; | ||
| 727 | } | ||
| 728 | std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { | ||
| 729 | return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; | ||
| 730 | }); | ||
| 731 | for (const ImageId image_id : images) { | ||
| 732 | Image& image = slot_images[image_id]; | ||
| 733 | auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); | ||
| 734 | const auto copies = FullDownloadCopies(image.info); | ||
| 735 | image.DownloadMemory(map, copies); | ||
| 736 | runtime.Finish(); | ||
| 737 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | ||
| 738 | } | ||
| 739 | } | ||
| 740 | |||
| 741 | template <class P> | ||
| 742 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | ||
| 743 | std::vector<ImageId> deleted_images; | ||
| 744 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); | ||
| 745 | for (const ImageId id : deleted_images) { | ||
| 746 | Image& image = slot_images[id]; | ||
| 747 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 748 | UntrackImage(image, id); | ||
| 749 | } | ||
| 750 | UnregisterImage(id); | ||
| 751 | DeleteImage(id); | ||
| 752 | } | ||
| 753 | } | ||
| 754 | |||
| 755 | template <class P> | ||
| 756 | void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { | ||
| 757 | std::vector<ImageId> deleted_images; | ||
| 758 | ForEachImageInRegionGPU(gpu_addr, size, | ||
| 759 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); | ||
| 760 | for (const ImageId id : deleted_images) { | ||
| 761 | Image& image = slot_images[id]; | ||
| 762 | if (True(image.flags & ImageFlagBits::Remapped)) { | ||
| 763 | continue; | ||
| 764 | } | ||
| 765 | image.flags |= ImageFlagBits::Remapped; | ||
| 766 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 767 | UntrackImage(image, id); | ||
| 768 | } | ||
| 769 | } | ||
| 770 | } | ||
| 771 | |||
| 772 | template <class P> | ||
| 773 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | ||
| 774 | const Tegra::Engines::Fermi2D::Surface& src, | ||
| 775 | const Tegra::Engines::Fermi2D::Config& copy) { | ||
| 776 | const BlitImages images = GetBlitImages(dst, src); | ||
| 777 | const ImageId dst_id = images.dst_id; | ||
| 778 | const ImageId src_id = images.src_id; | ||
| 779 | PrepareImage(src_id, false, false); | ||
| 780 | PrepareImage(dst_id, true, false); | ||
| 781 | |||
| 782 | ImageBase& dst_image = slot_images[dst_id]; | ||
| 783 | const ImageBase& src_image = slot_images[src_id]; | ||
| 784 | |||
| 785 | // TODO: Deduplicate | ||
| 786 | const std::optional src_base = src_image.TryFindBase(src.Address()); | ||
| 787 | const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; | ||
| 788 | const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); | ||
| 789 | const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); | ||
| 790 | const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); | ||
| 791 | const Region2D src_region{ | ||
| 792 | Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, | ||
| 793 | Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, | ||
| 794 | }; | ||
| 795 | |||
| 796 | const std::optional dst_base = dst_image.TryFindBase(dst.Address()); | ||
| 797 | const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; | ||
| 798 | const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); | ||
| 799 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 800 | const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); | ||
| 801 | const Region2D dst_region{ | ||
| 802 | Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, | ||
| 803 | Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, | ||
| 804 | }; | ||
| 805 | |||
| 806 | // Always call this after src_framebuffer_id was queried, as the address might be invalidated. | ||
| 807 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; | ||
| 808 | if constexpr (FRAMEBUFFER_BLITS) { | ||
| 809 | // OpenGL blits from framebuffers, not images | ||
| 810 | Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; | ||
| 811 | runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, | ||
| 812 | copy.filter, copy.operation); | ||
| 813 | } else { | ||
| 814 | // Vulkan can blit images, but it lacks format reinterpretations | ||
| 815 | // Provide a framebuffer in case it's necessary | ||
| 816 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 817 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 818 | runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, | ||
| 819 | copy.operation); | ||
| 820 | } | ||
| 821 | } | ||
| 822 | |||
| 823 | template <class P> | ||
| 824 | void TextureCache<P>::InvalidateColorBuffer(size_t index) { | ||
| 825 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | ||
| 826 | color_buffer_id = FindColorBuffer(index, false); | ||
| 827 | if (!color_buffer_id) { | ||
| 828 | LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index); | ||
| 829 | return; | ||
| 830 | } | ||
| 831 | // When invalidating a color buffer, the old contents are no longer relevant | ||
| 832 | ImageView& color_buffer = slot_image_views[color_buffer_id]; | ||
| 833 | Image& image = slot_images[color_buffer.image_id]; | ||
| 834 | image.flags &= ~ImageFlagBits::CpuModified; | ||
| 835 | image.flags &= ~ImageFlagBits::GpuModified; | ||
| 836 | |||
| 837 | runtime.InvalidateColorBuffer(color_buffer, index); | ||
| 838 | } | ||
| 839 | |||
| 840 | template <class P> | ||
| 841 | void TextureCache<P>::InvalidateDepthBuffer() { | ||
| 842 | ImageViewId& depth_buffer_id = render_targets.depth_buffer_id; | ||
| 843 | depth_buffer_id = FindDepthBuffer(false); | ||
| 844 | if (!depth_buffer_id) { | ||
| 845 | LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer"); | ||
| 846 | return; | ||
| 847 | } | ||
| 848 | // When invalidating the depth buffer, the old contents are no longer relevant | ||
| 849 | ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id]; | ||
| 850 | image.flags &= ~ImageFlagBits::CpuModified; | ||
| 851 | image.flags &= ~ImageFlagBits::GpuModified; | ||
| 852 | |||
| 853 | ImageView& depth_buffer = slot_image_views[depth_buffer_id]; | ||
| 854 | runtime.InvalidateDepthBuffer(depth_buffer); | ||
| 855 | } | ||
| 856 | |||
| 857 | template <class P> | ||
| 858 | typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { | ||
| 859 | // TODO: Properly implement this | ||
| 860 | const auto it = page_table.find(cpu_addr >> PAGE_BITS); | ||
| 861 | if (it == page_table.end()) { | ||
| 862 | return nullptr; | ||
| 863 | } | ||
| 864 | const auto& image_map_ids = it->second; | ||
| 865 | for (const ImageMapId map_id : image_map_ids) { | ||
| 866 | const ImageMapView& map = slot_map_views[map_id]; | ||
| 867 | const ImageBase& image = slot_images[map.image_id]; | ||
| 868 | if (image.cpu_addr != cpu_addr) { | ||
| 869 | continue; | ||
| 870 | } | ||
| 871 | if (image.image_view_ids.empty()) { | ||
| 872 | continue; | ||
| 873 | } | ||
| 874 | return &slot_image_views[image.image_view_ids.at(0)]; | ||
| 875 | } | ||
| 876 | return nullptr; | ||
| 877 | } | ||
| 878 | |||
| 879 | template <class P> | ||
| 880 | bool TextureCache<P>::HasUncommittedFlushes() const noexcept { | ||
| 881 | return !uncommitted_downloads.empty(); | ||
| 882 | } | ||
| 883 | |||
| 884 | template <class P> | ||
| 885 | bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { | ||
| 886 | return !committed_downloads.empty() && !committed_downloads.front().empty(); | ||
| 887 | } | ||
| 888 | |||
| 889 | template <class P> | ||
| 890 | void TextureCache<P>::CommitAsyncFlushes() { | ||
| 891 | // This is intentionally passing the value by copy | ||
| 892 | committed_downloads.push(uncommitted_downloads); | ||
| 893 | uncommitted_downloads.clear(); | ||
| 894 | } | ||
| 895 | |||
| 896 | template <class P> | ||
| 897 | void TextureCache<P>::PopAsyncFlushes() { | ||
| 898 | if (committed_downloads.empty()) { | ||
| 899 | return; | ||
| 900 | } | ||
| 901 | const std::span<const ImageId> download_ids = committed_downloads.front(); | ||
| 902 | if (download_ids.empty()) { | ||
| 903 | committed_downloads.pop(); | ||
| 904 | return; | ||
| 905 | } | ||
| 906 | size_t total_size_bytes = 0; | ||
| 907 | for (const ImageId image_id : download_ids) { | ||
| 908 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | ||
| 909 | } | ||
| 910 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 911 | const size_t original_offset = download_map.offset; | ||
| 912 | for (const ImageId image_id : download_ids) { | ||
| 913 | Image& image = slot_images[image_id]; | ||
| 914 | const auto copies = FullDownloadCopies(image.info); | ||
| 915 | image.DownloadMemory(download_map, copies); | ||
| 916 | download_map.offset += image.unswizzled_size_bytes; | ||
| 917 | } | ||
| 918 | // Wait for downloads to finish | ||
| 919 | runtime.Finish(); | ||
| 920 | |||
| 921 | download_map.offset = original_offset; | ||
| 922 | std::span<u8> download_span = download_map.mapped_span; | ||
| 923 | for (const ImageId image_id : download_ids) { | ||
| 924 | const ImageBase& image = slot_images[image_id]; | ||
| 925 | const auto copies = FullDownloadCopies(image.info); | ||
| 926 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); | ||
| 927 | download_map.offset += image.unswizzled_size_bytes; | ||
| 928 | download_span = download_span.subspan(image.unswizzled_size_bytes); | ||
| 929 | } | ||
| 930 | committed_downloads.pop(); | ||
| 931 | } | ||
| 932 | |||
| 933 | template <class P> | ||
| 934 | bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | ||
| 935 | bool is_modified = false; | ||
| 936 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { | ||
| 937 | if (False(image.flags & ImageFlagBits::GpuModified)) { | ||
| 938 | return false; | ||
| 939 | } | ||
| 940 | is_modified = true; | ||
| 941 | return true; | ||
| 942 | }); | ||
| 943 | return is_modified; | ||
| 944 | } | ||
| 945 | |||
| 946 | template <class P> | ||
| 947 | void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { | ||
| 948 | if (False(image.flags & ImageFlagBits::CpuModified)) { | ||
| 949 | // Only upload modified images | ||
| 950 | return; | ||
| 951 | } | ||
| 952 | image.flags &= ~ImageFlagBits::CpuModified; | ||
| 953 | TrackImage(image, image_id); | ||
| 954 | |||
| 955 | if (image.info.num_samples > 1) { | ||
| 956 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); | ||
| 957 | return; | ||
| 958 | } | ||
| 959 | auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); | ||
| 960 | UploadImageContents(image, staging); | ||
| 961 | runtime.InsertUploadMemoryBarrier(); | ||
| 962 | } | ||
| 963 | |||
| 964 | template <class P> | ||
| 965 | template <typename StagingBuffer> | ||
| 966 | void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) { | ||
| 967 | const std::span<u8> mapped_span = staging.mapped_span; | ||
| 968 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 969 | |||
| 970 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | ||
| 971 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | ||
| 972 | const auto uploads = FullUploadSwizzles(image.info); | ||
| 973 | runtime.AccelerateImageUpload(image, staging, uploads); | ||
| 974 | } else if (True(image.flags & ImageFlagBits::Converted)) { | ||
| 975 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | ||
| 976 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | ||
| 977 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | ||
| 978 | image.UploadMemory(staging, copies); | ||
| 979 | } else { | ||
| 980 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | ||
| 981 | image.UploadMemory(staging, copies); | ||
| 982 | } | ||
| 983 | } | ||
| 984 | |||
| 985 | template <class P> | ||
| 986 | ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { | ||
| 987 | if (!IsValidEntry(gpu_memory, config)) { | ||
| 988 | return NULL_IMAGE_VIEW_ID; | ||
| 989 | } | ||
| 990 | const auto [pair, is_new] = image_views.try_emplace(config); | ||
| 991 | ImageViewId& image_view_id = pair->second; | ||
| 992 | if (is_new) { | ||
| 993 | image_view_id = CreateImageView(config); | ||
| 994 | } | ||
| 995 | return image_view_id; | ||
| 996 | } | ||
| 997 | |||
| 998 | template <class P> | ||
| 999 | ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { | ||
| 1000 | const ImageInfo info(config); | ||
| 1001 | if (info.type == ImageType::Buffer) { | ||
| 1002 | const ImageViewInfo view_info(config, 0); | ||
| 1003 | return slot_image_views.insert(runtime, info, view_info, config.Address()); | ||
| 1004 | } | ||
| 1005 | const u32 layer_offset = config.BaseLayer() * info.layer_stride; | ||
| 1006 | const GPUVAddr image_gpu_addr = config.Address() - layer_offset; | ||
| 1007 | const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); | ||
| 1008 | if (!image_id) { | ||
| 1009 | return NULL_IMAGE_VIEW_ID; | ||
| 1010 | } | ||
| 1011 | ImageBase& image = slot_images[image_id]; | ||
| 1012 | const SubresourceBase base = image.TryFindBase(config.Address()).value(); | ||
| 1013 | ASSERT(base.level == 0); | ||
| 1014 | const ImageViewInfo view_info(config, base.layer); | ||
| 1015 | const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 1016 | ImageViewBase& image_view = slot_image_views[image_view_id]; | ||
| 1017 | image_view.flags |= ImageViewFlagBits::Strong; | ||
| 1018 | image.flags |= ImageFlagBits::Strong; | ||
| 1019 | return image_view_id; | ||
| 1020 | } | ||
| 1021 | |||
| 1022 | template <class P> | ||
| 1023 | ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 1024 | RelaxedOptions options) { | ||
| 1025 | if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { | ||
| 1026 | return image_id; | ||
| 1027 | } | ||
| 1028 | return InsertImage(info, gpu_addr, options); | ||
| 1029 | } | ||
| 1030 | |||
| 1031 | template <class P> | ||
| 1032 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 1033 | RelaxedOptions options) { | ||
| 1034 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 1035 | if (!cpu_addr) { | ||
| 1036 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | ||
| 1037 | if (!cpu_addr) { | ||
| 1038 | return ImageId{}; | ||
| 1039 | } | ||
| 1040 | } | ||
| 1041 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||
| 1042 | const bool native_bgr = runtime.HasNativeBgr(); | ||
| 1043 | ImageId image_id; | ||
| 1044 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | ||
| 1045 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | ||
| 1046 | return false; | ||
| 1047 | } | ||
| 1048 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { | ||
| 1049 | const bool strict_size = False(options & RelaxedOptions::Size) && | ||
| 1050 | True(existing_image.flags & ImageFlagBits::Strong); | ||
| 1051 | const ImageInfo& existing = existing_image.info; | ||
| 1052 | if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && | ||
| 1053 | existing.pitch == info.pitch && | ||
| 1054 | IsPitchLinearSameSize(existing, info, strict_size) && | ||
| 1055 | IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { | ||
| 1056 | image_id = existing_image_id; | ||
| 1057 | return true; | ||
| 1058 | } | ||
| 1059 | } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, | ||
| 1060 | native_bgr)) { | ||
| 1061 | image_id = existing_image_id; | ||
| 1062 | return true; | ||
| 1063 | } | ||
| 1064 | return false; | ||
| 1065 | }; | ||
| 1066 | ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); | ||
| 1067 | return image_id; | ||
| 1068 | } | ||
| 1069 | |||
| 1070 | template <class P> | ||
| 1071 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 1072 | RelaxedOptions options) { | ||
| 1073 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 1074 | if (!cpu_addr) { | ||
| 1075 | const auto size = CalculateGuestSizeInBytes(info); | ||
| 1076 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); | ||
| 1077 | if (!cpu_addr) { | ||
| 1078 | const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; | ||
| 1079 | virtual_invalid_space += Common::AlignUp(size, 32); | ||
| 1080 | cpu_addr = std::optional<VAddr>(fake_addr); | ||
| 1081 | } | ||
| 1082 | } | ||
| 1083 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | ||
| 1084 | const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); | ||
| 1085 | const Image& image = slot_images[image_id]; | ||
| 1086 | // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different | ||
| 1087 | const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); | ||
| 1088 | if (is_new) { | ||
| 1089 | it->second = slot_image_allocs.insert(); | ||
| 1090 | } | ||
| 1091 | slot_image_allocs[it->second].images.push_back(image_id); | ||
| 1092 | return image_id; | ||
| 1093 | } | ||
| 1094 | |||
| 1095 | template <class P> | ||
| 1096 | ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { | ||
| 1097 | ImageInfo new_info = info; | ||
| 1098 | const size_t size_bytes = CalculateGuestSizeInBytes(new_info); | ||
| 1099 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||
| 1100 | const bool native_bgr = runtime.HasNativeBgr(); | ||
| 1101 | std::vector<ImageId> overlap_ids; | ||
| 1102 | std::unordered_set<ImageId> overlaps_found; | ||
| 1103 | std::vector<ImageId> left_aliased_ids; | ||
| 1104 | std::vector<ImageId> right_aliased_ids; | ||
| 1105 | std::unordered_set<ImageId> ignore_textures; | ||
| 1106 | std::vector<ImageId> bad_overlap_ids; | ||
| 1107 | const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 1108 | if (True(overlap.flags & ImageFlagBits::Remapped)) { | ||
| 1109 | ignore_textures.insert(overlap_id); | ||
| 1110 | return; | ||
| 1111 | } | ||
| 1112 | if (info.type == ImageType::Linear) { | ||
| 1113 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { | ||
| 1114 | // Alias linear images with the same pitch | ||
| 1115 | left_aliased_ids.push_back(overlap_id); | ||
| 1116 | } | ||
| 1117 | return; | ||
| 1118 | } | ||
| 1119 | overlaps_found.insert(overlap_id); | ||
| 1120 | static constexpr bool strict_size = true; | ||
| 1121 | const std::optional<OverlapResult> solution = ResolveOverlap( | ||
| 1122 | new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); | ||
| 1123 | if (solution) { | ||
| 1124 | gpu_addr = solution->gpu_addr; | ||
| 1125 | cpu_addr = solution->cpu_addr; | ||
| 1126 | new_info.resources = solution->resources; | ||
| 1127 | overlap_ids.push_back(overlap_id); | ||
| 1128 | return; | ||
| 1129 | } | ||
| 1130 | static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; | ||
| 1131 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); | ||
| 1132 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { | ||
| 1133 | left_aliased_ids.push_back(overlap_id); | ||
| 1134 | overlap.flags |= ImageFlagBits::Alias; | ||
| 1135 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, | ||
| 1136 | broken_views, native_bgr)) { | ||
| 1137 | right_aliased_ids.push_back(overlap_id); | ||
| 1138 | overlap.flags |= ImageFlagBits::Alias; | ||
| 1139 | } else { | ||
| 1140 | bad_overlap_ids.push_back(overlap_id); | ||
| 1141 | overlap.flags |= ImageFlagBits::BadOverlap; | ||
| 1142 | } | ||
| 1143 | }; | ||
| 1144 | ForEachImageInRegion(cpu_addr, size_bytes, region_check); | ||
| 1145 | const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 1146 | if (!overlaps_found.contains(overlap_id)) { | ||
| 1147 | if (True(overlap.flags & ImageFlagBits::Remapped)) { | ||
| 1148 | ignore_textures.insert(overlap_id); | ||
| 1149 | } | ||
| 1150 | if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { | ||
| 1151 | ignore_textures.insert(overlap_id); | ||
| 1152 | } | ||
| 1153 | } | ||
| 1154 | }; | ||
| 1155 | ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); | ||
| 1156 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | ||
| 1157 | Image& new_image = slot_images[new_image_id]; | ||
| 1158 | |||
| 1159 | if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { | ||
| 1160 | new_image.flags |= ImageFlagBits::Sparse; | ||
| 1161 | } | ||
| 1162 | |||
| 1163 | for (const ImageId overlap_id : ignore_textures) { | ||
| 1164 | Image& overlap = slot_images[overlap_id]; | ||
| 1165 | if (True(overlap.flags & ImageFlagBits::GpuModified)) { | ||
| 1166 | UNIMPLEMENTED(); | ||
| 1167 | } | ||
| 1168 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | ||
| 1169 | UntrackImage(overlap, overlap_id); | ||
| 1170 | } | ||
| 1171 | UnregisterImage(overlap_id); | ||
| 1172 | DeleteImage(overlap_id); | ||
| 1173 | } | ||
| 1174 | |||
| 1175 | // TODO: Only upload what we need | ||
| 1176 | RefreshContents(new_image, new_image_id); | ||
| 1177 | |||
| 1178 | for (const ImageId overlap_id : overlap_ids) { | ||
| 1179 | Image& overlap = slot_images[overlap_id]; | ||
| 1180 | if (overlap.info.num_samples != new_image.info.num_samples) { | ||
| 1181 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); | ||
| 1182 | } else { | ||
| 1183 | const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); | ||
| 1184 | const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); | ||
| 1185 | runtime.CopyImage(new_image, overlap, copies); | ||
| 1186 | } | ||
| 1187 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | ||
| 1188 | UntrackImage(overlap, overlap_id); | ||
| 1189 | } | ||
| 1190 | UnregisterImage(overlap_id); | ||
| 1191 | DeleteImage(overlap_id); | ||
| 1192 | } | ||
| 1193 | ImageBase& new_image_base = new_image; | ||
| 1194 | for (const ImageId aliased_id : right_aliased_ids) { | ||
| 1195 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 1196 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); | ||
| 1197 | new_image.flags |= ImageFlagBits::Alias; | ||
| 1198 | } | ||
| 1199 | for (const ImageId aliased_id : left_aliased_ids) { | ||
| 1200 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 1201 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); | ||
| 1202 | new_image.flags |= ImageFlagBits::Alias; | ||
| 1203 | } | ||
| 1204 | for (const ImageId aliased_id : bad_overlap_ids) { | ||
| 1205 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 1206 | aliased.overlapping_images.push_back(new_image_id); | ||
| 1207 | new_image.overlapping_images.push_back(aliased_id); | ||
| 1208 | new_image.flags |= ImageFlagBits::BadOverlap; | ||
| 1209 | } | ||
| 1210 | RegisterImage(new_image_id); | ||
| 1211 | return new_image_id; | ||
| 1212 | } | ||
| 1213 | |||
| 1214 | template <class P> | ||
| 1215 | typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( | ||
| 1216 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { | ||
| 1217 | static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; | ||
| 1218 | const GPUVAddr dst_addr = dst.Address(); | ||
| 1219 | const GPUVAddr src_addr = src.Address(); | ||
| 1220 | ImageInfo dst_info(dst); | ||
| 1221 | ImageInfo src_info(src); | ||
| 1222 | ImageId dst_id; | ||
| 1223 | ImageId src_id; | ||
| 1224 | do { | ||
| 1225 | has_deleted_images = false; | ||
| 1226 | dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); | ||
| 1227 | src_id = FindImage(src_info, src_addr, FIND_OPTIONS); | ||
| 1228 | const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; | ||
| 1229 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; | ||
| 1230 | DeduceBlitImages(dst_info, src_info, dst_image, src_image); | ||
| 1231 | if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { | ||
| 1232 | continue; | ||
| 1233 | } | ||
| 1234 | if (!dst_id) { | ||
| 1235 | dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); | ||
| 1236 | } | ||
| 1237 | if (!src_id) { | ||
| 1238 | src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); | ||
| 1239 | } | ||
| 1240 | } while (has_deleted_images); | ||
| 1241 | return BlitImages{ | ||
| 1242 | .dst_id = dst_id, | ||
| 1243 | .src_id = src_id, | ||
| 1244 | .dst_format = dst_info.format, | ||
| 1245 | .src_format = src_info.format, | ||
| 1246 | }; | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | template <class P> | ||
| 1250 | SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { | ||
| 1251 | if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { | ||
| 1252 | return NULL_SAMPLER_ID; | ||
| 1253 | } | ||
| 1254 | const auto [pair, is_new] = samplers.try_emplace(config); | ||
| 1255 | if (is_new) { | ||
| 1256 | pair->second = slot_samplers.insert(runtime, config); | ||
| 1257 | } | ||
| 1258 | return pair->second; | ||
| 1259 | } | ||
| 1260 | |||
| 1261 | template <class P> | ||
| 1262 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { | ||
| 1263 | const auto& regs = maxwell3d.regs; | ||
| 1264 | if (index >= regs.rt_control.count) { | ||
| 1265 | return ImageViewId{}; | ||
| 1266 | } | ||
| 1267 | const auto& rt = regs.rt[index]; | ||
| 1268 | const GPUVAddr gpu_addr = rt.Address(); | ||
| 1269 | if (gpu_addr == 0) { | ||
| 1270 | return ImageViewId{}; | ||
| 1271 | } | ||
| 1272 | if (rt.format == Tegra::RenderTargetFormat::NONE) { | ||
| 1273 | return ImageViewId{}; | ||
| 1274 | } | ||
| 1275 | const ImageInfo info(regs, index); | ||
| 1276 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 1277 | } | ||
| 1278 | |||
| 1279 | template <class P> | ||
| 1280 | ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { | ||
| 1281 | const auto& regs = maxwell3d.regs; | ||
| 1282 | if (!regs.zeta_enable) { | ||
| 1283 | return ImageViewId{}; | ||
| 1284 | } | ||
| 1285 | const GPUVAddr gpu_addr = regs.zeta.Address(); | ||
| 1286 | if (gpu_addr == 0) { | ||
| 1287 | return ImageViewId{}; | ||
| 1288 | } | ||
| 1289 | const ImageInfo info(regs); | ||
| 1290 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 1291 | } | ||
| 1292 | |||
| 1293 | template <class P> | ||
| 1294 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 1295 | bool is_clear) { | ||
| 1296 | const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; | ||
| 1297 | const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); | ||
| 1298 | if (!image_id) { | ||
| 1299 | return NULL_IMAGE_VIEW_ID; | ||
| 1300 | } | ||
| 1301 | Image& image = slot_images[image_id]; | ||
| 1302 | const ImageViewType view_type = RenderTargetImageViewType(info); | ||
| 1303 | SubresourceBase base; | ||
| 1304 | if (image.info.type == ImageType::Linear) { | ||
| 1305 | base = SubresourceBase{.level = 0, .layer = 0}; | ||
| 1306 | } else { | ||
| 1307 | base = image.TryFindBase(gpu_addr).value(); | ||
| 1308 | } | ||
| 1309 | const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; | ||
| 1310 | const SubresourceRange range{ | ||
| 1311 | .base = base, | ||
| 1312 | .extent = {.levels = 1, .layers = layers}, | ||
| 1313 | }; | ||
| 1314 | return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); | ||
| 1315 | } | ||
| 1316 | |||
| 1317 | template <class P> | ||
| 1318 | template <typename Func> | ||
| 1319 | void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { | ||
| 1320 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 1321 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 1322 | boost::container::small_vector<ImageId, 32> images; | ||
| 1323 | boost::container::small_vector<ImageMapId, 32> maps; | ||
| 1324 | ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { | ||
| 1325 | const auto it = page_table.find(page); | ||
| 1326 | if (it == page_table.end()) { | ||
| 1327 | if constexpr (BOOL_BREAK) { | ||
| 1328 | return false; | ||
| 1329 | } else { | ||
| 1330 | return; | ||
| 1331 | } | ||
| 1332 | } | ||
| 1333 | for (const ImageMapId map_id : it->second) { | ||
| 1334 | ImageMapView& map = slot_map_views[map_id]; | ||
| 1335 | if (map.picked) { | ||
| 1336 | continue; | ||
| 1337 | } | ||
| 1338 | if (!map.Overlaps(cpu_addr, size)) { | ||
| 1339 | continue; | ||
| 1340 | } | ||
| 1341 | map.picked = true; | ||
| 1342 | maps.push_back(map_id); | ||
| 1343 | Image& image = slot_images[map.image_id]; | ||
| 1344 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 1345 | continue; | ||
| 1346 | } | ||
| 1347 | image.flags |= ImageFlagBits::Picked; | ||
| 1348 | images.push_back(map.image_id); | ||
| 1349 | if constexpr (BOOL_BREAK) { | ||
| 1350 | if (func(map.image_id, image)) { | ||
| 1351 | return true; | ||
| 1352 | } | ||
| 1353 | } else { | ||
| 1354 | func(map.image_id, image); | ||
| 1355 | } | ||
| 1356 | } | ||
| 1357 | if constexpr (BOOL_BREAK) { | ||
| 1358 | return false; | ||
| 1359 | } | ||
| 1360 | }); | ||
| 1361 | for (const ImageId image_id : images) { | ||
| 1362 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 1363 | } | ||
| 1364 | for (const ImageMapId map_id : maps) { | ||
| 1365 | slot_map_views[map_id].picked = false; | ||
| 1366 | } | ||
| 1367 | } | ||
| 1368 | |||
| 1369 | template <class P> | ||
| 1370 | template <typename Func> | ||
| 1371 | void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { | ||
| 1372 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 1373 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 1374 | boost::container::small_vector<ImageId, 8> images; | ||
| 1375 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | ||
| 1376 | const auto it = gpu_page_table.find(page); | ||
| 1377 | if (it == gpu_page_table.end()) { | ||
| 1378 | if constexpr (BOOL_BREAK) { | ||
| 1379 | return false; | ||
| 1380 | } else { | ||
| 1381 | return; | ||
| 1382 | } | ||
| 1383 | } | ||
| 1384 | for (const ImageId image_id : it->second) { | ||
| 1385 | Image& image = slot_images[image_id]; | ||
| 1386 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 1387 | continue; | ||
| 1388 | } | ||
| 1389 | if (!image.OverlapsGPU(gpu_addr, size)) { | ||
| 1390 | continue; | ||
| 1391 | } | ||
| 1392 | image.flags |= ImageFlagBits::Picked; | ||
| 1393 | images.push_back(image_id); | ||
| 1394 | if constexpr (BOOL_BREAK) { | ||
| 1395 | if (func(image_id, image)) { | ||
| 1396 | return true; | ||
| 1397 | } | ||
| 1398 | } else { | ||
| 1399 | func(image_id, image); | ||
| 1400 | } | ||
| 1401 | } | ||
| 1402 | if constexpr (BOOL_BREAK) { | ||
| 1403 | return false; | ||
| 1404 | } | ||
| 1405 | }); | ||
| 1406 | for (const ImageId image_id : images) { | ||
| 1407 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 1408 | } | ||
| 1409 | } | ||
| 1410 | |||
| 1411 | template <class P> | ||
| 1412 | template <typename Func> | ||
| 1413 | void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { | ||
| 1414 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 1415 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 1416 | boost::container::small_vector<ImageId, 8> images; | ||
| 1417 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | ||
| 1418 | const auto it = sparse_page_table.find(page); | ||
| 1419 | if (it == sparse_page_table.end()) { | ||
| 1420 | if constexpr (BOOL_BREAK) { | ||
| 1421 | return false; | ||
| 1422 | } else { | ||
| 1423 | return; | ||
| 1424 | } | ||
| 1425 | } | ||
| 1426 | for (const ImageId image_id : it->second) { | ||
| 1427 | Image& image = slot_images[image_id]; | ||
| 1428 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 1429 | continue; | ||
| 1430 | } | ||
| 1431 | if (!image.OverlapsGPU(gpu_addr, size)) { | ||
| 1432 | continue; | ||
| 1433 | } | ||
| 1434 | image.flags |= ImageFlagBits::Picked; | ||
| 1435 | images.push_back(image_id); | ||
| 1436 | if constexpr (BOOL_BREAK) { | ||
| 1437 | if (func(image_id, image)) { | ||
| 1438 | return true; | ||
| 1439 | } | ||
| 1440 | } else { | ||
| 1441 | func(image_id, image); | ||
| 1442 | } | ||
| 1443 | } | ||
| 1444 | if constexpr (BOOL_BREAK) { | ||
| 1445 | return false; | ||
| 1446 | } | ||
| 1447 | }); | ||
| 1448 | for (const ImageId image_id : images) { | ||
| 1449 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 1450 | } | ||
| 1451 | } | ||
| 1452 | |||
| 1453 | template <class P> | ||
| 1454 | template <typename Func> | ||
| 1455 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | ||
| 1456 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; | ||
| 1457 | static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; | ||
| 1458 | const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); | ||
| 1459 | for (auto& segment : segments) { | ||
| 1460 | const auto gpu_addr = segment.first; | ||
| 1461 | const auto size = segment.second; | ||
| 1462 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 1463 | ASSERT(cpu_addr); | ||
| 1464 | if constexpr (RETURNS_BOOL) { | ||
| 1465 | if (func(gpu_addr, *cpu_addr, size)) { | ||
| 1466 | break; | ||
| 1467 | } | ||
| 1468 | } else { | ||
| 1469 | func(gpu_addr, *cpu_addr, size); | ||
| 1470 | } | ||
| 1471 | } | ||
| 1472 | } | ||
| 1473 | |||
| 1474 | template <class P> | ||
| 1475 | ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { | ||
| 1476 | Image& image = slot_images[image_id]; | ||
| 1477 | if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { | ||
| 1478 | return image_view_id; | ||
| 1479 | } | ||
| 1480 | const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); | ||
| 1481 | image.InsertView(info, image_view_id); | ||
| 1482 | return image_view_id; | ||
| 1483 | } | ||
| 1484 | |||
| 1485 | template <class P> | ||
| 1486 | void TextureCache<P>::RegisterImage(ImageId image_id) { | ||
| 1487 | ImageBase& image = slot_images[image_id]; | ||
| 1488 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), | ||
| 1489 | "Trying to register an already registered image"); | ||
| 1490 | image.flags |= ImageFlagBits::Registered; | ||
| 1491 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1492 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1493 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1494 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1495 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1496 | } | ||
| 1497 | total_used_memory += Common::AlignUp(tentative_size, 1024); | ||
| 1498 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1499 | [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); | ||
| 1500 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1501 | auto map_id = | ||
| 1502 | slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); | ||
| 1503 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, | ||
| 1504 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||
| 1505 | image.map_view_id = map_id; | ||
| 1506 | return; | ||
| 1507 | } | ||
| 1508 | std::vector<ImageViewId> sparse_maps{}; | ||
| 1509 | ForEachSparseSegment( | ||
| 1510 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||
| 1511 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | ||
| 1512 | ForEachCPUPage(cpu_addr, size, | ||
| 1513 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||
| 1514 | sparse_maps.push_back(map_id); | ||
| 1515 | }); | ||
| 1516 | sparse_views.emplace(image_id, std::move(sparse_maps)); | ||
| 1517 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1518 | [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); | ||
| 1519 | } | ||
| 1520 | |||
| 1521 | template <class P> | ||
| 1522 | void TextureCache<P>::UnregisterImage(ImageId image_id) { | ||
| 1523 | Image& image = slot_images[image_id]; | ||
| 1524 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), | ||
| 1525 | "Trying to unregister an already registered image"); | ||
| 1526 | image.flags &= ~ImageFlagBits::Registered; | ||
| 1527 | image.flags &= ~ImageFlagBits::BadOverlap; | ||
| 1528 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1529 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1530 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1531 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1532 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1533 | } | ||
| 1534 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | ||
| 1535 | const auto& clear_page_table = | ||
| 1536 | [this, image_id]( | ||
| 1537 | u64 page, | ||
| 1538 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { | ||
| 1539 | const auto page_it = selected_page_table.find(page); | ||
| 1540 | if (page_it == selected_page_table.end()) { | ||
| 1541 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1542 | return; | ||
| 1543 | } | ||
| 1544 | std::vector<ImageId>& image_ids = page_it->second; | ||
| 1545 | const auto vector_it = std::ranges::find(image_ids, image_id); | ||
| 1546 | if (vector_it == image_ids.end()) { | ||
| 1547 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", | ||
| 1548 | page << PAGE_BITS); | ||
| 1549 | return; | ||
| 1550 | } | ||
| 1551 | image_ids.erase(vector_it); | ||
| 1552 | }; | ||
| 1553 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1554 | [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); | ||
| 1555 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1556 | const auto map_id = image.map_view_id; | ||
| 1557 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { | ||
| 1558 | const auto page_it = page_table.find(page); | ||
| 1559 | if (page_it == page_table.end()) { | ||
| 1560 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1561 | return; | ||
| 1562 | } | ||
| 1563 | std::vector<ImageMapId>& image_map_ids = page_it->second; | ||
| 1564 | const auto vector_it = std::ranges::find(image_map_ids, map_id); | ||
| 1565 | if (vector_it == image_map_ids.end()) { | ||
| 1566 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", | ||
| 1567 | page << PAGE_BITS); | ||
| 1568 | return; | ||
| 1569 | } | ||
| 1570 | image_map_ids.erase(vector_it); | ||
| 1571 | }); | ||
| 1572 | slot_map_views.erase(map_id); | ||
| 1573 | return; | ||
| 1574 | } | ||
| 1575 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { | ||
| 1576 | clear_page_table(page, sparse_page_table); | ||
| 1577 | }); | ||
| 1578 | auto it = sparse_views.find(image_id); | ||
| 1579 | ASSERT(it != sparse_views.end()); | ||
| 1580 | auto& sparse_maps = it->second; | ||
| 1581 | for (auto& map_view_id : sparse_maps) { | ||
| 1582 | const auto& map_range = slot_map_views[map_view_id]; | ||
| 1583 | const VAddr cpu_addr = map_range.cpu_addr; | ||
| 1584 | const std::size_t size = map_range.size; | ||
| 1585 | ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { | ||
| 1586 | const auto page_it = page_table.find(page); | ||
| 1587 | if (page_it == page_table.end()) { | ||
| 1588 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1589 | return; | ||
| 1590 | } | ||
| 1591 | std::vector<ImageMapId>& image_map_ids = page_it->second; | ||
| 1592 | auto vector_it = image_map_ids.begin(); | ||
| 1593 | while (vector_it != image_map_ids.end()) { | ||
| 1594 | ImageMapView& map = slot_map_views[*vector_it]; | ||
| 1595 | if (map.image_id != image_id) { | ||
| 1596 | vector_it++; | ||
| 1597 | continue; | ||
| 1598 | } | ||
| 1599 | if (!map.picked) { | ||
| 1600 | map.picked = true; | ||
| 1601 | } | ||
| 1602 | vector_it = image_map_ids.erase(vector_it); | ||
| 1603 | } | ||
| 1604 | }); | ||
| 1605 | slot_map_views.erase(map_view_id); | ||
| 1606 | } | ||
| 1607 | sparse_views.erase(it); | ||
| 1608 | } | ||
| 1609 | |||
| 1610 | template <class P> | ||
| 1611 | void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | ||
| 1612 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); | ||
| 1613 | image.flags |= ImageFlagBits::Tracked; | ||
| 1614 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1615 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | ||
| 1616 | return; | ||
| 1617 | } | ||
| 1618 | if (True(image.flags & ImageFlagBits::Registered)) { | ||
| 1619 | auto it = sparse_views.find(image_id); | ||
| 1620 | ASSERT(it != sparse_views.end()); | ||
| 1621 | auto& sparse_maps = it->second; | ||
| 1622 | for (auto& map_view_id : sparse_maps) { | ||
| 1623 | const auto& map = slot_map_views[map_view_id]; | ||
| 1624 | const VAddr cpu_addr = map.cpu_addr; | ||
| 1625 | const std::size_t size = map.size; | ||
| 1626 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 1627 | } | ||
| 1628 | return; | ||
| 1629 | } | ||
| 1630 | ForEachSparseSegment(image, | ||
| 1631 | [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||
| 1632 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 1633 | }); | ||
| 1634 | } | ||
| 1635 | |||
| 1636 | template <class P> | ||
| 1637 | void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | ||
| 1638 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); | ||
| 1639 | image.flags &= ~ImageFlagBits::Tracked; | ||
| 1640 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1641 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||
| 1642 | return; | ||
| 1643 | } | ||
| 1644 | ASSERT(True(image.flags & ImageFlagBits::Registered)); | ||
| 1645 | auto it = sparse_views.find(image_id); | ||
| 1646 | ASSERT(it != sparse_views.end()); | ||
| 1647 | auto& sparse_maps = it->second; | ||
| 1648 | for (auto& map_view_id : sparse_maps) { | ||
| 1649 | const auto& map = slot_map_views[map_view_id]; | ||
| 1650 | const VAddr cpu_addr = map.cpu_addr; | ||
| 1651 | const std::size_t size = map.size; | ||
| 1652 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||
| 1653 | } | ||
| 1654 | } | ||
| 1655 | |||
| 1656 | template <class P> | ||
| 1657 | void TextureCache<P>::DeleteImage(ImageId image_id) { | ||
| 1658 | ImageBase& image = slot_images[image_id]; | ||
| 1659 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 1660 | const auto alloc_it = image_allocs_table.find(gpu_addr); | ||
| 1661 | if (alloc_it == image_allocs_table.end()) { | ||
| 1662 | UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", | ||
| 1663 | gpu_addr); | ||
| 1664 | return; | ||
| 1665 | } | ||
| 1666 | const ImageAllocId alloc_id = alloc_it->second; | ||
| 1667 | std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images; | ||
| 1668 | const auto alloc_image_it = std::ranges::find(alloc_images, image_id); | ||
| 1669 | if (alloc_image_it == alloc_images.end()) { | ||
| 1670 | UNREACHABLE_MSG("Trying to delete an image that does not exist"); | ||
| 1671 | return; | ||
| 1672 | } | ||
| 1673 | ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); | ||
| 1674 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); | ||
| 1675 | |||
| 1676 | // Mark render targets as dirty | ||
| 1677 | auto& dirty = maxwell3d.dirty.flags; | ||
| 1678 | dirty[Dirty::RenderTargets] = true; | ||
| 1679 | dirty[Dirty::ZetaBuffer] = true; | ||
| 1680 | for (size_t rt = 0; rt < NUM_RT; ++rt) { | ||
| 1681 | dirty[Dirty::ColorBuffer0 + rt] = true; | ||
| 1682 | } | ||
| 1683 | const std::span<const ImageViewId> image_view_ids = image.image_view_ids; | ||
| 1684 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1685 | std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); | ||
| 1686 | if (render_targets.depth_buffer_id == image_view_id) { | ||
| 1687 | render_targets.depth_buffer_id = ImageViewId{}; | ||
| 1688 | } | ||
| 1689 | } | ||
| 1690 | RemoveImageViewReferences(image_view_ids); | ||
| 1691 | RemoveFramebuffers(image_view_ids); | ||
| 1692 | |||
| 1693 | for (const AliasedImage& alias : image.aliased_images) { | ||
| 1694 | ImageBase& other_image = slot_images[alias.id]; | ||
| 1695 | [[maybe_unused]] const size_t num_removed_aliases = | ||
| 1696 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { | ||
| 1697 | return other_alias.id == image_id; | ||
| 1698 | }); | ||
| 1699 | other_image.CheckAliasState(); | ||
| 1700 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", | ||
| 1701 | num_removed_aliases); | ||
| 1702 | } | ||
| 1703 | for (const ImageId overlap_id : image.overlapping_images) { | ||
| 1704 | ImageBase& other_image = slot_images[overlap_id]; | ||
| 1705 | [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( | ||
| 1706 | other_image.overlapping_images, | ||
| 1707 | [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); | ||
| 1708 | other_image.CheckBadOverlapState(); | ||
| 1709 | ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", | ||
| 1710 | num_removed_overlaps); | ||
| 1711 | } | ||
| 1712 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1713 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | ||
| 1714 | slot_image_views.erase(image_view_id); | ||
| 1715 | } | ||
| 1716 | sentenced_images.Push(std::move(slot_images[image_id])); | ||
| 1717 | slot_images.erase(image_id); | ||
| 1718 | |||
| 1719 | alloc_images.erase(alloc_image_it); | ||
| 1720 | if (alloc_images.empty()) { | ||
| 1721 | image_allocs_table.erase(alloc_it); | ||
| 1722 | } | ||
| 1723 | if constexpr (ENABLE_VALIDATION) { | ||
| 1724 | std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); | ||
| 1725 | std::ranges::fill(compute_image_view_ids, CORRUPT_ID); | ||
| 1726 | } | ||
| 1727 | graphics_image_table.Invalidate(); | ||
| 1728 | compute_image_table.Invalidate(); | ||
| 1729 | has_deleted_images = true; | ||
| 1730 | } | ||
| 1731 | |||
| 1732 | template <class P> | ||
| 1733 | void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { | ||
| 1734 | auto it = image_views.begin(); | ||
| 1735 | while (it != image_views.end()) { | ||
| 1736 | const auto found = std::ranges::find(removed_views, it->second); | ||
| 1737 | if (found != removed_views.end()) { | ||
| 1738 | it = image_views.erase(it); | ||
| 1739 | } else { | ||
| 1740 | ++it; | ||
| 1741 | } | ||
| 1742 | } | ||
| 1743 | } | ||
| 1744 | |||
| 1745 | template <class P> | ||
| 1746 | void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) { | ||
| 1747 | auto it = framebuffers.begin(); | ||
| 1748 | while (it != framebuffers.end()) { | ||
| 1749 | if (it->first.Contains(removed_views)) { | ||
| 1750 | it = framebuffers.erase(it); | ||
| 1751 | } else { | ||
| 1752 | ++it; | ||
| 1753 | } | ||
| 1754 | } | ||
| 1755 | } | ||
| 1756 | |||
| 1757 | template <class P> | ||
| 1758 | void TextureCache<P>::MarkModification(ImageBase& image) noexcept { | ||
| 1759 | image.flags |= ImageFlagBits::GpuModified; | ||
| 1760 | image.modification_tick = ++modification_tick; | ||
| 1761 | } | ||
| 1762 | |||
| 1763 | template <class P> | ||
| 1764 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | ||
| 1765 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | ||
| 1766 | ImageBase& image = slot_images[image_id]; | ||
| 1767 | u64 most_recent_tick = image.modification_tick; | ||
| 1768 | for (const AliasedImage& aliased : image.aliased_images) { | ||
| 1769 | ImageBase& aliased_image = slot_images[aliased.id]; | ||
| 1770 | if (image.modification_tick < aliased_image.modification_tick) { | ||
| 1771 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | ||
| 1772 | aliased_images.push_back(&aliased); | ||
| 1773 | } | ||
| 1774 | } | ||
| 1775 | if (aliased_images.empty()) { | ||
| 1776 | return; | ||
| 1777 | } | ||
| 1778 | image.modification_tick = most_recent_tick; | ||
| 1779 | std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { | ||
| 1780 | const ImageBase& lhs_image = slot_images[lhs->id]; | ||
| 1781 | const ImageBase& rhs_image = slot_images[rhs->id]; | ||
| 1782 | return lhs_image.modification_tick < rhs_image.modification_tick; | ||
| 1783 | }); | ||
| 1784 | for (const AliasedImage* const aliased : aliased_images) { | ||
| 1785 | CopyImage(image_id, aliased->id, aliased->copies); | ||
| 1786 | } | ||
| 1787 | } | ||
| 1788 | |||
| 1789 | template <class P> | ||
| 1790 | void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { | ||
| 1791 | Image& image = slot_images[image_id]; | ||
| 1792 | if (invalidate) { | ||
| 1793 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); | ||
| 1794 | if (False(image.flags & ImageFlagBits::Tracked)) { | ||
| 1795 | TrackImage(image, image_id); | ||
| 1796 | } | ||
| 1797 | } else { | ||
| 1798 | RefreshContents(image, image_id); | ||
| 1799 | SynchronizeAliases(image_id); | ||
| 1800 | } | ||
| 1801 | if (is_modification) { | ||
| 1802 | MarkModification(image); | ||
| 1803 | } | ||
| 1804 | image.frame_tick = frame_tick; | ||
| 1805 | } | ||
| 1806 | |||
| 1807 | template <class P> | ||
| 1808 | void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification, | ||
| 1809 | bool invalidate) { | ||
| 1810 | if (!image_view_id) { | ||
| 1811 | return; | ||
| 1812 | } | ||
| 1813 | const ImageViewBase& image_view = slot_image_views[image_view_id]; | ||
| 1814 | if (image_view.IsBuffer()) { | ||
| 1815 | return; | ||
| 1816 | } | ||
| 1817 | PrepareImage(image_view.image_id, is_modification, invalidate); | ||
| 1818 | } | ||
| 1819 | |||
| 1820 | template <class P> | ||
| 1821 | void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { | ||
| 1822 | Image& dst = slot_images[dst_id]; | ||
| 1823 | Image& src = slot_images[src_id]; | ||
| 1824 | const auto dst_format_type = GetFormatType(dst.info.format); | ||
| 1825 | const auto src_format_type = GetFormatType(src.info.format); | ||
| 1826 | if (src_format_type == dst_format_type) { | ||
| 1827 | if constexpr (HAS_EMULATED_COPIES) { | ||
| 1828 | if (!runtime.CanImageBeCopied(dst, src)) { | ||
| 1829 | return runtime.EmulateCopyImage(dst, src, copies); | ||
| 1830 | } | ||
| 1831 | } | ||
| 1832 | return runtime.CopyImage(dst, src, copies); | ||
| 1833 | } | ||
| 1834 | UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); | ||
| 1835 | UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); | ||
| 1836 | for (const ImageCopy& copy : copies) { | ||
| 1837 | UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); | ||
| 1838 | UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); | ||
| 1839 | UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); | ||
| 1840 | UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); | ||
| 1841 | |||
| 1842 | const SubresourceBase dst_base{ | ||
| 1843 | .level = copy.dst_subresource.base_level, | ||
| 1844 | .layer = copy.dst_subresource.base_layer, | ||
| 1845 | }; | ||
| 1846 | const SubresourceBase src_base{ | ||
| 1847 | .level = copy.src_subresource.base_level, | ||
| 1848 | .layer = copy.src_subresource.base_layer, | ||
| 1849 | }; | ||
| 1850 | const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; | ||
| 1851 | const SubresourceExtent src_extent{.levels = 1, .layers = 1}; | ||
| 1852 | const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; | ||
| 1853 | const SubresourceRange src_range{.base = src_base, .extent = src_extent}; | ||
| 1854 | const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); | ||
| 1855 | const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); | ||
| 1856 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 1857 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; | ||
| 1858 | const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); | ||
| 1859 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 1860 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 1861 | [[maybe_unused]] const Extent3D expected_size{ | ||
| 1862 | .width = std::min(dst_view.size.width, src_view.size.width), | ||
| 1863 | .height = std::min(dst_view.size.height, src_view.size.height), | ||
| 1864 | .depth = std::min(dst_view.size.depth, src_view.size.depth), | ||
| 1865 | }; | ||
| 1866 | UNIMPLEMENTED_IF(copy.extent != expected_size); | ||
| 1867 | |||
| 1868 | runtime.ConvertImage(dst_framebuffer, dst_view, src_view); | ||
| 1869 | } | ||
| 1870 | } | ||
| 1871 | |||
| 1872 | template <class P> | ||
| 1873 | void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { | ||
| 1874 | if (*old_id == new_id) { | ||
| 1875 | return; | ||
| 1876 | } | ||
| 1877 | if (*old_id) { | ||
| 1878 | const ImageViewBase& old_view = slot_image_views[*old_id]; | ||
| 1879 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { | ||
| 1880 | uncommitted_downloads.push_back(old_view.image_id); | ||
| 1881 | } | ||
| 1882 | } | ||
| 1883 | *old_id = new_id; | ||
| 1884 | } | ||
| 1885 | |||
| 1886 | template <class P> | ||
| 1887 | std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( | ||
| 1888 | ImageId image_id, const ImageViewInfo& view_info) { | ||
| 1889 | const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 1890 | const ImageBase& image = slot_images[image_id]; | ||
| 1891 | const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; | ||
| 1892 | const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; | ||
| 1893 | const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; | ||
| 1894 | const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); | ||
| 1895 | const u32 num_samples = image.info.num_samples; | ||
| 1896 | const auto [samples_x, samples_y] = SamplesLog2(num_samples); | ||
| 1897 | const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ | ||
| 1898 | .color_buffer_ids = {color_view_id}, | ||
| 1899 | .depth_buffer_id = depth_view_id, | ||
| 1900 | .size = {extent.width >> samples_x, extent.height >> samples_y}, | ||
| 1901 | }); | ||
| 1902 | return {framebuffer_id, view_id}; | ||
| 1903 | } | ||
| 1904 | |||
| 1905 | template <class P> | ||
| 1906 | bool TextureCache<P>::IsFullClear(ImageViewId id) { | ||
| 1907 | if (!id) { | ||
| 1908 | return true; | ||
| 1909 | } | ||
| 1910 | const ImageViewBase& image_view = slot_image_views[id]; | ||
| 1911 | const ImageBase& image = slot_images[image_view.image_id]; | ||
| 1912 | const Extent3D size = image_view.size; | ||
| 1913 | const auto& regs = maxwell3d.regs; | ||
| 1914 | const auto& scissor = regs.scissor_test[0]; | ||
| 1915 | if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { | ||
| 1916 | // Images with multiple resources can't be cleared in a single call | ||
| 1917 | return false; | ||
| 1918 | } | ||
| 1919 | if (regs.clear_flags.scissor == 0) { | ||
| 1920 | // If scissor testing is disabled, the clear is always full | ||
| 1921 | return true; | ||
| 1922 | } | ||
| 1923 | // Make sure the clear covers all texels in the subresource | ||
| 1924 | return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && | ||
| 1925 | scissor.max_y >= size.height; | ||
| 1926 | } | ||
| 1927 | |||
| 1928 | } // namespace VideoCommon | 402 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/texture_cache_templates.h b/src/video_core/texture_cache/texture_cache_templates.h new file mode 100644 index 000000000..8440d23d1 --- /dev/null +++ b/src/video_core/texture_cache/texture_cache_templates.h | |||
| @@ -0,0 +1,1507 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/texture_cache/texture_cache.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | using Tegra::Texture::SwizzleSource; | ||
| 12 | using Tegra::Texture::TextureType; | ||
| 13 | using Tegra::Texture::TICEntry; | ||
| 14 | using Tegra::Texture::TSCEntry; | ||
| 15 | using VideoCore::Surface::GetFormatType; | ||
| 16 | using VideoCore::Surface::IsCopyCompatible; | ||
| 17 | using VideoCore::Surface::PixelFormat; | ||
| 18 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 19 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 20 | using VideoCore::Surface::SurfaceType; | ||
| 21 | using namespace Common::Literals; | ||
| 22 | |||
| 23 | template <class P> | ||
| 24 | TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, | ||
| 25 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 26 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 27 | Tegra::MemoryManager& gpu_memory_) | ||
| 28 | : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 29 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { | ||
| 30 | // Configure null sampler | ||
| 31 | TSCEntry sampler_descriptor{}; | ||
| 32 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||
| 33 | sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||
| 34 | sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); | ||
| 35 | sampler_descriptor.cubemap_anisotropy.Assign(1); | ||
| 36 | |||
| 37 | // Make sure the first index is reserved for the null resources | ||
| 38 | // This way the null resource becomes a compile time constant | ||
| 39 | void(slot_image_views.insert(runtime, NullImageParams{})); | ||
| 40 | void(slot_samplers.insert(runtime, sampler_descriptor)); | ||
| 41 | |||
| 42 | deletion_iterator = slot_images.begin(); | ||
| 43 | |||
| 44 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | ||
| 45 | const auto device_memory = runtime.GetDeviceLocalMemory(); | ||
| 46 | const u64 possible_expected_memory = (device_memory * 3) / 10; | ||
| 47 | const u64 possible_critical_memory = (device_memory * 6) / 10; | ||
| 48 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); | ||
| 49 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); | ||
| 50 | minimum_memory = 0; | ||
| 51 | } else { | ||
| 52 | // on OGL we can be more conservatives as the driver takes care. | ||
| 53 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; | ||
| 54 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; | ||
| 55 | minimum_memory = expected_memory; | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | template <class P> | ||
| 60 | void TextureCache<P>::RunGarbageCollector() { | ||
| 61 | const bool high_priority_mode = total_used_memory >= expected_memory; | ||
| 62 | const bool aggressive_mode = total_used_memory >= critical_memory; | ||
| 63 | const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; | ||
| 64 | int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); | ||
| 65 | for (; num_iterations > 0; --num_iterations) { | ||
| 66 | if (deletion_iterator == slot_images.end()) { | ||
| 67 | deletion_iterator = slot_images.begin(); | ||
| 68 | if (deletion_iterator == slot_images.end()) { | ||
| 69 | break; | ||
| 70 | } | ||
| 71 | } | ||
| 72 | auto [image_id, image_tmp] = *deletion_iterator; | ||
| 73 | Image* image = image_tmp; // fix clang error. | ||
| 74 | const bool is_alias = True(image->flags & ImageFlagBits::Alias); | ||
| 75 | const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); | ||
| 76 | const bool must_download = image->IsSafeDownload(); | ||
| 77 | bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); | ||
| 78 | const u64 ticks_needed = | ||
| 79 | is_bad_overlap | ||
| 80 | ? ticks_to_destroy >> 4 | ||
| 81 | : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); | ||
| 82 | should_care |= aggressive_mode; | ||
| 83 | if (should_care && image->frame_tick + ticks_needed < frame_tick) { | ||
| 84 | if (is_bad_overlap) { | ||
| 85 | const bool overlap_check = std::ranges::all_of( | ||
| 86 | image->overlapping_images, [&, image](const ImageId& overlap_id) { | ||
| 87 | auto& overlap = slot_images[overlap_id]; | ||
| 88 | return overlap.frame_tick >= image->frame_tick; | ||
| 89 | }); | ||
| 90 | if (!overlap_check) { | ||
| 91 | ++deletion_iterator; | ||
| 92 | continue; | ||
| 93 | } | ||
| 94 | } | ||
| 95 | if (!is_bad_overlap && must_download) { | ||
| 96 | const bool alias_check = std::ranges::none_of( | ||
| 97 | image->aliased_images, [&, image](const AliasedImage& alias) { | ||
| 98 | auto& alias_image = slot_images[alias.id]; | ||
| 99 | return (alias_image.frame_tick < image->frame_tick) || | ||
| 100 | (alias_image.modification_tick < image->modification_tick); | ||
| 101 | }); | ||
| 102 | |||
| 103 | if (alias_check) { | ||
| 104 | auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); | ||
| 105 | const auto copies = FullDownloadCopies(image->info); | ||
| 106 | image->DownloadMemory(map, copies); | ||
| 107 | runtime.Finish(); | ||
| 108 | SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); | ||
| 109 | } | ||
| 110 | } | ||
| 111 | if (True(image->flags & ImageFlagBits::Tracked)) { | ||
| 112 | UntrackImage(*image, image_id); | ||
| 113 | } | ||
| 114 | UnregisterImage(image_id); | ||
| 115 | DeleteImage(image_id); | ||
| 116 | if (is_bad_overlap) { | ||
| 117 | ++num_iterations; | ||
| 118 | } | ||
| 119 | } | ||
| 120 | ++deletion_iterator; | ||
| 121 | } | ||
| 122 | } | ||
| 123 | |||
| 124 | template <class P> | ||
| 125 | void TextureCache<P>::TickFrame() { | ||
| 126 | if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { | ||
| 127 | RunGarbageCollector(); | ||
| 128 | } | ||
| 129 | sentenced_images.Tick(); | ||
| 130 | sentenced_framebuffers.Tick(); | ||
| 131 | sentenced_image_view.Tick(); | ||
| 132 | ++frame_tick; | ||
| 133 | } | ||
| 134 | |||
| 135 | template <class P> | ||
| 136 | const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { | ||
| 137 | return slot_image_views[id]; | ||
| 138 | } | ||
| 139 | |||
| 140 | template <class P> | ||
| 141 | typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { | ||
| 142 | return slot_image_views[id]; | ||
| 143 | } | ||
| 144 | |||
| 145 | template <class P> | ||
| 146 | void TextureCache<P>::MarkModification(ImageId id) noexcept { | ||
| 147 | MarkModification(slot_images[id]); | ||
| 148 | } | ||
| 149 | |||
| 150 | template <class P> | ||
| 151 | void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, | ||
| 152 | std::span<ImageViewId> image_view_ids) { | ||
| 153 | FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); | ||
| 154 | } | ||
| 155 | |||
| 156 | template <class P> | ||
| 157 | void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, | ||
| 158 | std::span<ImageViewId> image_view_ids) { | ||
| 159 | FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); | ||
| 160 | } | ||
| 161 | |||
| 162 | template <class P> | ||
| 163 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { | ||
| 164 | if (index > graphics_sampler_table.Limit()) { | ||
| 165 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); | ||
| 166 | return &slot_samplers[NULL_SAMPLER_ID]; | ||
| 167 | } | ||
| 168 | const auto [descriptor, is_new] = graphics_sampler_table.Read(index); | ||
| 169 | SamplerId& id = graphics_sampler_ids[index]; | ||
| 170 | if (is_new) { | ||
| 171 | id = FindSampler(descriptor); | ||
| 172 | } | ||
| 173 | return &slot_samplers[id]; | ||
| 174 | } | ||
| 175 | |||
| 176 | template <class P> | ||
| 177 | typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { | ||
| 178 | if (index > compute_sampler_table.Limit()) { | ||
| 179 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); | ||
| 180 | return &slot_samplers[NULL_SAMPLER_ID]; | ||
| 181 | } | ||
| 182 | const auto [descriptor, is_new] = compute_sampler_table.Read(index); | ||
| 183 | SamplerId& id = compute_sampler_ids[index]; | ||
| 184 | if (is_new) { | ||
| 185 | id = FindSampler(descriptor); | ||
| 186 | } | ||
| 187 | return &slot_samplers[id]; | ||
| 188 | } | ||
| 189 | |||
| 190 | template <class P> | ||
| 191 | void TextureCache<P>::SynchronizeGraphicsDescriptors() { | ||
| 192 | using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; | ||
| 193 | const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; | ||
| 194 | const u32 tic_limit = maxwell3d.regs.tic.limit; | ||
| 195 | const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; | ||
| 196 | if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { | ||
| 197 | graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); | ||
| 198 | } | ||
| 199 | if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { | ||
| 200 | graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | template <class P> | ||
| 205 | void TextureCache<P>::SynchronizeComputeDescriptors() { | ||
| 206 | const bool linked_tsc = kepler_compute.launch_description.linked_tsc; | ||
| 207 | const u32 tic_limit = kepler_compute.regs.tic.limit; | ||
| 208 | const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; | ||
| 209 | const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); | ||
| 210 | if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { | ||
| 211 | compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); | ||
| 212 | } | ||
| 213 | if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { | ||
| 214 | compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | template <class P> | ||
| 219 | void TextureCache<P>::UpdateRenderTargets(bool is_clear) { | ||
| 220 | using namespace VideoCommon::Dirty; | ||
| 221 | auto& flags = maxwell3d.dirty.flags; | ||
| 222 | if (!flags[Dirty::RenderTargets]) { | ||
| 223 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 224 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | ||
| 225 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); | ||
| 226 | } | ||
| 227 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; | ||
| 228 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | ||
| 229 | return; | ||
| 230 | } | ||
| 231 | flags[Dirty::RenderTargets] = false; | ||
| 232 | |||
| 233 | // Render target control is used on all render targets, so force look ups when this one is up | ||
| 234 | const bool force = flags[Dirty::RenderTargetControl]; | ||
| 235 | flags[Dirty::RenderTargetControl] = false; | ||
| 236 | |||
| 237 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 238 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | ||
| 239 | if (flags[Dirty::ColorBuffer0 + index] || force) { | ||
| 240 | flags[Dirty::ColorBuffer0 + index] = false; | ||
| 241 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); | ||
| 242 | } | ||
| 243 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); | ||
| 244 | } | ||
| 245 | if (flags[Dirty::ZetaBuffer] || force) { | ||
| 246 | flags[Dirty::ZetaBuffer] = false; | ||
| 247 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); | ||
| 248 | } | ||
| 249 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; | ||
| 250 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | ||
| 251 | |||
| 252 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 253 | render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); | ||
| 254 | } | ||
| 255 | render_targets.size = Extent2D{ | ||
| 256 | maxwell3d.regs.render_area.width, | ||
| 257 | maxwell3d.regs.render_area.height, | ||
| 258 | }; | ||
| 259 | } | ||
| 260 | |||
| 261 | template <class P> | ||
| 262 | typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { | ||
| 263 | return &slot_framebuffers[GetFramebufferId(render_targets)]; | ||
| 264 | } | ||
| 265 | |||
| 266 | template <class P> | ||
| 267 | void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, | ||
| 268 | std::span<ImageViewId> cached_image_view_ids, | ||
| 269 | std::span<const u32> indices, | ||
| 270 | std::span<ImageViewId> image_view_ids) { | ||
| 271 | ASSERT(indices.size() <= image_view_ids.size()); | ||
| 272 | do { | ||
| 273 | has_deleted_images = false; | ||
| 274 | std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { | ||
| 275 | return VisitImageView(table, cached_image_view_ids, index); | ||
| 276 | }); | ||
| 277 | } while (has_deleted_images); | ||
| 278 | } | ||
| 279 | |||
| 280 | template <class P> | ||
| 281 | ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, | ||
| 282 | std::span<ImageViewId> cached_image_view_ids, | ||
| 283 | u32 index) { | ||
| 284 | if (index > table.Limit()) { | ||
| 285 | LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); | ||
| 286 | return NULL_IMAGE_VIEW_ID; | ||
| 287 | } | ||
| 288 | const auto [descriptor, is_new] = table.Read(index); | ||
| 289 | ImageViewId& image_view_id = cached_image_view_ids[index]; | ||
| 290 | if (is_new) { | ||
| 291 | image_view_id = FindImageView(descriptor); | ||
| 292 | } | ||
| 293 | if (image_view_id != NULL_IMAGE_VIEW_ID) { | ||
| 294 | PrepareImageView(image_view_id, false, false); | ||
| 295 | } | ||
| 296 | return image_view_id; | ||
| 297 | } | ||
| 298 | |||
| 299 | template <class P> | ||
| 300 | FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { | ||
| 301 | const auto [pair, is_new] = framebuffers.try_emplace(key); | ||
| 302 | FramebufferId& framebuffer_id = pair->second; | ||
| 303 | if (!is_new) { | ||
| 304 | return framebuffer_id; | ||
| 305 | } | ||
| 306 | std::array<ImageView*, NUM_RT> color_buffers; | ||
| 307 | std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), | ||
| 308 | [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); | ||
| 309 | ImageView* const depth_buffer = | ||
| 310 | key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; | ||
| 311 | framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); | ||
| 312 | return framebuffer_id; | ||
| 313 | } | ||
| 314 | |||
| 315 | template <class P> | ||
| 316 | void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | ||
| 317 | ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { | ||
| 318 | if (True(image.flags & ImageFlagBits::CpuModified)) { | ||
| 319 | return; | ||
| 320 | } | ||
| 321 | image.flags |= ImageFlagBits::CpuModified; | ||
| 322 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 323 | UntrackImage(image, image_id); | ||
| 324 | } | ||
| 325 | }); | ||
| 326 | } | ||
| 327 | |||
| 328 | template <class P> | ||
| 329 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | ||
| 330 | std::vector<ImageId> images; | ||
| 331 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { | ||
| 332 | if (!image.IsSafeDownload()) { | ||
| 333 | return; | ||
| 334 | } | ||
| 335 | image.flags &= ~ImageFlagBits::GpuModified; | ||
| 336 | images.push_back(image_id); | ||
| 337 | }); | ||
| 338 | if (images.empty()) { | ||
| 339 | return; | ||
| 340 | } | ||
| 341 | std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { | ||
| 342 | return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; | ||
| 343 | }); | ||
| 344 | for (const ImageId image_id : images) { | ||
| 345 | Image& image = slot_images[image_id]; | ||
| 346 | auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); | ||
| 347 | const auto copies = FullDownloadCopies(image.info); | ||
| 348 | image.DownloadMemory(map, copies); | ||
| 349 | runtime.Finish(); | ||
| 350 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | template <class P> | ||
| 355 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | ||
| 356 | std::vector<ImageId> deleted_images; | ||
| 357 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); | ||
| 358 | for (const ImageId id : deleted_images) { | ||
| 359 | Image& image = slot_images[id]; | ||
| 360 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 361 | UntrackImage(image, id); | ||
| 362 | } | ||
| 363 | UnregisterImage(id); | ||
| 364 | DeleteImage(id); | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | template <class P> | ||
| 369 | void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { | ||
| 370 | std::vector<ImageId> deleted_images; | ||
| 371 | ForEachImageInRegionGPU(gpu_addr, size, | ||
| 372 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); | ||
| 373 | for (const ImageId id : deleted_images) { | ||
| 374 | Image& image = slot_images[id]; | ||
| 375 | if (True(image.flags & ImageFlagBits::Remapped)) { | ||
| 376 | continue; | ||
| 377 | } | ||
| 378 | image.flags |= ImageFlagBits::Remapped; | ||
| 379 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 380 | UntrackImage(image, id); | ||
| 381 | } | ||
| 382 | } | ||
| 383 | } | ||
| 384 | |||
| 385 | template <class P> | ||
| 386 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | ||
| 387 | const Tegra::Engines::Fermi2D::Surface& src, | ||
| 388 | const Tegra::Engines::Fermi2D::Config& copy) { | ||
| 389 | const BlitImages images = GetBlitImages(dst, src); | ||
| 390 | const ImageId dst_id = images.dst_id; | ||
| 391 | const ImageId src_id = images.src_id; | ||
| 392 | PrepareImage(src_id, false, false); | ||
| 393 | PrepareImage(dst_id, true, false); | ||
| 394 | |||
| 395 | ImageBase& dst_image = slot_images[dst_id]; | ||
| 396 | const ImageBase& src_image = slot_images[src_id]; | ||
| 397 | |||
| 398 | // TODO: Deduplicate | ||
| 399 | const std::optional src_base = src_image.TryFindBase(src.Address()); | ||
| 400 | const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; | ||
| 401 | const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); | ||
| 402 | const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); | ||
| 403 | const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); | ||
| 404 | const Region2D src_region{ | ||
| 405 | Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, | ||
| 406 | Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, | ||
| 407 | }; | ||
| 408 | |||
| 409 | const std::optional dst_base = dst_image.TryFindBase(dst.Address()); | ||
| 410 | const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; | ||
| 411 | const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); | ||
| 412 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 413 | const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); | ||
| 414 | const Region2D dst_region{ | ||
| 415 | Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, | ||
| 416 | Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, | ||
| 417 | }; | ||
| 418 | |||
| 419 | // Always call this after src_framebuffer_id was queried, as the address might be invalidated. | ||
| 420 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; | ||
| 421 | if constexpr (FRAMEBUFFER_BLITS) { | ||
| 422 | // OpenGL blits from framebuffers, not images | ||
| 423 | Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; | ||
| 424 | runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, | ||
| 425 | copy.filter, copy.operation); | ||
| 426 | } else { | ||
| 427 | // Vulkan can blit images, but it lacks format reinterpretations | ||
| 428 | // Provide a framebuffer in case it's necessary | ||
| 429 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 430 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 431 | runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, | ||
| 432 | copy.operation); | ||
| 433 | } | ||
| 434 | } | ||
| 435 | |||
| 436 | template <class P> | ||
| 437 | typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { | ||
| 438 | // TODO: Properly implement this | ||
| 439 | const auto it = page_table.find(cpu_addr >> PAGE_BITS); | ||
| 440 | if (it == page_table.end()) { | ||
| 441 | return nullptr; | ||
| 442 | } | ||
| 443 | const auto& image_map_ids = it->second; | ||
| 444 | for (const ImageMapId map_id : image_map_ids) { | ||
| 445 | const ImageMapView& map = slot_map_views[map_id]; | ||
| 446 | const ImageBase& image = slot_images[map.image_id]; | ||
| 447 | if (image.cpu_addr != cpu_addr) { | ||
| 448 | continue; | ||
| 449 | } | ||
| 450 | if (image.image_view_ids.empty()) { | ||
| 451 | continue; | ||
| 452 | } | ||
| 453 | return &slot_image_views[image.image_view_ids.at(0)]; | ||
| 454 | } | ||
| 455 | return nullptr; | ||
| 456 | } | ||
| 457 | |||
| 458 | template <class P> | ||
| 459 | bool TextureCache<P>::HasUncommittedFlushes() const noexcept { | ||
| 460 | return !uncommitted_downloads.empty(); | ||
| 461 | } | ||
| 462 | |||
| 463 | template <class P> | ||
| 464 | bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { | ||
| 465 | return !committed_downloads.empty() && !committed_downloads.front().empty(); | ||
| 466 | } | ||
| 467 | |||
| 468 | template <class P> | ||
| 469 | void TextureCache<P>::CommitAsyncFlushes() { | ||
| 470 | // This is intentionally passing the value by copy | ||
| 471 | committed_downloads.push(uncommitted_downloads); | ||
| 472 | uncommitted_downloads.clear(); | ||
| 473 | } | ||
| 474 | |||
| 475 | template <class P> | ||
| 476 | void TextureCache<P>::PopAsyncFlushes() { | ||
| 477 | if (committed_downloads.empty()) { | ||
| 478 | return; | ||
| 479 | } | ||
| 480 | const std::span<const ImageId> download_ids = committed_downloads.front(); | ||
| 481 | if (download_ids.empty()) { | ||
| 482 | committed_downloads.pop(); | ||
| 483 | return; | ||
| 484 | } | ||
| 485 | size_t total_size_bytes = 0; | ||
| 486 | for (const ImageId image_id : download_ids) { | ||
| 487 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | ||
| 488 | } | ||
| 489 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 490 | const size_t original_offset = download_map.offset; | ||
| 491 | for (const ImageId image_id : download_ids) { | ||
| 492 | Image& image = slot_images[image_id]; | ||
| 493 | const auto copies = FullDownloadCopies(image.info); | ||
| 494 | image.DownloadMemory(download_map, copies); | ||
| 495 | download_map.offset += image.unswizzled_size_bytes; | ||
| 496 | } | ||
| 497 | // Wait for downloads to finish | ||
| 498 | runtime.Finish(); | ||
| 499 | |||
| 500 | download_map.offset = original_offset; | ||
| 501 | std::span<u8> download_span = download_map.mapped_span; | ||
| 502 | for (const ImageId image_id : download_ids) { | ||
| 503 | const ImageBase& image = slot_images[image_id]; | ||
| 504 | const auto copies = FullDownloadCopies(image.info); | ||
| 505 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); | ||
| 506 | download_map.offset += image.unswizzled_size_bytes; | ||
| 507 | download_span = download_span.subspan(image.unswizzled_size_bytes); | ||
| 508 | } | ||
| 509 | committed_downloads.pop(); | ||
| 510 | } | ||
| 511 | |||
| 512 | template <class P> | ||
| 513 | bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | ||
| 514 | bool is_modified = false; | ||
| 515 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { | ||
| 516 | if (False(image.flags & ImageFlagBits::GpuModified)) { | ||
| 517 | return false; | ||
| 518 | } | ||
| 519 | is_modified = true; | ||
| 520 | return true; | ||
| 521 | }); | ||
| 522 | return is_modified; | ||
| 523 | } | ||
| 524 | |||
| 525 | template <class P> | ||
| 526 | void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { | ||
| 527 | if (False(image.flags & ImageFlagBits::CpuModified)) { | ||
| 528 | // Only upload modified images | ||
| 529 | return; | ||
| 530 | } | ||
| 531 | image.flags &= ~ImageFlagBits::CpuModified; | ||
| 532 | TrackImage(image, image_id); | ||
| 533 | |||
| 534 | if (image.info.num_samples > 1) { | ||
| 535 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); | ||
| 536 | return; | ||
| 537 | } | ||
| 538 | auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); | ||
| 539 | UploadImageContents(image, staging); | ||
| 540 | runtime.InsertUploadMemoryBarrier(); | ||
| 541 | } | ||
| 542 | |||
| 543 | template <class P> | ||
| 544 | template <typename StagingBuffer> | ||
| 545 | void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) { | ||
| 546 | const std::span<u8> mapped_span = staging.mapped_span; | ||
| 547 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 548 | |||
| 549 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | ||
| 550 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | ||
| 551 | const auto uploads = FullUploadSwizzles(image.info); | ||
| 552 | runtime.AccelerateImageUpload(image, staging, uploads); | ||
| 553 | } else if (True(image.flags & ImageFlagBits::Converted)) { | ||
| 554 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | ||
| 555 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | ||
| 556 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | ||
| 557 | image.UploadMemory(staging, copies); | ||
| 558 | } else { | ||
| 559 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | ||
| 560 | image.UploadMemory(staging, copies); | ||
| 561 | } | ||
| 562 | } | ||
| 563 | |||
| 564 | template <class P> | ||
| 565 | ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { | ||
| 566 | if (!IsValidEntry(gpu_memory, config)) { | ||
| 567 | return NULL_IMAGE_VIEW_ID; | ||
| 568 | } | ||
| 569 | const auto [pair, is_new] = image_views.try_emplace(config); | ||
| 570 | ImageViewId& image_view_id = pair->second; | ||
| 571 | if (is_new) { | ||
| 572 | image_view_id = CreateImageView(config); | ||
| 573 | } | ||
| 574 | return image_view_id; | ||
| 575 | } | ||
| 576 | |||
| 577 | template <class P> | ||
| 578 | ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { | ||
| 579 | const ImageInfo info(config); | ||
| 580 | if (info.type == ImageType::Buffer) { | ||
| 581 | const ImageViewInfo view_info(config, 0); | ||
| 582 | return slot_image_views.insert(runtime, info, view_info, config.Address()); | ||
| 583 | } | ||
| 584 | const u32 layer_offset = config.BaseLayer() * info.layer_stride; | ||
| 585 | const GPUVAddr image_gpu_addr = config.Address() - layer_offset; | ||
| 586 | const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); | ||
| 587 | if (!image_id) { | ||
| 588 | return NULL_IMAGE_VIEW_ID; | ||
| 589 | } | ||
| 590 | ImageBase& image = slot_images[image_id]; | ||
| 591 | const SubresourceBase base = image.TryFindBase(config.Address()).value(); | ||
| 592 | ASSERT(base.level == 0); | ||
| 593 | const ImageViewInfo view_info(config, base.layer); | ||
| 594 | const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 595 | ImageViewBase& image_view = slot_image_views[image_view_id]; | ||
| 596 | image_view.flags |= ImageViewFlagBits::Strong; | ||
| 597 | image.flags |= ImageFlagBits::Strong; | ||
| 598 | return image_view_id; | ||
| 599 | } | ||
| 600 | |||
| 601 | template <class P> | ||
| 602 | ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 603 | RelaxedOptions options) { | ||
| 604 | if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { | ||
| 605 | return image_id; | ||
| 606 | } | ||
| 607 | return InsertImage(info, gpu_addr, options); | ||
| 608 | } | ||
| 609 | |||
| 610 | template <class P> | ||
| 611 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 612 | RelaxedOptions options) { | ||
| 613 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 614 | if (!cpu_addr) { | ||
| 615 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | ||
| 616 | if (!cpu_addr) { | ||
| 617 | return ImageId{}; | ||
| 618 | } | ||
| 619 | } | ||
| 620 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||
| 621 | const bool native_bgr = runtime.HasNativeBgr(); | ||
| 622 | ImageId image_id; | ||
| 623 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | ||
| 624 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | ||
| 625 | return false; | ||
| 626 | } | ||
| 627 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { | ||
| 628 | const bool strict_size = False(options & RelaxedOptions::Size) && | ||
| 629 | True(existing_image.flags & ImageFlagBits::Strong); | ||
| 630 | const ImageInfo& existing = existing_image.info; | ||
| 631 | if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && | ||
| 632 | existing.pitch == info.pitch && | ||
| 633 | IsPitchLinearSameSize(existing, info, strict_size) && | ||
| 634 | IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { | ||
| 635 | image_id = existing_image_id; | ||
| 636 | return true; | ||
| 637 | } | ||
| 638 | } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, | ||
| 639 | native_bgr)) { | ||
| 640 | image_id = existing_image_id; | ||
| 641 | return true; | ||
| 642 | } | ||
| 643 | return false; | ||
| 644 | }; | ||
| 645 | ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); | ||
| 646 | return image_id; | ||
| 647 | } | ||
| 648 | |||
| 649 | template <class P> | ||
| 650 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 651 | RelaxedOptions options) { | ||
| 652 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 653 | if (!cpu_addr) { | ||
| 654 | const auto size = CalculateGuestSizeInBytes(info); | ||
| 655 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); | ||
| 656 | if (!cpu_addr) { | ||
| 657 | const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; | ||
| 658 | virtual_invalid_space += Common::AlignUp(size, 32); | ||
| 659 | cpu_addr = std::optional<VAddr>(fake_addr); | ||
| 660 | } | ||
| 661 | } | ||
| 662 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | ||
| 663 | const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); | ||
| 664 | const Image& image = slot_images[image_id]; | ||
| 665 | // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different | ||
| 666 | const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); | ||
| 667 | if (is_new) { | ||
| 668 | it->second = slot_image_allocs.insert(); | ||
| 669 | } | ||
| 670 | slot_image_allocs[it->second].images.push_back(image_id); | ||
| 671 | return image_id; | ||
| 672 | } | ||
| 673 | |||
| 674 | template <class P> | ||
| 675 | ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { | ||
| 676 | ImageInfo new_info = info; | ||
| 677 | const size_t size_bytes = CalculateGuestSizeInBytes(new_info); | ||
| 678 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||
| 679 | const bool native_bgr = runtime.HasNativeBgr(); | ||
| 680 | std::vector<ImageId> overlap_ids; | ||
| 681 | std::unordered_set<ImageId> overlaps_found; | ||
| 682 | std::vector<ImageId> left_aliased_ids; | ||
| 683 | std::vector<ImageId> right_aliased_ids; | ||
| 684 | std::unordered_set<ImageId> ignore_textures; | ||
| 685 | std::vector<ImageId> bad_overlap_ids; | ||
| 686 | const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 687 | if (True(overlap.flags & ImageFlagBits::Remapped)) { | ||
| 688 | ignore_textures.insert(overlap_id); | ||
| 689 | return; | ||
| 690 | } | ||
| 691 | if (info.type == ImageType::Linear) { | ||
| 692 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { | ||
| 693 | // Alias linear images with the same pitch | ||
| 694 | left_aliased_ids.push_back(overlap_id); | ||
| 695 | } | ||
| 696 | return; | ||
| 697 | } | ||
| 698 | overlaps_found.insert(overlap_id); | ||
| 699 | static constexpr bool strict_size = true; | ||
| 700 | const std::optional<OverlapResult> solution = ResolveOverlap( | ||
| 701 | new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); | ||
| 702 | if (solution) { | ||
| 703 | gpu_addr = solution->gpu_addr; | ||
| 704 | cpu_addr = solution->cpu_addr; | ||
| 705 | new_info.resources = solution->resources; | ||
| 706 | overlap_ids.push_back(overlap_id); | ||
| 707 | return; | ||
| 708 | } | ||
| 709 | static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; | ||
| 710 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); | ||
| 711 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { | ||
| 712 | left_aliased_ids.push_back(overlap_id); | ||
| 713 | overlap.flags |= ImageFlagBits::Alias; | ||
| 714 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, | ||
| 715 | broken_views, native_bgr)) { | ||
| 716 | right_aliased_ids.push_back(overlap_id); | ||
| 717 | overlap.flags |= ImageFlagBits::Alias; | ||
| 718 | } else { | ||
| 719 | bad_overlap_ids.push_back(overlap_id); | ||
| 720 | overlap.flags |= ImageFlagBits::BadOverlap; | ||
| 721 | } | ||
| 722 | }; | ||
| 723 | ForEachImageInRegion(cpu_addr, size_bytes, region_check); | ||
| 724 | const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 725 | if (!overlaps_found.contains(overlap_id)) { | ||
| 726 | if (True(overlap.flags & ImageFlagBits::Remapped)) { | ||
| 727 | ignore_textures.insert(overlap_id); | ||
| 728 | } | ||
| 729 | if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { | ||
| 730 | ignore_textures.insert(overlap_id); | ||
| 731 | } | ||
| 732 | } | ||
| 733 | }; | ||
| 734 | ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); | ||
| 735 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | ||
| 736 | Image& new_image = slot_images[new_image_id]; | ||
| 737 | |||
| 738 | if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { | ||
| 739 | new_image.flags |= ImageFlagBits::Sparse; | ||
| 740 | } | ||
| 741 | |||
| 742 | for (const ImageId overlap_id : ignore_textures) { | ||
| 743 | Image& overlap = slot_images[overlap_id]; | ||
| 744 | if (True(overlap.flags & ImageFlagBits::GpuModified)) { | ||
| 745 | UNIMPLEMENTED(); | ||
| 746 | } | ||
| 747 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | ||
| 748 | UntrackImage(overlap, overlap_id); | ||
| 749 | } | ||
| 750 | UnregisterImage(overlap_id); | ||
| 751 | DeleteImage(overlap_id); | ||
| 752 | } | ||
| 753 | |||
| 754 | // TODO: Only upload what we need | ||
| 755 | RefreshContents(new_image, new_image_id); | ||
| 756 | |||
| 757 | for (const ImageId overlap_id : overlap_ids) { | ||
| 758 | Image& overlap = slot_images[overlap_id]; | ||
| 759 | if (overlap.info.num_samples != new_image.info.num_samples) { | ||
| 760 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); | ||
| 761 | } else { | ||
| 762 | const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); | ||
| 763 | const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); | ||
| 764 | runtime.CopyImage(new_image, overlap, copies); | ||
| 765 | } | ||
| 766 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | ||
| 767 | UntrackImage(overlap, overlap_id); | ||
| 768 | } | ||
| 769 | UnregisterImage(overlap_id); | ||
| 770 | DeleteImage(overlap_id); | ||
| 771 | } | ||
| 772 | ImageBase& new_image_base = new_image; | ||
| 773 | for (const ImageId aliased_id : right_aliased_ids) { | ||
| 774 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 775 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); | ||
| 776 | new_image.flags |= ImageFlagBits::Alias; | ||
| 777 | } | ||
| 778 | for (const ImageId aliased_id : left_aliased_ids) { | ||
| 779 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 780 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); | ||
| 781 | new_image.flags |= ImageFlagBits::Alias; | ||
| 782 | } | ||
| 783 | for (const ImageId aliased_id : bad_overlap_ids) { | ||
| 784 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 785 | aliased.overlapping_images.push_back(new_image_id); | ||
| 786 | new_image.overlapping_images.push_back(aliased_id); | ||
| 787 | new_image.flags |= ImageFlagBits::BadOverlap; | ||
| 788 | } | ||
| 789 | RegisterImage(new_image_id); | ||
| 790 | return new_image_id; | ||
| 791 | } | ||
| 792 | |||
| 793 | template <class P> | ||
| 794 | typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( | ||
| 795 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { | ||
| 796 | static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; | ||
| 797 | const GPUVAddr dst_addr = dst.Address(); | ||
| 798 | const GPUVAddr src_addr = src.Address(); | ||
| 799 | ImageInfo dst_info(dst); | ||
| 800 | ImageInfo src_info(src); | ||
| 801 | ImageId dst_id; | ||
| 802 | ImageId src_id; | ||
| 803 | do { | ||
| 804 | has_deleted_images = false; | ||
| 805 | dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); | ||
| 806 | src_id = FindImage(src_info, src_addr, FIND_OPTIONS); | ||
| 807 | const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; | ||
| 808 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; | ||
| 809 | DeduceBlitImages(dst_info, src_info, dst_image, src_image); | ||
| 810 | if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { | ||
| 811 | continue; | ||
| 812 | } | ||
| 813 | if (!dst_id) { | ||
| 814 | dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); | ||
| 815 | } | ||
| 816 | if (!src_id) { | ||
| 817 | src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); | ||
| 818 | } | ||
| 819 | } while (has_deleted_images); | ||
| 820 | return BlitImages{ | ||
| 821 | .dst_id = dst_id, | ||
| 822 | .src_id = src_id, | ||
| 823 | .dst_format = dst_info.format, | ||
| 824 | .src_format = src_info.format, | ||
| 825 | }; | ||
| 826 | } | ||
| 827 | |||
| 828 | template <class P> | ||
| 829 | SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { | ||
| 830 | if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { | ||
| 831 | return NULL_SAMPLER_ID; | ||
| 832 | } | ||
| 833 | const auto [pair, is_new] = samplers.try_emplace(config); | ||
| 834 | if (is_new) { | ||
| 835 | pair->second = slot_samplers.insert(runtime, config); | ||
| 836 | } | ||
| 837 | return pair->second; | ||
| 838 | } | ||
| 839 | |||
| 840 | template <class P> | ||
| 841 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { | ||
| 842 | const auto& regs = maxwell3d.regs; | ||
| 843 | if (index >= regs.rt_control.count) { | ||
| 844 | return ImageViewId{}; | ||
| 845 | } | ||
| 846 | const auto& rt = regs.rt[index]; | ||
| 847 | const GPUVAddr gpu_addr = rt.Address(); | ||
| 848 | if (gpu_addr == 0) { | ||
| 849 | return ImageViewId{}; | ||
| 850 | } | ||
| 851 | if (rt.format == Tegra::RenderTargetFormat::NONE) { | ||
| 852 | return ImageViewId{}; | ||
| 853 | } | ||
| 854 | const ImageInfo info(regs, index); | ||
| 855 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 856 | } | ||
| 857 | |||
| 858 | template <class P> | ||
| 859 | ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { | ||
| 860 | const auto& regs = maxwell3d.regs; | ||
| 861 | if (!regs.zeta_enable) { | ||
| 862 | return ImageViewId{}; | ||
| 863 | } | ||
| 864 | const GPUVAddr gpu_addr = regs.zeta.Address(); | ||
| 865 | if (gpu_addr == 0) { | ||
| 866 | return ImageViewId{}; | ||
| 867 | } | ||
| 868 | const ImageInfo info(regs); | ||
| 869 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 870 | } | ||
| 871 | |||
| 872 | template <class P> | ||
| 873 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 874 | bool is_clear) { | ||
| 875 | const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; | ||
| 876 | const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); | ||
| 877 | if (!image_id) { | ||
| 878 | return NULL_IMAGE_VIEW_ID; | ||
| 879 | } | ||
| 880 | Image& image = slot_images[image_id]; | ||
| 881 | const ImageViewType view_type = RenderTargetImageViewType(info); | ||
| 882 | SubresourceBase base; | ||
| 883 | if (image.info.type == ImageType::Linear) { | ||
| 884 | base = SubresourceBase{.level = 0, .layer = 0}; | ||
| 885 | } else { | ||
| 886 | base = image.TryFindBase(gpu_addr).value(); | ||
| 887 | } | ||
| 888 | const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; | ||
| 889 | const SubresourceRange range{ | ||
| 890 | .base = base, | ||
| 891 | .extent = {.levels = 1, .layers = layers}, | ||
| 892 | }; | ||
| 893 | return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); | ||
| 894 | } | ||
| 895 | |||
| 896 | template <class P> | ||
| 897 | template <typename Func> | ||
| 898 | void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { | ||
| 899 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 900 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 901 | boost::container::small_vector<ImageId, 32> images; | ||
| 902 | boost::container::small_vector<ImageMapId, 32> maps; | ||
| 903 | ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { | ||
| 904 | const auto it = page_table.find(page); | ||
| 905 | if (it == page_table.end()) { | ||
| 906 | if constexpr (BOOL_BREAK) { | ||
| 907 | return false; | ||
| 908 | } else { | ||
| 909 | return; | ||
| 910 | } | ||
| 911 | } | ||
| 912 | for (const ImageMapId map_id : it->second) { | ||
| 913 | ImageMapView& map = slot_map_views[map_id]; | ||
| 914 | if (map.picked) { | ||
| 915 | continue; | ||
| 916 | } | ||
| 917 | if (!map.Overlaps(cpu_addr, size)) { | ||
| 918 | continue; | ||
| 919 | } | ||
| 920 | map.picked = true; | ||
| 921 | maps.push_back(map_id); | ||
| 922 | Image& image = slot_images[map.image_id]; | ||
| 923 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 924 | continue; | ||
| 925 | } | ||
| 926 | image.flags |= ImageFlagBits::Picked; | ||
| 927 | images.push_back(map.image_id); | ||
| 928 | if constexpr (BOOL_BREAK) { | ||
| 929 | if (func(map.image_id, image)) { | ||
| 930 | return true; | ||
| 931 | } | ||
| 932 | } else { | ||
| 933 | func(map.image_id, image); | ||
| 934 | } | ||
| 935 | } | ||
| 936 | if constexpr (BOOL_BREAK) { | ||
| 937 | return false; | ||
| 938 | } | ||
| 939 | }); | ||
| 940 | for (const ImageId image_id : images) { | ||
| 941 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 942 | } | ||
| 943 | for (const ImageMapId map_id : maps) { | ||
| 944 | slot_map_views[map_id].picked = false; | ||
| 945 | } | ||
| 946 | } | ||
| 947 | |||
| 948 | template <class P> | ||
| 949 | template <typename Func> | ||
| 950 | void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { | ||
| 951 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 952 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 953 | boost::container::small_vector<ImageId, 8> images; | ||
| 954 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | ||
| 955 | const auto it = gpu_page_table.find(page); | ||
| 956 | if (it == gpu_page_table.end()) { | ||
| 957 | if constexpr (BOOL_BREAK) { | ||
| 958 | return false; | ||
| 959 | } else { | ||
| 960 | return; | ||
| 961 | } | ||
| 962 | } | ||
| 963 | for (const ImageId image_id : it->second) { | ||
| 964 | Image& image = slot_images[image_id]; | ||
| 965 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 966 | continue; | ||
| 967 | } | ||
| 968 | if (!image.OverlapsGPU(gpu_addr, size)) { | ||
| 969 | continue; | ||
| 970 | } | ||
| 971 | image.flags |= ImageFlagBits::Picked; | ||
| 972 | images.push_back(image_id); | ||
| 973 | if constexpr (BOOL_BREAK) { | ||
| 974 | if (func(image_id, image)) { | ||
| 975 | return true; | ||
| 976 | } | ||
| 977 | } else { | ||
| 978 | func(image_id, image); | ||
| 979 | } | ||
| 980 | } | ||
| 981 | if constexpr (BOOL_BREAK) { | ||
| 982 | return false; | ||
| 983 | } | ||
| 984 | }); | ||
| 985 | for (const ImageId image_id : images) { | ||
| 986 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 987 | } | ||
| 988 | } | ||
| 989 | |||
| 990 | template <class P> | ||
| 991 | template <typename Func> | ||
| 992 | void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { | ||
| 993 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 994 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 995 | boost::container::small_vector<ImageId, 8> images; | ||
| 996 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | ||
| 997 | const auto it = sparse_page_table.find(page); | ||
| 998 | if (it == sparse_page_table.end()) { | ||
| 999 | if constexpr (BOOL_BREAK) { | ||
| 1000 | return false; | ||
| 1001 | } else { | ||
| 1002 | return; | ||
| 1003 | } | ||
| 1004 | } | ||
| 1005 | for (const ImageId image_id : it->second) { | ||
| 1006 | Image& image = slot_images[image_id]; | ||
| 1007 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 1008 | continue; | ||
| 1009 | } | ||
| 1010 | if (!image.OverlapsGPU(gpu_addr, size)) { | ||
| 1011 | continue; | ||
| 1012 | } | ||
| 1013 | image.flags |= ImageFlagBits::Picked; | ||
| 1014 | images.push_back(image_id); | ||
| 1015 | if constexpr (BOOL_BREAK) { | ||
| 1016 | if (func(image_id, image)) { | ||
| 1017 | return true; | ||
| 1018 | } | ||
| 1019 | } else { | ||
| 1020 | func(image_id, image); | ||
| 1021 | } | ||
| 1022 | } | ||
| 1023 | if constexpr (BOOL_BREAK) { | ||
| 1024 | return false; | ||
| 1025 | } | ||
| 1026 | }); | ||
| 1027 | for (const ImageId image_id : images) { | ||
| 1028 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 1029 | } | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | template <class P> | ||
| 1033 | template <typename Func> | ||
| 1034 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | ||
| 1035 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; | ||
| 1036 | static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; | ||
| 1037 | const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); | ||
| 1038 | for (auto& segment : segments) { | ||
| 1039 | const auto gpu_addr = segment.first; | ||
| 1040 | const auto size = segment.second; | ||
| 1041 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 1042 | ASSERT(cpu_addr); | ||
| 1043 | if constexpr (RETURNS_BOOL) { | ||
| 1044 | if (func(gpu_addr, *cpu_addr, size)) { | ||
| 1045 | break; | ||
| 1046 | } | ||
| 1047 | } else { | ||
| 1048 | func(gpu_addr, *cpu_addr, size); | ||
| 1049 | } | ||
| 1050 | } | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | template <class P> | ||
| 1054 | ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { | ||
| 1055 | Image& image = slot_images[image_id]; | ||
| 1056 | if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { | ||
| 1057 | return image_view_id; | ||
| 1058 | } | ||
| 1059 | const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); | ||
| 1060 | image.InsertView(info, image_view_id); | ||
| 1061 | return image_view_id; | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | template <class P> | ||
| 1065 | void TextureCache<P>::RegisterImage(ImageId image_id) { | ||
| 1066 | ImageBase& image = slot_images[image_id]; | ||
| 1067 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), | ||
| 1068 | "Trying to register an already registered image"); | ||
| 1069 | image.flags |= ImageFlagBits::Registered; | ||
| 1070 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1071 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1072 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1073 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1074 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1075 | } | ||
| 1076 | total_used_memory += Common::AlignUp(tentative_size, 1024); | ||
| 1077 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1078 | [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); | ||
| 1079 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1080 | auto map_id = | ||
| 1081 | slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); | ||
| 1082 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, | ||
| 1083 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||
| 1084 | image.map_view_id = map_id; | ||
| 1085 | return; | ||
| 1086 | } | ||
| 1087 | std::vector<ImageViewId> sparse_maps{}; | ||
| 1088 | ForEachSparseSegment( | ||
| 1089 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||
| 1090 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | ||
| 1091 | ForEachCPUPage(cpu_addr, size, | ||
| 1092 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||
| 1093 | sparse_maps.push_back(map_id); | ||
| 1094 | }); | ||
| 1095 | sparse_views.emplace(image_id, std::move(sparse_maps)); | ||
| 1096 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1097 | [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | template <class P> | ||
| 1101 | void TextureCache<P>::UnregisterImage(ImageId image_id) { | ||
| 1102 | Image& image = slot_images[image_id]; | ||
| 1103 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), | ||
| 1104 | "Trying to unregister an already registered image"); | ||
| 1105 | image.flags &= ~ImageFlagBits::Registered; | ||
| 1106 | image.flags &= ~ImageFlagBits::BadOverlap; | ||
| 1107 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1108 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1109 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1110 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1111 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1112 | } | ||
| 1113 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | ||
| 1114 | const auto& clear_page_table = | ||
| 1115 | [this, image_id]( | ||
| 1116 | u64 page, | ||
| 1117 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { | ||
| 1118 | const auto page_it = selected_page_table.find(page); | ||
| 1119 | if (page_it == selected_page_table.end()) { | ||
| 1120 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1121 | return; | ||
| 1122 | } | ||
| 1123 | std::vector<ImageId>& image_ids = page_it->second; | ||
| 1124 | const auto vector_it = std::ranges::find(image_ids, image_id); | ||
| 1125 | if (vector_it == image_ids.end()) { | ||
| 1126 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", | ||
| 1127 | page << PAGE_BITS); | ||
| 1128 | return; | ||
| 1129 | } | ||
| 1130 | image_ids.erase(vector_it); | ||
| 1131 | }; | ||
| 1132 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1133 | [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); | ||
| 1134 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1135 | const auto map_id = image.map_view_id; | ||
| 1136 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { | ||
| 1137 | const auto page_it = page_table.find(page); | ||
| 1138 | if (page_it == page_table.end()) { | ||
| 1139 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1140 | return; | ||
| 1141 | } | ||
| 1142 | std::vector<ImageMapId>& image_map_ids = page_it->second; | ||
| 1143 | const auto vector_it = std::ranges::find(image_map_ids, map_id); | ||
| 1144 | if (vector_it == image_map_ids.end()) { | ||
| 1145 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", | ||
| 1146 | page << PAGE_BITS); | ||
| 1147 | return; | ||
| 1148 | } | ||
| 1149 | image_map_ids.erase(vector_it); | ||
| 1150 | }); | ||
| 1151 | slot_map_views.erase(map_id); | ||
| 1152 | return; | ||
| 1153 | } | ||
| 1154 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { | ||
| 1155 | clear_page_table(page, sparse_page_table); | ||
| 1156 | }); | ||
| 1157 | auto it = sparse_views.find(image_id); | ||
| 1158 | ASSERT(it != sparse_views.end()); | ||
| 1159 | auto& sparse_maps = it->second; | ||
| 1160 | for (auto& map_view_id : sparse_maps) { | ||
| 1161 | const auto& map_range = slot_map_views[map_view_id]; | ||
| 1162 | const VAddr cpu_addr = map_range.cpu_addr; | ||
| 1163 | const std::size_t size = map_range.size; | ||
| 1164 | ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { | ||
| 1165 | const auto page_it = page_table.find(page); | ||
| 1166 | if (page_it == page_table.end()) { | ||
| 1167 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1168 | return; | ||
| 1169 | } | ||
| 1170 | std::vector<ImageMapId>& image_map_ids = page_it->second; | ||
| 1171 | auto vector_it = image_map_ids.begin(); | ||
| 1172 | while (vector_it != image_map_ids.end()) { | ||
| 1173 | ImageMapView& map = slot_map_views[*vector_it]; | ||
| 1174 | if (map.image_id != image_id) { | ||
| 1175 | vector_it++; | ||
| 1176 | continue; | ||
| 1177 | } | ||
| 1178 | if (!map.picked) { | ||
| 1179 | map.picked = true; | ||
| 1180 | } | ||
| 1181 | vector_it = image_map_ids.erase(vector_it); | ||
| 1182 | } | ||
| 1183 | }); | ||
| 1184 | slot_map_views.erase(map_view_id); | ||
| 1185 | } | ||
| 1186 | sparse_views.erase(it); | ||
| 1187 | } | ||
| 1188 | |||
| 1189 | template <class P> | ||
| 1190 | void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | ||
| 1191 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); | ||
| 1192 | image.flags |= ImageFlagBits::Tracked; | ||
| 1193 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1194 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | ||
| 1195 | return; | ||
| 1196 | } | ||
| 1197 | if (True(image.flags & ImageFlagBits::Registered)) { | ||
| 1198 | auto it = sparse_views.find(image_id); | ||
| 1199 | ASSERT(it != sparse_views.end()); | ||
| 1200 | auto& sparse_maps = it->second; | ||
| 1201 | for (auto& map_view_id : sparse_maps) { | ||
| 1202 | const auto& map = slot_map_views[map_view_id]; | ||
| 1203 | const VAddr cpu_addr = map.cpu_addr; | ||
| 1204 | const std::size_t size = map.size; | ||
| 1205 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 1206 | } | ||
| 1207 | return; | ||
| 1208 | } | ||
| 1209 | ForEachSparseSegment(image, | ||
| 1210 | [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||
| 1211 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 1212 | }); | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | template <class P> | ||
| 1216 | void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | ||
| 1217 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); | ||
| 1218 | image.flags &= ~ImageFlagBits::Tracked; | ||
| 1219 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1220 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||
| 1221 | return; | ||
| 1222 | } | ||
| 1223 | ASSERT(True(image.flags & ImageFlagBits::Registered)); | ||
| 1224 | auto it = sparse_views.find(image_id); | ||
| 1225 | ASSERT(it != sparse_views.end()); | ||
| 1226 | auto& sparse_maps = it->second; | ||
| 1227 | for (auto& map_view_id : sparse_maps) { | ||
| 1228 | const auto& map = slot_map_views[map_view_id]; | ||
| 1229 | const VAddr cpu_addr = map.cpu_addr; | ||
| 1230 | const std::size_t size = map.size; | ||
| 1231 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||
| 1232 | } | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | template <class P> | ||
| 1236 | void TextureCache<P>::DeleteImage(ImageId image_id) { | ||
| 1237 | ImageBase& image = slot_images[image_id]; | ||
| 1238 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 1239 | const auto alloc_it = image_allocs_table.find(gpu_addr); | ||
| 1240 | if (alloc_it == image_allocs_table.end()) { | ||
| 1241 | UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", | ||
| 1242 | gpu_addr); | ||
| 1243 | return; | ||
| 1244 | } | ||
| 1245 | const ImageAllocId alloc_id = alloc_it->second; | ||
| 1246 | std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images; | ||
| 1247 | const auto alloc_image_it = std::ranges::find(alloc_images, image_id); | ||
| 1248 | if (alloc_image_it == alloc_images.end()) { | ||
| 1249 | UNREACHABLE_MSG("Trying to delete an image that does not exist"); | ||
| 1250 | return; | ||
| 1251 | } | ||
| 1252 | ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); | ||
| 1253 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); | ||
| 1254 | |||
| 1255 | // Mark render targets as dirty | ||
| 1256 | auto& dirty = maxwell3d.dirty.flags; | ||
| 1257 | dirty[Dirty::RenderTargets] = true; | ||
| 1258 | dirty[Dirty::ZetaBuffer] = true; | ||
| 1259 | for (size_t rt = 0; rt < NUM_RT; ++rt) { | ||
| 1260 | dirty[Dirty::ColorBuffer0 + rt] = true; | ||
| 1261 | } | ||
| 1262 | const std::span<const ImageViewId> image_view_ids = image.image_view_ids; | ||
| 1263 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1264 | std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); | ||
| 1265 | if (render_targets.depth_buffer_id == image_view_id) { | ||
| 1266 | render_targets.depth_buffer_id = ImageViewId{}; | ||
| 1267 | } | ||
| 1268 | } | ||
| 1269 | RemoveImageViewReferences(image_view_ids); | ||
| 1270 | RemoveFramebuffers(image_view_ids); | ||
| 1271 | |||
| 1272 | for (const AliasedImage& alias : image.aliased_images) { | ||
| 1273 | ImageBase& other_image = slot_images[alias.id]; | ||
| 1274 | [[maybe_unused]] const size_t num_removed_aliases = | ||
| 1275 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { | ||
| 1276 | return other_alias.id == image_id; | ||
| 1277 | }); | ||
| 1278 | other_image.CheckAliasState(); | ||
| 1279 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", | ||
| 1280 | num_removed_aliases); | ||
| 1281 | } | ||
| 1282 | for (const ImageId overlap_id : image.overlapping_images) { | ||
| 1283 | ImageBase& other_image = slot_images[overlap_id]; | ||
| 1284 | [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( | ||
| 1285 | other_image.overlapping_images, | ||
| 1286 | [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); | ||
| 1287 | other_image.CheckBadOverlapState(); | ||
| 1288 | ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", | ||
| 1289 | num_removed_overlaps); | ||
| 1290 | } | ||
| 1291 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1292 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | ||
| 1293 | slot_image_views.erase(image_view_id); | ||
| 1294 | } | ||
| 1295 | sentenced_images.Push(std::move(slot_images[image_id])); | ||
| 1296 | slot_images.erase(image_id); | ||
| 1297 | |||
| 1298 | alloc_images.erase(alloc_image_it); | ||
| 1299 | if (alloc_images.empty()) { | ||
| 1300 | image_allocs_table.erase(alloc_it); | ||
| 1301 | } | ||
| 1302 | if constexpr (ENABLE_VALIDATION) { | ||
| 1303 | std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); | ||
| 1304 | std::ranges::fill(compute_image_view_ids, CORRUPT_ID); | ||
| 1305 | } | ||
| 1306 | graphics_image_table.Invalidate(); | ||
| 1307 | compute_image_table.Invalidate(); | ||
| 1308 | has_deleted_images = true; | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | template <class P> | ||
| 1312 | void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { | ||
| 1313 | auto it = image_views.begin(); | ||
| 1314 | while (it != image_views.end()) { | ||
| 1315 | const auto found = std::ranges::find(removed_views, it->second); | ||
| 1316 | if (found != removed_views.end()) { | ||
| 1317 | it = image_views.erase(it); | ||
| 1318 | } else { | ||
| 1319 | ++it; | ||
| 1320 | } | ||
| 1321 | } | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | template <class P> | ||
| 1325 | void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) { | ||
| 1326 | auto it = framebuffers.begin(); | ||
| 1327 | while (it != framebuffers.end()) { | ||
| 1328 | if (it->first.Contains(removed_views)) { | ||
| 1329 | it = framebuffers.erase(it); | ||
| 1330 | } else { | ||
| 1331 | ++it; | ||
| 1332 | } | ||
| 1333 | } | ||
| 1334 | } | ||
| 1335 | |||
| 1336 | template <class P> | ||
| 1337 | void TextureCache<P>::MarkModification(ImageBase& image) noexcept { | ||
| 1338 | image.flags |= ImageFlagBits::GpuModified; | ||
| 1339 | image.modification_tick = ++modification_tick; | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | template <class P> | ||
| 1343 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | ||
| 1344 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | ||
| 1345 | ImageBase& image = slot_images[image_id]; | ||
| 1346 | u64 most_recent_tick = image.modification_tick; | ||
| 1347 | for (const AliasedImage& aliased : image.aliased_images) { | ||
| 1348 | ImageBase& aliased_image = slot_images[aliased.id]; | ||
| 1349 | if (image.modification_tick < aliased_image.modification_tick) { | ||
| 1350 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | ||
| 1351 | aliased_images.push_back(&aliased); | ||
| 1352 | } | ||
| 1353 | } | ||
| 1354 | if (aliased_images.empty()) { | ||
| 1355 | return; | ||
| 1356 | } | ||
| 1357 | image.modification_tick = most_recent_tick; | ||
| 1358 | std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { | ||
| 1359 | const ImageBase& lhs_image = slot_images[lhs->id]; | ||
| 1360 | const ImageBase& rhs_image = slot_images[rhs->id]; | ||
| 1361 | return lhs_image.modification_tick < rhs_image.modification_tick; | ||
| 1362 | }); | ||
| 1363 | for (const AliasedImage* const aliased : aliased_images) { | ||
| 1364 | CopyImage(image_id, aliased->id, aliased->copies); | ||
| 1365 | } | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | template <class P> | ||
| 1369 | void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { | ||
| 1370 | Image& image = slot_images[image_id]; | ||
| 1371 | if (invalidate) { | ||
| 1372 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); | ||
| 1373 | if (False(image.flags & ImageFlagBits::Tracked)) { | ||
| 1374 | TrackImage(image, image_id); | ||
| 1375 | } | ||
| 1376 | } else { | ||
| 1377 | RefreshContents(image, image_id); | ||
| 1378 | SynchronizeAliases(image_id); | ||
| 1379 | } | ||
| 1380 | if (is_modification) { | ||
| 1381 | MarkModification(image); | ||
| 1382 | } | ||
| 1383 | image.frame_tick = frame_tick; | ||
| 1384 | } | ||
| 1385 | |||
| 1386 | template <class P> | ||
| 1387 | void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification, | ||
| 1388 | bool invalidate) { | ||
| 1389 | if (!image_view_id) { | ||
| 1390 | return; | ||
| 1391 | } | ||
| 1392 | const ImageViewBase& image_view = slot_image_views[image_view_id]; | ||
| 1393 | if (image_view.IsBuffer()) { | ||
| 1394 | return; | ||
| 1395 | } | ||
| 1396 | PrepareImage(image_view.image_id, is_modification, invalidate); | ||
| 1397 | } | ||
| 1398 | |||
| 1399 | template <class P> | ||
| 1400 | void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { | ||
| 1401 | Image& dst = slot_images[dst_id]; | ||
| 1402 | Image& src = slot_images[src_id]; | ||
| 1403 | const auto dst_format_type = GetFormatType(dst.info.format); | ||
| 1404 | const auto src_format_type = GetFormatType(src.info.format); | ||
| 1405 | if (src_format_type == dst_format_type) { | ||
| 1406 | if constexpr (HAS_EMULATED_COPIES) { | ||
| 1407 | if (!runtime.CanImageBeCopied(dst, src)) { | ||
| 1408 | return runtime.EmulateCopyImage(dst, src, copies); | ||
| 1409 | } | ||
| 1410 | } | ||
| 1411 | return runtime.CopyImage(dst, src, copies); | ||
| 1412 | } | ||
| 1413 | UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); | ||
| 1414 | UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); | ||
| 1415 | for (const ImageCopy& copy : copies) { | ||
| 1416 | UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); | ||
| 1417 | UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); | ||
| 1418 | UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); | ||
| 1419 | UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); | ||
| 1420 | |||
| 1421 | const SubresourceBase dst_base{ | ||
| 1422 | .level = copy.dst_subresource.base_level, | ||
| 1423 | .layer = copy.dst_subresource.base_layer, | ||
| 1424 | }; | ||
| 1425 | const SubresourceBase src_base{ | ||
| 1426 | .level = copy.src_subresource.base_level, | ||
| 1427 | .layer = copy.src_subresource.base_layer, | ||
| 1428 | }; | ||
| 1429 | const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; | ||
| 1430 | const SubresourceExtent src_extent{.levels = 1, .layers = 1}; | ||
| 1431 | const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; | ||
| 1432 | const SubresourceRange src_range{.base = src_base, .extent = src_extent}; | ||
| 1433 | const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); | ||
| 1434 | const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); | ||
| 1435 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 1436 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; | ||
| 1437 | const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); | ||
| 1438 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 1439 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 1440 | [[maybe_unused]] const Extent3D expected_size{ | ||
| 1441 | .width = std::min(dst_view.size.width, src_view.size.width), | ||
| 1442 | .height = std::min(dst_view.size.height, src_view.size.height), | ||
| 1443 | .depth = std::min(dst_view.size.depth, src_view.size.depth), | ||
| 1444 | }; | ||
| 1445 | UNIMPLEMENTED_IF(copy.extent != expected_size); | ||
| 1446 | |||
| 1447 | runtime.ConvertImage(dst_framebuffer, dst_view, src_view); | ||
| 1448 | } | ||
| 1449 | } | ||
| 1450 | |||
| 1451 | template <class P> | ||
| 1452 | void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { | ||
| 1453 | if (*old_id == new_id) { | ||
| 1454 | return; | ||
| 1455 | } | ||
| 1456 | if (*old_id) { | ||
| 1457 | const ImageViewBase& old_view = slot_image_views[*old_id]; | ||
| 1458 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { | ||
| 1459 | uncommitted_downloads.push_back(old_view.image_id); | ||
| 1460 | } | ||
| 1461 | } | ||
| 1462 | *old_id = new_id; | ||
| 1463 | } | ||
| 1464 | |||
| 1465 | template <class P> | ||
| 1466 | std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( | ||
| 1467 | ImageId image_id, const ImageViewInfo& view_info) { | ||
| 1468 | const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 1469 | const ImageBase& image = slot_images[image_id]; | ||
| 1470 | const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; | ||
| 1471 | const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; | ||
| 1472 | const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; | ||
| 1473 | const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); | ||
| 1474 | const u32 num_samples = image.info.num_samples; | ||
| 1475 | const auto [samples_x, samples_y] = SamplesLog2(num_samples); | ||
| 1476 | const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ | ||
| 1477 | .color_buffer_ids = {color_view_id}, | ||
| 1478 | .depth_buffer_id = depth_view_id, | ||
| 1479 | .size = {extent.width >> samples_x, extent.height >> samples_y}, | ||
| 1480 | }); | ||
| 1481 | return {framebuffer_id, view_id}; | ||
| 1482 | } | ||
| 1483 | |||
| 1484 | template <class P> | ||
| 1485 | bool TextureCache<P>::IsFullClear(ImageViewId id) { | ||
| 1486 | if (!id) { | ||
| 1487 | return true; | ||
| 1488 | } | ||
| 1489 | const ImageViewBase& image_view = slot_image_views[id]; | ||
| 1490 | const ImageBase& image = slot_images[image_view.image_id]; | ||
| 1491 | const Extent3D size = image_view.size; | ||
| 1492 | const auto& regs = maxwell3d.regs; | ||
| 1493 | const auto& scissor = regs.scissor_test[0]; | ||
| 1494 | if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { | ||
| 1495 | // Images with multiple resources can't be cleared in a single call | ||
| 1496 | return false; | ||
| 1497 | } | ||
| 1498 | if (regs.clear_flags.scissor == 0) { | ||
| 1499 | // If scissor testing is disabled, the clear is always full | ||
| 1500 | return true; | ||
| 1501 | } | ||
| 1502 | // Make sure the clear covers all texels in the subresource | ||
| 1503 | return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && | ||
| 1504 | scissor.max_y >= size.height; | ||
| 1505 | } | ||
| 1506 | |||
| 1507 | } // namespace VideoCommon | ||