summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar yzct123452021-08-05 13:52:30 +0000
committerGravatar GitHub2021-08-05 13:52:30 +0000
commitf9563c8f248677894b886373f18c016fb189e416 (patch)
tree9de4f4aca82bd56729695f7096aaffd2028e5f00
parentMerge pull request #6819 from Morph1984/i-am-dumb (diff)
downloadyuzu-f9563c8f248677894b886373f18c016fb189e416.tar.gz
yuzu-f9563c8f248677894b886373f18c016fb189e416.tar.xz
yuzu-f9563c8f248677894b886373f18c016fb189e416.zip
texture_cache: Split templates out
Diffstat (limited to '')
-rw-r--r--src/video_core/CMakeLists.txt3
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache_templates.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp10
-rw-r--r--src/video_core/texture_cache/texture_cache.h1528
-rw-r--r--src/video_core/texture_cache/texture_cache_templates.h1507
7 files changed, 1533 insertions, 1532 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 1eb67c051..1250cca6f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -97,6 +97,7 @@ add_library(video_core STATIC
97 renderer_opengl/gl_stream_buffer.h 97 renderer_opengl/gl_stream_buffer.h
98 renderer_opengl/gl_texture_cache.cpp 98 renderer_opengl/gl_texture_cache.cpp
99 renderer_opengl/gl_texture_cache.h 99 renderer_opengl/gl_texture_cache.h
100 renderer_opengl/gl_texture_cache_templates.cpp
100 renderer_opengl/gl_query_cache.cpp 101 renderer_opengl/gl_query_cache.cpp
101 renderer_opengl/gl_query_cache.h 102 renderer_opengl/gl_query_cache.h
102 renderer_opengl/maxwell_to_gl.h 103 renderer_opengl/maxwell_to_gl.h
@@ -155,6 +156,7 @@ add_library(video_core STATIC
155 renderer_vulkan/vk_swapchain.h 156 renderer_vulkan/vk_swapchain.h
156 renderer_vulkan/vk_texture_cache.cpp 157 renderer_vulkan/vk_texture_cache.cpp
157 renderer_vulkan/vk_texture_cache.h 158 renderer_vulkan/vk_texture_cache.h
159 renderer_vulkan/vk_texture_cache_templates.cpp
158 renderer_vulkan/vk_update_descriptor.cpp 160 renderer_vulkan/vk_update_descriptor.cpp
159 renderer_vulkan/vk_update_descriptor.h 161 renderer_vulkan/vk_update_descriptor.h
160 shader_cache.cpp 162 shader_cache.cpp
@@ -186,6 +188,7 @@ add_library(video_core STATIC
186 texture_cache/samples_helper.h 188 texture_cache/samples_helper.h
187 texture_cache/slot_vector.h 189 texture_cache/slot_vector.h
188 texture_cache/texture_cache.h 190 texture_cache/texture_cache.h
191 texture_cache/texture_cache_templates.h
189 texture_cache/types.h 192 texture_cache/types.h
190 texture_cache/util.cpp 193 texture_cache/util.cpp
191 texture_cache/util.h 194 texture_cache/util.h
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index c373c9cb4..26b423f5e 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -1,4 +1,4 @@
1// Copyright 2019 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -18,10 +18,7 @@
18#include "video_core/renderer_opengl/maxwell_to_gl.h" 18#include "video_core/renderer_opengl/maxwell_to_gl.h"
19#include "video_core/renderer_opengl/util_shaders.h" 19#include "video_core/renderer_opengl/util_shaders.h"
20#include "video_core/surface.h" 20#include "video_core/surface.h"
21#include "video_core/texture_cache/format_lookup_table.h"
22#include "video_core/texture_cache/samples_helper.h" 21#include "video_core/texture_cache/samples_helper.h"
23#include "video_core/texture_cache/texture_cache.h"
24#include "video_core/textures/decoders.h"
25 22
26namespace OpenGL { 23namespace OpenGL {
27namespace { 24namespace {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp b/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp
new file mode 100644
index 000000000..00ed06447
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp
@@ -0,0 +1,10 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/renderer_opengl/gl_texture_cache.h"
6#include "video_core/texture_cache/texture_cache_templates.h"
7
8namespace VideoCommon {
9template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>;
10}
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 8e029bcb3..b0496556d 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1,4 +1,4 @@
1// Copyright 2019 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp b/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp
new file mode 100644
index 000000000..fd8978954
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp
@@ -0,0 +1,10 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#include "video_core/renderer_vulkan/vk_texture_cache.h"
6#include "video_core/texture_cache/texture_cache_templates.h"
7
8namespace VideoCommon {
9template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>;
10}
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index f34c9d9ca..a4f6e9422 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1,4 +1,4 @@
1// Copyright 2019 yuzu Emulator Project 1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version 2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
@@ -164,14 +164,6 @@ public:
164 const Tegra::Engines::Fermi2D::Surface& src, 164 const Tegra::Engines::Fermi2D::Surface& src,
165 const Tegra::Engines::Fermi2D::Config& copy); 165 const Tegra::Engines::Fermi2D::Config& copy);
166 166
167 /// Invalidate the contents of the color buffer index
168 /// These contents become unspecified, the cache can assume aggressive optimizations.
169 void InvalidateColorBuffer(size_t index);
170
171 /// Invalidate the contents of the depth buffer
172 /// These contents become unspecified, the cache can assume aggressive optimizations.
173 void InvalidateDepthBuffer();
174
175 /// Try to find a cached image view in the given CPU address 167 /// Try to find a cached image view in the given CPU address
176 [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); 168 [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
177 169
@@ -407,1522 +399,4 @@ private:
407 typename SlotVector<Image>::Iterator deletion_iterator; 399 typename SlotVector<Image>::Iterator deletion_iterator;
408}; 400};
409 401
410template <class P>
411TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
412 Tegra::Engines::Maxwell3D& maxwell3d_,
413 Tegra::Engines::KeplerCompute& kepler_compute_,
414 Tegra::MemoryManager& gpu_memory_)
415 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
416 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
417 // Configure null sampler
418 TSCEntry sampler_descriptor{};
419 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
420 sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear);
421 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
422 sampler_descriptor.cubemap_anisotropy.Assign(1);
423
424 // Make sure the first index is reserved for the null resources
425 // This way the null resource becomes a compile time constant
426 void(slot_image_views.insert(runtime, NullImageParams{}));
427 void(slot_samplers.insert(runtime, sampler_descriptor));
428
429 deletion_iterator = slot_images.begin();
430
431 if constexpr (HAS_DEVICE_MEMORY_INFO) {
432 const auto device_memory = runtime.GetDeviceLocalMemory();
433 const u64 possible_expected_memory = (device_memory * 3) / 10;
434 const u64 possible_critical_memory = (device_memory * 6) / 10;
435 expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
436 critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
437 minimum_memory = 0;
438 } else {
439 // on OGL we can be more conservatives as the driver takes care.
440 expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
441 critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
442 minimum_memory = expected_memory;
443 }
444}
445
446template <class P>
447void TextureCache<P>::RunGarbageCollector() {
448 const bool high_priority_mode = total_used_memory >= expected_memory;
449 const bool aggressive_mode = total_used_memory >= critical_memory;
450 const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
451 int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
452 for (; num_iterations > 0; --num_iterations) {
453 if (deletion_iterator == slot_images.end()) {
454 deletion_iterator = slot_images.begin();
455 if (deletion_iterator == slot_images.end()) {
456 break;
457 }
458 }
459 auto [image_id, image_tmp] = *deletion_iterator;
460 Image* image = image_tmp; // fix clang error.
461 const bool is_alias = True(image->flags & ImageFlagBits::Alias);
462 const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
463 const bool must_download = image->IsSafeDownload();
464 bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
465 const u64 ticks_needed =
466 is_bad_overlap
467 ? ticks_to_destroy >> 4
468 : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
469 should_care |= aggressive_mode;
470 if (should_care && image->frame_tick + ticks_needed < frame_tick) {
471 if (is_bad_overlap) {
472 const bool overlap_check = std::ranges::all_of(
473 image->overlapping_images, [&, image](const ImageId& overlap_id) {
474 auto& overlap = slot_images[overlap_id];
475 return overlap.frame_tick >= image->frame_tick;
476 });
477 if (!overlap_check) {
478 ++deletion_iterator;
479 continue;
480 }
481 }
482 if (!is_bad_overlap && must_download) {
483 const bool alias_check = std::ranges::none_of(
484 image->aliased_images, [&, image](const AliasedImage& alias) {
485 auto& alias_image = slot_images[alias.id];
486 return (alias_image.frame_tick < image->frame_tick) ||
487 (alias_image.modification_tick < image->modification_tick);
488 });
489
490 if (alias_check) {
491 auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
492 const auto copies = FullDownloadCopies(image->info);
493 image->DownloadMemory(map, copies);
494 runtime.Finish();
495 SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
496 }
497 }
498 if (True(image->flags & ImageFlagBits::Tracked)) {
499 UntrackImage(*image, image_id);
500 }
501 UnregisterImage(image_id);
502 DeleteImage(image_id);
503 if (is_bad_overlap) {
504 ++num_iterations;
505 }
506 }
507 ++deletion_iterator;
508 }
509}
510
511template <class P>
512void TextureCache<P>::TickFrame() {
513 if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
514 RunGarbageCollector();
515 }
516 sentenced_images.Tick();
517 sentenced_framebuffers.Tick();
518 sentenced_image_view.Tick();
519 ++frame_tick;
520}
521
522template <class P>
523const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
524 return slot_image_views[id];
525}
526
527template <class P>
528typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
529 return slot_image_views[id];
530}
531
532template <class P>
533void TextureCache<P>::MarkModification(ImageId id) noexcept {
534 MarkModification(slot_images[id]);
535}
536
537template <class P>
538void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
539 std::span<ImageViewId> image_view_ids) {
540 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
541}
542
543template <class P>
544void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
545 std::span<ImageViewId> image_view_ids) {
546 FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
547}
548
549template <class P>
550typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
551 if (index > graphics_sampler_table.Limit()) {
552 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
553 return &slot_samplers[NULL_SAMPLER_ID];
554 }
555 const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
556 SamplerId& id = graphics_sampler_ids[index];
557 if (is_new) {
558 id = FindSampler(descriptor);
559 }
560 return &slot_samplers[id];
561}
562
563template <class P>
564typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
565 if (index > compute_sampler_table.Limit()) {
566 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
567 return &slot_samplers[NULL_SAMPLER_ID];
568 }
569 const auto [descriptor, is_new] = compute_sampler_table.Read(index);
570 SamplerId& id = compute_sampler_ids[index];
571 if (is_new) {
572 id = FindSampler(descriptor);
573 }
574 return &slot_samplers[id];
575}
576
577template <class P>
578void TextureCache<P>::SynchronizeGraphicsDescriptors() {
579 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
580 const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
581 const u32 tic_limit = maxwell3d.regs.tic.limit;
582 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
583 if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
584 graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
585 }
586 if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
587 graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
588 }
589}
590
591template <class P>
592void TextureCache<P>::SynchronizeComputeDescriptors() {
593 const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
594 const u32 tic_limit = kepler_compute.regs.tic.limit;
595 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
596 const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
597 if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
598 compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
599 }
600 if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
601 compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
602 }
603}
604
605template <class P>
606void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
607 using namespace VideoCommon::Dirty;
608 auto& flags = maxwell3d.dirty.flags;
609 if (!flags[Dirty::RenderTargets]) {
610 for (size_t index = 0; index < NUM_RT; ++index) {
611 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
612 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
613 }
614 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
615 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
616 return;
617 }
618 flags[Dirty::RenderTargets] = false;
619
620 // Render target control is used on all render targets, so force look ups when this one is up
621 const bool force = flags[Dirty::RenderTargetControl];
622 flags[Dirty::RenderTargetControl] = false;
623
624 for (size_t index = 0; index < NUM_RT; ++index) {
625 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
626 if (flags[Dirty::ColorBuffer0 + index] || force) {
627 flags[Dirty::ColorBuffer0 + index] = false;
628 BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
629 }
630 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
631 }
632 if (flags[Dirty::ZetaBuffer] || force) {
633 flags[Dirty::ZetaBuffer] = false;
634 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
635 }
636 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
637 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
638
639 for (size_t index = 0; index < NUM_RT; ++index) {
640 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
641 }
642 render_targets.size = Extent2D{
643 maxwell3d.regs.render_area.width,
644 maxwell3d.regs.render_area.height,
645 };
646}
647
648template <class P>
649typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
650 return &slot_framebuffers[GetFramebufferId(render_targets)];
651}
652
653template <class P>
654void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
655 std::span<ImageViewId> cached_image_view_ids,
656 std::span<const u32> indices,
657 std::span<ImageViewId> image_view_ids) {
658 ASSERT(indices.size() <= image_view_ids.size());
659 do {
660 has_deleted_images = false;
661 std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
662 return VisitImageView(table, cached_image_view_ids, index);
663 });
664 } while (has_deleted_images);
665}
666
667template <class P>
668ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
669 std::span<ImageViewId> cached_image_view_ids,
670 u32 index) {
671 if (index > table.Limit()) {
672 LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
673 return NULL_IMAGE_VIEW_ID;
674 }
675 const auto [descriptor, is_new] = table.Read(index);
676 ImageViewId& image_view_id = cached_image_view_ids[index];
677 if (is_new) {
678 image_view_id = FindImageView(descriptor);
679 }
680 if (image_view_id != NULL_IMAGE_VIEW_ID) {
681 PrepareImageView(image_view_id, false, false);
682 }
683 return image_view_id;
684}
685
686template <class P>
687FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
688 const auto [pair, is_new] = framebuffers.try_emplace(key);
689 FramebufferId& framebuffer_id = pair->second;
690 if (!is_new) {
691 return framebuffer_id;
692 }
693 std::array<ImageView*, NUM_RT> color_buffers;
694 std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
695 [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
696 ImageView* const depth_buffer =
697 key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
698 framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
699 return framebuffer_id;
700}
701
702template <class P>
703void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
704 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
705 if (True(image.flags & ImageFlagBits::CpuModified)) {
706 return;
707 }
708 image.flags |= ImageFlagBits::CpuModified;
709 if (True(image.flags & ImageFlagBits::Tracked)) {
710 UntrackImage(image, image_id);
711 }
712 });
713}
714
715template <class P>
716void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
717 std::vector<ImageId> images;
718 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
719 if (!image.IsSafeDownload()) {
720 return;
721 }
722 image.flags &= ~ImageFlagBits::GpuModified;
723 images.push_back(image_id);
724 });
725 if (images.empty()) {
726 return;
727 }
728 std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) {
729 return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
730 });
731 for (const ImageId image_id : images) {
732 Image& image = slot_images[image_id];
733 auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
734 const auto copies = FullDownloadCopies(image.info);
735 image.DownloadMemory(map, copies);
736 runtime.Finish();
737 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
738 }
739}
740
741template <class P>
742void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
743 std::vector<ImageId> deleted_images;
744 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
745 for (const ImageId id : deleted_images) {
746 Image& image = slot_images[id];
747 if (True(image.flags & ImageFlagBits::Tracked)) {
748 UntrackImage(image, id);
749 }
750 UnregisterImage(id);
751 DeleteImage(id);
752 }
753}
754
755template <class P>
756void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
757 std::vector<ImageId> deleted_images;
758 ForEachImageInRegionGPU(gpu_addr, size,
759 [&](ImageId id, Image&) { deleted_images.push_back(id); });
760 for (const ImageId id : deleted_images) {
761 Image& image = slot_images[id];
762 if (True(image.flags & ImageFlagBits::Remapped)) {
763 continue;
764 }
765 image.flags |= ImageFlagBits::Remapped;
766 if (True(image.flags & ImageFlagBits::Tracked)) {
767 UntrackImage(image, id);
768 }
769 }
770}
771
772template <class P>
773void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
774 const Tegra::Engines::Fermi2D::Surface& src,
775 const Tegra::Engines::Fermi2D::Config& copy) {
776 const BlitImages images = GetBlitImages(dst, src);
777 const ImageId dst_id = images.dst_id;
778 const ImageId src_id = images.src_id;
779 PrepareImage(src_id, false, false);
780 PrepareImage(dst_id, true, false);
781
782 ImageBase& dst_image = slot_images[dst_id];
783 const ImageBase& src_image = slot_images[src_id];
784
785 // TODO: Deduplicate
786 const std::optional src_base = src_image.TryFindBase(src.Address());
787 const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
788 const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
789 const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
790 const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
791 const Region2D src_region{
792 Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
793 Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
794 };
795
796 const std::optional dst_base = dst_image.TryFindBase(dst.Address());
797 const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
798 const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
799 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
800 const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
801 const Region2D dst_region{
802 Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
803 Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
804 };
805
806 // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
807 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
808 if constexpr (FRAMEBUFFER_BLITS) {
809 // OpenGL blits from framebuffers, not images
810 Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id];
811 runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region,
812 copy.filter, copy.operation);
813 } else {
814 // Vulkan can blit images, but it lacks format reinterpretations
815 // Provide a framebuffer in case it's necessary
816 ImageView& dst_view = slot_image_views[dst_view_id];
817 ImageView& src_view = slot_image_views[src_view_id];
818 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
819 copy.operation);
820 }
821}
822
823template <class P>
824void TextureCache<P>::InvalidateColorBuffer(size_t index) {
825 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
826 color_buffer_id = FindColorBuffer(index, false);
827 if (!color_buffer_id) {
828 LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index);
829 return;
830 }
831 // When invalidating a color buffer, the old contents are no longer relevant
832 ImageView& color_buffer = slot_image_views[color_buffer_id];
833 Image& image = slot_images[color_buffer.image_id];
834 image.flags &= ~ImageFlagBits::CpuModified;
835 image.flags &= ~ImageFlagBits::GpuModified;
836
837 runtime.InvalidateColorBuffer(color_buffer, index);
838}
839
840template <class P>
841void TextureCache<P>::InvalidateDepthBuffer() {
842 ImageViewId& depth_buffer_id = render_targets.depth_buffer_id;
843 depth_buffer_id = FindDepthBuffer(false);
844 if (!depth_buffer_id) {
845 LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer");
846 return;
847 }
848 // When invalidating the depth buffer, the old contents are no longer relevant
849 ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id];
850 image.flags &= ~ImageFlagBits::CpuModified;
851 image.flags &= ~ImageFlagBits::GpuModified;
852
853 ImageView& depth_buffer = slot_image_views[depth_buffer_id];
854 runtime.InvalidateDepthBuffer(depth_buffer);
855}
856
857template <class P>
858typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
859 // TODO: Properly implement this
860 const auto it = page_table.find(cpu_addr >> PAGE_BITS);
861 if (it == page_table.end()) {
862 return nullptr;
863 }
864 const auto& image_map_ids = it->second;
865 for (const ImageMapId map_id : image_map_ids) {
866 const ImageMapView& map = slot_map_views[map_id];
867 const ImageBase& image = slot_images[map.image_id];
868 if (image.cpu_addr != cpu_addr) {
869 continue;
870 }
871 if (image.image_view_ids.empty()) {
872 continue;
873 }
874 return &slot_image_views[image.image_view_ids.at(0)];
875 }
876 return nullptr;
877}
878
879template <class P>
880bool TextureCache<P>::HasUncommittedFlushes() const noexcept {
881 return !uncommitted_downloads.empty();
882}
883
884template <class P>
885bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
886 return !committed_downloads.empty() && !committed_downloads.front().empty();
887}
888
889template <class P>
890void TextureCache<P>::CommitAsyncFlushes() {
891 // This is intentionally passing the value by copy
892 committed_downloads.push(uncommitted_downloads);
893 uncommitted_downloads.clear();
894}
895
896template <class P>
897void TextureCache<P>::PopAsyncFlushes() {
898 if (committed_downloads.empty()) {
899 return;
900 }
901 const std::span<const ImageId> download_ids = committed_downloads.front();
902 if (download_ids.empty()) {
903 committed_downloads.pop();
904 return;
905 }
906 size_t total_size_bytes = 0;
907 for (const ImageId image_id : download_ids) {
908 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
909 }
910 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
911 const size_t original_offset = download_map.offset;
912 for (const ImageId image_id : download_ids) {
913 Image& image = slot_images[image_id];
914 const auto copies = FullDownloadCopies(image.info);
915 image.DownloadMemory(download_map, copies);
916 download_map.offset += image.unswizzled_size_bytes;
917 }
918 // Wait for downloads to finish
919 runtime.Finish();
920
921 download_map.offset = original_offset;
922 std::span<u8> download_span = download_map.mapped_span;
923 for (const ImageId image_id : download_ids) {
924 const ImageBase& image = slot_images[image_id];
925 const auto copies = FullDownloadCopies(image.info);
926 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
927 download_map.offset += image.unswizzled_size_bytes;
928 download_span = download_span.subspan(image.unswizzled_size_bytes);
929 }
930 committed_downloads.pop();
931}
932
933template <class P>
934bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
935 bool is_modified = false;
936 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
937 if (False(image.flags & ImageFlagBits::GpuModified)) {
938 return false;
939 }
940 is_modified = true;
941 return true;
942 });
943 return is_modified;
944}
945
946template <class P>
947void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
948 if (False(image.flags & ImageFlagBits::CpuModified)) {
949 // Only upload modified images
950 return;
951 }
952 image.flags &= ~ImageFlagBits::CpuModified;
953 TrackImage(image, image_id);
954
955 if (image.info.num_samples > 1) {
956 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
957 return;
958 }
959 auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
960 UploadImageContents(image, staging);
961 runtime.InsertUploadMemoryBarrier();
962}
963
964template <class P>
965template <typename StagingBuffer>
966void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) {
967 const std::span<u8> mapped_span = staging.mapped_span;
968 const GPUVAddr gpu_addr = image.gpu_addr;
969
970 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
971 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
972 const auto uploads = FullUploadSwizzles(image.info);
973 runtime.AccelerateImageUpload(image, staging, uploads);
974 } else if (True(image.flags & ImageFlagBits::Converted)) {
975 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
976 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
977 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
978 image.UploadMemory(staging, copies);
979 } else {
980 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
981 image.UploadMemory(staging, copies);
982 }
983}
984
985template <class P>
986ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
987 if (!IsValidEntry(gpu_memory, config)) {
988 return NULL_IMAGE_VIEW_ID;
989 }
990 const auto [pair, is_new] = image_views.try_emplace(config);
991 ImageViewId& image_view_id = pair->second;
992 if (is_new) {
993 image_view_id = CreateImageView(config);
994 }
995 return image_view_id;
996}
997
998template <class P>
999ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
1000 const ImageInfo info(config);
1001 if (info.type == ImageType::Buffer) {
1002 const ImageViewInfo view_info(config, 0);
1003 return slot_image_views.insert(runtime, info, view_info, config.Address());
1004 }
1005 const u32 layer_offset = config.BaseLayer() * info.layer_stride;
1006 const GPUVAddr image_gpu_addr = config.Address() - layer_offset;
1007 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
1008 if (!image_id) {
1009 return NULL_IMAGE_VIEW_ID;
1010 }
1011 ImageBase& image = slot_images[image_id];
1012 const SubresourceBase base = image.TryFindBase(config.Address()).value();
1013 ASSERT(base.level == 0);
1014 const ImageViewInfo view_info(config, base.layer);
1015 const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info);
1016 ImageViewBase& image_view = slot_image_views[image_view_id];
1017 image_view.flags |= ImageViewFlagBits::Strong;
1018 image.flags |= ImageFlagBits::Strong;
1019 return image_view_id;
1020}
1021
1022template <class P>
1023ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1024 RelaxedOptions options) {
1025 if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) {
1026 return image_id;
1027 }
1028 return InsertImage(info, gpu_addr, options);
1029}
1030
1031template <class P>
1032ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1033 RelaxedOptions options) {
1034 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1035 if (!cpu_addr) {
1036 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
1037 if (!cpu_addr) {
1038 return ImageId{};
1039 }
1040 }
1041 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1042 const bool native_bgr = runtime.HasNativeBgr();
1043 ImageId image_id;
1044 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1045 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
1046 return false;
1047 }
1048 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
1049 const bool strict_size = False(options & RelaxedOptions::Size) &&
1050 True(existing_image.flags & ImageFlagBits::Strong);
1051 const ImageInfo& existing = existing_image.info;
1052 if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
1053 existing.pitch == info.pitch &&
1054 IsPitchLinearSameSize(existing, info, strict_size) &&
1055 IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) {
1056 image_id = existing_image_id;
1057 return true;
1058 }
1059 } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views,
1060 native_bgr)) {
1061 image_id = existing_image_id;
1062 return true;
1063 }
1064 return false;
1065 };
1066 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
1067 return image_id;
1068}
1069
1070template <class P>
1071ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
1072 RelaxedOptions options) {
1073 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1074 if (!cpu_addr) {
1075 const auto size = CalculateGuestSizeInBytes(info);
1076 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
1077 if (!cpu_addr) {
1078 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
1079 virtual_invalid_space += Common::AlignUp(size, 32);
1080 cpu_addr = std::optional<VAddr>(fake_addr);
1081 }
1082 }
1083 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
1084 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
1085 const Image& image = slot_images[image_id];
1086 // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
1087 const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr);
1088 if (is_new) {
1089 it->second = slot_image_allocs.insert();
1090 }
1091 slot_image_allocs[it->second].images.push_back(image_id);
1092 return image_id;
1093}
1094
1095template <class P>
1096ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) {
1097 ImageInfo new_info = info;
1098 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
1099 const bool broken_views = runtime.HasBrokenTextureViewFormats();
1100 const bool native_bgr = runtime.HasNativeBgr();
1101 std::vector<ImageId> overlap_ids;
1102 std::unordered_set<ImageId> overlaps_found;
1103 std::vector<ImageId> left_aliased_ids;
1104 std::vector<ImageId> right_aliased_ids;
1105 std::unordered_set<ImageId> ignore_textures;
1106 std::vector<ImageId> bad_overlap_ids;
1107 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
1108 if (True(overlap.flags & ImageFlagBits::Remapped)) {
1109 ignore_textures.insert(overlap_id);
1110 return;
1111 }
1112 if (info.type == ImageType::Linear) {
1113 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
1114 // Alias linear images with the same pitch
1115 left_aliased_ids.push_back(overlap_id);
1116 }
1117 return;
1118 }
1119 overlaps_found.insert(overlap_id);
1120 static constexpr bool strict_size = true;
1121 const std::optional<OverlapResult> solution = ResolveOverlap(
1122 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
1123 if (solution) {
1124 gpu_addr = solution->gpu_addr;
1125 cpu_addr = solution->cpu_addr;
1126 new_info.resources = solution->resources;
1127 overlap_ids.push_back(overlap_id);
1128 return;
1129 }
1130 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
1131 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
1132 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
1133 left_aliased_ids.push_back(overlap_id);
1134 overlap.flags |= ImageFlagBits::Alias;
1135 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
1136 broken_views, native_bgr)) {
1137 right_aliased_ids.push_back(overlap_id);
1138 overlap.flags |= ImageFlagBits::Alias;
1139 } else {
1140 bad_overlap_ids.push_back(overlap_id);
1141 overlap.flags |= ImageFlagBits::BadOverlap;
1142 }
1143 };
1144 ForEachImageInRegion(cpu_addr, size_bytes, region_check);
1145 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
1146 if (!overlaps_found.contains(overlap_id)) {
1147 if (True(overlap.flags & ImageFlagBits::Remapped)) {
1148 ignore_textures.insert(overlap_id);
1149 }
1150 if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
1151 ignore_textures.insert(overlap_id);
1152 }
1153 }
1154 };
1155 ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
1156 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
1157 Image& new_image = slot_images[new_image_id];
1158
1159 if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
1160 new_image.flags |= ImageFlagBits::Sparse;
1161 }
1162
1163 for (const ImageId overlap_id : ignore_textures) {
1164 Image& overlap = slot_images[overlap_id];
1165 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
1166 UNIMPLEMENTED();
1167 }
1168 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1169 UntrackImage(overlap, overlap_id);
1170 }
1171 UnregisterImage(overlap_id);
1172 DeleteImage(overlap_id);
1173 }
1174
1175 // TODO: Only upload what we need
1176 RefreshContents(new_image, new_image_id);
1177
1178 for (const ImageId overlap_id : overlap_ids) {
1179 Image& overlap = slot_images[overlap_id];
1180 if (overlap.info.num_samples != new_image.info.num_samples) {
1181 LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
1182 } else {
1183 const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
1184 const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
1185 runtime.CopyImage(new_image, overlap, copies);
1186 }
1187 if (True(overlap.flags & ImageFlagBits::Tracked)) {
1188 UntrackImage(overlap, overlap_id);
1189 }
1190 UnregisterImage(overlap_id);
1191 DeleteImage(overlap_id);
1192 }
1193 ImageBase& new_image_base = new_image;
1194 for (const ImageId aliased_id : right_aliased_ids) {
1195 ImageBase& aliased = slot_images[aliased_id];
1196 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
1197 new_image.flags |= ImageFlagBits::Alias;
1198 }
1199 for (const ImageId aliased_id : left_aliased_ids) {
1200 ImageBase& aliased = slot_images[aliased_id];
1201 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
1202 new_image.flags |= ImageFlagBits::Alias;
1203 }
1204 for (const ImageId aliased_id : bad_overlap_ids) {
1205 ImageBase& aliased = slot_images[aliased_id];
1206 aliased.overlapping_images.push_back(new_image_id);
1207 new_image.overlapping_images.push_back(aliased_id);
1208 new_image.flags |= ImageFlagBits::BadOverlap;
1209 }
1210 RegisterImage(new_image_id);
1211 return new_image_id;
1212}
1213
1214template <class P>
1215typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
1216 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
1217 static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
1218 const GPUVAddr dst_addr = dst.Address();
1219 const GPUVAddr src_addr = src.Address();
1220 ImageInfo dst_info(dst);
1221 ImageInfo src_info(src);
1222 ImageId dst_id;
1223 ImageId src_id;
1224 do {
1225 has_deleted_images = false;
1226 dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
1227 src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
1228 const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
1229 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
1230 DeduceBlitImages(dst_info, src_info, dst_image, src_image);
1231 if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
1232 continue;
1233 }
1234 if (!dst_id) {
1235 dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
1236 }
1237 if (!src_id) {
1238 src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
1239 }
1240 } while (has_deleted_images);
1241 return BlitImages{
1242 .dst_id = dst_id,
1243 .src_id = src_id,
1244 .dst_format = dst_info.format,
1245 .src_format = src_info.format,
1246 };
1247}
1248
1249template <class P>
1250SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1251 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
1252 return NULL_SAMPLER_ID;
1253 }
1254 const auto [pair, is_new] = samplers.try_emplace(config);
1255 if (is_new) {
1256 pair->second = slot_samplers.insert(runtime, config);
1257 }
1258 return pair->second;
1259}
1260
1261template <class P>
1262ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1263 const auto& regs = maxwell3d.regs;
1264 if (index >= regs.rt_control.count) {
1265 return ImageViewId{};
1266 }
1267 const auto& rt = regs.rt[index];
1268 const GPUVAddr gpu_addr = rt.Address();
1269 if (gpu_addr == 0) {
1270 return ImageViewId{};
1271 }
1272 if (rt.format == Tegra::RenderTargetFormat::NONE) {
1273 return ImageViewId{};
1274 }
1275 const ImageInfo info(regs, index);
1276 return FindRenderTargetView(info, gpu_addr, is_clear);
1277}
1278
1279template <class P>
1280ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
1281 const auto& regs = maxwell3d.regs;
1282 if (!regs.zeta_enable) {
1283 return ImageViewId{};
1284 }
1285 const GPUVAddr gpu_addr = regs.zeta.Address();
1286 if (gpu_addr == 0) {
1287 return ImageViewId{};
1288 }
1289 const ImageInfo info(regs);
1290 return FindRenderTargetView(info, gpu_addr, is_clear);
1291}
1292
1293template <class P>
1294ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
1295 bool is_clear) {
1296 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
1297 const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
1298 if (!image_id) {
1299 return NULL_IMAGE_VIEW_ID;
1300 }
1301 Image& image = slot_images[image_id];
1302 const ImageViewType view_type = RenderTargetImageViewType(info);
1303 SubresourceBase base;
1304 if (image.info.type == ImageType::Linear) {
1305 base = SubresourceBase{.level = 0, .layer = 0};
1306 } else {
1307 base = image.TryFindBase(gpu_addr).value();
1308 }
1309 const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers;
1310 const SubresourceRange range{
1311 .base = base,
1312 .extent = {.levels = 1, .layers = layers},
1313 };
1314 return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range));
1315}
1316
1317template <class P>
1318template <typename Func>
1319void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
1320 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1321 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1322 boost::container::small_vector<ImageId, 32> images;
1323 boost::container::small_vector<ImageMapId, 32> maps;
1324 ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
1325 const auto it = page_table.find(page);
1326 if (it == page_table.end()) {
1327 if constexpr (BOOL_BREAK) {
1328 return false;
1329 } else {
1330 return;
1331 }
1332 }
1333 for (const ImageMapId map_id : it->second) {
1334 ImageMapView& map = slot_map_views[map_id];
1335 if (map.picked) {
1336 continue;
1337 }
1338 if (!map.Overlaps(cpu_addr, size)) {
1339 continue;
1340 }
1341 map.picked = true;
1342 maps.push_back(map_id);
1343 Image& image = slot_images[map.image_id];
1344 if (True(image.flags & ImageFlagBits::Picked)) {
1345 continue;
1346 }
1347 image.flags |= ImageFlagBits::Picked;
1348 images.push_back(map.image_id);
1349 if constexpr (BOOL_BREAK) {
1350 if (func(map.image_id, image)) {
1351 return true;
1352 }
1353 } else {
1354 func(map.image_id, image);
1355 }
1356 }
1357 if constexpr (BOOL_BREAK) {
1358 return false;
1359 }
1360 });
1361 for (const ImageId image_id : images) {
1362 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1363 }
1364 for (const ImageMapId map_id : maps) {
1365 slot_map_views[map_id].picked = false;
1366 }
1367}
1368
1369template <class P>
1370template <typename Func>
1371void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
1372 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1373 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1374 boost::container::small_vector<ImageId, 8> images;
1375 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
1376 const auto it = gpu_page_table.find(page);
1377 if (it == gpu_page_table.end()) {
1378 if constexpr (BOOL_BREAK) {
1379 return false;
1380 } else {
1381 return;
1382 }
1383 }
1384 for (const ImageId image_id : it->second) {
1385 Image& image = slot_images[image_id];
1386 if (True(image.flags & ImageFlagBits::Picked)) {
1387 continue;
1388 }
1389 if (!image.OverlapsGPU(gpu_addr, size)) {
1390 continue;
1391 }
1392 image.flags |= ImageFlagBits::Picked;
1393 images.push_back(image_id);
1394 if constexpr (BOOL_BREAK) {
1395 if (func(image_id, image)) {
1396 return true;
1397 }
1398 } else {
1399 func(image_id, image);
1400 }
1401 }
1402 if constexpr (BOOL_BREAK) {
1403 return false;
1404 }
1405 });
1406 for (const ImageId image_id : images) {
1407 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1408 }
1409}
1410
1411template <class P>
1412template <typename Func>
1413void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
1414 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1415 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1416 boost::container::small_vector<ImageId, 8> images;
1417 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
1418 const auto it = sparse_page_table.find(page);
1419 if (it == sparse_page_table.end()) {
1420 if constexpr (BOOL_BREAK) {
1421 return false;
1422 } else {
1423 return;
1424 }
1425 }
1426 for (const ImageId image_id : it->second) {
1427 Image& image = slot_images[image_id];
1428 if (True(image.flags & ImageFlagBits::Picked)) {
1429 continue;
1430 }
1431 if (!image.OverlapsGPU(gpu_addr, size)) {
1432 continue;
1433 }
1434 image.flags |= ImageFlagBits::Picked;
1435 images.push_back(image_id);
1436 if constexpr (BOOL_BREAK) {
1437 if (func(image_id, image)) {
1438 return true;
1439 }
1440 } else {
1441 func(image_id, image);
1442 }
1443 }
1444 if constexpr (BOOL_BREAK) {
1445 return false;
1446 }
1447 });
1448 for (const ImageId image_id : images) {
1449 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1450 }
1451}
1452
1453template <class P>
1454template <typename Func>
1455void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1456 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
1457 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
1458 const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
1459 for (auto& segment : segments) {
1460 const auto gpu_addr = segment.first;
1461 const auto size = segment.second;
1462 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1463 ASSERT(cpu_addr);
1464 if constexpr (RETURNS_BOOL) {
1465 if (func(gpu_addr, *cpu_addr, size)) {
1466 break;
1467 }
1468 } else {
1469 func(gpu_addr, *cpu_addr, size);
1470 }
1471 }
1472}
1473
1474template <class P>
1475ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
1476 Image& image = slot_images[image_id];
1477 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
1478 return image_view_id;
1479 }
1480 const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
1481 image.InsertView(info, image_view_id);
1482 return image_view_id;
1483}
1484
1485template <class P>
1486void TextureCache<P>::RegisterImage(ImageId image_id) {
1487 ImageBase& image = slot_images[image_id];
1488 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
1489 "Trying to register an already registered image");
1490 image.flags |= ImageFlagBits::Registered;
1491 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1492 if ((IsPixelFormatASTC(image.info.format) &&
1493 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
1494 True(image.flags & ImageFlagBits::Converted)) {
1495 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1496 }
1497 total_used_memory += Common::AlignUp(tentative_size, 1024);
1498 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1499 [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
1500 if (False(image.flags & ImageFlagBits::Sparse)) {
1501 auto map_id =
1502 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
1503 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
1504 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1505 image.map_view_id = map_id;
1506 return;
1507 }
1508 std::vector<ImageViewId> sparse_maps{};
1509 ForEachSparseSegment(
1510 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1511 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
1512 ForEachCPUPage(cpu_addr, size,
1513 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1514 sparse_maps.push_back(map_id);
1515 });
1516 sparse_views.emplace(image_id, std::move(sparse_maps));
1517 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1518 [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
1519}
1520
1521template <class P>
1522void TextureCache<P>::UnregisterImage(ImageId image_id) {
1523 Image& image = slot_images[image_id];
1524 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
1525 "Trying to unregister an already registered image");
1526 image.flags &= ~ImageFlagBits::Registered;
1527 image.flags &= ~ImageFlagBits::BadOverlap;
1528 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1529 if ((IsPixelFormatASTC(image.info.format) &&
1530 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
1531 True(image.flags & ImageFlagBits::Converted)) {
1532 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1533 }
1534 total_used_memory -= Common::AlignUp(tentative_size, 1024);
1535 const auto& clear_page_table =
1536 [this, image_id](
1537 u64 page,
1538 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
1539 const auto page_it = selected_page_table.find(page);
1540 if (page_it == selected_page_table.end()) {
1541 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1542 return;
1543 }
1544 std::vector<ImageId>& image_ids = page_it->second;
1545 const auto vector_it = std::ranges::find(image_ids, image_id);
1546 if (vector_it == image_ids.end()) {
1547 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1548 page << PAGE_BITS);
1549 return;
1550 }
1551 image_ids.erase(vector_it);
1552 };
1553 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1554 [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
1555 if (False(image.flags & ImageFlagBits::Sparse)) {
1556 const auto map_id = image.map_view_id;
1557 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
1558 const auto page_it = page_table.find(page);
1559 if (page_it == page_table.end()) {
1560 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1561 return;
1562 }
1563 std::vector<ImageMapId>& image_map_ids = page_it->second;
1564 const auto vector_it = std::ranges::find(image_map_ids, map_id);
1565 if (vector_it == image_map_ids.end()) {
1566 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1567 page << PAGE_BITS);
1568 return;
1569 }
1570 image_map_ids.erase(vector_it);
1571 });
1572 slot_map_views.erase(map_id);
1573 return;
1574 }
1575 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
1576 clear_page_table(page, sparse_page_table);
1577 });
1578 auto it = sparse_views.find(image_id);
1579 ASSERT(it != sparse_views.end());
1580 auto& sparse_maps = it->second;
1581 for (auto& map_view_id : sparse_maps) {
1582 const auto& map_range = slot_map_views[map_view_id];
1583 const VAddr cpu_addr = map_range.cpu_addr;
1584 const std::size_t size = map_range.size;
1585 ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
1586 const auto page_it = page_table.find(page);
1587 if (page_it == page_table.end()) {
1588 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1589 return;
1590 }
1591 std::vector<ImageMapId>& image_map_ids = page_it->second;
1592 auto vector_it = image_map_ids.begin();
1593 while (vector_it != image_map_ids.end()) {
1594 ImageMapView& map = slot_map_views[*vector_it];
1595 if (map.image_id != image_id) {
1596 vector_it++;
1597 continue;
1598 }
1599 if (!map.picked) {
1600 map.picked = true;
1601 }
1602 vector_it = image_map_ids.erase(vector_it);
1603 }
1604 });
1605 slot_map_views.erase(map_view_id);
1606 }
1607 sparse_views.erase(it);
1608}
1609
1610template <class P>
1611void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
1612 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1613 image.flags |= ImageFlagBits::Tracked;
1614 if (False(image.flags & ImageFlagBits::Sparse)) {
1615 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1616 return;
1617 }
1618 if (True(image.flags & ImageFlagBits::Registered)) {
1619 auto it = sparse_views.find(image_id);
1620 ASSERT(it != sparse_views.end());
1621 auto& sparse_maps = it->second;
1622 for (auto& map_view_id : sparse_maps) {
1623 const auto& map = slot_map_views[map_view_id];
1624 const VAddr cpu_addr = map.cpu_addr;
1625 const std::size_t size = map.size;
1626 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1627 }
1628 return;
1629 }
1630 ForEachSparseSegment(image,
1631 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1632 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1633 });
1634}
1635
1636template <class P>
1637void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
1638 ASSERT(True(image.flags & ImageFlagBits::Tracked));
1639 image.flags &= ~ImageFlagBits::Tracked;
1640 if (False(image.flags & ImageFlagBits::Sparse)) {
1641 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1642 return;
1643 }
1644 ASSERT(True(image.flags & ImageFlagBits::Registered));
1645 auto it = sparse_views.find(image_id);
1646 ASSERT(it != sparse_views.end());
1647 auto& sparse_maps = it->second;
1648 for (auto& map_view_id : sparse_maps) {
1649 const auto& map = slot_map_views[map_view_id];
1650 const VAddr cpu_addr = map.cpu_addr;
1651 const std::size_t size = map.size;
1652 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
1653 }
1654}
1655
1656template <class P>
1657void TextureCache<P>::DeleteImage(ImageId image_id) {
1658 ImageBase& image = slot_images[image_id];
1659 const GPUVAddr gpu_addr = image.gpu_addr;
1660 const auto alloc_it = image_allocs_table.find(gpu_addr);
1661 if (alloc_it == image_allocs_table.end()) {
1662 UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}",
1663 gpu_addr);
1664 return;
1665 }
1666 const ImageAllocId alloc_id = alloc_it->second;
1667 std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
1668 const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
1669 if (alloc_image_it == alloc_images.end()) {
1670 UNREACHABLE_MSG("Trying to delete an image that does not exist");
1671 return;
1672 }
1673 ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
1674 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
1675
1676 // Mark render targets as dirty
1677 auto& dirty = maxwell3d.dirty.flags;
1678 dirty[Dirty::RenderTargets] = true;
1679 dirty[Dirty::ZetaBuffer] = true;
1680 for (size_t rt = 0; rt < NUM_RT; ++rt) {
1681 dirty[Dirty::ColorBuffer0 + rt] = true;
1682 }
1683 const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
1684 for (const ImageViewId image_view_id : image_view_ids) {
1685 std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
1686 if (render_targets.depth_buffer_id == image_view_id) {
1687 render_targets.depth_buffer_id = ImageViewId{};
1688 }
1689 }
1690 RemoveImageViewReferences(image_view_ids);
1691 RemoveFramebuffers(image_view_ids);
1692
1693 for (const AliasedImage& alias : image.aliased_images) {
1694 ImageBase& other_image = slot_images[alias.id];
1695 [[maybe_unused]] const size_t num_removed_aliases =
1696 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
1697 return other_alias.id == image_id;
1698 });
1699 other_image.CheckAliasState();
1700 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
1701 num_removed_aliases);
1702 }
1703 for (const ImageId overlap_id : image.overlapping_images) {
1704 ImageBase& other_image = slot_images[overlap_id];
1705 [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
1706 other_image.overlapping_images,
1707 [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
1708 other_image.CheckBadOverlapState();
1709 ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
1710 num_removed_overlaps);
1711 }
1712 for (const ImageViewId image_view_id : image_view_ids) {
1713 sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
1714 slot_image_views.erase(image_view_id);
1715 }
1716 sentenced_images.Push(std::move(slot_images[image_id]));
1717 slot_images.erase(image_id);
1718
1719 alloc_images.erase(alloc_image_it);
1720 if (alloc_images.empty()) {
1721 image_allocs_table.erase(alloc_it);
1722 }
1723 if constexpr (ENABLE_VALIDATION) {
1724 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
1725 std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
1726 }
1727 graphics_image_table.Invalidate();
1728 compute_image_table.Invalidate();
1729 has_deleted_images = true;
1730}
1731
1732template <class P>
1733void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
1734 auto it = image_views.begin();
1735 while (it != image_views.end()) {
1736 const auto found = std::ranges::find(removed_views, it->second);
1737 if (found != removed_views.end()) {
1738 it = image_views.erase(it);
1739 } else {
1740 ++it;
1741 }
1742 }
1743}
1744
1745template <class P>
1746void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) {
1747 auto it = framebuffers.begin();
1748 while (it != framebuffers.end()) {
1749 if (it->first.Contains(removed_views)) {
1750 it = framebuffers.erase(it);
1751 } else {
1752 ++it;
1753 }
1754 }
1755}
1756
1757template <class P>
1758void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
1759 image.flags |= ImageFlagBits::GpuModified;
1760 image.modification_tick = ++modification_tick;
1761}
1762
1763template <class P>
1764void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1765 boost::container::small_vector<const AliasedImage*, 1> aliased_images;
1766 ImageBase& image = slot_images[image_id];
1767 u64 most_recent_tick = image.modification_tick;
1768 for (const AliasedImage& aliased : image.aliased_images) {
1769 ImageBase& aliased_image = slot_images[aliased.id];
1770 if (image.modification_tick < aliased_image.modification_tick) {
1771 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
1772 aliased_images.push_back(&aliased);
1773 }
1774 }
1775 if (aliased_images.empty()) {
1776 return;
1777 }
1778 image.modification_tick = most_recent_tick;
1779 std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
1780 const ImageBase& lhs_image = slot_images[lhs->id];
1781 const ImageBase& rhs_image = slot_images[rhs->id];
1782 return lhs_image.modification_tick < rhs_image.modification_tick;
1783 });
1784 for (const AliasedImage* const aliased : aliased_images) {
1785 CopyImage(image_id, aliased->id, aliased->copies);
1786 }
1787}
1788
1789template <class P>
1790void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
1791 Image& image = slot_images[image_id];
1792 if (invalidate) {
1793 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
1794 if (False(image.flags & ImageFlagBits::Tracked)) {
1795 TrackImage(image, image_id);
1796 }
1797 } else {
1798 RefreshContents(image, image_id);
1799 SynchronizeAliases(image_id);
1800 }
1801 if (is_modification) {
1802 MarkModification(image);
1803 }
1804 image.frame_tick = frame_tick;
1805}
1806
1807template <class P>
1808void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
1809 bool invalidate) {
1810 if (!image_view_id) {
1811 return;
1812 }
1813 const ImageViewBase& image_view = slot_image_views[image_view_id];
1814 if (image_view.IsBuffer()) {
1815 return;
1816 }
1817 PrepareImage(image_view.image_id, is_modification, invalidate);
1818}
1819
1820template <class P>
1821void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
1822 Image& dst = slot_images[dst_id];
1823 Image& src = slot_images[src_id];
1824 const auto dst_format_type = GetFormatType(dst.info.format);
1825 const auto src_format_type = GetFormatType(src.info.format);
1826 if (src_format_type == dst_format_type) {
1827 if constexpr (HAS_EMULATED_COPIES) {
1828 if (!runtime.CanImageBeCopied(dst, src)) {
1829 return runtime.EmulateCopyImage(dst, src, copies);
1830 }
1831 }
1832 return runtime.CopyImage(dst, src, copies);
1833 }
1834 UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
1835 UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
1836 for (const ImageCopy& copy : copies) {
1837 UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
1838 UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1);
1839 UNIMPLEMENTED_IF(copy.src_offset != Offset3D{});
1840 UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{});
1841
1842 const SubresourceBase dst_base{
1843 .level = copy.dst_subresource.base_level,
1844 .layer = copy.dst_subresource.base_layer,
1845 };
1846 const SubresourceBase src_base{
1847 .level = copy.src_subresource.base_level,
1848 .layer = copy.src_subresource.base_layer,
1849 };
1850 const SubresourceExtent dst_extent{.levels = 1, .layers = 1};
1851 const SubresourceExtent src_extent{.levels = 1, .layers = 1};
1852 const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
1853 const SubresourceRange src_range{.base = src_base, .extent = src_extent};
1854 const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range);
1855 const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
1856 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
1857 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
1858 const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info);
1859 ImageView& dst_view = slot_image_views[dst_view_id];
1860 ImageView& src_view = slot_image_views[src_view_id];
1861 [[maybe_unused]] const Extent3D expected_size{
1862 .width = std::min(dst_view.size.width, src_view.size.width),
1863 .height = std::min(dst_view.size.height, src_view.size.height),
1864 .depth = std::min(dst_view.size.depth, src_view.size.depth),
1865 };
1866 UNIMPLEMENTED_IF(copy.extent != expected_size);
1867
1868 runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
1869 }
1870}
1871
1872template <class P>
1873void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) {
1874 if (*old_id == new_id) {
1875 return;
1876 }
1877 if (*old_id) {
1878 const ImageViewBase& old_view = slot_image_views[*old_id];
1879 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
1880 uncommitted_downloads.push_back(old_view.image_id);
1881 }
1882 }
1883 *old_id = new_id;
1884}
1885
1886template <class P>
1887std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
1888 ImageId image_id, const ImageViewInfo& view_info) {
1889 const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
1890 const ImageBase& image = slot_images[image_id];
1891 const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
1892 const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
1893 const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
1894 const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
1895 const u32 num_samples = image.info.num_samples;
1896 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
1897 const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
1898 .color_buffer_ids = {color_view_id},
1899 .depth_buffer_id = depth_view_id,
1900 .size = {extent.width >> samples_x, extent.height >> samples_y},
1901 });
1902 return {framebuffer_id, view_id};
1903}
1904
1905template <class P>
1906bool TextureCache<P>::IsFullClear(ImageViewId id) {
1907 if (!id) {
1908 return true;
1909 }
1910 const ImageViewBase& image_view = slot_image_views[id];
1911 const ImageBase& image = slot_images[image_view.image_id];
1912 const Extent3D size = image_view.size;
1913 const auto& regs = maxwell3d.regs;
1914 const auto& scissor = regs.scissor_test[0];
1915 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
1916 // Images with multiple resources can't be cleared in a single call
1917 return false;
1918 }
1919 if (regs.clear_flags.scissor == 0) {
1920 // If scissor testing is disabled, the clear is always full
1921 return true;
1922 }
1923 // Make sure the clear covers all texels in the subresource
1924 return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width &&
1925 scissor.max_y >= size.height;
1926}
1927
1928} // namespace VideoCommon 402} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache_templates.h b/src/video_core/texture_cache/texture_cache_templates.h
new file mode 100644
index 000000000..8440d23d1
--- /dev/null
+++ b/src/video_core/texture_cache/texture_cache_templates.h
@@ -0,0 +1,1507 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/texture_cache/texture_cache.h"
8
9namespace VideoCommon {
10
11using Tegra::Texture::SwizzleSource;
12using Tegra::Texture::TextureType;
13using Tegra::Texture::TICEntry;
14using Tegra::Texture::TSCEntry;
15using VideoCore::Surface::GetFormatType;
16using VideoCore::Surface::IsCopyCompatible;
17using VideoCore::Surface::PixelFormat;
18using VideoCore::Surface::PixelFormatFromDepthFormat;
19using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
20using VideoCore::Surface::SurfaceType;
21using namespace Common::Literals;
22
23template <class P>
24TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
25 Tegra::Engines::Maxwell3D& maxwell3d_,
26 Tegra::Engines::KeplerCompute& kepler_compute_,
27 Tegra::MemoryManager& gpu_memory_)
28 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
29 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
30 // Configure null sampler
31 TSCEntry sampler_descriptor{};
32 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
33 sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear);
34 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
35 sampler_descriptor.cubemap_anisotropy.Assign(1);
36
37 // Make sure the first index is reserved for the null resources
38 // This way the null resource becomes a compile time constant
39 void(slot_image_views.insert(runtime, NullImageParams{}));
40 void(slot_samplers.insert(runtime, sampler_descriptor));
41
42 deletion_iterator = slot_images.begin();
43
44 if constexpr (HAS_DEVICE_MEMORY_INFO) {
45 const auto device_memory = runtime.GetDeviceLocalMemory();
46 const u64 possible_expected_memory = (device_memory * 3) / 10;
47 const u64 possible_critical_memory = (device_memory * 6) / 10;
48 expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
49 critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
50 minimum_memory = 0;
51 } else {
52 // on OGL we can be more conservatives as the driver takes care.
53 expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
54 critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
55 minimum_memory = expected_memory;
56 }
57}
58
59template <class P>
60void TextureCache<P>::RunGarbageCollector() {
61 const bool high_priority_mode = total_used_memory >= expected_memory;
62 const bool aggressive_mode = total_used_memory >= critical_memory;
63 const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
64 int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
65 for (; num_iterations > 0; --num_iterations) {
66 if (deletion_iterator == slot_images.end()) {
67 deletion_iterator = slot_images.begin();
68 if (deletion_iterator == slot_images.end()) {
69 break;
70 }
71 }
72 auto [image_id, image_tmp] = *deletion_iterator;
73 Image* image = image_tmp; // fix clang error.
74 const bool is_alias = True(image->flags & ImageFlagBits::Alias);
75 const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
76 const bool must_download = image->IsSafeDownload();
77 bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
78 const u64 ticks_needed =
79 is_bad_overlap
80 ? ticks_to_destroy >> 4
81 : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
82 should_care |= aggressive_mode;
83 if (should_care && image->frame_tick + ticks_needed < frame_tick) {
84 if (is_bad_overlap) {
85 const bool overlap_check = std::ranges::all_of(
86 image->overlapping_images, [&, image](const ImageId& overlap_id) {
87 auto& overlap = slot_images[overlap_id];
88 return overlap.frame_tick >= image->frame_tick;
89 });
90 if (!overlap_check) {
91 ++deletion_iterator;
92 continue;
93 }
94 }
95 if (!is_bad_overlap && must_download) {
96 const bool alias_check = std::ranges::none_of(
97 image->aliased_images, [&, image](const AliasedImage& alias) {
98 auto& alias_image = slot_images[alias.id];
99 return (alias_image.frame_tick < image->frame_tick) ||
100 (alias_image.modification_tick < image->modification_tick);
101 });
102
103 if (alias_check) {
104 auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
105 const auto copies = FullDownloadCopies(image->info);
106 image->DownloadMemory(map, copies);
107 runtime.Finish();
108 SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
109 }
110 }
111 if (True(image->flags & ImageFlagBits::Tracked)) {
112 UntrackImage(*image, image_id);
113 }
114 UnregisterImage(image_id);
115 DeleteImage(image_id);
116 if (is_bad_overlap) {
117 ++num_iterations;
118 }
119 }
120 ++deletion_iterator;
121 }
122}
123
124template <class P>
125void TextureCache<P>::TickFrame() {
126 if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
127 RunGarbageCollector();
128 }
129 sentenced_images.Tick();
130 sentenced_framebuffers.Tick();
131 sentenced_image_view.Tick();
132 ++frame_tick;
133}
134
135template <class P>
136const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
137 return slot_image_views[id];
138}
139
140template <class P>
141typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
142 return slot_image_views[id];
143}
144
145template <class P>
146void TextureCache<P>::MarkModification(ImageId id) noexcept {
147 MarkModification(slot_images[id]);
148}
149
150template <class P>
151void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
152 std::span<ImageViewId> image_view_ids) {
153 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
154}
155
156template <class P>
157void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
158 std::span<ImageViewId> image_view_ids) {
159 FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
160}
161
162template <class P>
163typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
164 if (index > graphics_sampler_table.Limit()) {
165 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
166 return &slot_samplers[NULL_SAMPLER_ID];
167 }
168 const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
169 SamplerId& id = graphics_sampler_ids[index];
170 if (is_new) {
171 id = FindSampler(descriptor);
172 }
173 return &slot_samplers[id];
174}
175
176template <class P>
177typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
178 if (index > compute_sampler_table.Limit()) {
179 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
180 return &slot_samplers[NULL_SAMPLER_ID];
181 }
182 const auto [descriptor, is_new] = compute_sampler_table.Read(index);
183 SamplerId& id = compute_sampler_ids[index];
184 if (is_new) {
185 id = FindSampler(descriptor);
186 }
187 return &slot_samplers[id];
188}
189
190template <class P>
191void TextureCache<P>::SynchronizeGraphicsDescriptors() {
192 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
193 const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
194 const u32 tic_limit = maxwell3d.regs.tic.limit;
195 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
196 if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
197 graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
198 }
199 if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
200 graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
201 }
202}
203
204template <class P>
205void TextureCache<P>::SynchronizeComputeDescriptors() {
206 const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
207 const u32 tic_limit = kepler_compute.regs.tic.limit;
208 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
209 const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
210 if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
211 compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
212 }
213 if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
214 compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
215 }
216}
217
218template <class P>
219void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
220 using namespace VideoCommon::Dirty;
221 auto& flags = maxwell3d.dirty.flags;
222 if (!flags[Dirty::RenderTargets]) {
223 for (size_t index = 0; index < NUM_RT; ++index) {
224 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
225 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
226 }
227 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
228 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
229 return;
230 }
231 flags[Dirty::RenderTargets] = false;
232
233 // Render target control is used on all render targets, so force look ups when this one is up
234 const bool force = flags[Dirty::RenderTargetControl];
235 flags[Dirty::RenderTargetControl] = false;
236
237 for (size_t index = 0; index < NUM_RT; ++index) {
238 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
239 if (flags[Dirty::ColorBuffer0 + index] || force) {
240 flags[Dirty::ColorBuffer0 + index] = false;
241 BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
242 }
243 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
244 }
245 if (flags[Dirty::ZetaBuffer] || force) {
246 flags[Dirty::ZetaBuffer] = false;
247 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
248 }
249 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
250 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
251
252 for (size_t index = 0; index < NUM_RT; ++index) {
253 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
254 }
255 render_targets.size = Extent2D{
256 maxwell3d.regs.render_area.width,
257 maxwell3d.regs.render_area.height,
258 };
259}
260
261template <class P>
262typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
263 return &slot_framebuffers[GetFramebufferId(render_targets)];
264}
265
266template <class P>
267void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
268 std::span<ImageViewId> cached_image_view_ids,
269 std::span<const u32> indices,
270 std::span<ImageViewId> image_view_ids) {
271 ASSERT(indices.size() <= image_view_ids.size());
272 do {
273 has_deleted_images = false;
274 std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
275 return VisitImageView(table, cached_image_view_ids, index);
276 });
277 } while (has_deleted_images);
278}
279
280template <class P>
281ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
282 std::span<ImageViewId> cached_image_view_ids,
283 u32 index) {
284 if (index > table.Limit()) {
285 LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
286 return NULL_IMAGE_VIEW_ID;
287 }
288 const auto [descriptor, is_new] = table.Read(index);
289 ImageViewId& image_view_id = cached_image_view_ids[index];
290 if (is_new) {
291 image_view_id = FindImageView(descriptor);
292 }
293 if (image_view_id != NULL_IMAGE_VIEW_ID) {
294 PrepareImageView(image_view_id, false, false);
295 }
296 return image_view_id;
297}
298
299template <class P>
300FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
301 const auto [pair, is_new] = framebuffers.try_emplace(key);
302 FramebufferId& framebuffer_id = pair->second;
303 if (!is_new) {
304 return framebuffer_id;
305 }
306 std::array<ImageView*, NUM_RT> color_buffers;
307 std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
308 [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
309 ImageView* const depth_buffer =
310 key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
311 framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
312 return framebuffer_id;
313}
314
315template <class P>
316void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
317 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
318 if (True(image.flags & ImageFlagBits::CpuModified)) {
319 return;
320 }
321 image.flags |= ImageFlagBits::CpuModified;
322 if (True(image.flags & ImageFlagBits::Tracked)) {
323 UntrackImage(image, image_id);
324 }
325 });
326}
327
328template <class P>
329void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
330 std::vector<ImageId> images;
331 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
332 if (!image.IsSafeDownload()) {
333 return;
334 }
335 image.flags &= ~ImageFlagBits::GpuModified;
336 images.push_back(image_id);
337 });
338 if (images.empty()) {
339 return;
340 }
341 std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) {
342 return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
343 });
344 for (const ImageId image_id : images) {
345 Image& image = slot_images[image_id];
346 auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
347 const auto copies = FullDownloadCopies(image.info);
348 image.DownloadMemory(map, copies);
349 runtime.Finish();
350 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
351 }
352}
353
354template <class P>
355void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
356 std::vector<ImageId> deleted_images;
357 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
358 for (const ImageId id : deleted_images) {
359 Image& image = slot_images[id];
360 if (True(image.flags & ImageFlagBits::Tracked)) {
361 UntrackImage(image, id);
362 }
363 UnregisterImage(id);
364 DeleteImage(id);
365 }
366}
367
368template <class P>
369void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
370 std::vector<ImageId> deleted_images;
371 ForEachImageInRegionGPU(gpu_addr, size,
372 [&](ImageId id, Image&) { deleted_images.push_back(id); });
373 for (const ImageId id : deleted_images) {
374 Image& image = slot_images[id];
375 if (True(image.flags & ImageFlagBits::Remapped)) {
376 continue;
377 }
378 image.flags |= ImageFlagBits::Remapped;
379 if (True(image.flags & ImageFlagBits::Tracked)) {
380 UntrackImage(image, id);
381 }
382 }
383}
384
385template <class P>
386void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
387 const Tegra::Engines::Fermi2D::Surface& src,
388 const Tegra::Engines::Fermi2D::Config& copy) {
389 const BlitImages images = GetBlitImages(dst, src);
390 const ImageId dst_id = images.dst_id;
391 const ImageId src_id = images.src_id;
392 PrepareImage(src_id, false, false);
393 PrepareImage(dst_id, true, false);
394
395 ImageBase& dst_image = slot_images[dst_id];
396 const ImageBase& src_image = slot_images[src_id];
397
398 // TODO: Deduplicate
399 const std::optional src_base = src_image.TryFindBase(src.Address());
400 const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
401 const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
402 const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
403 const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
404 const Region2D src_region{
405 Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
406 Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
407 };
408
409 const std::optional dst_base = dst_image.TryFindBase(dst.Address());
410 const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
411 const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
412 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
413 const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
414 const Region2D dst_region{
415 Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
416 Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
417 };
418
419 // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
420 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
421 if constexpr (FRAMEBUFFER_BLITS) {
422 // OpenGL blits from framebuffers, not images
423 Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id];
424 runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region,
425 copy.filter, copy.operation);
426 } else {
427 // Vulkan can blit images, but it lacks format reinterpretations
428 // Provide a framebuffer in case it's necessary
429 ImageView& dst_view = slot_image_views[dst_view_id];
430 ImageView& src_view = slot_image_views[src_view_id];
431 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
432 copy.operation);
433 }
434}
435
436template <class P>
437typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
438 // TODO: Properly implement this
439 const auto it = page_table.find(cpu_addr >> PAGE_BITS);
440 if (it == page_table.end()) {
441 return nullptr;
442 }
443 const auto& image_map_ids = it->second;
444 for (const ImageMapId map_id : image_map_ids) {
445 const ImageMapView& map = slot_map_views[map_id];
446 const ImageBase& image = slot_images[map.image_id];
447 if (image.cpu_addr != cpu_addr) {
448 continue;
449 }
450 if (image.image_view_ids.empty()) {
451 continue;
452 }
453 return &slot_image_views[image.image_view_ids.at(0)];
454 }
455 return nullptr;
456}
457
458template <class P>
459bool TextureCache<P>::HasUncommittedFlushes() const noexcept {
460 return !uncommitted_downloads.empty();
461}
462
463template <class P>
464bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
465 return !committed_downloads.empty() && !committed_downloads.front().empty();
466}
467
468template <class P>
469void TextureCache<P>::CommitAsyncFlushes() {
470 // This is intentionally passing the value by copy
471 committed_downloads.push(uncommitted_downloads);
472 uncommitted_downloads.clear();
473}
474
475template <class P>
476void TextureCache<P>::PopAsyncFlushes() {
477 if (committed_downloads.empty()) {
478 return;
479 }
480 const std::span<const ImageId> download_ids = committed_downloads.front();
481 if (download_ids.empty()) {
482 committed_downloads.pop();
483 return;
484 }
485 size_t total_size_bytes = 0;
486 for (const ImageId image_id : download_ids) {
487 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
488 }
489 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
490 const size_t original_offset = download_map.offset;
491 for (const ImageId image_id : download_ids) {
492 Image& image = slot_images[image_id];
493 const auto copies = FullDownloadCopies(image.info);
494 image.DownloadMemory(download_map, copies);
495 download_map.offset += image.unswizzled_size_bytes;
496 }
497 // Wait for downloads to finish
498 runtime.Finish();
499
500 download_map.offset = original_offset;
501 std::span<u8> download_span = download_map.mapped_span;
502 for (const ImageId image_id : download_ids) {
503 const ImageBase& image = slot_images[image_id];
504 const auto copies = FullDownloadCopies(image.info);
505 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
506 download_map.offset += image.unswizzled_size_bytes;
507 download_span = download_span.subspan(image.unswizzled_size_bytes);
508 }
509 committed_downloads.pop();
510}
511
512template <class P>
513bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
514 bool is_modified = false;
515 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
516 if (False(image.flags & ImageFlagBits::GpuModified)) {
517 return false;
518 }
519 is_modified = true;
520 return true;
521 });
522 return is_modified;
523}
524
525template <class P>
526void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
527 if (False(image.flags & ImageFlagBits::CpuModified)) {
528 // Only upload modified images
529 return;
530 }
531 image.flags &= ~ImageFlagBits::CpuModified;
532 TrackImage(image, image_id);
533
534 if (image.info.num_samples > 1) {
535 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
536 return;
537 }
538 auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
539 UploadImageContents(image, staging);
540 runtime.InsertUploadMemoryBarrier();
541}
542
543template <class P>
544template <typename StagingBuffer>
545void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) {
546 const std::span<u8> mapped_span = staging.mapped_span;
547 const GPUVAddr gpu_addr = image.gpu_addr;
548
549 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
550 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
551 const auto uploads = FullUploadSwizzles(image.info);
552 runtime.AccelerateImageUpload(image, staging, uploads);
553 } else if (True(image.flags & ImageFlagBits::Converted)) {
554 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
555 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
556 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
557 image.UploadMemory(staging, copies);
558 } else {
559 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
560 image.UploadMemory(staging, copies);
561 }
562}
563
564template <class P>
565ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
566 if (!IsValidEntry(gpu_memory, config)) {
567 return NULL_IMAGE_VIEW_ID;
568 }
569 const auto [pair, is_new] = image_views.try_emplace(config);
570 ImageViewId& image_view_id = pair->second;
571 if (is_new) {
572 image_view_id = CreateImageView(config);
573 }
574 return image_view_id;
575}
576
577template <class P>
578ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
579 const ImageInfo info(config);
580 if (info.type == ImageType::Buffer) {
581 const ImageViewInfo view_info(config, 0);
582 return slot_image_views.insert(runtime, info, view_info, config.Address());
583 }
584 const u32 layer_offset = config.BaseLayer() * info.layer_stride;
585 const GPUVAddr image_gpu_addr = config.Address() - layer_offset;
586 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
587 if (!image_id) {
588 return NULL_IMAGE_VIEW_ID;
589 }
590 ImageBase& image = slot_images[image_id];
591 const SubresourceBase base = image.TryFindBase(config.Address()).value();
592 ASSERT(base.level == 0);
593 const ImageViewInfo view_info(config, base.layer);
594 const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info);
595 ImageViewBase& image_view = slot_image_views[image_view_id];
596 image_view.flags |= ImageViewFlagBits::Strong;
597 image.flags |= ImageFlagBits::Strong;
598 return image_view_id;
599}
600
601template <class P>
602ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
603 RelaxedOptions options) {
604 if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) {
605 return image_id;
606 }
607 return InsertImage(info, gpu_addr, options);
608}
609
610template <class P>
611ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
612 RelaxedOptions options) {
613 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
614 if (!cpu_addr) {
615 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
616 if (!cpu_addr) {
617 return ImageId{};
618 }
619 }
620 const bool broken_views = runtime.HasBrokenTextureViewFormats();
621 const bool native_bgr = runtime.HasNativeBgr();
622 ImageId image_id;
623 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
624 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
625 return false;
626 }
627 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
628 const bool strict_size = False(options & RelaxedOptions::Size) &&
629 True(existing_image.flags & ImageFlagBits::Strong);
630 const ImageInfo& existing = existing_image.info;
631 if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
632 existing.pitch == info.pitch &&
633 IsPitchLinearSameSize(existing, info, strict_size) &&
634 IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) {
635 image_id = existing_image_id;
636 return true;
637 }
638 } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views,
639 native_bgr)) {
640 image_id = existing_image_id;
641 return true;
642 }
643 return false;
644 };
645 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
646 return image_id;
647}
648
649template <class P>
650ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
651 RelaxedOptions options) {
652 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
653 if (!cpu_addr) {
654 const auto size = CalculateGuestSizeInBytes(info);
655 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
656 if (!cpu_addr) {
657 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
658 virtual_invalid_space += Common::AlignUp(size, 32);
659 cpu_addr = std::optional<VAddr>(fake_addr);
660 }
661 }
662 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
663 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
664 const Image& image = slot_images[image_id];
665 // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
666 const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr);
667 if (is_new) {
668 it->second = slot_image_allocs.insert();
669 }
670 slot_image_allocs[it->second].images.push_back(image_id);
671 return image_id;
672}
673
674template <class P>
675ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) {
676 ImageInfo new_info = info;
677 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
678 const bool broken_views = runtime.HasBrokenTextureViewFormats();
679 const bool native_bgr = runtime.HasNativeBgr();
680 std::vector<ImageId> overlap_ids;
681 std::unordered_set<ImageId> overlaps_found;
682 std::vector<ImageId> left_aliased_ids;
683 std::vector<ImageId> right_aliased_ids;
684 std::unordered_set<ImageId> ignore_textures;
685 std::vector<ImageId> bad_overlap_ids;
686 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
687 if (True(overlap.flags & ImageFlagBits::Remapped)) {
688 ignore_textures.insert(overlap_id);
689 return;
690 }
691 if (info.type == ImageType::Linear) {
692 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
693 // Alias linear images with the same pitch
694 left_aliased_ids.push_back(overlap_id);
695 }
696 return;
697 }
698 overlaps_found.insert(overlap_id);
699 static constexpr bool strict_size = true;
700 const std::optional<OverlapResult> solution = ResolveOverlap(
701 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
702 if (solution) {
703 gpu_addr = solution->gpu_addr;
704 cpu_addr = solution->cpu_addr;
705 new_info.resources = solution->resources;
706 overlap_ids.push_back(overlap_id);
707 return;
708 }
709 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
710 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
711 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
712 left_aliased_ids.push_back(overlap_id);
713 overlap.flags |= ImageFlagBits::Alias;
714 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
715 broken_views, native_bgr)) {
716 right_aliased_ids.push_back(overlap_id);
717 overlap.flags |= ImageFlagBits::Alias;
718 } else {
719 bad_overlap_ids.push_back(overlap_id);
720 overlap.flags |= ImageFlagBits::BadOverlap;
721 }
722 };
723 ForEachImageInRegion(cpu_addr, size_bytes, region_check);
724 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
725 if (!overlaps_found.contains(overlap_id)) {
726 if (True(overlap.flags & ImageFlagBits::Remapped)) {
727 ignore_textures.insert(overlap_id);
728 }
729 if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
730 ignore_textures.insert(overlap_id);
731 }
732 }
733 };
734 ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
735 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
736 Image& new_image = slot_images[new_image_id];
737
738 if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
739 new_image.flags |= ImageFlagBits::Sparse;
740 }
741
742 for (const ImageId overlap_id : ignore_textures) {
743 Image& overlap = slot_images[overlap_id];
744 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
745 UNIMPLEMENTED();
746 }
747 if (True(overlap.flags & ImageFlagBits::Tracked)) {
748 UntrackImage(overlap, overlap_id);
749 }
750 UnregisterImage(overlap_id);
751 DeleteImage(overlap_id);
752 }
753
754 // TODO: Only upload what we need
755 RefreshContents(new_image, new_image_id);
756
757 for (const ImageId overlap_id : overlap_ids) {
758 Image& overlap = slot_images[overlap_id];
759 if (overlap.info.num_samples != new_image.info.num_samples) {
760 LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
761 } else {
762 const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
763 const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
764 runtime.CopyImage(new_image, overlap, copies);
765 }
766 if (True(overlap.flags & ImageFlagBits::Tracked)) {
767 UntrackImage(overlap, overlap_id);
768 }
769 UnregisterImage(overlap_id);
770 DeleteImage(overlap_id);
771 }
772 ImageBase& new_image_base = new_image;
773 for (const ImageId aliased_id : right_aliased_ids) {
774 ImageBase& aliased = slot_images[aliased_id];
775 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
776 new_image.flags |= ImageFlagBits::Alias;
777 }
778 for (const ImageId aliased_id : left_aliased_ids) {
779 ImageBase& aliased = slot_images[aliased_id];
780 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
781 new_image.flags |= ImageFlagBits::Alias;
782 }
783 for (const ImageId aliased_id : bad_overlap_ids) {
784 ImageBase& aliased = slot_images[aliased_id];
785 aliased.overlapping_images.push_back(new_image_id);
786 new_image.overlapping_images.push_back(aliased_id);
787 new_image.flags |= ImageFlagBits::BadOverlap;
788 }
789 RegisterImage(new_image_id);
790 return new_image_id;
791}
792
793template <class P>
794typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
795 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
796 static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
797 const GPUVAddr dst_addr = dst.Address();
798 const GPUVAddr src_addr = src.Address();
799 ImageInfo dst_info(dst);
800 ImageInfo src_info(src);
801 ImageId dst_id;
802 ImageId src_id;
803 do {
804 has_deleted_images = false;
805 dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
806 src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
807 const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
808 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
809 DeduceBlitImages(dst_info, src_info, dst_image, src_image);
810 if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
811 continue;
812 }
813 if (!dst_id) {
814 dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
815 }
816 if (!src_id) {
817 src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
818 }
819 } while (has_deleted_images);
820 return BlitImages{
821 .dst_id = dst_id,
822 .src_id = src_id,
823 .dst_format = dst_info.format,
824 .src_format = src_info.format,
825 };
826}
827
828template <class P>
829SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
830 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
831 return NULL_SAMPLER_ID;
832 }
833 const auto [pair, is_new] = samplers.try_emplace(config);
834 if (is_new) {
835 pair->second = slot_samplers.insert(runtime, config);
836 }
837 return pair->second;
838}
839
840template <class P>
841ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
842 const auto& regs = maxwell3d.regs;
843 if (index >= regs.rt_control.count) {
844 return ImageViewId{};
845 }
846 const auto& rt = regs.rt[index];
847 const GPUVAddr gpu_addr = rt.Address();
848 if (gpu_addr == 0) {
849 return ImageViewId{};
850 }
851 if (rt.format == Tegra::RenderTargetFormat::NONE) {
852 return ImageViewId{};
853 }
854 const ImageInfo info(regs, index);
855 return FindRenderTargetView(info, gpu_addr, is_clear);
856}
857
858template <class P>
859ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
860 const auto& regs = maxwell3d.regs;
861 if (!regs.zeta_enable) {
862 return ImageViewId{};
863 }
864 const GPUVAddr gpu_addr = regs.zeta.Address();
865 if (gpu_addr == 0) {
866 return ImageViewId{};
867 }
868 const ImageInfo info(regs);
869 return FindRenderTargetView(info, gpu_addr, is_clear);
870}
871
872template <class P>
873ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
874 bool is_clear) {
875 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
876 const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
877 if (!image_id) {
878 return NULL_IMAGE_VIEW_ID;
879 }
880 Image& image = slot_images[image_id];
881 const ImageViewType view_type = RenderTargetImageViewType(info);
882 SubresourceBase base;
883 if (image.info.type == ImageType::Linear) {
884 base = SubresourceBase{.level = 0, .layer = 0};
885 } else {
886 base = image.TryFindBase(gpu_addr).value();
887 }
888 const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers;
889 const SubresourceRange range{
890 .base = base,
891 .extent = {.levels = 1, .layers = layers},
892 };
893 return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range));
894}
895
896template <class P>
897template <typename Func>
898void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
899 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
900 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
901 boost::container::small_vector<ImageId, 32> images;
902 boost::container::small_vector<ImageMapId, 32> maps;
903 ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
904 const auto it = page_table.find(page);
905 if (it == page_table.end()) {
906 if constexpr (BOOL_BREAK) {
907 return false;
908 } else {
909 return;
910 }
911 }
912 for (const ImageMapId map_id : it->second) {
913 ImageMapView& map = slot_map_views[map_id];
914 if (map.picked) {
915 continue;
916 }
917 if (!map.Overlaps(cpu_addr, size)) {
918 continue;
919 }
920 map.picked = true;
921 maps.push_back(map_id);
922 Image& image = slot_images[map.image_id];
923 if (True(image.flags & ImageFlagBits::Picked)) {
924 continue;
925 }
926 image.flags |= ImageFlagBits::Picked;
927 images.push_back(map.image_id);
928 if constexpr (BOOL_BREAK) {
929 if (func(map.image_id, image)) {
930 return true;
931 }
932 } else {
933 func(map.image_id, image);
934 }
935 }
936 if constexpr (BOOL_BREAK) {
937 return false;
938 }
939 });
940 for (const ImageId image_id : images) {
941 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
942 }
943 for (const ImageMapId map_id : maps) {
944 slot_map_views[map_id].picked = false;
945 }
946}
947
948template <class P>
949template <typename Func>
950void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
951 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
952 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
953 boost::container::small_vector<ImageId, 8> images;
954 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
955 const auto it = gpu_page_table.find(page);
956 if (it == gpu_page_table.end()) {
957 if constexpr (BOOL_BREAK) {
958 return false;
959 } else {
960 return;
961 }
962 }
963 for (const ImageId image_id : it->second) {
964 Image& image = slot_images[image_id];
965 if (True(image.flags & ImageFlagBits::Picked)) {
966 continue;
967 }
968 if (!image.OverlapsGPU(gpu_addr, size)) {
969 continue;
970 }
971 image.flags |= ImageFlagBits::Picked;
972 images.push_back(image_id);
973 if constexpr (BOOL_BREAK) {
974 if (func(image_id, image)) {
975 return true;
976 }
977 } else {
978 func(image_id, image);
979 }
980 }
981 if constexpr (BOOL_BREAK) {
982 return false;
983 }
984 });
985 for (const ImageId image_id : images) {
986 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
987 }
988}
989
990template <class P>
991template <typename Func>
992void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
993 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
994 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
995 boost::container::small_vector<ImageId, 8> images;
996 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
997 const auto it = sparse_page_table.find(page);
998 if (it == sparse_page_table.end()) {
999 if constexpr (BOOL_BREAK) {
1000 return false;
1001 } else {
1002 return;
1003 }
1004 }
1005 for (const ImageId image_id : it->second) {
1006 Image& image = slot_images[image_id];
1007 if (True(image.flags & ImageFlagBits::Picked)) {
1008 continue;
1009 }
1010 if (!image.OverlapsGPU(gpu_addr, size)) {
1011 continue;
1012 }
1013 image.flags |= ImageFlagBits::Picked;
1014 images.push_back(image_id);
1015 if constexpr (BOOL_BREAK) {
1016 if (func(image_id, image)) {
1017 return true;
1018 }
1019 } else {
1020 func(image_id, image);
1021 }
1022 }
1023 if constexpr (BOOL_BREAK) {
1024 return false;
1025 }
1026 });
1027 for (const ImageId image_id : images) {
1028 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1029 }
1030}
1031
1032template <class P>
1033template <typename Func>
1034void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1035 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
1036 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
1037 const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
1038 for (auto& segment : segments) {
1039 const auto gpu_addr = segment.first;
1040 const auto size = segment.second;
1041 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1042 ASSERT(cpu_addr);
1043 if constexpr (RETURNS_BOOL) {
1044 if (func(gpu_addr, *cpu_addr, size)) {
1045 break;
1046 }
1047 } else {
1048 func(gpu_addr, *cpu_addr, size);
1049 }
1050 }
1051}
1052
1053template <class P>
1054ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
1055 Image& image = slot_images[image_id];
1056 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
1057 return image_view_id;
1058 }
1059 const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
1060 image.InsertView(info, image_view_id);
1061 return image_view_id;
1062}
1063
1064template <class P>
1065void TextureCache<P>::RegisterImage(ImageId image_id) {
1066 ImageBase& image = slot_images[image_id];
1067 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
1068 "Trying to register an already registered image");
1069 image.flags |= ImageFlagBits::Registered;
1070 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1071 if ((IsPixelFormatASTC(image.info.format) &&
1072 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
1073 True(image.flags & ImageFlagBits::Converted)) {
1074 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1075 }
1076 total_used_memory += Common::AlignUp(tentative_size, 1024);
1077 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1078 [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
1079 if (False(image.flags & ImageFlagBits::Sparse)) {
1080 auto map_id =
1081 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
1082 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
1083 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1084 image.map_view_id = map_id;
1085 return;
1086 }
1087 std::vector<ImageViewId> sparse_maps{};
1088 ForEachSparseSegment(
1089 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1090 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
1091 ForEachCPUPage(cpu_addr, size,
1092 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1093 sparse_maps.push_back(map_id);
1094 });
1095 sparse_views.emplace(image_id, std::move(sparse_maps));
1096 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1097 [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
1098}
1099
1100template <class P>
1101void TextureCache<P>::UnregisterImage(ImageId image_id) {
1102 Image& image = slot_images[image_id];
1103 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
1104 "Trying to unregister an already registered image");
1105 image.flags &= ~ImageFlagBits::Registered;
1106 image.flags &= ~ImageFlagBits::BadOverlap;
1107 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1108 if ((IsPixelFormatASTC(image.info.format) &&
1109 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
1110 True(image.flags & ImageFlagBits::Converted)) {
1111 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1112 }
1113 total_used_memory -= Common::AlignUp(tentative_size, 1024);
1114 const auto& clear_page_table =
1115 [this, image_id](
1116 u64 page,
1117 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
1118 const auto page_it = selected_page_table.find(page);
1119 if (page_it == selected_page_table.end()) {
1120 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1121 return;
1122 }
1123 std::vector<ImageId>& image_ids = page_it->second;
1124 const auto vector_it = std::ranges::find(image_ids, image_id);
1125 if (vector_it == image_ids.end()) {
1126 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1127 page << PAGE_BITS);
1128 return;
1129 }
1130 image_ids.erase(vector_it);
1131 };
1132 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1133 [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
1134 if (False(image.flags & ImageFlagBits::Sparse)) {
1135 const auto map_id = image.map_view_id;
1136 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
1137 const auto page_it = page_table.find(page);
1138 if (page_it == page_table.end()) {
1139 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1140 return;
1141 }
1142 std::vector<ImageMapId>& image_map_ids = page_it->second;
1143 const auto vector_it = std::ranges::find(image_map_ids, map_id);
1144 if (vector_it == image_map_ids.end()) {
1145 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1146 page << PAGE_BITS);
1147 return;
1148 }
1149 image_map_ids.erase(vector_it);
1150 });
1151 slot_map_views.erase(map_id);
1152 return;
1153 }
1154 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
1155 clear_page_table(page, sparse_page_table);
1156 });
1157 auto it = sparse_views.find(image_id);
1158 ASSERT(it != sparse_views.end());
1159 auto& sparse_maps = it->second;
1160 for (auto& map_view_id : sparse_maps) {
1161 const auto& map_range = slot_map_views[map_view_id];
1162 const VAddr cpu_addr = map_range.cpu_addr;
1163 const std::size_t size = map_range.size;
1164 ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
1165 const auto page_it = page_table.find(page);
1166 if (page_it == page_table.end()) {
1167 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1168 return;
1169 }
1170 std::vector<ImageMapId>& image_map_ids = page_it->second;
1171 auto vector_it = image_map_ids.begin();
1172 while (vector_it != image_map_ids.end()) {
1173 ImageMapView& map = slot_map_views[*vector_it];
1174 if (map.image_id != image_id) {
1175 vector_it++;
1176 continue;
1177 }
1178 if (!map.picked) {
1179 map.picked = true;
1180 }
1181 vector_it = image_map_ids.erase(vector_it);
1182 }
1183 });
1184 slot_map_views.erase(map_view_id);
1185 }
1186 sparse_views.erase(it);
1187}
1188
1189template <class P>
1190void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
1191 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1192 image.flags |= ImageFlagBits::Tracked;
1193 if (False(image.flags & ImageFlagBits::Sparse)) {
1194 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1195 return;
1196 }
1197 if (True(image.flags & ImageFlagBits::Registered)) {
1198 auto it = sparse_views.find(image_id);
1199 ASSERT(it != sparse_views.end());
1200 auto& sparse_maps = it->second;
1201 for (auto& map_view_id : sparse_maps) {
1202 const auto& map = slot_map_views[map_view_id];
1203 const VAddr cpu_addr = map.cpu_addr;
1204 const std::size_t size = map.size;
1205 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1206 }
1207 return;
1208 }
1209 ForEachSparseSegment(image,
1210 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1211 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1212 });
1213}
1214
1215template <class P>
1216void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
1217 ASSERT(True(image.flags & ImageFlagBits::Tracked));
1218 image.flags &= ~ImageFlagBits::Tracked;
1219 if (False(image.flags & ImageFlagBits::Sparse)) {
1220 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1221 return;
1222 }
1223 ASSERT(True(image.flags & ImageFlagBits::Registered));
1224 auto it = sparse_views.find(image_id);
1225 ASSERT(it != sparse_views.end());
1226 auto& sparse_maps = it->second;
1227 for (auto& map_view_id : sparse_maps) {
1228 const auto& map = slot_map_views[map_view_id];
1229 const VAddr cpu_addr = map.cpu_addr;
1230 const std::size_t size = map.size;
1231 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
1232 }
1233}
1234
1235template <class P>
1236void TextureCache<P>::DeleteImage(ImageId image_id) {
1237 ImageBase& image = slot_images[image_id];
1238 const GPUVAddr gpu_addr = image.gpu_addr;
1239 const auto alloc_it = image_allocs_table.find(gpu_addr);
1240 if (alloc_it == image_allocs_table.end()) {
1241 UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}",
1242 gpu_addr);
1243 return;
1244 }
1245 const ImageAllocId alloc_id = alloc_it->second;
1246 std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
1247 const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
1248 if (alloc_image_it == alloc_images.end()) {
1249 UNREACHABLE_MSG("Trying to delete an image that does not exist");
1250 return;
1251 }
1252 ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
1253 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
1254
1255 // Mark render targets as dirty
1256 auto& dirty = maxwell3d.dirty.flags;
1257 dirty[Dirty::RenderTargets] = true;
1258 dirty[Dirty::ZetaBuffer] = true;
1259 for (size_t rt = 0; rt < NUM_RT; ++rt) {
1260 dirty[Dirty::ColorBuffer0 + rt] = true;
1261 }
1262 const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
1263 for (const ImageViewId image_view_id : image_view_ids) {
1264 std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
1265 if (render_targets.depth_buffer_id == image_view_id) {
1266 render_targets.depth_buffer_id = ImageViewId{};
1267 }
1268 }
1269 RemoveImageViewReferences(image_view_ids);
1270 RemoveFramebuffers(image_view_ids);
1271
1272 for (const AliasedImage& alias : image.aliased_images) {
1273 ImageBase& other_image = slot_images[alias.id];
1274 [[maybe_unused]] const size_t num_removed_aliases =
1275 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
1276 return other_alias.id == image_id;
1277 });
1278 other_image.CheckAliasState();
1279 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
1280 num_removed_aliases);
1281 }
1282 for (const ImageId overlap_id : image.overlapping_images) {
1283 ImageBase& other_image = slot_images[overlap_id];
1284 [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
1285 other_image.overlapping_images,
1286 [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
1287 other_image.CheckBadOverlapState();
1288 ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
1289 num_removed_overlaps);
1290 }
1291 for (const ImageViewId image_view_id : image_view_ids) {
1292 sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
1293 slot_image_views.erase(image_view_id);
1294 }
1295 sentenced_images.Push(std::move(slot_images[image_id]));
1296 slot_images.erase(image_id);
1297
1298 alloc_images.erase(alloc_image_it);
1299 if (alloc_images.empty()) {
1300 image_allocs_table.erase(alloc_it);
1301 }
1302 if constexpr (ENABLE_VALIDATION) {
1303 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
1304 std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
1305 }
1306 graphics_image_table.Invalidate();
1307 compute_image_table.Invalidate();
1308 has_deleted_images = true;
1309}
1310
1311template <class P>
1312void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
1313 auto it = image_views.begin();
1314 while (it != image_views.end()) {
1315 const auto found = std::ranges::find(removed_views, it->second);
1316 if (found != removed_views.end()) {
1317 it = image_views.erase(it);
1318 } else {
1319 ++it;
1320 }
1321 }
1322}
1323
1324template <class P>
1325void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) {
1326 auto it = framebuffers.begin();
1327 while (it != framebuffers.end()) {
1328 if (it->first.Contains(removed_views)) {
1329 it = framebuffers.erase(it);
1330 } else {
1331 ++it;
1332 }
1333 }
1334}
1335
1336template <class P>
1337void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
1338 image.flags |= ImageFlagBits::GpuModified;
1339 image.modification_tick = ++modification_tick;
1340}
1341
1342template <class P>
1343void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1344 boost::container::small_vector<const AliasedImage*, 1> aliased_images;
1345 ImageBase& image = slot_images[image_id];
1346 u64 most_recent_tick = image.modification_tick;
1347 for (const AliasedImage& aliased : image.aliased_images) {
1348 ImageBase& aliased_image = slot_images[aliased.id];
1349 if (image.modification_tick < aliased_image.modification_tick) {
1350 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
1351 aliased_images.push_back(&aliased);
1352 }
1353 }
1354 if (aliased_images.empty()) {
1355 return;
1356 }
1357 image.modification_tick = most_recent_tick;
1358 std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
1359 const ImageBase& lhs_image = slot_images[lhs->id];
1360 const ImageBase& rhs_image = slot_images[rhs->id];
1361 return lhs_image.modification_tick < rhs_image.modification_tick;
1362 });
1363 for (const AliasedImage* const aliased : aliased_images) {
1364 CopyImage(image_id, aliased->id, aliased->copies);
1365 }
1366}
1367
1368template <class P>
1369void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
1370 Image& image = slot_images[image_id];
1371 if (invalidate) {
1372 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
1373 if (False(image.flags & ImageFlagBits::Tracked)) {
1374 TrackImage(image, image_id);
1375 }
1376 } else {
1377 RefreshContents(image, image_id);
1378 SynchronizeAliases(image_id);
1379 }
1380 if (is_modification) {
1381 MarkModification(image);
1382 }
1383 image.frame_tick = frame_tick;
1384}
1385
1386template <class P>
1387void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
1388 bool invalidate) {
1389 if (!image_view_id) {
1390 return;
1391 }
1392 const ImageViewBase& image_view = slot_image_views[image_view_id];
1393 if (image_view.IsBuffer()) {
1394 return;
1395 }
1396 PrepareImage(image_view.image_id, is_modification, invalidate);
1397}
1398
1399template <class P>
1400void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
1401 Image& dst = slot_images[dst_id];
1402 Image& src = slot_images[src_id];
1403 const auto dst_format_type = GetFormatType(dst.info.format);
1404 const auto src_format_type = GetFormatType(src.info.format);
1405 if (src_format_type == dst_format_type) {
1406 if constexpr (HAS_EMULATED_COPIES) {
1407 if (!runtime.CanImageBeCopied(dst, src)) {
1408 return runtime.EmulateCopyImage(dst, src, copies);
1409 }
1410 }
1411 return runtime.CopyImage(dst, src, copies);
1412 }
1413 UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
1414 UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
1415 for (const ImageCopy& copy : copies) {
1416 UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
1417 UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1);
1418 UNIMPLEMENTED_IF(copy.src_offset != Offset3D{});
1419 UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{});
1420
1421 const SubresourceBase dst_base{
1422 .level = copy.dst_subresource.base_level,
1423 .layer = copy.dst_subresource.base_layer,
1424 };
1425 const SubresourceBase src_base{
1426 .level = copy.src_subresource.base_level,
1427 .layer = copy.src_subresource.base_layer,
1428 };
1429 const SubresourceExtent dst_extent{.levels = 1, .layers = 1};
1430 const SubresourceExtent src_extent{.levels = 1, .layers = 1};
1431 const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
1432 const SubresourceRange src_range{.base = src_base, .extent = src_extent};
1433 const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range);
1434 const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
1435 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
1436 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
1437 const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info);
1438 ImageView& dst_view = slot_image_views[dst_view_id];
1439 ImageView& src_view = slot_image_views[src_view_id];
1440 [[maybe_unused]] const Extent3D expected_size{
1441 .width = std::min(dst_view.size.width, src_view.size.width),
1442 .height = std::min(dst_view.size.height, src_view.size.height),
1443 .depth = std::min(dst_view.size.depth, src_view.size.depth),
1444 };
1445 UNIMPLEMENTED_IF(copy.extent != expected_size);
1446
1447 runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
1448 }
1449}
1450
1451template <class P>
1452void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) {
1453 if (*old_id == new_id) {
1454 return;
1455 }
1456 if (*old_id) {
1457 const ImageViewBase& old_view = slot_image_views[*old_id];
1458 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
1459 uncommitted_downloads.push_back(old_view.image_id);
1460 }
1461 }
1462 *old_id = new_id;
1463}
1464
1465template <class P>
1466std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
1467 ImageId image_id, const ImageViewInfo& view_info) {
1468 const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
1469 const ImageBase& image = slot_images[image_id];
1470 const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
1471 const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
1472 const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
1473 const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
1474 const u32 num_samples = image.info.num_samples;
1475 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
1476 const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
1477 .color_buffer_ids = {color_view_id},
1478 .depth_buffer_id = depth_view_id,
1479 .size = {extent.width >> samples_x, extent.height >> samples_y},
1480 });
1481 return {framebuffer_id, view_id};
1482}
1483
1484template <class P>
1485bool TextureCache<P>::IsFullClear(ImageViewId id) {
1486 if (!id) {
1487 return true;
1488 }
1489 const ImageViewBase& image_view = slot_image_views[id];
1490 const ImageBase& image = slot_images[image_view.image_id];
1491 const Extent3D size = image_view.size;
1492 const auto& regs = maxwell3d.regs;
1493 const auto& scissor = regs.scissor_test[0];
1494 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
1495 // Images with multiple resources can't be cleared in a single call
1496 return false;
1497 }
1498 if (regs.clear_flags.scissor == 0) {
1499 // If scissor testing is disabled, the clear is always full
1500 return true;
1501 }
1502 // Make sure the clear covers all texels in the subresource
1503 return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width &&
1504 scissor.max_y >= size.height;
1505}
1506
1507} // namespace VideoCommon