summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache_base.cpp (renamed from src/video_core/renderer_opengl/gl_texture_cache_templates.cpp)2
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache_base.cpp (renamed from src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp)2
-rw-r--r--src/video_core/texture_cache/image_view_info.cpp2
-rw-r--r--src/video_core/texture_cache/texture_cache.h1711
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h402
-rw-r--r--src/video_core/texture_cache/texture_cache_templates.h1507
12 files changed, 1821 insertions, 1821 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 1250cca6f..2f6cdd216 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -97,7 +97,7 @@ add_library(video_core STATIC
97 renderer_opengl/gl_stream_buffer.h 97 renderer_opengl/gl_stream_buffer.h
98 renderer_opengl/gl_texture_cache.cpp 98 renderer_opengl/gl_texture_cache.cpp
99 renderer_opengl/gl_texture_cache.h 99 renderer_opengl/gl_texture_cache.h
100 renderer_opengl/gl_texture_cache_templates.cpp 100 renderer_opengl/gl_texture_cache_base.cpp
101 renderer_opengl/gl_query_cache.cpp 101 renderer_opengl/gl_query_cache.cpp
102 renderer_opengl/gl_query_cache.h 102 renderer_opengl/gl_query_cache.h
103 renderer_opengl/maxwell_to_gl.h 103 renderer_opengl/maxwell_to_gl.h
@@ -156,7 +156,7 @@ add_library(video_core STATIC
156 renderer_vulkan/vk_swapchain.h 156 renderer_vulkan/vk_swapchain.h
157 renderer_vulkan/vk_texture_cache.cpp 157 renderer_vulkan/vk_texture_cache.cpp
158 renderer_vulkan/vk_texture_cache.h 158 renderer_vulkan/vk_texture_cache.h
159 renderer_vulkan/vk_texture_cache_templates.cpp 159 renderer_vulkan/vk_texture_cache_base.cpp
160 renderer_vulkan/vk_update_descriptor.cpp 160 renderer_vulkan/vk_update_descriptor.cpp
161 renderer_vulkan/vk_update_descriptor.h 161 renderer_vulkan/vk_update_descriptor.h
162 shader_cache.cpp 162 shader_cache.cpp
@@ -188,7 +188,7 @@ add_library(video_core STATIC
188 texture_cache/samples_helper.h 188 texture_cache/samples_helper.h
189 texture_cache/slot_vector.h 189 texture_cache/slot_vector.h
190 texture_cache/texture_cache.h 190 texture_cache/texture_cache.h
191 texture_cache/texture_cache_templates.h 191 texture_cache/texture_cache_base.h
192 texture_cache/types.h 192 texture_cache/types.h
193 texture_cache/util.cpp 193 texture_cache/util.cpp
194 texture_cache/util.h 194 texture_cache/util.h
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index fac0034fb..bccb37a58 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -15,7 +15,7 @@
15#include "video_core/renderer_opengl/gl_shader_util.h" 15#include "video_core/renderer_opengl/gl_shader_util.h"
16#include "video_core/renderer_opengl/gl_state_tracker.h" 16#include "video_core/renderer_opengl/gl_state_tracker.h"
17#include "video_core/shader_notify.h" 17#include "video_core/shader_notify.h"
18#include "video_core/texture_cache/texture_cache.h" 18#include "video_core/texture_cache/texture_cache_base.h"
19 19
20#if defined(_MSC_VER) && defined(NDEBUG) 20#if defined(_MSC_VER) && defined(NDEBUG)
21#define LAMBDA_FORCEINLINE [[msvc::forceinline]] 21#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 41d2b73f4..b909c387e 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -32,7 +32,7 @@
32#include "video_core/renderer_opengl/maxwell_to_gl.h" 32#include "video_core/renderer_opengl/maxwell_to_gl.h"
33#include "video_core/renderer_opengl/renderer_opengl.h" 33#include "video_core/renderer_opengl/renderer_opengl.h"
34#include "video_core/shader_cache.h" 34#include "video_core/shader_cache.h"
35#include "video_core/texture_cache/texture_cache.h" 35#include "video_core/texture_cache/texture_cache_base.h"
36 36
37namespace OpenGL { 37namespace OpenGL {
38 38
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 921072ebe..4a4f6301c 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -12,7 +12,7 @@
12#include "shader_recompiler/shader_info.h" 12#include "shader_recompiler/shader_info.h"
13#include "video_core/renderer_opengl/gl_resource_manager.h" 13#include "video_core/renderer_opengl/gl_resource_manager.h"
14#include "video_core/renderer_opengl/util_shaders.h" 14#include "video_core/renderer_opengl/util_shaders.h"
15#include "video_core/texture_cache/texture_cache.h" 15#include "video_core/texture_cache/texture_cache_base.h"
16 16
17namespace OpenGL { 17namespace OpenGL {
18 18
diff --git a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp
index 00ed06447..385358fea 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp
@@ -3,7 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "video_core/renderer_opengl/gl_texture_cache.h" 5#include "video_core/renderer_opengl/gl_texture_cache.h"
6#include "video_core/texture_cache/texture_cache_templates.h" 6#include "video_core/texture_cache/texture_cache.h"
7 7
8namespace VideoCommon { 8namespace VideoCommon {
9template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>; 9template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 23cef2996..3ac18ea54 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -32,7 +32,7 @@
32#include "video_core/renderer_vulkan/vk_texture_cache.h" 32#include "video_core/renderer_vulkan/vk_texture_cache.h"
33#include "video_core/renderer_vulkan/vk_update_descriptor.h" 33#include "video_core/renderer_vulkan/vk_update_descriptor.h"
34#include "video_core/shader_cache.h" 34#include "video_core/shader_cache.h"
35#include "video_core/texture_cache/texture_cache.h" 35#include "video_core/texture_cache/texture_cache_base.h"
36#include "video_core/vulkan_common/vulkan_device.h" 36#include "video_core/vulkan_common/vulkan_device.h"
37#include "video_core/vulkan_common/vulkan_wrapper.h" 37#include "video_core/vulkan_common/vulkan_wrapper.h"
38 38
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 0b73d55f8..5fe6b7ba3 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -9,7 +9,7 @@
9 9
10#include "shader_recompiler/shader_info.h" 10#include "shader_recompiler/shader_info.h"
11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 11#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
12#include "video_core/texture_cache/texture_cache.h" 12#include "video_core/texture_cache/texture_cache_base.h"
13#include "video_core/vulkan_common/vulkan_memory_allocator.h" 13#include "video_core/vulkan_common/vulkan_memory_allocator.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 14#include "video_core/vulkan_common/vulkan_wrapper.h"
15 15
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp
index fd8978954..44e688342 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp
@@ -3,7 +3,7 @@
3// Refer to the license.txt file included. 3// Refer to the license.txt file included.
4 4
5#include "video_core/renderer_vulkan/vk_texture_cache.h" 5#include "video_core/renderer_vulkan/vk_texture_cache.h"
6#include "video_core/texture_cache/texture_cache_templates.h" 6#include "video_core/texture_cache/texture_cache.h"
7 7
8namespace VideoCommon { 8namespace VideoCommon {
9template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>; 9template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>;
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp
index faf5b151f..f14a92565 100644
--- a/src/video_core/texture_cache/image_view_info.cpp
+++ b/src/video_core/texture_cache/image_view_info.cpp
@@ -6,7 +6,7 @@
6 6
7#include "common/assert.h" 7#include "common/assert.h"
8#include "video_core/texture_cache/image_view_info.h" 8#include "video_core/texture_cache/image_view_info.h"
9#include "video_core/texture_cache/texture_cache.h" 9#include "video_core/texture_cache/texture_cache_base.h"
10#include "video_core/texture_cache/types.h" 10#include "video_core/texture_cache/types.h"
11#include "video_core/textures/texture.h" 11#include "video_core/textures/texture.h"
12 12
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index a4f6e9422..5884fa16e 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -4,48 +4,7 @@
4 4
5#pragma once 5#pragma once
6 6
7#include <algorithm> 7#include "video_core/texture_cache/texture_cache_base.h"
8#include <array>
9#include <bit>
10#include <memory>
11#include <mutex>
12#include <optional>
13#include <span>
14#include <type_traits>
15#include <unordered_map>
16#include <unordered_set>
17#include <utility>
18#include <vector>
19
20#include <boost/container/small_vector.hpp>
21
22#include "common/alignment.h"
23#include "common/common_types.h"
24#include "common/literals.h"
25#include "common/logging/log.h"
26#include "common/settings.h"
27#include "video_core/compatible_formats.h"
28#include "video_core/delayed_destruction_ring.h"
29#include "video_core/dirty_flags.h"
30#include "video_core/engines/fermi_2d.h"
31#include "video_core/engines/kepler_compute.h"
32#include "video_core/engines/maxwell_3d.h"
33#include "video_core/memory_manager.h"
34#include "video_core/rasterizer_interface.h"
35#include "video_core/surface.h"
36#include "video_core/texture_cache/descriptor_table.h"
37#include "video_core/texture_cache/format_lookup_table.h"
38#include "video_core/texture_cache/formatter.h"
39#include "video_core/texture_cache/image_base.h"
40#include "video_core/texture_cache/image_info.h"
41#include "video_core/texture_cache/image_view_base.h"
42#include "video_core/texture_cache/image_view_info.h"
43#include "video_core/texture_cache/render_targets.h"
44#include "video_core/texture_cache/samples_helper.h"
45#include "video_core/texture_cache/slot_vector.h"
46#include "video_core/texture_cache/types.h"
47#include "video_core/texture_cache/util.h"
48#include "video_core/textures/texture.h"
49 8
50namespace VideoCommon { 9namespace VideoCommon {
51 10
@@ -62,341 +21,1487 @@ using VideoCore::Surface::SurfaceType;
62using namespace Common::Literals; 21using namespace Common::Literals;
63 22
64template <class P> 23template <class P>
65class TextureCache { 24TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
66 /// Address shift for caching images into a hash table 25 Tegra::Engines::Maxwell3D& maxwell3d_,
67 static constexpr u64 PAGE_BITS = 20; 26 Tegra::Engines::KeplerCompute& kepler_compute_,
68 27 Tegra::MemoryManager& gpu_memory_)
69 /// Enables debugging features to the texture cache 28 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
70 static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; 29 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
71 /// Implement blits as copies between framebuffers 30 // Configure null sampler
72 static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; 31 TSCEntry sampler_descriptor{};
73 /// True when some copies have to be emulated 32 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
74 static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; 33 sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear);
75 /// True when the API can provide info about the memory of the device. 34 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
76 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; 35 sampler_descriptor.cubemap_anisotropy.Assign(1);
77 36
78 /// Image view ID for null descriptors 37 // Make sure the first index is reserved for the null resources
79 static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; 38 // This way the null resource becomes a compile time constant
80 /// Sampler ID for bugged sampler ids 39 void(slot_image_views.insert(runtime, NullImageParams{}));
81 static constexpr SamplerId NULL_SAMPLER_ID{0}; 40 void(slot_samplers.insert(runtime, sampler_descriptor));
82 41
83 static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; 42 deletion_iterator = slot_images.begin();
84 static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; 43
85 44 if constexpr (HAS_DEVICE_MEMORY_INFO) {
86 using Runtime = typename P::Runtime; 45 const auto device_memory = runtime.GetDeviceLocalMemory();
87 using Image = typename P::Image; 46 const u64 possible_expected_memory = (device_memory * 3) / 10;
88 using ImageAlloc = typename P::ImageAlloc; 47 const u64 possible_critical_memory = (device_memory * 6) / 10;
89 using ImageView = typename P::ImageView; 48 expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
90 using Sampler = typename P::Sampler; 49 critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
91 using Framebuffer = typename P::Framebuffer; 50 minimum_memory = 0;
92 51 } else {
93 struct BlitImages { 52 // on OGL we can be more conservatives as the driver takes care.
94 ImageId dst_id; 53 expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
95 ImageId src_id; 54 critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
96 PixelFormat dst_format; 55 minimum_memory = expected_memory;
97 PixelFormat src_format; 56 }
98 }; 57}
99 58
100 template <typename T> 59template <class P>
101 struct IdentityHash { 60void TextureCache<P>::RunGarbageCollector() {
102 [[nodiscard]] size_t operator()(T value) const noexcept { 61 const bool high_priority_mode = total_used_memory >= expected_memory;
103 return static_cast<size_t>(value); 62 const bool aggressive_mode = total_used_memory >= critical_memory;
63 const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
64 int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
65 for (; num_iterations > 0; --num_iterations) {
66 if (deletion_iterator == slot_images.end()) {
67 deletion_iterator = slot_images.begin();
68 if (deletion_iterator == slot_images.end()) {
69 break;
70 }
104 } 71 }
105 }; 72 auto [image_id, image_tmp] = *deletion_iterator;
106 73 Image* image = image_tmp; // fix clang error.
107public: 74 const bool is_alias = True(image->flags & ImageFlagBits::Alias);
108 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, 75 const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
109 Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); 76 const bool must_download = image->IsSafeDownload();
110 77 bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
111 /// Notify the cache that a new frame has been queued 78 const u64 ticks_needed =
112 void TickFrame(); 79 is_bad_overlap
113 80 ? ticks_to_destroy >> 4
114 /// Return a constant reference to the given image view id 81 : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
115 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; 82 should_care |= aggressive_mode;
116 83 if (should_care && image->frame_tick + ticks_needed < frame_tick) {
117 /// Return a reference to the given image view id 84 if (is_bad_overlap) {
118 [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; 85 const bool overlap_check = std::ranges::all_of(
119 86 image->overlapping_images, [&, image](const ImageId& overlap_id) {
120 /// Mark an image as modified from the GPU 87 auto& overlap = slot_images[overlap_id];
121 void MarkModification(ImageId id) noexcept; 88 return overlap.frame_tick >= image->frame_tick;
122 89 });
123 /// Fill image_view_ids with the graphics images in indices 90 if (!overlap_check) {
124 void FillGraphicsImageViews(std::span<const u32> indices, 91 ++deletion_iterator;
125 std::span<ImageViewId> image_view_ids); 92 continue;
93 }
94 }
95 if (!is_bad_overlap && must_download) {
96 const bool alias_check = std::ranges::none_of(
97 image->aliased_images, [&, image](const AliasedImage& alias) {
98 auto& alias_image = slot_images[alias.id];
99 return (alias_image.frame_tick < image->frame_tick) ||
100 (alias_image.modification_tick < image->modification_tick);
101 });
102
103 if (alias_check) {
104 auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
105 const auto copies = FullDownloadCopies(image->info);
106 image->DownloadMemory(map, copies);
107 runtime.Finish();
108 SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
109 }
110 }
111 if (True(image->flags & ImageFlagBits::Tracked)) {
112 UntrackImage(*image, image_id);
113 }
114 UnregisterImage(image_id);
115 DeleteImage(image_id);
116 if (is_bad_overlap) {
117 ++num_iterations;
118 }
119 }
120 ++deletion_iterator;
121 }
122}
126 123
127 /// Fill image_view_ids with the compute images in indices 124template <class P>
128 void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); 125void TextureCache<P>::TickFrame() {
126 if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
127 RunGarbageCollector();
128 }
129 sentenced_images.Tick();
130 sentenced_framebuffers.Tick();
131 sentenced_image_view.Tick();
132 ++frame_tick;
133}
129 134
130 /// Get the sampler from the graphics descriptor table in the specified index 135template <class P>
131 Sampler* GetGraphicsSampler(u32 index); 136const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
137 return slot_image_views[id];
138}
132 139
133 /// Get the sampler from the compute descriptor table in the specified index 140template <class P>
134 Sampler* GetComputeSampler(u32 index); 141typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
142 return slot_image_views[id];
143}
135 144
136 /// Refresh the state for graphics image view and sampler descriptors 145template <class P>
137 void SynchronizeGraphicsDescriptors(); 146void TextureCache<P>::MarkModification(ImageId id) noexcept {
147 MarkModification(slot_images[id]);
148}
138 149
139 /// Refresh the state for compute image view and sampler descriptors 150template <class P>
140 void SynchronizeComputeDescriptors(); 151void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
152 std::span<ImageViewId> image_view_ids) {
153 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
154}
141 155
142 /// Update bound render targets and upload memory if necessary 156template <class P>
143 /// @param is_clear True when the render targets are being used for clears 157void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
144 void UpdateRenderTargets(bool is_clear); 158 std::span<ImageViewId> image_view_ids) {
159 FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
160}
145 161
146 /// Find a framebuffer with the currently bound render targets 162template <class P>
147 /// UpdateRenderTargets should be called before this 163typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
148 Framebuffer* GetFramebuffer(); 164 if (index > graphics_sampler_table.Limit()) {
165 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
166 return &slot_samplers[NULL_SAMPLER_ID];
167 }
168 const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
169 SamplerId& id = graphics_sampler_ids[index];
170 if (is_new) {
171 id = FindSampler(descriptor);
172 }
173 return &slot_samplers[id];
174}
149 175
150 /// Mark images in a range as modified from the CPU 176template <class P>
151 void WriteMemory(VAddr cpu_addr, size_t size); 177typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
178 if (index > compute_sampler_table.Limit()) {
179 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
180 return &slot_samplers[NULL_SAMPLER_ID];
181 }
182 const auto [descriptor, is_new] = compute_sampler_table.Read(index);
183 SamplerId& id = compute_sampler_ids[index];
184 if (is_new) {
185 id = FindSampler(descriptor);
186 }
187 return &slot_samplers[id];
188}
152 189
153 /// Download contents of host images to guest memory in a region 190template <class P>
154 void DownloadMemory(VAddr cpu_addr, size_t size); 191void TextureCache<P>::SynchronizeGraphicsDescriptors() {
192 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
193 const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
194 const u32 tic_limit = maxwell3d.regs.tic.limit;
195 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
196 if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
197 graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
198 }
199 if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
200 graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
201 }
202}
155 203
156 /// Remove images in a region 204template <class P>
157 void UnmapMemory(VAddr cpu_addr, size_t size); 205void TextureCache<P>::SynchronizeComputeDescriptors() {
206 const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
207 const u32 tic_limit = kepler_compute.regs.tic.limit;
208 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
209 const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
210 if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
211 compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
212 }
213 if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
214 compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
215 }
216}
158 217
159 /// Remove images in a region 218template <class P>
160 void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); 219void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
220 using namespace VideoCommon::Dirty;
221 auto& flags = maxwell3d.dirty.flags;
222 if (!flags[Dirty::RenderTargets]) {
223 for (size_t index = 0; index < NUM_RT; ++index) {
224 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
225 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
226 }
227 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
228 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
229 return;
230 }
231 flags[Dirty::RenderTargets] = false;
161 232
162 /// Blit an image with the given parameters 233 // Render target control is used on all render targets, so force look ups when this one is up
163 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 234 const bool force = flags[Dirty::RenderTargetControl];
164 const Tegra::Engines::Fermi2D::Surface& src, 235 flags[Dirty::RenderTargetControl] = false;
165 const Tegra::Engines::Fermi2D::Config& copy);
166 236
167 /// Try to find a cached image view in the given CPU address 237 for (size_t index = 0; index < NUM_RT; ++index) {
168 [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); 238 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
239 if (flags[Dirty::ColorBuffer0 + index] || force) {
240 flags[Dirty::ColorBuffer0 + index] = false;
241 BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
242 }
243 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
244 }
245 if (flags[Dirty::ZetaBuffer] || force) {
246 flags[Dirty::ZetaBuffer] = false;
247 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
248 }
249 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
250 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
169 251
170 /// Return true when there are uncommitted images to be downloaded 252 for (size_t index = 0; index < NUM_RT; ++index) {
171 [[nodiscard]] bool HasUncommittedFlushes() const noexcept; 253 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
254 }
255 render_targets.size = Extent2D{
256 maxwell3d.regs.render_area.width,
257 maxwell3d.regs.render_area.height,
258 };
259}
172 260
173 /// Return true when the caller should wait for async downloads 261template <class P>
174 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; 262typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
263 return &slot_framebuffers[GetFramebufferId(render_targets)];
264}
175 265
176 /// Commit asynchronous downloads 266template <class P>
177 void CommitAsyncFlushes(); 267void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
268 std::span<ImageViewId> cached_image_view_ids,
269 std::span<const u32> indices,
270 std::span<ImageViewId> image_view_ids) {
271 ASSERT(indices.size() <= image_view_ids.size());
272 do {
273 has_deleted_images = false;
274 std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
275 return VisitImageView(table, cached_image_view_ids, index);
276 });
277 } while (has_deleted_images);
278}
178 279
179 /// Pop asynchronous downloads 280template <class P>
180 void PopAsyncFlushes(); 281ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
282 std::span<ImageViewId> cached_image_view_ids,
283 u32 index) {
284 if (index > table.Limit()) {
285 LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
286 return NULL_IMAGE_VIEW_ID;
287 }
288 const auto [descriptor, is_new] = table.Read(index);
289 ImageViewId& image_view_id = cached_image_view_ids[index];
290 if (is_new) {
291 image_view_id = FindImageView(descriptor);
292 }
293 if (image_view_id != NULL_IMAGE_VIEW_ID) {
294 PrepareImageView(image_view_id, false, false);
295 }
296 return image_view_id;
297}
181 298
182 /// Return true when a CPU region is modified from the GPU 299template <class P>
183 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 300FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
301 const auto [pair, is_new] = framebuffers.try_emplace(key);
302 FramebufferId& framebuffer_id = pair->second;
303 if (!is_new) {
304 return framebuffer_id;
305 }
306 std::array<ImageView*, NUM_RT> color_buffers;
307 std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
308 [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
309 ImageView* const depth_buffer =
310 key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
311 framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
312 return framebuffer_id;
313}
184 314
185 std::mutex mutex; 315template <class P>
316void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
317 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
318 if (True(image.flags & ImageFlagBits::CpuModified)) {
319 return;
320 }
321 image.flags |= ImageFlagBits::CpuModified;
322 if (True(image.flags & ImageFlagBits::Tracked)) {
323 UntrackImage(image, image_id);
324 }
325 });
326}
186 327
187private: 328template <class P>
188 /// Iterate over all page indices in a range 329void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
189 template <typename Func> 330 std::vector<ImageId> images;
190 static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { 331 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
191 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; 332 if (!image.IsSafeDownload()) {
192 const u64 page_end = (addr + size - 1) >> PAGE_BITS; 333 return;
193 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
194 if constexpr (RETURNS_BOOL) {
195 if (func(page)) {
196 break;
197 }
198 } else {
199 func(page);
200 }
201 } 334 }
335 image.flags &= ~ImageFlagBits::GpuModified;
336 images.push_back(image_id);
337 });
338 if (images.empty()) {
339 return;
202 } 340 }
341 std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) {
342 return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
343 });
344 for (const ImageId image_id : images) {
345 Image& image = slot_images[image_id];
346 auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
347 const auto copies = FullDownloadCopies(image.info);
348 image.DownloadMemory(map, copies);
349 runtime.Finish();
350 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
351 }
352}
203 353
204 template <typename Func> 354template <class P>
205 static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { 355void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
206 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; 356 std::vector<ImageId> deleted_images;
207 const u64 page_end = (addr + size - 1) >> PAGE_BITS; 357 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
208 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { 358 for (const ImageId id : deleted_images) {
209 if constexpr (RETURNS_BOOL) { 359 Image& image = slot_images[id];
210 if (func(page)) { 360 if (True(image.flags & ImageFlagBits::Tracked)) {
211 break; 361 UntrackImage(image, id);
212 }
213 } else {
214 func(page);
215 }
216 } 362 }
363 UnregisterImage(id);
364 DeleteImage(id);
217 } 365 }
366}
218 367
219 /// Runs the Garbage Collector. 368template <class P>
220 void RunGarbageCollector(); 369void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
221 370 std::vector<ImageId> deleted_images;
222 /// Fills image_view_ids in the image views in indices 371 ForEachImageInRegionGPU(gpu_addr, size,
223 void FillImageViews(DescriptorTable<TICEntry>& table, 372 [&](ImageId id, Image&) { deleted_images.push_back(id); });
224 std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, 373 for (const ImageId id : deleted_images) {
225 std::span<ImageViewId> image_view_ids); 374 Image& image = slot_images[id];
226 375 if (True(image.flags & ImageFlagBits::Remapped)) {
227 /// Find or create an image view in the guest descriptor table 376 continue;
228 ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, 377 }
229 std::span<ImageViewId> cached_image_view_ids, u32 index); 378 image.flags |= ImageFlagBits::Remapped;
230 379 if (True(image.flags & ImageFlagBits::Tracked)) {
231 /// Find or create a framebuffer with the given render target parameters 380 UntrackImage(image, id);
232 FramebufferId GetFramebufferId(const RenderTargets& key); 381 }
382 }
383}
233 384
234 /// Refresh the contents (pixel data) of an image 385template <class P>
235 void RefreshContents(Image& image, ImageId image_id); 386void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
387 const Tegra::Engines::Fermi2D::Surface& src,
388 const Tegra::Engines::Fermi2D::Config& copy) {
389 const BlitImages images = GetBlitImages(dst, src);
390 const ImageId dst_id = images.dst_id;
391 const ImageId src_id = images.src_id;
392 PrepareImage(src_id, false, false);
393 PrepareImage(dst_id, true, false);
394
395 ImageBase& dst_image = slot_images[dst_id];
396 const ImageBase& src_image = slot_images[src_id];
397
398 // TODO: Deduplicate
399 const std::optional src_base = src_image.TryFindBase(src.Address());
400 const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
401 const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
402 const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
403 const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
404 const Region2D src_region{
405 Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
406 Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
407 };
236 408
237 /// Upload data from guest to an image 409 const std::optional dst_base = dst_image.TryFindBase(dst.Address());
238 template <typename StagingBuffer> 410 const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
239 void UploadImageContents(Image& image, StagingBuffer& staging_buffer); 411 const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
412 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
413 const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
414 const Region2D dst_region{
415 Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
416 Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
417 };
240 418
241 /// Find or create an image view from a guest descriptor 419 // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
242 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); 420 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
421 if constexpr (FRAMEBUFFER_BLITS) {
422 // OpenGL blits from framebuffers, not images
423 Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id];
424 runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region,
425 copy.filter, copy.operation);
426 } else {
427 // Vulkan can blit images, but it lacks format reinterpretations
428 // Provide a framebuffer in case it's necessary
429 ImageView& dst_view = slot_image_views[dst_view_id];
430 ImageView& src_view = slot_image_views[src_view_id];
431 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
432 copy.operation);
433 }
434}
243 435
244 /// Create a new image view from a guest descriptor 436template <class P>
245 [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); 437typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
438 // TODO: Properly implement this
439 const auto it = page_table.find(cpu_addr >> PAGE_BITS);
440 if (it == page_table.end()) {
441 return nullptr;
442 }
443 const auto& image_map_ids = it->second;
444 for (const ImageMapId map_id : image_map_ids) {
445 const ImageMapView& map = slot_map_views[map_id];
446 const ImageBase& image = slot_images[map.image_id];
447 if (image.cpu_addr != cpu_addr) {
448 continue;
449 }
450 if (image.image_view_ids.empty()) {
451 continue;
452 }
453 return &slot_image_views[image.image_view_ids.at(0)];
454 }
455 return nullptr;
456}
246 457
247 /// Find or create an image from the given parameters 458template <class P>
248 [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, 459bool TextureCache<P>::HasUncommittedFlushes() const noexcept {
249 RelaxedOptions options = RelaxedOptions{}); 460 return !uncommitted_downloads.empty();
461}
250 462
251 /// Find an image from the given parameters 463template <class P>
252 [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, 464bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
253 RelaxedOptions options); 465 return !committed_downloads.empty() && !committed_downloads.front().empty();
466}
254 467
255 /// Create an image from the given parameters 468template <class P>
256 [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, 469void TextureCache<P>::CommitAsyncFlushes() {
257 RelaxedOptions options); 470 // This is intentionally passing the value by copy
471 committed_downloads.push(uncommitted_downloads);
472 uncommitted_downloads.clear();
473}
258 474
259 /// Create a new image and join perfectly matching existing images 475template <class P>
260 /// Remove joined images from the cache 476void TextureCache<P>::PopAsyncFlushes() {
261 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); 477 if (committed_downloads.empty()) {
478 return;
479 }
480 const std::span<const ImageId> download_ids = committed_downloads.front();
481 if (download_ids.empty()) {
482 committed_downloads.pop();
483 return;
484 }
485 size_t total_size_bytes = 0;
486 for (const ImageId image_id : download_ids) {
487 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
488 }
489 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
490 const size_t original_offset = download_map.offset;
491 for (const ImageId image_id : download_ids) {
492 Image& image = slot_images[image_id];
493 const auto copies = FullDownloadCopies(image.info);
494 image.DownloadMemory(download_map, copies);
495 download_map.offset += image.unswizzled_size_bytes;
496 }
497 // Wait for downloads to finish
498 runtime.Finish();
499
500 download_map.offset = original_offset;
501 std::span<u8> download_span = download_map.mapped_span;
502 for (const ImageId image_id : download_ids) {
503 const ImageBase& image = slot_images[image_id];
504 const auto copies = FullDownloadCopies(image.info);
505 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
506 download_map.offset += image.unswizzled_size_bytes;
507 download_span = download_span.subspan(image.unswizzled_size_bytes);
508 }
509 committed_downloads.pop();
510}
262 511
263 /// Return a blit image pair from the given guest blit parameters 512template <class P>
264 [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, 513bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
265 const Tegra::Engines::Fermi2D::Surface& src); 514 bool is_modified = false;
515 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
516 if (False(image.flags & ImageFlagBits::GpuModified)) {
517 return false;
518 }
519 is_modified = true;
520 return true;
521 });
522 return is_modified;
523}
266 524
267 /// Find or create a sampler from a guest descriptor sampler 525template <class P>
268 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); 526void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
527 if (False(image.flags & ImageFlagBits::CpuModified)) {
528 // Only upload modified images
529 return;
530 }
531 image.flags &= ~ImageFlagBits::CpuModified;
532 TrackImage(image, image_id);
269 533
270 /// Find or create an image view for the given color buffer index 534 if (image.info.num_samples > 1) {
271 [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); 535 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
536 return;
537 }
538 auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
539 UploadImageContents(image, staging);
540 runtime.InsertUploadMemoryBarrier();
541}
272 542
273 /// Find or create an image view for the depth buffer 543template <class P>
274 [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); 544template <typename StagingBuffer>
545void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) {
546 const std::span<u8> mapped_span = staging.mapped_span;
547 const GPUVAddr gpu_addr = image.gpu_addr;
548
549 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
550 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
551 const auto uploads = FullUploadSwizzles(image.info);
552 runtime.AccelerateImageUpload(image, staging, uploads);
553 } else if (True(image.flags & ImageFlagBits::Converted)) {
554 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
555 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
556 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
557 image.UploadMemory(staging, copies);
558 } else {
559 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
560 image.UploadMemory(staging, copies);
561 }
562}
275 563
276 /// Find or create a view for a render target with the given image parameters 564template <class P>
277 [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, 565ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
278 bool is_clear); 566 if (!IsValidEntry(gpu_memory, config)) {
567 return NULL_IMAGE_VIEW_ID;
568 }
569 const auto [pair, is_new] = image_views.try_emplace(config);
570 ImageViewId& image_view_id = pair->second;
571 if (is_new) {
572 image_view_id = CreateImageView(config);
573 }
574 return image_view_id;
575}
279 576
280 /// Iterates over all the images in a region calling func 577template <class P>
281 template <typename Func> 578ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
282 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); 579 const ImageInfo info(config);
580 if (info.type == ImageType::Buffer) {
581 const ImageViewInfo view_info(config, 0);
582 return slot_image_views.insert(runtime, info, view_info, config.Address());
583 }
584 const u32 layer_offset = config.BaseLayer() * info.layer_stride;
585 const GPUVAddr image_gpu_addr = config.Address() - layer_offset;
586 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
587 if (!image_id) {
588 return NULL_IMAGE_VIEW_ID;
589 }
590 ImageBase& image = slot_images[image_id];
591 const SubresourceBase base = image.TryFindBase(config.Address()).value();
592 ASSERT(base.level == 0);
593 const ImageViewInfo view_info(config, base.layer);
594 const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info);
595 ImageViewBase& image_view = slot_image_views[image_view_id];
596 image_view.flags |= ImageViewFlagBits::Strong;
597 image.flags |= ImageFlagBits::Strong;
598 return image_view_id;
599}
283 600
284 template <typename Func> 601template <class P>
285 void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); 602ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
603 RelaxedOptions options) {
604 if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) {
605 return image_id;
606 }
607 return InsertImage(info, gpu_addr, options);
608}
286 609
287 template <typename Func> 610template <class P>
288 void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); 611ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
612 RelaxedOptions options) {
613 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
614 if (!cpu_addr) {
615 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
616 if (!cpu_addr) {
617 return ImageId{};
618 }
619 }
620 const bool broken_views = runtime.HasBrokenTextureViewFormats();
621 const bool native_bgr = runtime.HasNativeBgr();
622 ImageId image_id;
623 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
624 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
625 return false;
626 }
627 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
628 const bool strict_size = False(options & RelaxedOptions::Size) &&
629 True(existing_image.flags & ImageFlagBits::Strong);
630 const ImageInfo& existing = existing_image.info;
631 if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
632 existing.pitch == info.pitch &&
633 IsPitchLinearSameSize(existing, info, strict_size) &&
634 IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) {
635 image_id = existing_image_id;
636 return true;
637 }
638 } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views,
639 native_bgr)) {
640 image_id = existing_image_id;
641 return true;
642 }
643 return false;
644 };
645 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
646 return image_id;
647}
289 648
290 /// Iterates over all the images in a region calling func 649template <class P>
291 template <typename Func> 650ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
292 void ForEachSparseSegment(ImageBase& image, Func&& func); 651 RelaxedOptions options) {
652 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
653 if (!cpu_addr) {
654 const auto size = CalculateGuestSizeInBytes(info);
655 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
656 if (!cpu_addr) {
657 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
658 virtual_invalid_space += Common::AlignUp(size, 32);
659 cpu_addr = std::optional<VAddr>(fake_addr);
660 }
661 }
662 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
663 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
664 const Image& image = slot_images[image_id];
665 // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
666 const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr);
667 if (is_new) {
668 it->second = slot_image_allocs.insert();
669 }
670 slot_image_allocs[it->second].images.push_back(image_id);
671 return image_id;
672}
293 673
294 /// Find or create an image view in the given image with the passed parameters 674template <class P>
295 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); 675ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) {
676 ImageInfo new_info = info;
677 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
678 const bool broken_views = runtime.HasBrokenTextureViewFormats();
679 const bool native_bgr = runtime.HasNativeBgr();
680 std::vector<ImageId> overlap_ids;
681 std::unordered_set<ImageId> overlaps_found;
682 std::vector<ImageId> left_aliased_ids;
683 std::vector<ImageId> right_aliased_ids;
684 std::unordered_set<ImageId> ignore_textures;
685 std::vector<ImageId> bad_overlap_ids;
686 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
687 if (True(overlap.flags & ImageFlagBits::Remapped)) {
688 ignore_textures.insert(overlap_id);
689 return;
690 }
691 if (info.type == ImageType::Linear) {
692 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
693 // Alias linear images with the same pitch
694 left_aliased_ids.push_back(overlap_id);
695 }
696 return;
697 }
698 overlaps_found.insert(overlap_id);
699 static constexpr bool strict_size = true;
700 const std::optional<OverlapResult> solution = ResolveOverlap(
701 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
702 if (solution) {
703 gpu_addr = solution->gpu_addr;
704 cpu_addr = solution->cpu_addr;
705 new_info.resources = solution->resources;
706 overlap_ids.push_back(overlap_id);
707 return;
708 }
709 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
710 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
711 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
712 left_aliased_ids.push_back(overlap_id);
713 overlap.flags |= ImageFlagBits::Alias;
714 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
715 broken_views, native_bgr)) {
716 right_aliased_ids.push_back(overlap_id);
717 overlap.flags |= ImageFlagBits::Alias;
718 } else {
719 bad_overlap_ids.push_back(overlap_id);
720 overlap.flags |= ImageFlagBits::BadOverlap;
721 }
722 };
723 ForEachImageInRegion(cpu_addr, size_bytes, region_check);
724 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
725 if (!overlaps_found.contains(overlap_id)) {
726 if (True(overlap.flags & ImageFlagBits::Remapped)) {
727 ignore_textures.insert(overlap_id);
728 }
729 if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
730 ignore_textures.insert(overlap_id);
731 }
732 }
733 };
734 ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
735 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
736 Image& new_image = slot_images[new_image_id];
296 737
297 /// Register image in the page table 738 if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
298 void RegisterImage(ImageId image); 739 new_image.flags |= ImageFlagBits::Sparse;
740 }
299 741
300 /// Unregister image from the page table 742 for (const ImageId overlap_id : ignore_textures) {
301 void UnregisterImage(ImageId image); 743 Image& overlap = slot_images[overlap_id];
744 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
745 UNIMPLEMENTED();
746 }
747 if (True(overlap.flags & ImageFlagBits::Tracked)) {
748 UntrackImage(overlap, overlap_id);
749 }
750 UnregisterImage(overlap_id);
751 DeleteImage(overlap_id);
752 }
302 753
303 /// Track CPU reads and writes for image 754 // TODO: Only upload what we need
304 void TrackImage(ImageBase& image, ImageId image_id); 755 RefreshContents(new_image, new_image_id);
756
757 for (const ImageId overlap_id : overlap_ids) {
758 Image& overlap = slot_images[overlap_id];
759 if (overlap.info.num_samples != new_image.info.num_samples) {
760 LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
761 } else {
762 const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
763 const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
764 runtime.CopyImage(new_image, overlap, copies);
765 }
766 if (True(overlap.flags & ImageFlagBits::Tracked)) {
767 UntrackImage(overlap, overlap_id);
768 }
769 UnregisterImage(overlap_id);
770 DeleteImage(overlap_id);
771 }
772 ImageBase& new_image_base = new_image;
773 for (const ImageId aliased_id : right_aliased_ids) {
774 ImageBase& aliased = slot_images[aliased_id];
775 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
776 new_image.flags |= ImageFlagBits::Alias;
777 }
778 for (const ImageId aliased_id : left_aliased_ids) {
779 ImageBase& aliased = slot_images[aliased_id];
780 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
781 new_image.flags |= ImageFlagBits::Alias;
782 }
783 for (const ImageId aliased_id : bad_overlap_ids) {
784 ImageBase& aliased = slot_images[aliased_id];
785 aliased.overlapping_images.push_back(new_image_id);
786 new_image.overlapping_images.push_back(aliased_id);
787 new_image.flags |= ImageFlagBits::BadOverlap;
788 }
789 RegisterImage(new_image_id);
790 return new_image_id;
791}
305 792
306 /// Stop tracking CPU reads and writes for image 793template <class P>
307 void UntrackImage(ImageBase& image, ImageId image_id); 794typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
795 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
796 static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
797 const GPUVAddr dst_addr = dst.Address();
798 const GPUVAddr src_addr = src.Address();
799 ImageInfo dst_info(dst);
800 ImageInfo src_info(src);
801 ImageId dst_id;
802 ImageId src_id;
803 do {
804 has_deleted_images = false;
805 dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
806 src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
807 const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
808 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
809 DeduceBlitImages(dst_info, src_info, dst_image, src_image);
810 if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
811 continue;
812 }
813 if (!dst_id) {
814 dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
815 }
816 if (!src_id) {
817 src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
818 }
819 } while (has_deleted_images);
820 return BlitImages{
821 .dst_id = dst_id,
822 .src_id = src_id,
823 .dst_format = dst_info.format,
824 .src_format = src_info.format,
825 };
826}
308 827
309 /// Delete image from the cache 828template <class P>
310 void DeleteImage(ImageId image); 829SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
830 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
831 return NULL_SAMPLER_ID;
832 }
833 const auto [pair, is_new] = samplers.try_emplace(config);
834 if (is_new) {
835 pair->second = slot_samplers.insert(runtime, config);
836 }
837 return pair->second;
838}
311 839
312 /// Remove image views references from the cache 840template <class P>
313 void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); 841ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
842 const auto& regs = maxwell3d.regs;
843 if (index >= regs.rt_control.count) {
844 return ImageViewId{};
845 }
846 const auto& rt = regs.rt[index];
847 const GPUVAddr gpu_addr = rt.Address();
848 if (gpu_addr == 0) {
849 return ImageViewId{};
850 }
851 if (rt.format == Tegra::RenderTargetFormat::NONE) {
852 return ImageViewId{};
853 }
854 const ImageInfo info(regs, index);
855 return FindRenderTargetView(info, gpu_addr, is_clear);
856}
314 857
315 /// Remove framebuffers using the given image views from the cache 858template <class P>
316 void RemoveFramebuffers(std::span<const ImageViewId> removed_views); 859ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
860 const auto& regs = maxwell3d.regs;
861 if (!regs.zeta_enable) {
862 return ImageViewId{};
863 }
864 const GPUVAddr gpu_addr = regs.zeta.Address();
865 if (gpu_addr == 0) {
866 return ImageViewId{};
867 }
868 const ImageInfo info(regs);
869 return FindRenderTargetView(info, gpu_addr, is_clear);
870}
317 871
318 /// Mark an image as modified from the GPU 872template <class P>
319 void MarkModification(ImageBase& image) noexcept; 873ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
874 bool is_clear) {
875 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
876 const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
877 if (!image_id) {
878 return NULL_IMAGE_VIEW_ID;
879 }
880 Image& image = slot_images[image_id];
881 const ImageViewType view_type = RenderTargetImageViewType(info);
882 SubresourceBase base;
883 if (image.info.type == ImageType::Linear) {
884 base = SubresourceBase{.level = 0, .layer = 0};
885 } else {
886 base = image.TryFindBase(gpu_addr).value();
887 }
888 const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers;
889 const SubresourceRange range{
890 .base = base,
891 .extent = {.levels = 1, .layers = layers},
892 };
893 return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range));
894}
320 895
321 /// Synchronize image aliases, copying data if needed 896template <class P>
322 void SynchronizeAliases(ImageId image_id); 897template <typename Func>
898void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
899 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
900 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
901 boost::container::small_vector<ImageId, 32> images;
902 boost::container::small_vector<ImageMapId, 32> maps;
903 ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
904 const auto it = page_table.find(page);
905 if (it == page_table.end()) {
906 if constexpr (BOOL_BREAK) {
907 return false;
908 } else {
909 return;
910 }
911 }
912 for (const ImageMapId map_id : it->second) {
913 ImageMapView& map = slot_map_views[map_id];
914 if (map.picked) {
915 continue;
916 }
917 if (!map.Overlaps(cpu_addr, size)) {
918 continue;
919 }
920 map.picked = true;
921 maps.push_back(map_id);
922 Image& image = slot_images[map.image_id];
923 if (True(image.flags & ImageFlagBits::Picked)) {
924 continue;
925 }
926 image.flags |= ImageFlagBits::Picked;
927 images.push_back(map.image_id);
928 if constexpr (BOOL_BREAK) {
929 if (func(map.image_id, image)) {
930 return true;
931 }
932 } else {
933 func(map.image_id, image);
934 }
935 }
936 if constexpr (BOOL_BREAK) {
937 return false;
938 }
939 });
940 for (const ImageId image_id : images) {
941 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
942 }
943 for (const ImageMapId map_id : maps) {
944 slot_map_views[map_id].picked = false;
945 }
946}
323 947
324 /// Prepare an image to be used 948template <class P>
325 void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); 949template <typename Func>
950void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
951 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
952 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
953 boost::container::small_vector<ImageId, 8> images;
954 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
955 const auto it = gpu_page_table.find(page);
956 if (it == gpu_page_table.end()) {
957 if constexpr (BOOL_BREAK) {
958 return false;
959 } else {
960 return;
961 }
962 }
963 for (const ImageId image_id : it->second) {
964 Image& image = slot_images[image_id];
965 if (True(image.flags & ImageFlagBits::Picked)) {
966 continue;
967 }
968 if (!image.OverlapsGPU(gpu_addr, size)) {
969 continue;
970 }
971 image.flags |= ImageFlagBits::Picked;
972 images.push_back(image_id);
973 if constexpr (BOOL_BREAK) {
974 if (func(image_id, image)) {
975 return true;
976 }
977 } else {
978 func(image_id, image);
979 }
980 }
981 if constexpr (BOOL_BREAK) {
982 return false;
983 }
984 });
985 for (const ImageId image_id : images) {
986 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
987 }
988}
326 989
327 /// Prepare an image view to be used 990template <class P>
328 void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); 991template <typename Func>
992void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
993 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
994 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
995 boost::container::small_vector<ImageId, 8> images;
996 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
997 const auto it = sparse_page_table.find(page);
998 if (it == sparse_page_table.end()) {
999 if constexpr (BOOL_BREAK) {
1000 return false;
1001 } else {
1002 return;
1003 }
1004 }
1005 for (const ImageId image_id : it->second) {
1006 Image& image = slot_images[image_id];
1007 if (True(image.flags & ImageFlagBits::Picked)) {
1008 continue;
1009 }
1010 if (!image.OverlapsGPU(gpu_addr, size)) {
1011 continue;
1012 }
1013 image.flags |= ImageFlagBits::Picked;
1014 images.push_back(image_id);
1015 if constexpr (BOOL_BREAK) {
1016 if (func(image_id, image)) {
1017 return true;
1018 }
1019 } else {
1020 func(image_id, image);
1021 }
1022 }
1023 if constexpr (BOOL_BREAK) {
1024 return false;
1025 }
1026 });
1027 for (const ImageId image_id : images) {
1028 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1029 }
1030}
329 1031
330 /// Execute copies from one image to the other, even if they are incompatible 1032template <class P>
331 void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); 1033template <typename Func>
1034void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1035 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
1036 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
1037 const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
1038 for (auto& segment : segments) {
1039 const auto gpu_addr = segment.first;
1040 const auto size = segment.second;
1041 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1042 ASSERT(cpu_addr);
1043 if constexpr (RETURNS_BOOL) {
1044 if (func(gpu_addr, *cpu_addr, size)) {
1045 break;
1046 }
1047 } else {
1048 func(gpu_addr, *cpu_addr, size);
1049 }
1050 }
1051}
332 1052
333 /// Bind an image view as render target, downloading resources preemtively if needed 1053template <class P>
334 void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); 1054ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
1055 Image& image = slot_images[image_id];
1056 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
1057 return image_view_id;
1058 }
1059 const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
1060 image.InsertView(info, image_view_id);
1061 return image_view_id;
1062}
335 1063
336 /// Create a render target from a given image and image view parameters 1064template <class P>
337 [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage( 1065void TextureCache<P>::RegisterImage(ImageId image_id) {
338 ImageId, const ImageViewInfo& view_info); 1066 ImageBase& image = slot_images[image_id];
1067 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
1068 "Trying to register an already registered image");
1069 image.flags |= ImageFlagBits::Registered;
1070 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1071 if ((IsPixelFormatASTC(image.info.format) &&
1072 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
1073 True(image.flags & ImageFlagBits::Converted)) {
1074 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1075 }
1076 total_used_memory += Common::AlignUp(tentative_size, 1024);
1077 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1078 [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
1079 if (False(image.flags & ImageFlagBits::Sparse)) {
1080 auto map_id =
1081 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
1082 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
1083 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1084 image.map_view_id = map_id;
1085 return;
1086 }
1087 std::vector<ImageViewId> sparse_maps{};
1088 ForEachSparseSegment(
1089 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1090 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
1091 ForEachCPUPage(cpu_addr, size,
1092 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1093 sparse_maps.push_back(map_id);
1094 });
1095 sparse_views.emplace(image_id, std::move(sparse_maps));
1096 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1097 [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
1098}
339 1099
340 /// Returns true if the current clear parameters clear the whole image of a given image view 1100template <class P>
341 [[nodiscard]] bool IsFullClear(ImageViewId id); 1101void TextureCache<P>::UnregisterImage(ImageId image_id) {
1102 Image& image = slot_images[image_id];
1103 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
1104 "Trying to unregister an already registered image");
1105 image.flags &= ~ImageFlagBits::Registered;
1106 image.flags &= ~ImageFlagBits::BadOverlap;
1107 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1108 if ((IsPixelFormatASTC(image.info.format) &&
1109 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
1110 True(image.flags & ImageFlagBits::Converted)) {
1111 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1112 }
1113 total_used_memory -= Common::AlignUp(tentative_size, 1024);
1114 const auto& clear_page_table =
1115 [this, image_id](
1116 u64 page,
1117 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
1118 const auto page_it = selected_page_table.find(page);
1119 if (page_it == selected_page_table.end()) {
1120 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1121 return;
1122 }
1123 std::vector<ImageId>& image_ids = page_it->second;
1124 const auto vector_it = std::ranges::find(image_ids, image_id);
1125 if (vector_it == image_ids.end()) {
1126 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1127 page << PAGE_BITS);
1128 return;
1129 }
1130 image_ids.erase(vector_it);
1131 };
1132 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1133 [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
1134 if (False(image.flags & ImageFlagBits::Sparse)) {
1135 const auto map_id = image.map_view_id;
1136 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
1137 const auto page_it = page_table.find(page);
1138 if (page_it == page_table.end()) {
1139 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1140 return;
1141 }
1142 std::vector<ImageMapId>& image_map_ids = page_it->second;
1143 const auto vector_it = std::ranges::find(image_map_ids, map_id);
1144 if (vector_it == image_map_ids.end()) {
1145 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1146 page << PAGE_BITS);
1147 return;
1148 }
1149 image_map_ids.erase(vector_it);
1150 });
1151 slot_map_views.erase(map_id);
1152 return;
1153 }
1154 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
1155 clear_page_table(page, sparse_page_table);
1156 });
1157 auto it = sparse_views.find(image_id);
1158 ASSERT(it != sparse_views.end());
1159 auto& sparse_maps = it->second;
1160 for (auto& map_view_id : sparse_maps) {
1161 const auto& map_range = slot_map_views[map_view_id];
1162 const VAddr cpu_addr = map_range.cpu_addr;
1163 const std::size_t size = map_range.size;
1164 ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
1165 const auto page_it = page_table.find(page);
1166 if (page_it == page_table.end()) {
1167 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1168 return;
1169 }
1170 std::vector<ImageMapId>& image_map_ids = page_it->second;
1171 auto vector_it = image_map_ids.begin();
1172 while (vector_it != image_map_ids.end()) {
1173 ImageMapView& map = slot_map_views[*vector_it];
1174 if (map.image_id != image_id) {
1175 vector_it++;
1176 continue;
1177 }
1178 if (!map.picked) {
1179 map.picked = true;
1180 }
1181 vector_it = image_map_ids.erase(vector_it);
1182 }
1183 });
1184 slot_map_views.erase(map_view_id);
1185 }
1186 sparse_views.erase(it);
1187}
342 1188
343 Runtime& runtime; 1189template <class P>
344 VideoCore::RasterizerInterface& rasterizer; 1190void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
345 Tegra::Engines::Maxwell3D& maxwell3d; 1191 ASSERT(False(image.flags & ImageFlagBits::Tracked));
346 Tegra::Engines::KeplerCompute& kepler_compute; 1192 image.flags |= ImageFlagBits::Tracked;
347 Tegra::MemoryManager& gpu_memory; 1193 if (False(image.flags & ImageFlagBits::Sparse)) {
1194 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1195 return;
1196 }
1197 if (True(image.flags & ImageFlagBits::Registered)) {
1198 auto it = sparse_views.find(image_id);
1199 ASSERT(it != sparse_views.end());
1200 auto& sparse_maps = it->second;
1201 for (auto& map_view_id : sparse_maps) {
1202 const auto& map = slot_map_views[map_view_id];
1203 const VAddr cpu_addr = map.cpu_addr;
1204 const std::size_t size = map.size;
1205 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1206 }
1207 return;
1208 }
1209 ForEachSparseSegment(image,
1210 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1211 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1212 });
1213}
348 1214
349 DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; 1215template <class P>
350 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; 1216void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
351 std::vector<SamplerId> graphics_sampler_ids; 1217 ASSERT(True(image.flags & ImageFlagBits::Tracked));
352 std::vector<ImageViewId> graphics_image_view_ids; 1218 image.flags &= ~ImageFlagBits::Tracked;
1219 if (False(image.flags & ImageFlagBits::Sparse)) {
1220 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1221 return;
1222 }
1223 ASSERT(True(image.flags & ImageFlagBits::Registered));
1224 auto it = sparse_views.find(image_id);
1225 ASSERT(it != sparse_views.end());
1226 auto& sparse_maps = it->second;
1227 for (auto& map_view_id : sparse_maps) {
1228 const auto& map = slot_map_views[map_view_id];
1229 const VAddr cpu_addr = map.cpu_addr;
1230 const std::size_t size = map.size;
1231 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
1232 }
1233}
353 1234
354 DescriptorTable<TICEntry> compute_image_table{gpu_memory}; 1235template <class P>
355 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; 1236void TextureCache<P>::DeleteImage(ImageId image_id) {
356 std::vector<SamplerId> compute_sampler_ids; 1237 ImageBase& image = slot_images[image_id];
357 std::vector<ImageViewId> compute_image_view_ids; 1238 const GPUVAddr gpu_addr = image.gpu_addr;
1239 const auto alloc_it = image_allocs_table.find(gpu_addr);
1240 if (alloc_it == image_allocs_table.end()) {
1241 UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}",
1242 gpu_addr);
1243 return;
1244 }
1245 const ImageAllocId alloc_id = alloc_it->second;
1246 std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
1247 const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
1248 if (alloc_image_it == alloc_images.end()) {
1249 UNREACHABLE_MSG("Trying to delete an image that does not exist");
1250 return;
1251 }
1252 ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
1253 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
1254
1255 // Mark render targets as dirty
1256 auto& dirty = maxwell3d.dirty.flags;
1257 dirty[Dirty::RenderTargets] = true;
1258 dirty[Dirty::ZetaBuffer] = true;
1259 for (size_t rt = 0; rt < NUM_RT; ++rt) {
1260 dirty[Dirty::ColorBuffer0 + rt] = true;
1261 }
1262 const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
1263 for (const ImageViewId image_view_id : image_view_ids) {
1264 std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
1265 if (render_targets.depth_buffer_id == image_view_id) {
1266 render_targets.depth_buffer_id = ImageViewId{};
1267 }
1268 }
1269 RemoveImageViewReferences(image_view_ids);
1270 RemoveFramebuffers(image_view_ids);
1271
1272 for (const AliasedImage& alias : image.aliased_images) {
1273 ImageBase& other_image = slot_images[alias.id];
1274 [[maybe_unused]] const size_t num_removed_aliases =
1275 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
1276 return other_alias.id == image_id;
1277 });
1278 other_image.CheckAliasState();
1279 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
1280 num_removed_aliases);
1281 }
1282 for (const ImageId overlap_id : image.overlapping_images) {
1283 ImageBase& other_image = slot_images[overlap_id];
1284 [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
1285 other_image.overlapping_images,
1286 [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
1287 other_image.CheckBadOverlapState();
1288 ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
1289 num_removed_overlaps);
1290 }
1291 for (const ImageViewId image_view_id : image_view_ids) {
1292 sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
1293 slot_image_views.erase(image_view_id);
1294 }
1295 sentenced_images.Push(std::move(slot_images[image_id]));
1296 slot_images.erase(image_id);
358 1297
359 RenderTargets render_targets; 1298 alloc_images.erase(alloc_image_it);
1299 if (alloc_images.empty()) {
1300 image_allocs_table.erase(alloc_it);
1301 }
1302 if constexpr (ENABLE_VALIDATION) {
1303 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
1304 std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
1305 }
1306 graphics_image_table.Invalidate();
1307 compute_image_table.Invalidate();
1308 has_deleted_images = true;
1309}
360 1310
361 std::unordered_map<TICEntry, ImageViewId> image_views; 1311template <class P>
362 std::unordered_map<TSCEntry, SamplerId> samplers; 1312void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
363 std::unordered_map<RenderTargets, FramebufferId> framebuffers; 1313 auto it = image_views.begin();
1314 while (it != image_views.end()) {
1315 const auto found = std::ranges::find(removed_views, it->second);
1316 if (found != removed_views.end()) {
1317 it = image_views.erase(it);
1318 } else {
1319 ++it;
1320 }
1321 }
1322}
364 1323
365 std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; 1324template <class P>
366 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; 1325void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) {
367 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; 1326 auto it = framebuffers.begin();
1327 while (it != framebuffers.end()) {
1328 if (it->first.Contains(removed_views)) {
1329 it = framebuffers.erase(it);
1330 } else {
1331 ++it;
1332 }
1333 }
1334}
368 1335
369 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; 1336template <class P>
1337void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
1338 image.flags |= ImageFlagBits::GpuModified;
1339 image.modification_tick = ++modification_tick;
1340}
370 1341
371 VAddr virtual_invalid_space{}; 1342template <class P>
1343void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1344 boost::container::small_vector<const AliasedImage*, 1> aliased_images;
1345 ImageBase& image = slot_images[image_id];
1346 u64 most_recent_tick = image.modification_tick;
1347 for (const AliasedImage& aliased : image.aliased_images) {
1348 ImageBase& aliased_image = slot_images[aliased.id];
1349 if (image.modification_tick < aliased_image.modification_tick) {
1350 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
1351 aliased_images.push_back(&aliased);
1352 }
1353 }
1354 if (aliased_images.empty()) {
1355 return;
1356 }
1357 image.modification_tick = most_recent_tick;
1358 std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
1359 const ImageBase& lhs_image = slot_images[lhs->id];
1360 const ImageBase& rhs_image = slot_images[rhs->id];
1361 return lhs_image.modification_tick < rhs_image.modification_tick;
1362 });
1363 for (const AliasedImage* const aliased : aliased_images) {
1364 CopyImage(image_id, aliased->id, aliased->copies);
1365 }
1366}
372 1367
373 bool has_deleted_images = false; 1368template <class P>
374 u64 total_used_memory = 0; 1369void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
375 u64 minimum_memory; 1370 Image& image = slot_images[image_id];
376 u64 expected_memory; 1371 if (invalidate) {
377 u64 critical_memory; 1372 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
1373 if (False(image.flags & ImageFlagBits::Tracked)) {
1374 TrackImage(image, image_id);
1375 }
1376 } else {
1377 RefreshContents(image, image_id);
1378 SynchronizeAliases(image_id);
1379 }
1380 if (is_modification) {
1381 MarkModification(image);
1382 }
1383 image.frame_tick = frame_tick;
1384}
378 1385
379 SlotVector<Image> slot_images; 1386template <class P>
380 SlotVector<ImageMapView> slot_map_views; 1387void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
381 SlotVector<ImageView> slot_image_views; 1388 bool invalidate) {
382 SlotVector<ImageAlloc> slot_image_allocs; 1389 if (!image_view_id) {
383 SlotVector<Sampler> slot_samplers; 1390 return;
384 SlotVector<Framebuffer> slot_framebuffers; 1391 }
1392 const ImageViewBase& image_view = slot_image_views[image_view_id];
1393 if (image_view.IsBuffer()) {
1394 return;
1395 }
1396 PrepareImage(image_view.image_id, is_modification, invalidate);
1397}
385 1398
386 // TODO: This data structure is not optimal and it should be reworked 1399template <class P>
387 std::vector<ImageId> uncommitted_downloads; 1400void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
388 std::queue<std::vector<ImageId>> committed_downloads; 1401 Image& dst = slot_images[dst_id];
1402 Image& src = slot_images[src_id];
1403 const auto dst_format_type = GetFormatType(dst.info.format);
1404 const auto src_format_type = GetFormatType(src.info.format);
1405 if (src_format_type == dst_format_type) {
1406 if constexpr (HAS_EMULATED_COPIES) {
1407 if (!runtime.CanImageBeCopied(dst, src)) {
1408 return runtime.EmulateCopyImage(dst, src, copies);
1409 }
1410 }
1411 return runtime.CopyImage(dst, src, copies);
1412 }
1413 UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
1414 UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
1415 for (const ImageCopy& copy : copies) {
1416 UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
1417 UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1);
1418 UNIMPLEMENTED_IF(copy.src_offset != Offset3D{});
1419 UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{});
1420
1421 const SubresourceBase dst_base{
1422 .level = copy.dst_subresource.base_level,
1423 .layer = copy.dst_subresource.base_layer,
1424 };
1425 const SubresourceBase src_base{
1426 .level = copy.src_subresource.base_level,
1427 .layer = copy.src_subresource.base_layer,
1428 };
1429 const SubresourceExtent dst_extent{.levels = 1, .layers = 1};
1430 const SubresourceExtent src_extent{.levels = 1, .layers = 1};
1431 const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
1432 const SubresourceRange src_range{.base = src_base, .extent = src_extent};
1433 const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range);
1434 const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
1435 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
1436 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
1437 const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info);
1438 ImageView& dst_view = slot_image_views[dst_view_id];
1439 ImageView& src_view = slot_image_views[src_view_id];
1440 [[maybe_unused]] const Extent3D expected_size{
1441 .width = std::min(dst_view.size.width, src_view.size.width),
1442 .height = std::min(dst_view.size.height, src_view.size.height),
1443 .depth = std::min(dst_view.size.depth, src_view.size.depth),
1444 };
1445 UNIMPLEMENTED_IF(copy.extent != expected_size);
1446
1447 runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
1448 }
1449}
389 1450
390 static constexpr size_t TICKS_TO_DESTROY = 6; 1451template <class P>
391 DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; 1452void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) {
392 DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; 1453 if (*old_id == new_id) {
393 DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers; 1454 return;
1455 }
1456 if (*old_id) {
1457 const ImageViewBase& old_view = slot_image_views[*old_id];
1458 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
1459 uncommitted_downloads.push_back(old_view.image_id);
1460 }
1461 }
1462 *old_id = new_id;
1463}
394 1464
395 std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; 1465template <class P>
1466std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
1467 ImageId image_id, const ImageViewInfo& view_info) {
1468 const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
1469 const ImageBase& image = slot_images[image_id];
1470 const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
1471 const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
1472 const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
1473 const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
1474 const u32 num_samples = image.info.num_samples;
1475 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
1476 const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
1477 .color_buffer_ids = {color_view_id},
1478 .depth_buffer_id = depth_view_id,
1479 .size = {extent.width >> samples_x, extent.height >> samples_y},
1480 });
1481 return {framebuffer_id, view_id};
1482}
396 1483
397 u64 modification_tick = 0; 1484template <class P>
398 u64 frame_tick = 0; 1485bool TextureCache<P>::IsFullClear(ImageViewId id) {
399 typename SlotVector<Image>::Iterator deletion_iterator; 1486 if (!id) {
400}; 1487 return true;
1488 }
1489 const ImageViewBase& image_view = slot_image_views[id];
1490 const ImageBase& image = slot_images[image_view.image_id];
1491 const Extent3D size = image_view.size;
1492 const auto& regs = maxwell3d.regs;
1493 const auto& scissor = regs.scissor_test[0];
1494 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
1495 // Images with multiple resources can't be cleared in a single call
1496 return false;
1497 }
1498 if (regs.clear_flags.scissor == 0) {
1499 // If scissor testing is disabled, the clear is always full
1500 return true;
1501 }
1502 // Make sure the clear covers all texels in the subresource
1503 return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width &&
1504 scissor.max_y >= size.height;
1505}
401 1506
402} // namespace VideoCommon 1507} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
new file mode 100644
index 000000000..a4f6e9422
--- /dev/null
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -0,0 +1,402 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include <algorithm>
8#include <array>
9#include <bit>
10#include <memory>
11#include <mutex>
12#include <optional>
13#include <span>
14#include <type_traits>
15#include <unordered_map>
16#include <unordered_set>
17#include <utility>
18#include <vector>
19
20#include <boost/container/small_vector.hpp>
21
22#include "common/alignment.h"
23#include "common/common_types.h"
24#include "common/literals.h"
25#include "common/logging/log.h"
26#include "common/settings.h"
27#include "video_core/compatible_formats.h"
28#include "video_core/delayed_destruction_ring.h"
29#include "video_core/dirty_flags.h"
30#include "video_core/engines/fermi_2d.h"
31#include "video_core/engines/kepler_compute.h"
32#include "video_core/engines/maxwell_3d.h"
33#include "video_core/memory_manager.h"
34#include "video_core/rasterizer_interface.h"
35#include "video_core/surface.h"
36#include "video_core/texture_cache/descriptor_table.h"
37#include "video_core/texture_cache/format_lookup_table.h"
38#include "video_core/texture_cache/formatter.h"
39#include "video_core/texture_cache/image_base.h"
40#include "video_core/texture_cache/image_info.h"
41#include "video_core/texture_cache/image_view_base.h"
42#include "video_core/texture_cache/image_view_info.h"
43#include "video_core/texture_cache/render_targets.h"
44#include "video_core/texture_cache/samples_helper.h"
45#include "video_core/texture_cache/slot_vector.h"
46#include "video_core/texture_cache/types.h"
47#include "video_core/texture_cache/util.h"
48#include "video_core/textures/texture.h"
49
50namespace VideoCommon {
51
52using Tegra::Texture::SwizzleSource;
53using Tegra::Texture::TextureType;
54using Tegra::Texture::TICEntry;
55using Tegra::Texture::TSCEntry;
56using VideoCore::Surface::GetFormatType;
57using VideoCore::Surface::IsCopyCompatible;
58using VideoCore::Surface::PixelFormat;
59using VideoCore::Surface::PixelFormatFromDepthFormat;
60using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
61using VideoCore::Surface::SurfaceType;
62using namespace Common::Literals;
63
64template <class P>
65class TextureCache {
66 /// Address shift for caching images into a hash table
67 static constexpr u64 PAGE_BITS = 20;
68
69 /// Enables debugging features to the texture cache
70 static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
71 /// Implement blits as copies between framebuffers
72 static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
73 /// True when some copies have to be emulated
74 static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
75 /// True when the API can provide info about the memory of the device.
76 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
77
78 /// Image view ID for null descriptors
79 static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
80 /// Sampler ID for bugged sampler ids
81 static constexpr SamplerId NULL_SAMPLER_ID{0};
82
83 static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
84 static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
85
86 using Runtime = typename P::Runtime;
87 using Image = typename P::Image;
88 using ImageAlloc = typename P::ImageAlloc;
89 using ImageView = typename P::ImageView;
90 using Sampler = typename P::Sampler;
91 using Framebuffer = typename P::Framebuffer;
92
93 struct BlitImages {
94 ImageId dst_id;
95 ImageId src_id;
96 PixelFormat dst_format;
97 PixelFormat src_format;
98 };
99
100 template <typename T>
101 struct IdentityHash {
102 [[nodiscard]] size_t operator()(T value) const noexcept {
103 return static_cast<size_t>(value);
104 }
105 };
106
107public:
108 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
109 Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
110
111 /// Notify the cache that a new frame has been queued
112 void TickFrame();
113
114 /// Return a constant reference to the given image view id
115 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
116
117 /// Return a reference to the given image view id
118 [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
119
120 /// Mark an image as modified from the GPU
121 void MarkModification(ImageId id) noexcept;
122
123 /// Fill image_view_ids with the graphics images in indices
124 void FillGraphicsImageViews(std::span<const u32> indices,
125 std::span<ImageViewId> image_view_ids);
126
127 /// Fill image_view_ids with the compute images in indices
128 void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
129
130 /// Get the sampler from the graphics descriptor table in the specified index
131 Sampler* GetGraphicsSampler(u32 index);
132
133 /// Get the sampler from the compute descriptor table in the specified index
134 Sampler* GetComputeSampler(u32 index);
135
136 /// Refresh the state for graphics image view and sampler descriptors
137 void SynchronizeGraphicsDescriptors();
138
139 /// Refresh the state for compute image view and sampler descriptors
140 void SynchronizeComputeDescriptors();
141
142 /// Update bound render targets and upload memory if necessary
143 /// @param is_clear True when the render targets are being used for clears
144 void UpdateRenderTargets(bool is_clear);
145
146 /// Find a framebuffer with the currently bound render targets
147 /// UpdateRenderTargets should be called before this
148 Framebuffer* GetFramebuffer();
149
150 /// Mark images in a range as modified from the CPU
151 void WriteMemory(VAddr cpu_addr, size_t size);
152
153 /// Download contents of host images to guest memory in a region
154 void DownloadMemory(VAddr cpu_addr, size_t size);
155
156 /// Remove images in a region
157 void UnmapMemory(VAddr cpu_addr, size_t size);
158
159 /// Remove images in a region
160 void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
161
162 /// Blit an image with the given parameters
163 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
164 const Tegra::Engines::Fermi2D::Surface& src,
165 const Tegra::Engines::Fermi2D::Config& copy);
166
167 /// Try to find a cached image view in the given CPU address
168 [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
169
170 /// Return true when there are uncommitted images to be downloaded
171 [[nodiscard]] bool HasUncommittedFlushes() const noexcept;
172
173 /// Return true when the caller should wait for async downloads
174 [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
175
176 /// Commit asynchronous downloads
177 void CommitAsyncFlushes();
178
179 /// Pop asynchronous downloads
180 void PopAsyncFlushes();
181
182 /// Return true when a CPU region is modified from the GPU
183 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
184
185 std::mutex mutex;
186
187private:
188 /// Iterate over all page indices in a range
189 template <typename Func>
190 static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
191 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
192 const u64 page_end = (addr + size - 1) >> PAGE_BITS;
193 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
194 if constexpr (RETURNS_BOOL) {
195 if (func(page)) {
196 break;
197 }
198 } else {
199 func(page);
200 }
201 }
202 }
203
204 template <typename Func>
205 static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
206 static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
207 const u64 page_end = (addr + size - 1) >> PAGE_BITS;
208 for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
209 if constexpr (RETURNS_BOOL) {
210 if (func(page)) {
211 break;
212 }
213 } else {
214 func(page);
215 }
216 }
217 }
218
219 /// Runs the Garbage Collector.
220 void RunGarbageCollector();
221
222 /// Fills image_view_ids in the image views in indices
223 void FillImageViews(DescriptorTable<TICEntry>& table,
224 std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
225 std::span<ImageViewId> image_view_ids);
226
227 /// Find or create an image view in the guest descriptor table
228 ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
229 std::span<ImageViewId> cached_image_view_ids, u32 index);
230
231 /// Find or create a framebuffer with the given render target parameters
232 FramebufferId GetFramebufferId(const RenderTargets& key);
233
234 /// Refresh the contents (pixel data) of an image
235 void RefreshContents(Image& image, ImageId image_id);
236
237 /// Upload data from guest to an image
238 template <typename StagingBuffer>
239 void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
240
241 /// Find or create an image view from a guest descriptor
242 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
243
244 /// Create a new image view from a guest descriptor
245 [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
246
247 /// Find or create an image from the given parameters
248 [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
249 RelaxedOptions options = RelaxedOptions{});
250
251 /// Find an image from the given parameters
252 [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
253 RelaxedOptions options);
254
255 /// Create an image from the given parameters
256 [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
257 RelaxedOptions options);
258
259 /// Create a new image and join perfectly matching existing images
260 /// Remove joined images from the cache
261 [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
262
263 /// Return a blit image pair from the given guest blit parameters
264 [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
265 const Tegra::Engines::Fermi2D::Surface& src);
266
267 /// Find or create a sampler from a guest descriptor sampler
268 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
269
270 /// Find or create an image view for the given color buffer index
271 [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
272
273 /// Find or create an image view for the depth buffer
274 [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
275
276 /// Find or create a view for a render target with the given image parameters
277 [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
278 bool is_clear);
279
280 /// Iterates over all the images in a region calling func
281 template <typename Func>
282 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
283
284 template <typename Func>
285 void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
286
287 template <typename Func>
288 void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
289
290 /// Iterates over all the images in a region calling func
291 template <typename Func>
292 void ForEachSparseSegment(ImageBase& image, Func&& func);
293
294 /// Find or create an image view in the given image with the passed parameters
295 [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
296
297 /// Register image in the page table
298 void RegisterImage(ImageId image);
299
300 /// Unregister image from the page table
301 void UnregisterImage(ImageId image);
302
303 /// Track CPU reads and writes for image
304 void TrackImage(ImageBase& image, ImageId image_id);
305
306 /// Stop tracking CPU reads and writes for image
307 void UntrackImage(ImageBase& image, ImageId image_id);
308
309 /// Delete image from the cache
310 void DeleteImage(ImageId image);
311
312 /// Remove image views references from the cache
313 void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
314
315 /// Remove framebuffers using the given image views from the cache
316 void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
317
318 /// Mark an image as modified from the GPU
319 void MarkModification(ImageBase& image) noexcept;
320
321 /// Synchronize image aliases, copying data if needed
322 void SynchronizeAliases(ImageId image_id);
323
324 /// Prepare an image to be used
325 void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
326
327 /// Prepare an image view to be used
328 void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
329
330 /// Execute copies from one image to the other, even if they are incompatible
331 void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
332
333 /// Bind an image view as render target, downloading resources preemtively if needed
334 void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
335
336 /// Create a render target from a given image and image view parameters
337 [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
338 ImageId, const ImageViewInfo& view_info);
339
340 /// Returns true if the current clear parameters clear the whole image of a given image view
341 [[nodiscard]] bool IsFullClear(ImageViewId id);
342
343 Runtime& runtime;
344 VideoCore::RasterizerInterface& rasterizer;
345 Tegra::Engines::Maxwell3D& maxwell3d;
346 Tegra::Engines::KeplerCompute& kepler_compute;
347 Tegra::MemoryManager& gpu_memory;
348
349 DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
350 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
351 std::vector<SamplerId> graphics_sampler_ids;
352 std::vector<ImageViewId> graphics_image_view_ids;
353
354 DescriptorTable<TICEntry> compute_image_table{gpu_memory};
355 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
356 std::vector<SamplerId> compute_sampler_ids;
357 std::vector<ImageViewId> compute_image_view_ids;
358
359 RenderTargets render_targets;
360
361 std::unordered_map<TICEntry, ImageViewId> image_views;
362 std::unordered_map<TSCEntry, SamplerId> samplers;
363 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
364
365 std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
366 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
367 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
368
369 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
370
371 VAddr virtual_invalid_space{};
372
373 bool has_deleted_images = false;
374 u64 total_used_memory = 0;
375 u64 minimum_memory;
376 u64 expected_memory;
377 u64 critical_memory;
378
379 SlotVector<Image> slot_images;
380 SlotVector<ImageMapView> slot_map_views;
381 SlotVector<ImageView> slot_image_views;
382 SlotVector<ImageAlloc> slot_image_allocs;
383 SlotVector<Sampler> slot_samplers;
384 SlotVector<Framebuffer> slot_framebuffers;
385
386 // TODO: This data structure is not optimal and it should be reworked
387 std::vector<ImageId> uncommitted_downloads;
388 std::queue<std::vector<ImageId>> committed_downloads;
389
390 static constexpr size_t TICKS_TO_DESTROY = 6;
391 DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
392 DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
393 DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
394
395 std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
396
397 u64 modification_tick = 0;
398 u64 frame_tick = 0;
399 typename SlotVector<Image>::Iterator deletion_iterator;
400};
401
402} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache_templates.h b/src/video_core/texture_cache/texture_cache_templates.h
deleted file mode 100644
index 8440d23d1..000000000
--- a/src/video_core/texture_cache/texture_cache_templates.h
+++ /dev/null
@@ -1,1507 +0,0 @@
1// Copyright 2021 yuzu Emulator Project
2// Licensed under GPLv2 or any later version
3// Refer to the license.txt file included.
4
5#pragma once
6
7#include "video_core/texture_cache/texture_cache.h"
8
9namespace VideoCommon {
10
11using Tegra::Texture::SwizzleSource;
12using Tegra::Texture::TextureType;
13using Tegra::Texture::TICEntry;
14using Tegra::Texture::TSCEntry;
15using VideoCore::Surface::GetFormatType;
16using VideoCore::Surface::IsCopyCompatible;
17using VideoCore::Surface::PixelFormat;
18using VideoCore::Surface::PixelFormatFromDepthFormat;
19using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
20using VideoCore::Surface::SurfaceType;
21using namespace Common::Literals;
22
23template <class P>
24TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
25 Tegra::Engines::Maxwell3D& maxwell3d_,
26 Tegra::Engines::KeplerCompute& kepler_compute_,
27 Tegra::MemoryManager& gpu_memory_)
28 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
29 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
30 // Configure null sampler
31 TSCEntry sampler_descriptor{};
32 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
33 sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear);
34 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
35 sampler_descriptor.cubemap_anisotropy.Assign(1);
36
37 // Make sure the first index is reserved for the null resources
38 // This way the null resource becomes a compile time constant
39 void(slot_image_views.insert(runtime, NullImageParams{}));
40 void(slot_samplers.insert(runtime, sampler_descriptor));
41
42 deletion_iterator = slot_images.begin();
43
44 if constexpr (HAS_DEVICE_MEMORY_INFO) {
45 const auto device_memory = runtime.GetDeviceLocalMemory();
46 const u64 possible_expected_memory = (device_memory * 3) / 10;
47 const u64 possible_critical_memory = (device_memory * 6) / 10;
48 expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
49 critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
50 minimum_memory = 0;
51 } else {
52 // on OGL we can be more conservatives as the driver takes care.
53 expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
54 critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
55 minimum_memory = expected_memory;
56 }
57}
58
59template <class P>
60void TextureCache<P>::RunGarbageCollector() {
61 const bool high_priority_mode = total_used_memory >= expected_memory;
62 const bool aggressive_mode = total_used_memory >= critical_memory;
63 const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
64 int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
65 for (; num_iterations > 0; --num_iterations) {
66 if (deletion_iterator == slot_images.end()) {
67 deletion_iterator = slot_images.begin();
68 if (deletion_iterator == slot_images.end()) {
69 break;
70 }
71 }
72 auto [image_id, image_tmp] = *deletion_iterator;
73 Image* image = image_tmp; // fix clang error.
74 const bool is_alias = True(image->flags & ImageFlagBits::Alias);
75 const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
76 const bool must_download = image->IsSafeDownload();
77 bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
78 const u64 ticks_needed =
79 is_bad_overlap
80 ? ticks_to_destroy >> 4
81 : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
82 should_care |= aggressive_mode;
83 if (should_care && image->frame_tick + ticks_needed < frame_tick) {
84 if (is_bad_overlap) {
85 const bool overlap_check = std::ranges::all_of(
86 image->overlapping_images, [&, image](const ImageId& overlap_id) {
87 auto& overlap = slot_images[overlap_id];
88 return overlap.frame_tick >= image->frame_tick;
89 });
90 if (!overlap_check) {
91 ++deletion_iterator;
92 continue;
93 }
94 }
95 if (!is_bad_overlap && must_download) {
96 const bool alias_check = std::ranges::none_of(
97 image->aliased_images, [&, image](const AliasedImage& alias) {
98 auto& alias_image = slot_images[alias.id];
99 return (alias_image.frame_tick < image->frame_tick) ||
100 (alias_image.modification_tick < image->modification_tick);
101 });
102
103 if (alias_check) {
104 auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
105 const auto copies = FullDownloadCopies(image->info);
106 image->DownloadMemory(map, copies);
107 runtime.Finish();
108 SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
109 }
110 }
111 if (True(image->flags & ImageFlagBits::Tracked)) {
112 UntrackImage(*image, image_id);
113 }
114 UnregisterImage(image_id);
115 DeleteImage(image_id);
116 if (is_bad_overlap) {
117 ++num_iterations;
118 }
119 }
120 ++deletion_iterator;
121 }
122}
123
124template <class P>
125void TextureCache<P>::TickFrame() {
126 if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
127 RunGarbageCollector();
128 }
129 sentenced_images.Tick();
130 sentenced_framebuffers.Tick();
131 sentenced_image_view.Tick();
132 ++frame_tick;
133}
134
135template <class P>
136const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
137 return slot_image_views[id];
138}
139
140template <class P>
141typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept {
142 return slot_image_views[id];
143}
144
145template <class P>
146void TextureCache<P>::MarkModification(ImageId id) noexcept {
147 MarkModification(slot_images[id]);
148}
149
150template <class P>
151void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices,
152 std::span<ImageViewId> image_view_ids) {
153 FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids);
154}
155
156template <class P>
157void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices,
158 std::span<ImageViewId> image_view_ids) {
159 FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids);
160}
161
162template <class P>
163typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
164 if (index > graphics_sampler_table.Limit()) {
165 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
166 return &slot_samplers[NULL_SAMPLER_ID];
167 }
168 const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
169 SamplerId& id = graphics_sampler_ids[index];
170 if (is_new) {
171 id = FindSampler(descriptor);
172 }
173 return &slot_samplers[id];
174}
175
176template <class P>
177typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
178 if (index > compute_sampler_table.Limit()) {
179 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
180 return &slot_samplers[NULL_SAMPLER_ID];
181 }
182 const auto [descriptor, is_new] = compute_sampler_table.Read(index);
183 SamplerId& id = compute_sampler_ids[index];
184 if (is_new) {
185 id = FindSampler(descriptor);
186 }
187 return &slot_samplers[id];
188}
189
190template <class P>
191void TextureCache<P>::SynchronizeGraphicsDescriptors() {
192 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
193 const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
194 const u32 tic_limit = maxwell3d.regs.tic.limit;
195 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
196 if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
197 graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
198 }
199 if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
200 graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
201 }
202}
203
204template <class P>
205void TextureCache<P>::SynchronizeComputeDescriptors() {
206 const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
207 const u32 tic_limit = kepler_compute.regs.tic.limit;
208 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
209 const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
210 if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
211 compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
212 }
213 if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
214 compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
215 }
216}
217
218template <class P>
219void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
220 using namespace VideoCommon::Dirty;
221 auto& flags = maxwell3d.dirty.flags;
222 if (!flags[Dirty::RenderTargets]) {
223 for (size_t index = 0; index < NUM_RT; ++index) {
224 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
225 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
226 }
227 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
228 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
229 return;
230 }
231 flags[Dirty::RenderTargets] = false;
232
233 // Render target control is used on all render targets, so force look ups when this one is up
234 const bool force = flags[Dirty::RenderTargetControl];
235 flags[Dirty::RenderTargetControl] = false;
236
237 for (size_t index = 0; index < NUM_RT; ++index) {
238 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
239 if (flags[Dirty::ColorBuffer0 + index] || force) {
240 flags[Dirty::ColorBuffer0 + index] = false;
241 BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear));
242 }
243 PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id));
244 }
245 if (flags[Dirty::ZetaBuffer] || force) {
246 flags[Dirty::ZetaBuffer] = false;
247 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear));
248 }
249 const ImageViewId depth_buffer_id = render_targets.depth_buffer_id;
250 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
251
252 for (size_t index = 0; index < NUM_RT; ++index) {
253 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
254 }
255 render_targets.size = Extent2D{
256 maxwell3d.regs.render_area.width,
257 maxwell3d.regs.render_area.height,
258 };
259}
260
261template <class P>
262typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
263 return &slot_framebuffers[GetFramebufferId(render_targets)];
264}
265
266template <class P>
267void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
268 std::span<ImageViewId> cached_image_view_ids,
269 std::span<const u32> indices,
270 std::span<ImageViewId> image_view_ids) {
271 ASSERT(indices.size() <= image_view_ids.size());
272 do {
273 has_deleted_images = false;
274 std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) {
275 return VisitImageView(table, cached_image_view_ids, index);
276 });
277 } while (has_deleted_images);
278}
279
280template <class P>
281ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table,
282 std::span<ImageViewId> cached_image_view_ids,
283 u32 index) {
284 if (index > table.Limit()) {
285 LOG_DEBUG(HW_GPU, "Invalid image view index={}", index);
286 return NULL_IMAGE_VIEW_ID;
287 }
288 const auto [descriptor, is_new] = table.Read(index);
289 ImageViewId& image_view_id = cached_image_view_ids[index];
290 if (is_new) {
291 image_view_id = FindImageView(descriptor);
292 }
293 if (image_view_id != NULL_IMAGE_VIEW_ID) {
294 PrepareImageView(image_view_id, false, false);
295 }
296 return image_view_id;
297}
298
299template <class P>
300FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
301 const auto [pair, is_new] = framebuffers.try_emplace(key);
302 FramebufferId& framebuffer_id = pair->second;
303 if (!is_new) {
304 return framebuffer_id;
305 }
306 std::array<ImageView*, NUM_RT> color_buffers;
307 std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
308 [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
309 ImageView* const depth_buffer =
310 key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
311 framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
312 return framebuffer_id;
313}
314
315template <class P>
316void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
317 ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) {
318 if (True(image.flags & ImageFlagBits::CpuModified)) {
319 return;
320 }
321 image.flags |= ImageFlagBits::CpuModified;
322 if (True(image.flags & ImageFlagBits::Tracked)) {
323 UntrackImage(image, image_id);
324 }
325 });
326}
327
328template <class P>
329void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
330 std::vector<ImageId> images;
331 ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) {
332 if (!image.IsSafeDownload()) {
333 return;
334 }
335 image.flags &= ~ImageFlagBits::GpuModified;
336 images.push_back(image_id);
337 });
338 if (images.empty()) {
339 return;
340 }
341 std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) {
342 return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
343 });
344 for (const ImageId image_id : images) {
345 Image& image = slot_images[image_id];
346 auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
347 const auto copies = FullDownloadCopies(image.info);
348 image.DownloadMemory(map, copies);
349 runtime.Finish();
350 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
351 }
352}
353
354template <class P>
355void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
356 std::vector<ImageId> deleted_images;
357 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
358 for (const ImageId id : deleted_images) {
359 Image& image = slot_images[id];
360 if (True(image.flags & ImageFlagBits::Tracked)) {
361 UntrackImage(image, id);
362 }
363 UnregisterImage(id);
364 DeleteImage(id);
365 }
366}
367
368template <class P>
369void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
370 std::vector<ImageId> deleted_images;
371 ForEachImageInRegionGPU(gpu_addr, size,
372 [&](ImageId id, Image&) { deleted_images.push_back(id); });
373 for (const ImageId id : deleted_images) {
374 Image& image = slot_images[id];
375 if (True(image.flags & ImageFlagBits::Remapped)) {
376 continue;
377 }
378 image.flags |= ImageFlagBits::Remapped;
379 if (True(image.flags & ImageFlagBits::Tracked)) {
380 UntrackImage(image, id);
381 }
382 }
383}
384
385template <class P>
386void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
387 const Tegra::Engines::Fermi2D::Surface& src,
388 const Tegra::Engines::Fermi2D::Config& copy) {
389 const BlitImages images = GetBlitImages(dst, src);
390 const ImageId dst_id = images.dst_id;
391 const ImageId src_id = images.src_id;
392 PrepareImage(src_id, false, false);
393 PrepareImage(dst_id, true, false);
394
395 ImageBase& dst_image = slot_images[dst_id];
396 const ImageBase& src_image = slot_images[src_id];
397
398 // TODO: Deduplicate
399 const std::optional src_base = src_image.TryFindBase(src.Address());
400 const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
401 const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
402 const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
403 const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples);
404 const Region2D src_region{
405 Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y},
406 Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
407 };
408
409 const std::optional dst_base = dst_image.TryFindBase(dst.Address());
410 const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}};
411 const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range);
412 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
413 const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples);
414 const Region2D dst_region{
415 Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y},
416 Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y},
417 };
418
419 // Always call this after src_framebuffer_id was queried, as the address might be invalidated.
420 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
421 if constexpr (FRAMEBUFFER_BLITS) {
422 // OpenGL blits from framebuffers, not images
423 Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id];
424 runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region,
425 copy.filter, copy.operation);
426 } else {
427 // Vulkan can blit images, but it lacks format reinterpretations
428 // Provide a framebuffer in case it's necessary
429 ImageView& dst_view = slot_image_views[dst_view_id];
430 ImageView& src_view = slot_image_views[src_view_id];
431 runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter,
432 copy.operation);
433 }
434}
435
436template <class P>
437typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
438 // TODO: Properly implement this
439 const auto it = page_table.find(cpu_addr >> PAGE_BITS);
440 if (it == page_table.end()) {
441 return nullptr;
442 }
443 const auto& image_map_ids = it->second;
444 for (const ImageMapId map_id : image_map_ids) {
445 const ImageMapView& map = slot_map_views[map_id];
446 const ImageBase& image = slot_images[map.image_id];
447 if (image.cpu_addr != cpu_addr) {
448 continue;
449 }
450 if (image.image_view_ids.empty()) {
451 continue;
452 }
453 return &slot_image_views[image.image_view_ids.at(0)];
454 }
455 return nullptr;
456}
457
458template <class P>
459bool TextureCache<P>::HasUncommittedFlushes() const noexcept {
460 return !uncommitted_downloads.empty();
461}
462
463template <class P>
464bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept {
465 return !committed_downloads.empty() && !committed_downloads.front().empty();
466}
467
468template <class P>
469void TextureCache<P>::CommitAsyncFlushes() {
470 // This is intentionally passing the value by copy
471 committed_downloads.push(uncommitted_downloads);
472 uncommitted_downloads.clear();
473}
474
475template <class P>
476void TextureCache<P>::PopAsyncFlushes() {
477 if (committed_downloads.empty()) {
478 return;
479 }
480 const std::span<const ImageId> download_ids = committed_downloads.front();
481 if (download_ids.empty()) {
482 committed_downloads.pop();
483 return;
484 }
485 size_t total_size_bytes = 0;
486 for (const ImageId image_id : download_ids) {
487 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
488 }
489 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
490 const size_t original_offset = download_map.offset;
491 for (const ImageId image_id : download_ids) {
492 Image& image = slot_images[image_id];
493 const auto copies = FullDownloadCopies(image.info);
494 image.DownloadMemory(download_map, copies);
495 download_map.offset += image.unswizzled_size_bytes;
496 }
497 // Wait for downloads to finish
498 runtime.Finish();
499
500 download_map.offset = original_offset;
501 std::span<u8> download_span = download_map.mapped_span;
502 for (const ImageId image_id : download_ids) {
503 const ImageBase& image = slot_images[image_id];
504 const auto copies = FullDownloadCopies(image.info);
505 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
506 download_map.offset += image.unswizzled_size_bytes;
507 download_span = download_span.subspan(image.unswizzled_size_bytes);
508 }
509 committed_downloads.pop();
510}
511
512template <class P>
513bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
514 bool is_modified = false;
515 ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
516 if (False(image.flags & ImageFlagBits::GpuModified)) {
517 return false;
518 }
519 is_modified = true;
520 return true;
521 });
522 return is_modified;
523}
524
525template <class P>
526void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
527 if (False(image.flags & ImageFlagBits::CpuModified)) {
528 // Only upload modified images
529 return;
530 }
531 image.flags &= ~ImageFlagBits::CpuModified;
532 TrackImage(image, image_id);
533
534 if (image.info.num_samples > 1) {
535 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
536 return;
537 }
538 auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
539 UploadImageContents(image, staging);
540 runtime.InsertUploadMemoryBarrier();
541}
542
543template <class P>
544template <typename StagingBuffer>
545void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) {
546 const std::span<u8> mapped_span = staging.mapped_span;
547 const GPUVAddr gpu_addr = image.gpu_addr;
548
549 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
550 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
551 const auto uploads = FullUploadSwizzles(image.info);
552 runtime.AccelerateImageUpload(image, staging, uploads);
553 } else if (True(image.flags & ImageFlagBits::Converted)) {
554 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
555 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
556 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
557 image.UploadMemory(staging, copies);
558 } else {
559 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
560 image.UploadMemory(staging, copies);
561 }
562}
563
564template <class P>
565ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
566 if (!IsValidEntry(gpu_memory, config)) {
567 return NULL_IMAGE_VIEW_ID;
568 }
569 const auto [pair, is_new] = image_views.try_emplace(config);
570 ImageViewId& image_view_id = pair->second;
571 if (is_new) {
572 image_view_id = CreateImageView(config);
573 }
574 return image_view_id;
575}
576
577template <class P>
578ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) {
579 const ImageInfo info(config);
580 if (info.type == ImageType::Buffer) {
581 const ImageViewInfo view_info(config, 0);
582 return slot_image_views.insert(runtime, info, view_info, config.Address());
583 }
584 const u32 layer_offset = config.BaseLayer() * info.layer_stride;
585 const GPUVAddr image_gpu_addr = config.Address() - layer_offset;
586 const ImageId image_id = FindOrInsertImage(info, image_gpu_addr);
587 if (!image_id) {
588 return NULL_IMAGE_VIEW_ID;
589 }
590 ImageBase& image = slot_images[image_id];
591 const SubresourceBase base = image.TryFindBase(config.Address()).value();
592 ASSERT(base.level == 0);
593 const ImageViewInfo view_info(config, base.layer);
594 const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info);
595 ImageViewBase& image_view = slot_image_views[image_view_id];
596 image_view.flags |= ImageViewFlagBits::Strong;
597 image.flags |= ImageFlagBits::Strong;
598 return image_view_id;
599}
600
601template <class P>
602ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
603 RelaxedOptions options) {
604 if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) {
605 return image_id;
606 }
607 return InsertImage(info, gpu_addr, options);
608}
609
610template <class P>
611ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
612 RelaxedOptions options) {
613 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
614 if (!cpu_addr) {
615 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
616 if (!cpu_addr) {
617 return ImageId{};
618 }
619 }
620 const bool broken_views = runtime.HasBrokenTextureViewFormats();
621 const bool native_bgr = runtime.HasNativeBgr();
622 ImageId image_id;
623 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
624 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
625 return false;
626 }
627 if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
628 const bool strict_size = False(options & RelaxedOptions::Size) &&
629 True(existing_image.flags & ImageFlagBits::Strong);
630 const ImageInfo& existing = existing_image.info;
631 if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
632 existing.pitch == info.pitch &&
633 IsPitchLinearSameSize(existing, info, strict_size) &&
634 IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) {
635 image_id = existing_image_id;
636 return true;
637 }
638 } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views,
639 native_bgr)) {
640 image_id = existing_image_id;
641 return true;
642 }
643 return false;
644 };
645 ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda);
646 return image_id;
647}
648
649template <class P>
650ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
651 RelaxedOptions options) {
652 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
653 if (!cpu_addr) {
654 const auto size = CalculateGuestSizeInBytes(info);
655 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
656 if (!cpu_addr) {
657 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
658 virtual_invalid_space += Common::AlignUp(size, 32);
659 cpu_addr = std::optional<VAddr>(fake_addr);
660 }
661 }
662 ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
663 const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
664 const Image& image = slot_images[image_id];
665 // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different
666 const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr);
667 if (is_new) {
668 it->second = slot_image_allocs.insert();
669 }
670 slot_image_allocs[it->second].images.push_back(image_id);
671 return image_id;
672}
673
674template <class P>
675ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) {
676 ImageInfo new_info = info;
677 const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
678 const bool broken_views = runtime.HasBrokenTextureViewFormats();
679 const bool native_bgr = runtime.HasNativeBgr();
680 std::vector<ImageId> overlap_ids;
681 std::unordered_set<ImageId> overlaps_found;
682 std::vector<ImageId> left_aliased_ids;
683 std::vector<ImageId> right_aliased_ids;
684 std::unordered_set<ImageId> ignore_textures;
685 std::vector<ImageId> bad_overlap_ids;
686 const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
687 if (True(overlap.flags & ImageFlagBits::Remapped)) {
688 ignore_textures.insert(overlap_id);
689 return;
690 }
691 if (info.type == ImageType::Linear) {
692 if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
693 // Alias linear images with the same pitch
694 left_aliased_ids.push_back(overlap_id);
695 }
696 return;
697 }
698 overlaps_found.insert(overlap_id);
699 static constexpr bool strict_size = true;
700 const std::optional<OverlapResult> solution = ResolveOverlap(
701 new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
702 if (solution) {
703 gpu_addr = solution->gpu_addr;
704 cpu_addr = solution->cpu_addr;
705 new_info.resources = solution->resources;
706 overlap_ids.push_back(overlap_id);
707 return;
708 }
709 static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
710 const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
711 if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
712 left_aliased_ids.push_back(overlap_id);
713 overlap.flags |= ImageFlagBits::Alias;
714 } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
715 broken_views, native_bgr)) {
716 right_aliased_ids.push_back(overlap_id);
717 overlap.flags |= ImageFlagBits::Alias;
718 } else {
719 bad_overlap_ids.push_back(overlap_id);
720 overlap.flags |= ImageFlagBits::BadOverlap;
721 }
722 };
723 ForEachImageInRegion(cpu_addr, size_bytes, region_check);
724 const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
725 if (!overlaps_found.contains(overlap_id)) {
726 if (True(overlap.flags & ImageFlagBits::Remapped)) {
727 ignore_textures.insert(overlap_id);
728 }
729 if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
730 ignore_textures.insert(overlap_id);
731 }
732 }
733 };
734 ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
735 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
736 Image& new_image = slot_images[new_image_id];
737
738 if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
739 new_image.flags |= ImageFlagBits::Sparse;
740 }
741
742 for (const ImageId overlap_id : ignore_textures) {
743 Image& overlap = slot_images[overlap_id];
744 if (True(overlap.flags & ImageFlagBits::GpuModified)) {
745 UNIMPLEMENTED();
746 }
747 if (True(overlap.flags & ImageFlagBits::Tracked)) {
748 UntrackImage(overlap, overlap_id);
749 }
750 UnregisterImage(overlap_id);
751 DeleteImage(overlap_id);
752 }
753
754 // TODO: Only upload what we need
755 RefreshContents(new_image, new_image_id);
756
757 for (const ImageId overlap_id : overlap_ids) {
758 Image& overlap = slot_images[overlap_id];
759 if (overlap.info.num_samples != new_image.info.num_samples) {
760 LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented");
761 } else {
762 const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
763 const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base);
764 runtime.CopyImage(new_image, overlap, copies);
765 }
766 if (True(overlap.flags & ImageFlagBits::Tracked)) {
767 UntrackImage(overlap, overlap_id);
768 }
769 UnregisterImage(overlap_id);
770 DeleteImage(overlap_id);
771 }
772 ImageBase& new_image_base = new_image;
773 for (const ImageId aliased_id : right_aliased_ids) {
774 ImageBase& aliased = slot_images[aliased_id];
775 AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
776 new_image.flags |= ImageFlagBits::Alias;
777 }
778 for (const ImageId aliased_id : left_aliased_ids) {
779 ImageBase& aliased = slot_images[aliased_id];
780 AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
781 new_image.flags |= ImageFlagBits::Alias;
782 }
783 for (const ImageId aliased_id : bad_overlap_ids) {
784 ImageBase& aliased = slot_images[aliased_id];
785 aliased.overlapping_images.push_back(new_image_id);
786 new_image.overlapping_images.push_back(aliased_id);
787 new_image.flags |= ImageFlagBits::BadOverlap;
788 }
789 RegisterImage(new_image_id);
790 return new_image_id;
791}
792
793template <class P>
794typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
795 const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
796 static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
797 const GPUVAddr dst_addr = dst.Address();
798 const GPUVAddr src_addr = src.Address();
799 ImageInfo dst_info(dst);
800 ImageInfo src_info(src);
801 ImageId dst_id;
802 ImageId src_id;
803 do {
804 has_deleted_images = false;
805 dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS);
806 src_id = FindImage(src_info, src_addr, FIND_OPTIONS);
807 const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr;
808 const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr;
809 DeduceBlitImages(dst_info, src_info, dst_image, src_image);
810 if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) {
811 continue;
812 }
813 if (!dst_id) {
814 dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{});
815 }
816 if (!src_id) {
817 src_id = InsertImage(src_info, src_addr, RelaxedOptions{});
818 }
819 } while (has_deleted_images);
820 return BlitImages{
821 .dst_id = dst_id,
822 .src_id = src_id,
823 .dst_format = dst_info.format,
824 .src_format = src_info.format,
825 };
826}
827
828template <class P>
829SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
830 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
831 return NULL_SAMPLER_ID;
832 }
833 const auto [pair, is_new] = samplers.try_emplace(config);
834 if (is_new) {
835 pair->second = slot_samplers.insert(runtime, config);
836 }
837 return pair->second;
838}
839
840template <class P>
841ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
842 const auto& regs = maxwell3d.regs;
843 if (index >= regs.rt_control.count) {
844 return ImageViewId{};
845 }
846 const auto& rt = regs.rt[index];
847 const GPUVAddr gpu_addr = rt.Address();
848 if (gpu_addr == 0) {
849 return ImageViewId{};
850 }
851 if (rt.format == Tegra::RenderTargetFormat::NONE) {
852 return ImageViewId{};
853 }
854 const ImageInfo info(regs, index);
855 return FindRenderTargetView(info, gpu_addr, is_clear);
856}
857
858template <class P>
859ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
860 const auto& regs = maxwell3d.regs;
861 if (!regs.zeta_enable) {
862 return ImageViewId{};
863 }
864 const GPUVAddr gpu_addr = regs.zeta.Address();
865 if (gpu_addr == 0) {
866 return ImageViewId{};
867 }
868 const ImageInfo info(regs);
869 return FindRenderTargetView(info, gpu_addr, is_clear);
870}
871
872template <class P>
873ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
874 bool is_clear) {
875 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
876 const ImageId image_id = FindOrInsertImage(info, gpu_addr, options);
877 if (!image_id) {
878 return NULL_IMAGE_VIEW_ID;
879 }
880 Image& image = slot_images[image_id];
881 const ImageViewType view_type = RenderTargetImageViewType(info);
882 SubresourceBase base;
883 if (image.info.type == ImageType::Linear) {
884 base = SubresourceBase{.level = 0, .layer = 0};
885 } else {
886 base = image.TryFindBase(gpu_addr).value();
887 }
888 const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers;
889 const SubresourceRange range{
890 .base = base,
891 .extent = {.levels = 1, .layers = layers},
892 };
893 return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range));
894}
895
896template <class P>
897template <typename Func>
898void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) {
899 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
900 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
901 boost::container::small_vector<ImageId, 32> images;
902 boost::container::small_vector<ImageMapId, 32> maps;
903 ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
904 const auto it = page_table.find(page);
905 if (it == page_table.end()) {
906 if constexpr (BOOL_BREAK) {
907 return false;
908 } else {
909 return;
910 }
911 }
912 for (const ImageMapId map_id : it->second) {
913 ImageMapView& map = slot_map_views[map_id];
914 if (map.picked) {
915 continue;
916 }
917 if (!map.Overlaps(cpu_addr, size)) {
918 continue;
919 }
920 map.picked = true;
921 maps.push_back(map_id);
922 Image& image = slot_images[map.image_id];
923 if (True(image.flags & ImageFlagBits::Picked)) {
924 continue;
925 }
926 image.flags |= ImageFlagBits::Picked;
927 images.push_back(map.image_id);
928 if constexpr (BOOL_BREAK) {
929 if (func(map.image_id, image)) {
930 return true;
931 }
932 } else {
933 func(map.image_id, image);
934 }
935 }
936 if constexpr (BOOL_BREAK) {
937 return false;
938 }
939 });
940 for (const ImageId image_id : images) {
941 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
942 }
943 for (const ImageMapId map_id : maps) {
944 slot_map_views[map_id].picked = false;
945 }
946}
947
948template <class P>
949template <typename Func>
950void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
951 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
952 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
953 boost::container::small_vector<ImageId, 8> images;
954 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
955 const auto it = gpu_page_table.find(page);
956 if (it == gpu_page_table.end()) {
957 if constexpr (BOOL_BREAK) {
958 return false;
959 } else {
960 return;
961 }
962 }
963 for (const ImageId image_id : it->second) {
964 Image& image = slot_images[image_id];
965 if (True(image.flags & ImageFlagBits::Picked)) {
966 continue;
967 }
968 if (!image.OverlapsGPU(gpu_addr, size)) {
969 continue;
970 }
971 image.flags |= ImageFlagBits::Picked;
972 images.push_back(image_id);
973 if constexpr (BOOL_BREAK) {
974 if (func(image_id, image)) {
975 return true;
976 }
977 } else {
978 func(image_id, image);
979 }
980 }
981 if constexpr (BOOL_BREAK) {
982 return false;
983 }
984 });
985 for (const ImageId image_id : images) {
986 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
987 }
988}
989
990template <class P>
991template <typename Func>
992void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
993 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
994 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
995 boost::container::small_vector<ImageId, 8> images;
996 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
997 const auto it = sparse_page_table.find(page);
998 if (it == sparse_page_table.end()) {
999 if constexpr (BOOL_BREAK) {
1000 return false;
1001 } else {
1002 return;
1003 }
1004 }
1005 for (const ImageId image_id : it->second) {
1006 Image& image = slot_images[image_id];
1007 if (True(image.flags & ImageFlagBits::Picked)) {
1008 continue;
1009 }
1010 if (!image.OverlapsGPU(gpu_addr, size)) {
1011 continue;
1012 }
1013 image.flags |= ImageFlagBits::Picked;
1014 images.push_back(image_id);
1015 if constexpr (BOOL_BREAK) {
1016 if (func(image_id, image)) {
1017 return true;
1018 }
1019 } else {
1020 func(image_id, image);
1021 }
1022 }
1023 if constexpr (BOOL_BREAK) {
1024 return false;
1025 }
1026 });
1027 for (const ImageId image_id : images) {
1028 slot_images[image_id].flags &= ~ImageFlagBits::Picked;
1029 }
1030}
1031
1032template <class P>
1033template <typename Func>
1034void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1035 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
1036 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
1037 const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
1038 for (auto& segment : segments) {
1039 const auto gpu_addr = segment.first;
1040 const auto size = segment.second;
1041 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
1042 ASSERT(cpu_addr);
1043 if constexpr (RETURNS_BOOL) {
1044 if (func(gpu_addr, *cpu_addr, size)) {
1045 break;
1046 }
1047 } else {
1048 func(gpu_addr, *cpu_addr, size);
1049 }
1050 }
1051}
1052
1053template <class P>
1054ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
1055 Image& image = slot_images[image_id];
1056 if (const ImageViewId image_view_id = image.FindView(info); image_view_id) {
1057 return image_view_id;
1058 }
1059 const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image);
1060 image.InsertView(info, image_view_id);
1061 return image_view_id;
1062}
1063
1064template <class P>
1065void TextureCache<P>::RegisterImage(ImageId image_id) {
1066 ImageBase& image = slot_images[image_id];
1067 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
1068 "Trying to register an already registered image");
1069 image.flags |= ImageFlagBits::Registered;
1070 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1071 if ((IsPixelFormatASTC(image.info.format) &&
1072 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
1073 True(image.flags & ImageFlagBits::Converted)) {
1074 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1075 }
1076 total_used_memory += Common::AlignUp(tentative_size, 1024);
1077 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1078 [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
1079 if (False(image.flags & ImageFlagBits::Sparse)) {
1080 auto map_id =
1081 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
1082 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
1083 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1084 image.map_view_id = map_id;
1085 return;
1086 }
1087 std::vector<ImageViewId> sparse_maps{};
1088 ForEachSparseSegment(
1089 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1090 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
1091 ForEachCPUPage(cpu_addr, size,
1092 [this, map_id](u64 page) { page_table[page].push_back(map_id); });
1093 sparse_maps.push_back(map_id);
1094 });
1095 sparse_views.emplace(image_id, std::move(sparse_maps));
1096 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1097 [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
1098}
1099
1100template <class P>
1101void TextureCache<P>::UnregisterImage(ImageId image_id) {
1102 Image& image = slot_images[image_id];
1103 ASSERT_MSG(True(image.flags & ImageFlagBits::Registered),
1104 "Trying to unregister an already registered image");
1105 image.flags &= ~ImageFlagBits::Registered;
1106 image.flags &= ~ImageFlagBits::BadOverlap;
1107 u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
1108 if ((IsPixelFormatASTC(image.info.format) &&
1109 True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
1110 True(image.flags & ImageFlagBits::Converted)) {
1111 tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
1112 }
1113 total_used_memory -= Common::AlignUp(tentative_size, 1024);
1114 const auto& clear_page_table =
1115 [this, image_id](
1116 u64 page,
1117 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
1118 const auto page_it = selected_page_table.find(page);
1119 if (page_it == selected_page_table.end()) {
1120 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1121 return;
1122 }
1123 std::vector<ImageId>& image_ids = page_it->second;
1124 const auto vector_it = std::ranges::find(image_ids, image_id);
1125 if (vector_it == image_ids.end()) {
1126 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1127 page << PAGE_BITS);
1128 return;
1129 }
1130 image_ids.erase(vector_it);
1131 };
1132 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1133 [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
1134 if (False(image.flags & ImageFlagBits::Sparse)) {
1135 const auto map_id = image.map_view_id;
1136 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
1137 const auto page_it = page_table.find(page);
1138 if (page_it == page_table.end()) {
1139 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1140 return;
1141 }
1142 std::vector<ImageMapId>& image_map_ids = page_it->second;
1143 const auto vector_it = std::ranges::find(image_map_ids, map_id);
1144 if (vector_it == image_map_ids.end()) {
1145 UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
1146 page << PAGE_BITS);
1147 return;
1148 }
1149 image_map_ids.erase(vector_it);
1150 });
1151 slot_map_views.erase(map_id);
1152 return;
1153 }
1154 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
1155 clear_page_table(page, sparse_page_table);
1156 });
1157 auto it = sparse_views.find(image_id);
1158 ASSERT(it != sparse_views.end());
1159 auto& sparse_maps = it->second;
1160 for (auto& map_view_id : sparse_maps) {
1161 const auto& map_range = slot_map_views[map_view_id];
1162 const VAddr cpu_addr = map_range.cpu_addr;
1163 const std::size_t size = map_range.size;
1164 ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
1165 const auto page_it = page_table.find(page);
1166 if (page_it == page_table.end()) {
1167 UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
1168 return;
1169 }
1170 std::vector<ImageMapId>& image_map_ids = page_it->second;
1171 auto vector_it = image_map_ids.begin();
1172 while (vector_it != image_map_ids.end()) {
1173 ImageMapView& map = slot_map_views[*vector_it];
1174 if (map.image_id != image_id) {
1175 vector_it++;
1176 continue;
1177 }
1178 if (!map.picked) {
1179 map.picked = true;
1180 }
1181 vector_it = image_map_ids.erase(vector_it);
1182 }
1183 });
1184 slot_map_views.erase(map_view_id);
1185 }
1186 sparse_views.erase(it);
1187}
1188
1189template <class P>
1190void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
1191 ASSERT(False(image.flags & ImageFlagBits::Tracked));
1192 image.flags |= ImageFlagBits::Tracked;
1193 if (False(image.flags & ImageFlagBits::Sparse)) {
1194 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
1195 return;
1196 }
1197 if (True(image.flags & ImageFlagBits::Registered)) {
1198 auto it = sparse_views.find(image_id);
1199 ASSERT(it != sparse_views.end());
1200 auto& sparse_maps = it->second;
1201 for (auto& map_view_id : sparse_maps) {
1202 const auto& map = slot_map_views[map_view_id];
1203 const VAddr cpu_addr = map.cpu_addr;
1204 const std::size_t size = map.size;
1205 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1206 }
1207 return;
1208 }
1209 ForEachSparseSegment(image,
1210 [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1211 rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
1212 });
1213}
1214
1215template <class P>
1216void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
1217 ASSERT(True(image.flags & ImageFlagBits::Tracked));
1218 image.flags &= ~ImageFlagBits::Tracked;
1219 if (False(image.flags & ImageFlagBits::Sparse)) {
1220 rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
1221 return;
1222 }
1223 ASSERT(True(image.flags & ImageFlagBits::Registered));
1224 auto it = sparse_views.find(image_id);
1225 ASSERT(it != sparse_views.end());
1226 auto& sparse_maps = it->second;
1227 for (auto& map_view_id : sparse_maps) {
1228 const auto& map = slot_map_views[map_view_id];
1229 const VAddr cpu_addr = map.cpu_addr;
1230 const std::size_t size = map.size;
1231 rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
1232 }
1233}
1234
1235template <class P>
1236void TextureCache<P>::DeleteImage(ImageId image_id) {
1237 ImageBase& image = slot_images[image_id];
1238 const GPUVAddr gpu_addr = image.gpu_addr;
1239 const auto alloc_it = image_allocs_table.find(gpu_addr);
1240 if (alloc_it == image_allocs_table.end()) {
1241 UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}",
1242 gpu_addr);
1243 return;
1244 }
1245 const ImageAllocId alloc_id = alloc_it->second;
1246 std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images;
1247 const auto alloc_image_it = std::ranges::find(alloc_images, image_id);
1248 if (alloc_image_it == alloc_images.end()) {
1249 UNREACHABLE_MSG("Trying to delete an image that does not exist");
1250 return;
1251 }
1252 ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked");
1253 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
1254
1255 // Mark render targets as dirty
1256 auto& dirty = maxwell3d.dirty.flags;
1257 dirty[Dirty::RenderTargets] = true;
1258 dirty[Dirty::ZetaBuffer] = true;
1259 for (size_t rt = 0; rt < NUM_RT; ++rt) {
1260 dirty[Dirty::ColorBuffer0 + rt] = true;
1261 }
1262 const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
1263 for (const ImageViewId image_view_id : image_view_ids) {
1264 std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{});
1265 if (render_targets.depth_buffer_id == image_view_id) {
1266 render_targets.depth_buffer_id = ImageViewId{};
1267 }
1268 }
1269 RemoveImageViewReferences(image_view_ids);
1270 RemoveFramebuffers(image_view_ids);
1271
1272 for (const AliasedImage& alias : image.aliased_images) {
1273 ImageBase& other_image = slot_images[alias.id];
1274 [[maybe_unused]] const size_t num_removed_aliases =
1275 std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) {
1276 return other_alias.id == image_id;
1277 });
1278 other_image.CheckAliasState();
1279 ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}",
1280 num_removed_aliases);
1281 }
1282 for (const ImageId overlap_id : image.overlapping_images) {
1283 ImageBase& other_image = slot_images[overlap_id];
1284 [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if(
1285 other_image.overlapping_images,
1286 [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; });
1287 other_image.CheckBadOverlapState();
1288 ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}",
1289 num_removed_overlaps);
1290 }
1291 for (const ImageViewId image_view_id : image_view_ids) {
1292 sentenced_image_view.Push(std::move(slot_image_views[image_view_id]));
1293 slot_image_views.erase(image_view_id);
1294 }
1295 sentenced_images.Push(std::move(slot_images[image_id]));
1296 slot_images.erase(image_id);
1297
1298 alloc_images.erase(alloc_image_it);
1299 if (alloc_images.empty()) {
1300 image_allocs_table.erase(alloc_it);
1301 }
1302 if constexpr (ENABLE_VALIDATION) {
1303 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
1304 std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
1305 }
1306 graphics_image_table.Invalidate();
1307 compute_image_table.Invalidate();
1308 has_deleted_images = true;
1309}
1310
1311template <class P>
1312void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
1313 auto it = image_views.begin();
1314 while (it != image_views.end()) {
1315 const auto found = std::ranges::find(removed_views, it->second);
1316 if (found != removed_views.end()) {
1317 it = image_views.erase(it);
1318 } else {
1319 ++it;
1320 }
1321 }
1322}
1323
1324template <class P>
1325void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) {
1326 auto it = framebuffers.begin();
1327 while (it != framebuffers.end()) {
1328 if (it->first.Contains(removed_views)) {
1329 it = framebuffers.erase(it);
1330 } else {
1331 ++it;
1332 }
1333 }
1334}
1335
1336template <class P>
1337void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
1338 image.flags |= ImageFlagBits::GpuModified;
1339 image.modification_tick = ++modification_tick;
1340}
1341
1342template <class P>
1343void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1344 boost::container::small_vector<const AliasedImage*, 1> aliased_images;
1345 ImageBase& image = slot_images[image_id];
1346 u64 most_recent_tick = image.modification_tick;
1347 for (const AliasedImage& aliased : image.aliased_images) {
1348 ImageBase& aliased_image = slot_images[aliased.id];
1349 if (image.modification_tick < aliased_image.modification_tick) {
1350 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
1351 aliased_images.push_back(&aliased);
1352 }
1353 }
1354 if (aliased_images.empty()) {
1355 return;
1356 }
1357 image.modification_tick = most_recent_tick;
1358 std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
1359 const ImageBase& lhs_image = slot_images[lhs->id];
1360 const ImageBase& rhs_image = slot_images[rhs->id];
1361 return lhs_image.modification_tick < rhs_image.modification_tick;
1362 });
1363 for (const AliasedImage* const aliased : aliased_images) {
1364 CopyImage(image_id, aliased->id, aliased->copies);
1365 }
1366}
1367
1368template <class P>
1369void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
1370 Image& image = slot_images[image_id];
1371 if (invalidate) {
1372 image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
1373 if (False(image.flags & ImageFlagBits::Tracked)) {
1374 TrackImage(image, image_id);
1375 }
1376 } else {
1377 RefreshContents(image, image_id);
1378 SynchronizeAliases(image_id);
1379 }
1380 if (is_modification) {
1381 MarkModification(image);
1382 }
1383 image.frame_tick = frame_tick;
1384}
1385
1386template <class P>
1387void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification,
1388 bool invalidate) {
1389 if (!image_view_id) {
1390 return;
1391 }
1392 const ImageViewBase& image_view = slot_image_views[image_view_id];
1393 if (image_view.IsBuffer()) {
1394 return;
1395 }
1396 PrepareImage(image_view.image_id, is_modification, invalidate);
1397}
1398
1399template <class P>
1400void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) {
1401 Image& dst = slot_images[dst_id];
1402 Image& src = slot_images[src_id];
1403 const auto dst_format_type = GetFormatType(dst.info.format);
1404 const auto src_format_type = GetFormatType(src.info.format);
1405 if (src_format_type == dst_format_type) {
1406 if constexpr (HAS_EMULATED_COPIES) {
1407 if (!runtime.CanImageBeCopied(dst, src)) {
1408 return runtime.EmulateCopyImage(dst, src, copies);
1409 }
1410 }
1411 return runtime.CopyImage(dst, src, copies);
1412 }
1413 UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D);
1414 UNIMPLEMENTED_IF(src.info.type != ImageType::e2D);
1415 for (const ImageCopy& copy : copies) {
1416 UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1);
1417 UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1);
1418 UNIMPLEMENTED_IF(copy.src_offset != Offset3D{});
1419 UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{});
1420
1421 const SubresourceBase dst_base{
1422 .level = copy.dst_subresource.base_level,
1423 .layer = copy.dst_subresource.base_layer,
1424 };
1425 const SubresourceBase src_base{
1426 .level = copy.src_subresource.base_level,
1427 .layer = copy.src_subresource.base_layer,
1428 };
1429 const SubresourceExtent dst_extent{.levels = 1, .layers = 1};
1430 const SubresourceExtent src_extent{.levels = 1, .layers = 1};
1431 const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent};
1432 const SubresourceRange src_range{.base = src_base, .extent = src_extent};
1433 const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range);
1434 const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range);
1435 const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info);
1436 Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id];
1437 const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info);
1438 ImageView& dst_view = slot_image_views[dst_view_id];
1439 ImageView& src_view = slot_image_views[src_view_id];
1440 [[maybe_unused]] const Extent3D expected_size{
1441 .width = std::min(dst_view.size.width, src_view.size.width),
1442 .height = std::min(dst_view.size.height, src_view.size.height),
1443 .depth = std::min(dst_view.size.depth, src_view.size.depth),
1444 };
1445 UNIMPLEMENTED_IF(copy.extent != expected_size);
1446
1447 runtime.ConvertImage(dst_framebuffer, dst_view, src_view);
1448 }
1449}
1450
1451template <class P>
1452void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) {
1453 if (*old_id == new_id) {
1454 return;
1455 }
1456 if (*old_id) {
1457 const ImageViewBase& old_view = slot_image_views[*old_id];
1458 if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) {
1459 uncommitted_downloads.push_back(old_view.image_id);
1460 }
1461 }
1462 *old_id = new_id;
1463}
1464
1465template <class P>
1466std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
1467 ImageId image_id, const ImageViewInfo& view_info) {
1468 const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info);
1469 const ImageBase& image = slot_images[image_id];
1470 const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture;
1471 const ImageViewId color_view_id = is_color ? view_id : ImageViewId{};
1472 const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id;
1473 const Extent3D extent = MipSize(image.info.size, view_info.range.base.level);
1474 const u32 num_samples = image.info.num_samples;
1475 const auto [samples_x, samples_y] = SamplesLog2(num_samples);
1476 const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{
1477 .color_buffer_ids = {color_view_id},
1478 .depth_buffer_id = depth_view_id,
1479 .size = {extent.width >> samples_x, extent.height >> samples_y},
1480 });
1481 return {framebuffer_id, view_id};
1482}
1483
1484template <class P>
1485bool TextureCache<P>::IsFullClear(ImageViewId id) {
1486 if (!id) {
1487 return true;
1488 }
1489 const ImageViewBase& image_view = slot_image_views[id];
1490 const ImageBase& image = slot_images[image_view.image_id];
1491 const Extent3D size = image_view.size;
1492 const auto& regs = maxwell3d.regs;
1493 const auto& scissor = regs.scissor_test[0];
1494 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
1495 // Images with multiple resources can't be cleared in a single call
1496 return false;
1497 }
1498 if (regs.clear_flags.scissor == 0) {
1499 // If scissor testing is disabled, the clear is always full
1500 return true;
1501 }
1502 // Make sure the clear covers all texels in the subresource
1503 return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width &&
1504 scissor.max_y >= size.height;
1505}
1506
1507} // namespace VideoCommon