diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/CMakeLists.txt | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache_base.cpp (renamed from src/video_core/renderer_opengl/gl_texture_cache_templates.cpp) | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache_base.cpp (renamed from src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp) | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/image_view_info.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 1711 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 402 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_templates.h | 1507 |
12 files changed, 1821 insertions, 1821 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1250cca6f..2f6cdd216 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -97,7 +97,7 @@ add_library(video_core STATIC | |||
| 97 | renderer_opengl/gl_stream_buffer.h | 97 | renderer_opengl/gl_stream_buffer.h |
| 98 | renderer_opengl/gl_texture_cache.cpp | 98 | renderer_opengl/gl_texture_cache.cpp |
| 99 | renderer_opengl/gl_texture_cache.h | 99 | renderer_opengl/gl_texture_cache.h |
| 100 | renderer_opengl/gl_texture_cache_templates.cpp | 100 | renderer_opengl/gl_texture_cache_base.cpp |
| 101 | renderer_opengl/gl_query_cache.cpp | 101 | renderer_opengl/gl_query_cache.cpp |
| 102 | renderer_opengl/gl_query_cache.h | 102 | renderer_opengl/gl_query_cache.h |
| 103 | renderer_opengl/maxwell_to_gl.h | 103 | renderer_opengl/maxwell_to_gl.h |
| @@ -156,7 +156,7 @@ add_library(video_core STATIC | |||
| 156 | renderer_vulkan/vk_swapchain.h | 156 | renderer_vulkan/vk_swapchain.h |
| 157 | renderer_vulkan/vk_texture_cache.cpp | 157 | renderer_vulkan/vk_texture_cache.cpp |
| 158 | renderer_vulkan/vk_texture_cache.h | 158 | renderer_vulkan/vk_texture_cache.h |
| 159 | renderer_vulkan/vk_texture_cache_templates.cpp | 159 | renderer_vulkan/vk_texture_cache_base.cpp |
| 160 | renderer_vulkan/vk_update_descriptor.cpp | 160 | renderer_vulkan/vk_update_descriptor.cpp |
| 161 | renderer_vulkan/vk_update_descriptor.h | 161 | renderer_vulkan/vk_update_descriptor.h |
| 162 | shader_cache.cpp | 162 | shader_cache.cpp |
| @@ -188,7 +188,7 @@ add_library(video_core STATIC | |||
| 188 | texture_cache/samples_helper.h | 188 | texture_cache/samples_helper.h |
| 189 | texture_cache/slot_vector.h | 189 | texture_cache/slot_vector.h |
| 190 | texture_cache/texture_cache.h | 190 | texture_cache/texture_cache.h |
| 191 | texture_cache/texture_cache_templates.h | 191 | texture_cache/texture_cache_base.h |
| 192 | texture_cache/types.h | 192 | texture_cache/types.h |
| 193 | texture_cache/util.cpp | 193 | texture_cache/util.cpp |
| 194 | texture_cache/util.h | 194 | texture_cache/util.h |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index fac0034fb..bccb37a58 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | |||
| @@ -15,7 +15,7 @@ | |||
| 15 | #include "video_core/renderer_opengl/gl_shader_util.h" | 15 | #include "video_core/renderer_opengl/gl_shader_util.h" |
| 16 | #include "video_core/renderer_opengl/gl_state_tracker.h" | 16 | #include "video_core/renderer_opengl/gl_state_tracker.h" |
| 17 | #include "video_core/shader_notify.h" | 17 | #include "video_core/shader_notify.h" |
| 18 | #include "video_core/texture_cache/texture_cache.h" | 18 | #include "video_core/texture_cache/texture_cache_base.h" |
| 19 | 19 | ||
| 20 | #if defined(_MSC_VER) && defined(NDEBUG) | 20 | #if defined(_MSC_VER) && defined(NDEBUG) |
| 21 | #define LAMBDA_FORCEINLINE [[msvc::forceinline]] | 21 | #define LAMBDA_FORCEINLINE [[msvc::forceinline]] |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 41d2b73f4..b909c387e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -32,7 +32,7 @@ | |||
| 32 | #include "video_core/renderer_opengl/maxwell_to_gl.h" | 32 | #include "video_core/renderer_opengl/maxwell_to_gl.h" |
| 33 | #include "video_core/renderer_opengl/renderer_opengl.h" | 33 | #include "video_core/renderer_opengl/renderer_opengl.h" |
| 34 | #include "video_core/shader_cache.h" | 34 | #include "video_core/shader_cache.h" |
| 35 | #include "video_core/texture_cache/texture_cache.h" | 35 | #include "video_core/texture_cache/texture_cache_base.h" |
| 36 | 36 | ||
| 37 | namespace OpenGL { | 37 | namespace OpenGL { |
| 38 | 38 | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 921072ebe..4a4f6301c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | #include "shader_recompiler/shader_info.h" | 12 | #include "shader_recompiler/shader_info.h" |
| 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" | 13 | #include "video_core/renderer_opengl/gl_resource_manager.h" |
| 14 | #include "video_core/renderer_opengl/util_shaders.h" | 14 | #include "video_core/renderer_opengl/util_shaders.h" |
| 15 | #include "video_core/texture_cache/texture_cache.h" | 15 | #include "video_core/texture_cache/texture_cache_base.h" |
| 16 | 16 | ||
| 17 | namespace OpenGL { | 17 | namespace OpenGL { |
| 18 | 18 | ||
diff --git a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp index 00ed06447..385358fea 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "video_core/renderer_opengl/gl_texture_cache.h" | 5 | #include "video_core/renderer_opengl/gl_texture_cache.h" |
| 6 | #include "video_core/texture_cache/texture_cache_templates.h" | 6 | #include "video_core/texture_cache/texture_cache.h" |
| 7 | 7 | ||
| 8 | namespace VideoCommon { | 8 | namespace VideoCommon { |
| 9 | template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>; | 9 | template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 23cef2996..3ac18ea54 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -32,7 +32,7 @@ | |||
| 32 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 32 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 33 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" | 33 | #include "video_core/renderer_vulkan/vk_update_descriptor.h" |
| 34 | #include "video_core/shader_cache.h" | 34 | #include "video_core/shader_cache.h" |
| 35 | #include "video_core/texture_cache/texture_cache.h" | 35 | #include "video_core/texture_cache/texture_cache_base.h" |
| 36 | #include "video_core/vulkan_common/vulkan_device.h" | 36 | #include "video_core/vulkan_common/vulkan_device.h" |
| 37 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 37 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 38 | 38 | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 0b73d55f8..5fe6b7ba3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -9,7 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | #include "shader_recompiler/shader_info.h" | 10 | #include "shader_recompiler/shader_info.h" |
| 11 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 11 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| 12 | #include "video_core/texture_cache/texture_cache.h" | 12 | #include "video_core/texture_cache/texture_cache_base.h" |
| 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 13 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 15 | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp index fd8978954..44e688342 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp | |||
| @@ -3,7 +3,7 @@ | |||
| 3 | // Refer to the license.txt file included. | 3 | // Refer to the license.txt file included. |
| 4 | 4 | ||
| 5 | #include "video_core/renderer_vulkan/vk_texture_cache.h" | 5 | #include "video_core/renderer_vulkan/vk_texture_cache.h" |
| 6 | #include "video_core/texture_cache/texture_cache_templates.h" | 6 | #include "video_core/texture_cache/texture_cache.h" |
| 7 | 7 | ||
| 8 | namespace VideoCommon { | 8 | namespace VideoCommon { |
| 9 | template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>; | 9 | template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>; |
diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp index faf5b151f..f14a92565 100644 --- a/src/video_core/texture_cache/image_view_info.cpp +++ b/src/video_core/texture_cache/image_view_info.cpp | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | 6 | ||
| 7 | #include "common/assert.h" | 7 | #include "common/assert.h" |
| 8 | #include "video_core/texture_cache/image_view_info.h" | 8 | #include "video_core/texture_cache/image_view_info.h" |
| 9 | #include "video_core/texture_cache/texture_cache.h" | 9 | #include "video_core/texture_cache/texture_cache_base.h" |
| 10 | #include "video_core/texture_cache/types.h" | 10 | #include "video_core/texture_cache/types.h" |
| 11 | #include "video_core/textures/texture.h" | 11 | #include "video_core/textures/texture.h" |
| 12 | 12 | ||
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a4f6e9422..5884fa16e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -4,48 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #pragma once | 5 | #pragma once |
| 6 | 6 | ||
| 7 | #include <algorithm> | 7 | #include "video_core/texture_cache/texture_cache_base.h" |
| 8 | #include <array> | ||
| 9 | #include <bit> | ||
| 10 | #include <memory> | ||
| 11 | #include <mutex> | ||
| 12 | #include <optional> | ||
| 13 | #include <span> | ||
| 14 | #include <type_traits> | ||
| 15 | #include <unordered_map> | ||
| 16 | #include <unordered_set> | ||
| 17 | #include <utility> | ||
| 18 | #include <vector> | ||
| 19 | |||
| 20 | #include <boost/container/small_vector.hpp> | ||
| 21 | |||
| 22 | #include "common/alignment.h" | ||
| 23 | #include "common/common_types.h" | ||
| 24 | #include "common/literals.h" | ||
| 25 | #include "common/logging/log.h" | ||
| 26 | #include "common/settings.h" | ||
| 27 | #include "video_core/compatible_formats.h" | ||
| 28 | #include "video_core/delayed_destruction_ring.h" | ||
| 29 | #include "video_core/dirty_flags.h" | ||
| 30 | #include "video_core/engines/fermi_2d.h" | ||
| 31 | #include "video_core/engines/kepler_compute.h" | ||
| 32 | #include "video_core/engines/maxwell_3d.h" | ||
| 33 | #include "video_core/memory_manager.h" | ||
| 34 | #include "video_core/rasterizer_interface.h" | ||
| 35 | #include "video_core/surface.h" | ||
| 36 | #include "video_core/texture_cache/descriptor_table.h" | ||
| 37 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 38 | #include "video_core/texture_cache/formatter.h" | ||
| 39 | #include "video_core/texture_cache/image_base.h" | ||
| 40 | #include "video_core/texture_cache/image_info.h" | ||
| 41 | #include "video_core/texture_cache/image_view_base.h" | ||
| 42 | #include "video_core/texture_cache/image_view_info.h" | ||
| 43 | #include "video_core/texture_cache/render_targets.h" | ||
| 44 | #include "video_core/texture_cache/samples_helper.h" | ||
| 45 | #include "video_core/texture_cache/slot_vector.h" | ||
| 46 | #include "video_core/texture_cache/types.h" | ||
| 47 | #include "video_core/texture_cache/util.h" | ||
| 48 | #include "video_core/textures/texture.h" | ||
| 49 | 8 | ||
| 50 | namespace VideoCommon { | 9 | namespace VideoCommon { |
| 51 | 10 | ||
| @@ -62,341 +21,1487 @@ using VideoCore::Surface::SurfaceType; | |||
| 62 | using namespace Common::Literals; | 21 | using namespace Common::Literals; |
| 63 | 22 | ||
| 64 | template <class P> | 23 | template <class P> |
| 65 | class TextureCache { | 24 | TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, |
| 66 | /// Address shift for caching images into a hash table | 25 | Tegra::Engines::Maxwell3D& maxwell3d_, |
| 67 | static constexpr u64 PAGE_BITS = 20; | 26 | Tegra::Engines::KeplerCompute& kepler_compute_, |
| 68 | 27 | Tegra::MemoryManager& gpu_memory_) | |
| 69 | /// Enables debugging features to the texture cache | 28 | : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, |
| 70 | static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; | 29 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { |
| 71 | /// Implement blits as copies between framebuffers | 30 | // Configure null sampler |
| 72 | static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; | 31 | TSCEntry sampler_descriptor{}; |
| 73 | /// True when some copies have to be emulated | 32 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); |
| 74 | static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; | 33 | sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); |
| 75 | /// True when the API can provide info about the memory of the device. | 34 | sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); |
| 76 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | 35 | sampler_descriptor.cubemap_anisotropy.Assign(1); |
| 77 | 36 | ||
| 78 | /// Image view ID for null descriptors | 37 | // Make sure the first index is reserved for the null resources |
| 79 | static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; | 38 | // This way the null resource becomes a compile time constant |
| 80 | /// Sampler ID for bugged sampler ids | 39 | void(slot_image_views.insert(runtime, NullImageParams{})); |
| 81 | static constexpr SamplerId NULL_SAMPLER_ID{0}; | 40 | void(slot_samplers.insert(runtime, sampler_descriptor)); |
| 82 | 41 | ||
| 83 | static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; | 42 | deletion_iterator = slot_images.begin(); |
| 84 | static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; | 43 | |
| 85 | 44 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | |
| 86 | using Runtime = typename P::Runtime; | 45 | const auto device_memory = runtime.GetDeviceLocalMemory(); |
| 87 | using Image = typename P::Image; | 46 | const u64 possible_expected_memory = (device_memory * 3) / 10; |
| 88 | using ImageAlloc = typename P::ImageAlloc; | 47 | const u64 possible_critical_memory = (device_memory * 6) / 10; |
| 89 | using ImageView = typename P::ImageView; | 48 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); |
| 90 | using Sampler = typename P::Sampler; | 49 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); |
| 91 | using Framebuffer = typename P::Framebuffer; | 50 | minimum_memory = 0; |
| 92 | 51 | } else { | |
| 93 | struct BlitImages { | 52 | // on OGL we can be more conservatives as the driver takes care. |
| 94 | ImageId dst_id; | 53 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; |
| 95 | ImageId src_id; | 54 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; |
| 96 | PixelFormat dst_format; | 55 | minimum_memory = expected_memory; |
| 97 | PixelFormat src_format; | 56 | } |
| 98 | }; | 57 | } |
| 99 | 58 | ||
| 100 | template <typename T> | 59 | template <class P> |
| 101 | struct IdentityHash { | 60 | void TextureCache<P>::RunGarbageCollector() { |
| 102 | [[nodiscard]] size_t operator()(T value) const noexcept { | 61 | const bool high_priority_mode = total_used_memory >= expected_memory; |
| 103 | return static_cast<size_t>(value); | 62 | const bool aggressive_mode = total_used_memory >= critical_memory; |
| 63 | const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; | ||
| 64 | int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); | ||
| 65 | for (; num_iterations > 0; --num_iterations) { | ||
| 66 | if (deletion_iterator == slot_images.end()) { | ||
| 67 | deletion_iterator = slot_images.begin(); | ||
| 68 | if (deletion_iterator == slot_images.end()) { | ||
| 69 | break; | ||
| 70 | } | ||
| 104 | } | 71 | } |
| 105 | }; | 72 | auto [image_id, image_tmp] = *deletion_iterator; |
| 106 | 73 | Image* image = image_tmp; // fix clang error. | |
| 107 | public: | 74 | const bool is_alias = True(image->flags & ImageFlagBits::Alias); |
| 108 | explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, | 75 | const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); |
| 109 | Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); | 76 | const bool must_download = image->IsSafeDownload(); |
| 110 | 77 | bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); | |
| 111 | /// Notify the cache that a new frame has been queued | 78 | const u64 ticks_needed = |
| 112 | void TickFrame(); | 79 | is_bad_overlap |
| 113 | 80 | ? ticks_to_destroy >> 4 | |
| 114 | /// Return a constant reference to the given image view id | 81 | : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); |
| 115 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; | 82 | should_care |= aggressive_mode; |
| 116 | 83 | if (should_care && image->frame_tick + ticks_needed < frame_tick) { | |
| 117 | /// Return a reference to the given image view id | 84 | if (is_bad_overlap) { |
| 118 | [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; | 85 | const bool overlap_check = std::ranges::all_of( |
| 119 | 86 | image->overlapping_images, [&, image](const ImageId& overlap_id) { | |
| 120 | /// Mark an image as modified from the GPU | 87 | auto& overlap = slot_images[overlap_id]; |
| 121 | void MarkModification(ImageId id) noexcept; | 88 | return overlap.frame_tick >= image->frame_tick; |
| 122 | 89 | }); | |
| 123 | /// Fill image_view_ids with the graphics images in indices | 90 | if (!overlap_check) { |
| 124 | void FillGraphicsImageViews(std::span<const u32> indices, | 91 | ++deletion_iterator; |
| 125 | std::span<ImageViewId> image_view_ids); | 92 | continue; |
| 93 | } | ||
| 94 | } | ||
| 95 | if (!is_bad_overlap && must_download) { | ||
| 96 | const bool alias_check = std::ranges::none_of( | ||
| 97 | image->aliased_images, [&, image](const AliasedImage& alias) { | ||
| 98 | auto& alias_image = slot_images[alias.id]; | ||
| 99 | return (alias_image.frame_tick < image->frame_tick) || | ||
| 100 | (alias_image.modification_tick < image->modification_tick); | ||
| 101 | }); | ||
| 102 | |||
| 103 | if (alias_check) { | ||
| 104 | auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); | ||
| 105 | const auto copies = FullDownloadCopies(image->info); | ||
| 106 | image->DownloadMemory(map, copies); | ||
| 107 | runtime.Finish(); | ||
| 108 | SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); | ||
| 109 | } | ||
| 110 | } | ||
| 111 | if (True(image->flags & ImageFlagBits::Tracked)) { | ||
| 112 | UntrackImage(*image, image_id); | ||
| 113 | } | ||
| 114 | UnregisterImage(image_id); | ||
| 115 | DeleteImage(image_id); | ||
| 116 | if (is_bad_overlap) { | ||
| 117 | ++num_iterations; | ||
| 118 | } | ||
| 119 | } | ||
| 120 | ++deletion_iterator; | ||
| 121 | } | ||
| 122 | } | ||
| 126 | 123 | ||
| 127 | /// Fill image_view_ids with the compute images in indices | 124 | template <class P> |
| 128 | void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); | 125 | void TextureCache<P>::TickFrame() { |
| 126 | if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { | ||
| 127 | RunGarbageCollector(); | ||
| 128 | } | ||
| 129 | sentenced_images.Tick(); | ||
| 130 | sentenced_framebuffers.Tick(); | ||
| 131 | sentenced_image_view.Tick(); | ||
| 132 | ++frame_tick; | ||
| 133 | } | ||
| 129 | 134 | ||
| 130 | /// Get the sampler from the graphics descriptor table in the specified index | 135 | template <class P> |
| 131 | Sampler* GetGraphicsSampler(u32 index); | 136 | const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { |
| 137 | return slot_image_views[id]; | ||
| 138 | } | ||
| 132 | 139 | ||
| 133 | /// Get the sampler from the compute descriptor table in the specified index | 140 | template <class P> |
| 134 | Sampler* GetComputeSampler(u32 index); | 141 | typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { |
| 142 | return slot_image_views[id]; | ||
| 143 | } | ||
| 135 | 144 | ||
| 136 | /// Refresh the state for graphics image view and sampler descriptors | 145 | template <class P> |
| 137 | void SynchronizeGraphicsDescriptors(); | 146 | void TextureCache<P>::MarkModification(ImageId id) noexcept { |
| 147 | MarkModification(slot_images[id]); | ||
| 148 | } | ||
| 138 | 149 | ||
| 139 | /// Refresh the state for compute image view and sampler descriptors | 150 | template <class P> |
| 140 | void SynchronizeComputeDescriptors(); | 151 | void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, |
| 152 | std::span<ImageViewId> image_view_ids) { | ||
| 153 | FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); | ||
| 154 | } | ||
| 141 | 155 | ||
| 142 | /// Update bound render targets and upload memory if necessary | 156 | template <class P> |
| 143 | /// @param is_clear True when the render targets are being used for clears | 157 | void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, |
| 144 | void UpdateRenderTargets(bool is_clear); | 158 | std::span<ImageViewId> image_view_ids) { |
| 159 | FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); | ||
| 160 | } | ||
| 145 | 161 | ||
| 146 | /// Find a framebuffer with the currently bound render targets | 162 | template <class P> |
| 147 | /// UpdateRenderTargets should be called before this | 163 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { |
| 148 | Framebuffer* GetFramebuffer(); | 164 | if (index > graphics_sampler_table.Limit()) { |
| 165 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); | ||
| 166 | return &slot_samplers[NULL_SAMPLER_ID]; | ||
| 167 | } | ||
| 168 | const auto [descriptor, is_new] = graphics_sampler_table.Read(index); | ||
| 169 | SamplerId& id = graphics_sampler_ids[index]; | ||
| 170 | if (is_new) { | ||
| 171 | id = FindSampler(descriptor); | ||
| 172 | } | ||
| 173 | return &slot_samplers[id]; | ||
| 174 | } | ||
| 149 | 175 | ||
| 150 | /// Mark images in a range as modified from the CPU | 176 | template <class P> |
| 151 | void WriteMemory(VAddr cpu_addr, size_t size); | 177 | typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { |
| 178 | if (index > compute_sampler_table.Limit()) { | ||
| 179 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); | ||
| 180 | return &slot_samplers[NULL_SAMPLER_ID]; | ||
| 181 | } | ||
| 182 | const auto [descriptor, is_new] = compute_sampler_table.Read(index); | ||
| 183 | SamplerId& id = compute_sampler_ids[index]; | ||
| 184 | if (is_new) { | ||
| 185 | id = FindSampler(descriptor); | ||
| 186 | } | ||
| 187 | return &slot_samplers[id]; | ||
| 188 | } | ||
| 152 | 189 | ||
| 153 | /// Download contents of host images to guest memory in a region | 190 | template <class P> |
| 154 | void DownloadMemory(VAddr cpu_addr, size_t size); | 191 | void TextureCache<P>::SynchronizeGraphicsDescriptors() { |
| 192 | using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; | ||
| 193 | const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; | ||
| 194 | const u32 tic_limit = maxwell3d.regs.tic.limit; | ||
| 195 | const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; | ||
| 196 | if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { | ||
| 197 | graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); | ||
| 198 | } | ||
| 199 | if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { | ||
| 200 | graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 201 | } | ||
| 202 | } | ||
| 155 | 203 | ||
| 156 | /// Remove images in a region | 204 | template <class P> |
| 157 | void UnmapMemory(VAddr cpu_addr, size_t size); | 205 | void TextureCache<P>::SynchronizeComputeDescriptors() { |
| 206 | const bool linked_tsc = kepler_compute.launch_description.linked_tsc; | ||
| 207 | const u32 tic_limit = kepler_compute.regs.tic.limit; | ||
| 208 | const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; | ||
| 209 | const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); | ||
| 210 | if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { | ||
| 211 | compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); | ||
| 212 | } | ||
| 213 | if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { | ||
| 214 | compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 215 | } | ||
| 216 | } | ||
| 158 | 217 | ||
| 159 | /// Remove images in a region | 218 | template <class P> |
| 160 | void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); | 219 | void TextureCache<P>::UpdateRenderTargets(bool is_clear) { |
| 220 | using namespace VideoCommon::Dirty; | ||
| 221 | auto& flags = maxwell3d.dirty.flags; | ||
| 222 | if (!flags[Dirty::RenderTargets]) { | ||
| 223 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 224 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | ||
| 225 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); | ||
| 226 | } | ||
| 227 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; | ||
| 228 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | ||
| 229 | return; | ||
| 230 | } | ||
| 231 | flags[Dirty::RenderTargets] = false; | ||
| 161 | 232 | ||
| 162 | /// Blit an image with the given parameters | 233 | // Render target control is used on all render targets, so force look ups when this one is up |
| 163 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | 234 | const bool force = flags[Dirty::RenderTargetControl]; |
| 164 | const Tegra::Engines::Fermi2D::Surface& src, | 235 | flags[Dirty::RenderTargetControl] = false; |
| 165 | const Tegra::Engines::Fermi2D::Config& copy); | ||
| 166 | 236 | ||
| 167 | /// Try to find a cached image view in the given CPU address | 237 | for (size_t index = 0; index < NUM_RT; ++index) { |
| 168 | [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); | 238 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; |
| 239 | if (flags[Dirty::ColorBuffer0 + index] || force) { | ||
| 240 | flags[Dirty::ColorBuffer0 + index] = false; | ||
| 241 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); | ||
| 242 | } | ||
| 243 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); | ||
| 244 | } | ||
| 245 | if (flags[Dirty::ZetaBuffer] || force) { | ||
| 246 | flags[Dirty::ZetaBuffer] = false; | ||
| 247 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); | ||
| 248 | } | ||
| 249 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; | ||
| 250 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | ||
| 169 | 251 | ||
| 170 | /// Return true when there are uncommitted images to be downloaded | 252 | for (size_t index = 0; index < NUM_RT; ++index) { |
| 171 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | 253 | render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); |
| 254 | } | ||
| 255 | render_targets.size = Extent2D{ | ||
| 256 | maxwell3d.regs.render_area.width, | ||
| 257 | maxwell3d.regs.render_area.height, | ||
| 258 | }; | ||
| 259 | } | ||
| 172 | 260 | ||
| 173 | /// Return true when the caller should wait for async downloads | 261 | template <class P> |
| 174 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; | 262 | typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { |
| 263 | return &slot_framebuffers[GetFramebufferId(render_targets)]; | ||
| 264 | } | ||
| 175 | 265 | ||
| 176 | /// Commit asynchronous downloads | 266 | template <class P> |
| 177 | void CommitAsyncFlushes(); | 267 | void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, |
| 268 | std::span<ImageViewId> cached_image_view_ids, | ||
| 269 | std::span<const u32> indices, | ||
| 270 | std::span<ImageViewId> image_view_ids) { | ||
| 271 | ASSERT(indices.size() <= image_view_ids.size()); | ||
| 272 | do { | ||
| 273 | has_deleted_images = false; | ||
| 274 | std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { | ||
| 275 | return VisitImageView(table, cached_image_view_ids, index); | ||
| 276 | }); | ||
| 277 | } while (has_deleted_images); | ||
| 278 | } | ||
| 178 | 279 | ||
| 179 | /// Pop asynchronous downloads | 280 | template <class P> |
| 180 | void PopAsyncFlushes(); | 281 | ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, |
| 282 | std::span<ImageViewId> cached_image_view_ids, | ||
| 283 | u32 index) { | ||
| 284 | if (index > table.Limit()) { | ||
| 285 | LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); | ||
| 286 | return NULL_IMAGE_VIEW_ID; | ||
| 287 | } | ||
| 288 | const auto [descriptor, is_new] = table.Read(index); | ||
| 289 | ImageViewId& image_view_id = cached_image_view_ids[index]; | ||
| 290 | if (is_new) { | ||
| 291 | image_view_id = FindImageView(descriptor); | ||
| 292 | } | ||
| 293 | if (image_view_id != NULL_IMAGE_VIEW_ID) { | ||
| 294 | PrepareImageView(image_view_id, false, false); | ||
| 295 | } | ||
| 296 | return image_view_id; | ||
| 297 | } | ||
| 181 | 298 | ||
| 182 | /// Return true when a CPU region is modified from the GPU | 299 | template <class P> |
| 183 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 300 | FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { |
| 301 | const auto [pair, is_new] = framebuffers.try_emplace(key); | ||
| 302 | FramebufferId& framebuffer_id = pair->second; | ||
| 303 | if (!is_new) { | ||
| 304 | return framebuffer_id; | ||
| 305 | } | ||
| 306 | std::array<ImageView*, NUM_RT> color_buffers; | ||
| 307 | std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), | ||
| 308 | [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); | ||
| 309 | ImageView* const depth_buffer = | ||
| 310 | key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; | ||
| 311 | framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); | ||
| 312 | return framebuffer_id; | ||
| 313 | } | ||
| 184 | 314 | ||
| 185 | std::mutex mutex; | 315 | template <class P> |
| 316 | void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | ||
| 317 | ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { | ||
| 318 | if (True(image.flags & ImageFlagBits::CpuModified)) { | ||
| 319 | return; | ||
| 320 | } | ||
| 321 | image.flags |= ImageFlagBits::CpuModified; | ||
| 322 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 323 | UntrackImage(image, image_id); | ||
| 324 | } | ||
| 325 | }); | ||
| 326 | } | ||
| 186 | 327 | ||
| 187 | private: | 328 | template <class P> |
| 188 | /// Iterate over all page indices in a range | 329 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { |
| 189 | template <typename Func> | 330 | std::vector<ImageId> images; |
| 190 | static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { | 331 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { |
| 191 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; | 332 | if (!image.IsSafeDownload()) { |
| 192 | const u64 page_end = (addr + size - 1) >> PAGE_BITS; | 333 | return; |
| 193 | for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { | ||
| 194 | if constexpr (RETURNS_BOOL) { | ||
| 195 | if (func(page)) { | ||
| 196 | break; | ||
| 197 | } | ||
| 198 | } else { | ||
| 199 | func(page); | ||
| 200 | } | ||
| 201 | } | 334 | } |
| 335 | image.flags &= ~ImageFlagBits::GpuModified; | ||
| 336 | images.push_back(image_id); | ||
| 337 | }); | ||
| 338 | if (images.empty()) { | ||
| 339 | return; | ||
| 202 | } | 340 | } |
| 341 | std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { | ||
| 342 | return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; | ||
| 343 | }); | ||
| 344 | for (const ImageId image_id : images) { | ||
| 345 | Image& image = slot_images[image_id]; | ||
| 346 | auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); | ||
| 347 | const auto copies = FullDownloadCopies(image.info); | ||
| 348 | image.DownloadMemory(map, copies); | ||
| 349 | runtime.Finish(); | ||
| 350 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | ||
| 351 | } | ||
| 352 | } | ||
| 203 | 353 | ||
| 204 | template <typename Func> | 354 | template <class P> |
| 205 | static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { | 355 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { |
| 206 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; | 356 | std::vector<ImageId> deleted_images; |
| 207 | const u64 page_end = (addr + size - 1) >> PAGE_BITS; | 357 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 208 | for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { | 358 | for (const ImageId id : deleted_images) { |
| 209 | if constexpr (RETURNS_BOOL) { | 359 | Image& image = slot_images[id]; |
| 210 | if (func(page)) { | 360 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 211 | break; | 361 | UntrackImage(image, id); |
| 212 | } | ||
| 213 | } else { | ||
| 214 | func(page); | ||
| 215 | } | ||
| 216 | } | 362 | } |
| 363 | UnregisterImage(id); | ||
| 364 | DeleteImage(id); | ||
| 217 | } | 365 | } |
| 366 | } | ||
| 218 | 367 | ||
| 219 | /// Runs the Garbage Collector. | 368 | template <class P> |
| 220 | void RunGarbageCollector(); | 369 | void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { |
| 221 | 370 | std::vector<ImageId> deleted_images; | |
| 222 | /// Fills image_view_ids in the image views in indices | 371 | ForEachImageInRegionGPU(gpu_addr, size, |
| 223 | void FillImageViews(DescriptorTable<TICEntry>& table, | 372 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 224 | std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, | 373 | for (const ImageId id : deleted_images) { |
| 225 | std::span<ImageViewId> image_view_ids); | 374 | Image& image = slot_images[id]; |
| 226 | 375 | if (True(image.flags & ImageFlagBits::Remapped)) { | |
| 227 | /// Find or create an image view in the guest descriptor table | 376 | continue; |
| 228 | ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, | 377 | } |
| 229 | std::span<ImageViewId> cached_image_view_ids, u32 index); | 378 | image.flags |= ImageFlagBits::Remapped; |
| 230 | 379 | if (True(image.flags & ImageFlagBits::Tracked)) { | |
| 231 | /// Find or create a framebuffer with the given render target parameters | 380 | UntrackImage(image, id); |
| 232 | FramebufferId GetFramebufferId(const RenderTargets& key); | 381 | } |
| 382 | } | ||
| 383 | } | ||
| 233 | 384 | ||
| 234 | /// Refresh the contents (pixel data) of an image | 385 | template <class P> |
| 235 | void RefreshContents(Image& image, ImageId image_id); | 386 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |
| 387 | const Tegra::Engines::Fermi2D::Surface& src, | ||
| 388 | const Tegra::Engines::Fermi2D::Config& copy) { | ||
| 389 | const BlitImages images = GetBlitImages(dst, src); | ||
| 390 | const ImageId dst_id = images.dst_id; | ||
| 391 | const ImageId src_id = images.src_id; | ||
| 392 | PrepareImage(src_id, false, false); | ||
| 393 | PrepareImage(dst_id, true, false); | ||
| 394 | |||
| 395 | ImageBase& dst_image = slot_images[dst_id]; | ||
| 396 | const ImageBase& src_image = slot_images[src_id]; | ||
| 397 | |||
| 398 | // TODO: Deduplicate | ||
| 399 | const std::optional src_base = src_image.TryFindBase(src.Address()); | ||
| 400 | const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; | ||
| 401 | const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); | ||
| 402 | const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); | ||
| 403 | const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); | ||
| 404 | const Region2D src_region{ | ||
| 405 | Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, | ||
| 406 | Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, | ||
| 407 | }; | ||
| 236 | 408 | ||
| 237 | /// Upload data from guest to an image | 409 | const std::optional dst_base = dst_image.TryFindBase(dst.Address()); |
| 238 | template <typename StagingBuffer> | 410 | const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; |
| 239 | void UploadImageContents(Image& image, StagingBuffer& staging_buffer); | 411 | const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); |
| 412 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 413 | const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); | ||
| 414 | const Region2D dst_region{ | ||
| 415 | Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, | ||
| 416 | Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, | ||
| 417 | }; | ||
| 240 | 418 | ||
| 241 | /// Find or create an image view from a guest descriptor | 419 | // Always call this after src_framebuffer_id was queried, as the address might be invalidated. |
| 242 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); | 420 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; |
| 421 | if constexpr (FRAMEBUFFER_BLITS) { | ||
| 422 | // OpenGL blits from framebuffers, not images | ||
| 423 | Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; | ||
| 424 | runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, | ||
| 425 | copy.filter, copy.operation); | ||
| 426 | } else { | ||
| 427 | // Vulkan can blit images, but it lacks format reinterpretations | ||
| 428 | // Provide a framebuffer in case it's necessary | ||
| 429 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 430 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 431 | runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, | ||
| 432 | copy.operation); | ||
| 433 | } | ||
| 434 | } | ||
| 243 | 435 | ||
| 244 | /// Create a new image view from a guest descriptor | 436 | template <class P> |
| 245 | [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); | 437 | typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { |
| 438 | // TODO: Properly implement this | ||
| 439 | const auto it = page_table.find(cpu_addr >> PAGE_BITS); | ||
| 440 | if (it == page_table.end()) { | ||
| 441 | return nullptr; | ||
| 442 | } | ||
| 443 | const auto& image_map_ids = it->second; | ||
| 444 | for (const ImageMapId map_id : image_map_ids) { | ||
| 445 | const ImageMapView& map = slot_map_views[map_id]; | ||
| 446 | const ImageBase& image = slot_images[map.image_id]; | ||
| 447 | if (image.cpu_addr != cpu_addr) { | ||
| 448 | continue; | ||
| 449 | } | ||
| 450 | if (image.image_view_ids.empty()) { | ||
| 451 | continue; | ||
| 452 | } | ||
| 453 | return &slot_image_views[image.image_view_ids.at(0)]; | ||
| 454 | } | ||
| 455 | return nullptr; | ||
| 456 | } | ||
| 246 | 457 | ||
| 247 | /// Find or create an image from the given parameters | 458 | template <class P> |
| 248 | [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | 459 | bool TextureCache<P>::HasUncommittedFlushes() const noexcept { |
| 249 | RelaxedOptions options = RelaxedOptions{}); | 460 | return !uncommitted_downloads.empty(); |
| 461 | } | ||
| 250 | 462 | ||
| 251 | /// Find an image from the given parameters | 463 | template <class P> |
| 252 | [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | 464 | bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { |
| 253 | RelaxedOptions options); | 465 | return !committed_downloads.empty() && !committed_downloads.front().empty(); |
| 466 | } | ||
| 254 | 467 | ||
| 255 | /// Create an image from the given parameters | 468 | template <class P> |
| 256 | [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | 469 | void TextureCache<P>::CommitAsyncFlushes() { |
| 257 | RelaxedOptions options); | 470 | // This is intentionally passing the value by copy |
| 471 | committed_downloads.push(uncommitted_downloads); | ||
| 472 | uncommitted_downloads.clear(); | ||
| 473 | } | ||
| 258 | 474 | ||
| 259 | /// Create a new image and join perfectly matching existing images | 475 | template <class P> |
| 260 | /// Remove joined images from the cache | 476 | void TextureCache<P>::PopAsyncFlushes() { |
| 261 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | 477 | if (committed_downloads.empty()) { |
| 478 | return; | ||
| 479 | } | ||
| 480 | const std::span<const ImageId> download_ids = committed_downloads.front(); | ||
| 481 | if (download_ids.empty()) { | ||
| 482 | committed_downloads.pop(); | ||
| 483 | return; | ||
| 484 | } | ||
| 485 | size_t total_size_bytes = 0; | ||
| 486 | for (const ImageId image_id : download_ids) { | ||
| 487 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | ||
| 488 | } | ||
| 489 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 490 | const size_t original_offset = download_map.offset; | ||
| 491 | for (const ImageId image_id : download_ids) { | ||
| 492 | Image& image = slot_images[image_id]; | ||
| 493 | const auto copies = FullDownloadCopies(image.info); | ||
| 494 | image.DownloadMemory(download_map, copies); | ||
| 495 | download_map.offset += image.unswizzled_size_bytes; | ||
| 496 | } | ||
| 497 | // Wait for downloads to finish | ||
| 498 | runtime.Finish(); | ||
| 499 | |||
| 500 | download_map.offset = original_offset; | ||
| 501 | std::span<u8> download_span = download_map.mapped_span; | ||
| 502 | for (const ImageId image_id : download_ids) { | ||
| 503 | const ImageBase& image = slot_images[image_id]; | ||
| 504 | const auto copies = FullDownloadCopies(image.info); | ||
| 505 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); | ||
| 506 | download_map.offset += image.unswizzled_size_bytes; | ||
| 507 | download_span = download_span.subspan(image.unswizzled_size_bytes); | ||
| 508 | } | ||
| 509 | committed_downloads.pop(); | ||
| 510 | } | ||
| 262 | 511 | ||
| 263 | /// Return a blit image pair from the given guest blit parameters | 512 | template <class P> |
| 264 | [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, | 513 | bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { |
| 265 | const Tegra::Engines::Fermi2D::Surface& src); | 514 | bool is_modified = false; |
| 515 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { | ||
| 516 | if (False(image.flags & ImageFlagBits::GpuModified)) { | ||
| 517 | return false; | ||
| 518 | } | ||
| 519 | is_modified = true; | ||
| 520 | return true; | ||
| 521 | }); | ||
| 522 | return is_modified; | ||
| 523 | } | ||
| 266 | 524 | ||
| 267 | /// Find or create a sampler from a guest descriptor sampler | 525 | template <class P> |
| 268 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); | 526 | void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { |
| 527 | if (False(image.flags & ImageFlagBits::CpuModified)) { | ||
| 528 | // Only upload modified images | ||
| 529 | return; | ||
| 530 | } | ||
| 531 | image.flags &= ~ImageFlagBits::CpuModified; | ||
| 532 | TrackImage(image, image_id); | ||
| 269 | 533 | ||
| 270 | /// Find or create an image view for the given color buffer index | 534 | if (image.info.num_samples > 1) { |
| 271 | [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); | 535 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); |
| 536 | return; | ||
| 537 | } | ||
| 538 | auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); | ||
| 539 | UploadImageContents(image, staging); | ||
| 540 | runtime.InsertUploadMemoryBarrier(); | ||
| 541 | } | ||
| 272 | 542 | ||
| 273 | /// Find or create an image view for the depth buffer | 543 | template <class P> |
| 274 | [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); | 544 | template <typename StagingBuffer> |
| 545 | void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) { | ||
| 546 | const std::span<u8> mapped_span = staging.mapped_span; | ||
| 547 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 548 | |||
| 549 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | ||
| 550 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | ||
| 551 | const auto uploads = FullUploadSwizzles(image.info); | ||
| 552 | runtime.AccelerateImageUpload(image, staging, uploads); | ||
| 553 | } else if (True(image.flags & ImageFlagBits::Converted)) { | ||
| 554 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | ||
| 555 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | ||
| 556 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | ||
| 557 | image.UploadMemory(staging, copies); | ||
| 558 | } else { | ||
| 559 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | ||
| 560 | image.UploadMemory(staging, copies); | ||
| 561 | } | ||
| 562 | } | ||
| 275 | 563 | ||
| 276 | /// Find or create a view for a render target with the given image parameters | 564 | template <class P> |
| 277 | [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, | 565 | ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { |
| 278 | bool is_clear); | 566 | if (!IsValidEntry(gpu_memory, config)) { |
| 567 | return NULL_IMAGE_VIEW_ID; | ||
| 568 | } | ||
| 569 | const auto [pair, is_new] = image_views.try_emplace(config); | ||
| 570 | ImageViewId& image_view_id = pair->second; | ||
| 571 | if (is_new) { | ||
| 572 | image_view_id = CreateImageView(config); | ||
| 573 | } | ||
| 574 | return image_view_id; | ||
| 575 | } | ||
| 279 | 576 | ||
| 280 | /// Iterates over all the images in a region calling func | 577 | template <class P> |
| 281 | template <typename Func> | 578 | ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { |
| 282 | void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); | 579 | const ImageInfo info(config); |
| 580 | if (info.type == ImageType::Buffer) { | ||
| 581 | const ImageViewInfo view_info(config, 0); | ||
| 582 | return slot_image_views.insert(runtime, info, view_info, config.Address()); | ||
| 583 | } | ||
| 584 | const u32 layer_offset = config.BaseLayer() * info.layer_stride; | ||
| 585 | const GPUVAddr image_gpu_addr = config.Address() - layer_offset; | ||
| 586 | const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); | ||
| 587 | if (!image_id) { | ||
| 588 | return NULL_IMAGE_VIEW_ID; | ||
| 589 | } | ||
| 590 | ImageBase& image = slot_images[image_id]; | ||
| 591 | const SubresourceBase base = image.TryFindBase(config.Address()).value(); | ||
| 592 | ASSERT(base.level == 0); | ||
| 593 | const ImageViewInfo view_info(config, base.layer); | ||
| 594 | const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 595 | ImageViewBase& image_view = slot_image_views[image_view_id]; | ||
| 596 | image_view.flags |= ImageViewFlagBits::Strong; | ||
| 597 | image.flags |= ImageFlagBits::Strong; | ||
| 598 | return image_view_id; | ||
| 599 | } | ||
| 283 | 600 | ||
| 284 | template <typename Func> | 601 | template <class P> |
| 285 | void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); | 602 | ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 603 | RelaxedOptions options) { | ||
| 604 | if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { | ||
| 605 | return image_id; | ||
| 606 | } | ||
| 607 | return InsertImage(info, gpu_addr, options); | ||
| 608 | } | ||
| 286 | 609 | ||
| 287 | template <typename Func> | 610 | template <class P> |
| 288 | void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); | 611 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 612 | RelaxedOptions options) { | ||
| 613 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 614 | if (!cpu_addr) { | ||
| 615 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | ||
| 616 | if (!cpu_addr) { | ||
| 617 | return ImageId{}; | ||
| 618 | } | ||
| 619 | } | ||
| 620 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||
| 621 | const bool native_bgr = runtime.HasNativeBgr(); | ||
| 622 | ImageId image_id; | ||
| 623 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | ||
| 624 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | ||
| 625 | return false; | ||
| 626 | } | ||
| 627 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { | ||
| 628 | const bool strict_size = False(options & RelaxedOptions::Size) && | ||
| 629 | True(existing_image.flags & ImageFlagBits::Strong); | ||
| 630 | const ImageInfo& existing = existing_image.info; | ||
| 631 | if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && | ||
| 632 | existing.pitch == info.pitch && | ||
| 633 | IsPitchLinearSameSize(existing, info, strict_size) && | ||
| 634 | IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { | ||
| 635 | image_id = existing_image_id; | ||
| 636 | return true; | ||
| 637 | } | ||
| 638 | } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, | ||
| 639 | native_bgr)) { | ||
| 640 | image_id = existing_image_id; | ||
| 641 | return true; | ||
| 642 | } | ||
| 643 | return false; | ||
| 644 | }; | ||
| 645 | ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); | ||
| 646 | return image_id; | ||
| 647 | } | ||
| 289 | 648 | ||
| 290 | /// Iterates over all the images in a region calling func | 649 | template <class P> |
| 291 | template <typename Func> | 650 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
| 292 | void ForEachSparseSegment(ImageBase& image, Func&& func); | 651 | RelaxedOptions options) { |
| 652 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 653 | if (!cpu_addr) { | ||
| 654 | const auto size = CalculateGuestSizeInBytes(info); | ||
| 655 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); | ||
| 656 | if (!cpu_addr) { | ||
| 657 | const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; | ||
| 658 | virtual_invalid_space += Common::AlignUp(size, 32); | ||
| 659 | cpu_addr = std::optional<VAddr>(fake_addr); | ||
| 660 | } | ||
| 661 | } | ||
| 662 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | ||
| 663 | const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); | ||
| 664 | const Image& image = slot_images[image_id]; | ||
| 665 | // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different | ||
| 666 | const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); | ||
| 667 | if (is_new) { | ||
| 668 | it->second = slot_image_allocs.insert(); | ||
| 669 | } | ||
| 670 | slot_image_allocs[it->second].images.push_back(image_id); | ||
| 671 | return image_id; | ||
| 672 | } | ||
| 293 | 673 | ||
| 294 | /// Find or create an image view in the given image with the passed parameters | 674 | template <class P> |
| 295 | [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); | 675 | ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { |
| 676 | ImageInfo new_info = info; | ||
| 677 | const size_t size_bytes = CalculateGuestSizeInBytes(new_info); | ||
| 678 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||
| 679 | const bool native_bgr = runtime.HasNativeBgr(); | ||
| 680 | std::vector<ImageId> overlap_ids; | ||
| 681 | std::unordered_set<ImageId> overlaps_found; | ||
| 682 | std::vector<ImageId> left_aliased_ids; | ||
| 683 | std::vector<ImageId> right_aliased_ids; | ||
| 684 | std::unordered_set<ImageId> ignore_textures; | ||
| 685 | std::vector<ImageId> bad_overlap_ids; | ||
| 686 | const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 687 | if (True(overlap.flags & ImageFlagBits::Remapped)) { | ||
| 688 | ignore_textures.insert(overlap_id); | ||
| 689 | return; | ||
| 690 | } | ||
| 691 | if (info.type == ImageType::Linear) { | ||
| 692 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { | ||
| 693 | // Alias linear images with the same pitch | ||
| 694 | left_aliased_ids.push_back(overlap_id); | ||
| 695 | } | ||
| 696 | return; | ||
| 697 | } | ||
| 698 | overlaps_found.insert(overlap_id); | ||
| 699 | static constexpr bool strict_size = true; | ||
| 700 | const std::optional<OverlapResult> solution = ResolveOverlap( | ||
| 701 | new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); | ||
| 702 | if (solution) { | ||
| 703 | gpu_addr = solution->gpu_addr; | ||
| 704 | cpu_addr = solution->cpu_addr; | ||
| 705 | new_info.resources = solution->resources; | ||
| 706 | overlap_ids.push_back(overlap_id); | ||
| 707 | return; | ||
| 708 | } | ||
| 709 | static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; | ||
| 710 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); | ||
| 711 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { | ||
| 712 | left_aliased_ids.push_back(overlap_id); | ||
| 713 | overlap.flags |= ImageFlagBits::Alias; | ||
| 714 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, | ||
| 715 | broken_views, native_bgr)) { | ||
| 716 | right_aliased_ids.push_back(overlap_id); | ||
| 717 | overlap.flags |= ImageFlagBits::Alias; | ||
| 718 | } else { | ||
| 719 | bad_overlap_ids.push_back(overlap_id); | ||
| 720 | overlap.flags |= ImageFlagBits::BadOverlap; | ||
| 721 | } | ||
| 722 | }; | ||
| 723 | ForEachImageInRegion(cpu_addr, size_bytes, region_check); | ||
| 724 | const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 725 | if (!overlaps_found.contains(overlap_id)) { | ||
| 726 | if (True(overlap.flags & ImageFlagBits::Remapped)) { | ||
| 727 | ignore_textures.insert(overlap_id); | ||
| 728 | } | ||
| 729 | if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { | ||
| 730 | ignore_textures.insert(overlap_id); | ||
| 731 | } | ||
| 732 | } | ||
| 733 | }; | ||
| 734 | ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); | ||
| 735 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | ||
| 736 | Image& new_image = slot_images[new_image_id]; | ||
| 296 | 737 | ||
| 297 | /// Register image in the page table | 738 | if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { |
| 298 | void RegisterImage(ImageId image); | 739 | new_image.flags |= ImageFlagBits::Sparse; |
| 740 | } | ||
| 299 | 741 | ||
| 300 | /// Unregister image from the page table | 742 | for (const ImageId overlap_id : ignore_textures) { |
| 301 | void UnregisterImage(ImageId image); | 743 | Image& overlap = slot_images[overlap_id]; |
| 744 | if (True(overlap.flags & ImageFlagBits::GpuModified)) { | ||
| 745 | UNIMPLEMENTED(); | ||
| 746 | } | ||
| 747 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | ||
| 748 | UntrackImage(overlap, overlap_id); | ||
| 749 | } | ||
| 750 | UnregisterImage(overlap_id); | ||
| 751 | DeleteImage(overlap_id); | ||
| 752 | } | ||
| 302 | 753 | ||
| 303 | /// Track CPU reads and writes for image | 754 | // TODO: Only upload what we need |
| 304 | void TrackImage(ImageBase& image, ImageId image_id); | 755 | RefreshContents(new_image, new_image_id); |
| 756 | |||
| 757 | for (const ImageId overlap_id : overlap_ids) { | ||
| 758 | Image& overlap = slot_images[overlap_id]; | ||
| 759 | if (overlap.info.num_samples != new_image.info.num_samples) { | ||
| 760 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); | ||
| 761 | } else { | ||
| 762 | const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); | ||
| 763 | const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); | ||
| 764 | runtime.CopyImage(new_image, overlap, copies); | ||
| 765 | } | ||
| 766 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | ||
| 767 | UntrackImage(overlap, overlap_id); | ||
| 768 | } | ||
| 769 | UnregisterImage(overlap_id); | ||
| 770 | DeleteImage(overlap_id); | ||
| 771 | } | ||
| 772 | ImageBase& new_image_base = new_image; | ||
| 773 | for (const ImageId aliased_id : right_aliased_ids) { | ||
| 774 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 775 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); | ||
| 776 | new_image.flags |= ImageFlagBits::Alias; | ||
| 777 | } | ||
| 778 | for (const ImageId aliased_id : left_aliased_ids) { | ||
| 779 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 780 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); | ||
| 781 | new_image.flags |= ImageFlagBits::Alias; | ||
| 782 | } | ||
| 783 | for (const ImageId aliased_id : bad_overlap_ids) { | ||
| 784 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 785 | aliased.overlapping_images.push_back(new_image_id); | ||
| 786 | new_image.overlapping_images.push_back(aliased_id); | ||
| 787 | new_image.flags |= ImageFlagBits::BadOverlap; | ||
| 788 | } | ||
| 789 | RegisterImage(new_image_id); | ||
| 790 | return new_image_id; | ||
| 791 | } | ||
| 305 | 792 | ||
| 306 | /// Stop tracking CPU reads and writes for image | 793 | template <class P> |
| 307 | void UntrackImage(ImageBase& image, ImageId image_id); | 794 | typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( |
| 795 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { | ||
| 796 | static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; | ||
| 797 | const GPUVAddr dst_addr = dst.Address(); | ||
| 798 | const GPUVAddr src_addr = src.Address(); | ||
| 799 | ImageInfo dst_info(dst); | ||
| 800 | ImageInfo src_info(src); | ||
| 801 | ImageId dst_id; | ||
| 802 | ImageId src_id; | ||
| 803 | do { | ||
| 804 | has_deleted_images = false; | ||
| 805 | dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); | ||
| 806 | src_id = FindImage(src_info, src_addr, FIND_OPTIONS); | ||
| 807 | const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; | ||
| 808 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; | ||
| 809 | DeduceBlitImages(dst_info, src_info, dst_image, src_image); | ||
| 810 | if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { | ||
| 811 | continue; | ||
| 812 | } | ||
| 813 | if (!dst_id) { | ||
| 814 | dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); | ||
| 815 | } | ||
| 816 | if (!src_id) { | ||
| 817 | src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); | ||
| 818 | } | ||
| 819 | } while (has_deleted_images); | ||
| 820 | return BlitImages{ | ||
| 821 | .dst_id = dst_id, | ||
| 822 | .src_id = src_id, | ||
| 823 | .dst_format = dst_info.format, | ||
| 824 | .src_format = src_info.format, | ||
| 825 | }; | ||
| 826 | } | ||
| 308 | 827 | ||
| 309 | /// Delete image from the cache | 828 | template <class P> |
| 310 | void DeleteImage(ImageId image); | 829 | SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { |
| 830 | if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { | ||
| 831 | return NULL_SAMPLER_ID; | ||
| 832 | } | ||
| 833 | const auto [pair, is_new] = samplers.try_emplace(config); | ||
| 834 | if (is_new) { | ||
| 835 | pair->second = slot_samplers.insert(runtime, config); | ||
| 836 | } | ||
| 837 | return pair->second; | ||
| 838 | } | ||
| 311 | 839 | ||
| 312 | /// Remove image views references from the cache | 840 | template <class P> |
| 313 | void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); | 841 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { |
| 842 | const auto& regs = maxwell3d.regs; | ||
| 843 | if (index >= regs.rt_control.count) { | ||
| 844 | return ImageViewId{}; | ||
| 845 | } | ||
| 846 | const auto& rt = regs.rt[index]; | ||
| 847 | const GPUVAddr gpu_addr = rt.Address(); | ||
| 848 | if (gpu_addr == 0) { | ||
| 849 | return ImageViewId{}; | ||
| 850 | } | ||
| 851 | if (rt.format == Tegra::RenderTargetFormat::NONE) { | ||
| 852 | return ImageViewId{}; | ||
| 853 | } | ||
| 854 | const ImageInfo info(regs, index); | ||
| 855 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 856 | } | ||
| 314 | 857 | ||
| 315 | /// Remove framebuffers using the given image views from the cache | 858 | template <class P> |
| 316 | void RemoveFramebuffers(std::span<const ImageViewId> removed_views); | 859 | ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { |
| 860 | const auto& regs = maxwell3d.regs; | ||
| 861 | if (!regs.zeta_enable) { | ||
| 862 | return ImageViewId{}; | ||
| 863 | } | ||
| 864 | const GPUVAddr gpu_addr = regs.zeta.Address(); | ||
| 865 | if (gpu_addr == 0) { | ||
| 866 | return ImageViewId{}; | ||
| 867 | } | ||
| 868 | const ImageInfo info(regs); | ||
| 869 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 870 | } | ||
| 317 | 871 | ||
| 318 | /// Mark an image as modified from the GPU | 872 | template <class P> |
| 319 | void MarkModification(ImageBase& image) noexcept; | 873 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, |
| 874 | bool is_clear) { | ||
| 875 | const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; | ||
| 876 | const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); | ||
| 877 | if (!image_id) { | ||
| 878 | return NULL_IMAGE_VIEW_ID; | ||
| 879 | } | ||
| 880 | Image& image = slot_images[image_id]; | ||
| 881 | const ImageViewType view_type = RenderTargetImageViewType(info); | ||
| 882 | SubresourceBase base; | ||
| 883 | if (image.info.type == ImageType::Linear) { | ||
| 884 | base = SubresourceBase{.level = 0, .layer = 0}; | ||
| 885 | } else { | ||
| 886 | base = image.TryFindBase(gpu_addr).value(); | ||
| 887 | } | ||
| 888 | const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; | ||
| 889 | const SubresourceRange range{ | ||
| 890 | .base = base, | ||
| 891 | .extent = {.levels = 1, .layers = layers}, | ||
| 892 | }; | ||
| 893 | return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); | ||
| 894 | } | ||
| 320 | 895 | ||
| 321 | /// Synchronize image aliases, copying data if needed | 896 | template <class P> |
| 322 | void SynchronizeAliases(ImageId image_id); | 897 | template <typename Func> |
| 898 | void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { | ||
| 899 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 900 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 901 | boost::container::small_vector<ImageId, 32> images; | ||
| 902 | boost::container::small_vector<ImageMapId, 32> maps; | ||
| 903 | ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { | ||
| 904 | const auto it = page_table.find(page); | ||
| 905 | if (it == page_table.end()) { | ||
| 906 | if constexpr (BOOL_BREAK) { | ||
| 907 | return false; | ||
| 908 | } else { | ||
| 909 | return; | ||
| 910 | } | ||
| 911 | } | ||
| 912 | for (const ImageMapId map_id : it->second) { | ||
| 913 | ImageMapView& map = slot_map_views[map_id]; | ||
| 914 | if (map.picked) { | ||
| 915 | continue; | ||
| 916 | } | ||
| 917 | if (!map.Overlaps(cpu_addr, size)) { | ||
| 918 | continue; | ||
| 919 | } | ||
| 920 | map.picked = true; | ||
| 921 | maps.push_back(map_id); | ||
| 922 | Image& image = slot_images[map.image_id]; | ||
| 923 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 924 | continue; | ||
| 925 | } | ||
| 926 | image.flags |= ImageFlagBits::Picked; | ||
| 927 | images.push_back(map.image_id); | ||
| 928 | if constexpr (BOOL_BREAK) { | ||
| 929 | if (func(map.image_id, image)) { | ||
| 930 | return true; | ||
| 931 | } | ||
| 932 | } else { | ||
| 933 | func(map.image_id, image); | ||
| 934 | } | ||
| 935 | } | ||
| 936 | if constexpr (BOOL_BREAK) { | ||
| 937 | return false; | ||
| 938 | } | ||
| 939 | }); | ||
| 940 | for (const ImageId image_id : images) { | ||
| 941 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 942 | } | ||
| 943 | for (const ImageMapId map_id : maps) { | ||
| 944 | slot_map_views[map_id].picked = false; | ||
| 945 | } | ||
| 946 | } | ||
| 323 | 947 | ||
| 324 | /// Prepare an image to be used | 948 | template <class P> |
| 325 | void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); | 949 | template <typename Func> |
| 950 | void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { | ||
| 951 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 952 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 953 | boost::container::small_vector<ImageId, 8> images; | ||
| 954 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | ||
| 955 | const auto it = gpu_page_table.find(page); | ||
| 956 | if (it == gpu_page_table.end()) { | ||
| 957 | if constexpr (BOOL_BREAK) { | ||
| 958 | return false; | ||
| 959 | } else { | ||
| 960 | return; | ||
| 961 | } | ||
| 962 | } | ||
| 963 | for (const ImageId image_id : it->second) { | ||
| 964 | Image& image = slot_images[image_id]; | ||
| 965 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 966 | continue; | ||
| 967 | } | ||
| 968 | if (!image.OverlapsGPU(gpu_addr, size)) { | ||
| 969 | continue; | ||
| 970 | } | ||
| 971 | image.flags |= ImageFlagBits::Picked; | ||
| 972 | images.push_back(image_id); | ||
| 973 | if constexpr (BOOL_BREAK) { | ||
| 974 | if (func(image_id, image)) { | ||
| 975 | return true; | ||
| 976 | } | ||
| 977 | } else { | ||
| 978 | func(image_id, image); | ||
| 979 | } | ||
| 980 | } | ||
| 981 | if constexpr (BOOL_BREAK) { | ||
| 982 | return false; | ||
| 983 | } | ||
| 984 | }); | ||
| 985 | for (const ImageId image_id : images) { | ||
| 986 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 987 | } | ||
| 988 | } | ||
| 326 | 989 | ||
| 327 | /// Prepare an image view to be used | 990 | template <class P> |
| 328 | void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); | 991 | template <typename Func> |
| 992 | void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { | ||
| 993 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 994 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 995 | boost::container::small_vector<ImageId, 8> images; | ||
| 996 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | ||
| 997 | const auto it = sparse_page_table.find(page); | ||
| 998 | if (it == sparse_page_table.end()) { | ||
| 999 | if constexpr (BOOL_BREAK) { | ||
| 1000 | return false; | ||
| 1001 | } else { | ||
| 1002 | return; | ||
| 1003 | } | ||
| 1004 | } | ||
| 1005 | for (const ImageId image_id : it->second) { | ||
| 1006 | Image& image = slot_images[image_id]; | ||
| 1007 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 1008 | continue; | ||
| 1009 | } | ||
| 1010 | if (!image.OverlapsGPU(gpu_addr, size)) { | ||
| 1011 | continue; | ||
| 1012 | } | ||
| 1013 | image.flags |= ImageFlagBits::Picked; | ||
| 1014 | images.push_back(image_id); | ||
| 1015 | if constexpr (BOOL_BREAK) { | ||
| 1016 | if (func(image_id, image)) { | ||
| 1017 | return true; | ||
| 1018 | } | ||
| 1019 | } else { | ||
| 1020 | func(image_id, image); | ||
| 1021 | } | ||
| 1022 | } | ||
| 1023 | if constexpr (BOOL_BREAK) { | ||
| 1024 | return false; | ||
| 1025 | } | ||
| 1026 | }); | ||
| 1027 | for (const ImageId image_id : images) { | ||
| 1028 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 1029 | } | ||
| 1030 | } | ||
| 329 | 1031 | ||
| 330 | /// Execute copies from one image to the other, even if they are incompatible | 1032 | template <class P> |
| 331 | void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); | 1033 | template <typename Func> |
| 1034 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | ||
| 1035 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; | ||
| 1036 | static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; | ||
| 1037 | const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); | ||
| 1038 | for (auto& segment : segments) { | ||
| 1039 | const auto gpu_addr = segment.first; | ||
| 1040 | const auto size = segment.second; | ||
| 1041 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 1042 | ASSERT(cpu_addr); | ||
| 1043 | if constexpr (RETURNS_BOOL) { | ||
| 1044 | if (func(gpu_addr, *cpu_addr, size)) { | ||
| 1045 | break; | ||
| 1046 | } | ||
| 1047 | } else { | ||
| 1048 | func(gpu_addr, *cpu_addr, size); | ||
| 1049 | } | ||
| 1050 | } | ||
| 1051 | } | ||
| 332 | 1052 | ||
| 333 | /// Bind an image view as render target, downloading resources preemtively if needed | 1053 | template <class P> |
| 334 | void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); | 1054 | ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { |
| 1055 | Image& image = slot_images[image_id]; | ||
| 1056 | if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { | ||
| 1057 | return image_view_id; | ||
| 1058 | } | ||
| 1059 | const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); | ||
| 1060 | image.InsertView(info, image_view_id); | ||
| 1061 | return image_view_id; | ||
| 1062 | } | ||
| 335 | 1063 | ||
| 336 | /// Create a render target from a given image and image view parameters | 1064 | template <class P> |
| 337 | [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage( | 1065 | void TextureCache<P>::RegisterImage(ImageId image_id) { |
| 338 | ImageId, const ImageViewInfo& view_info); | 1066 | ImageBase& image = slot_images[image_id]; |
| 1067 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), | ||
| 1068 | "Trying to register an already registered image"); | ||
| 1069 | image.flags |= ImageFlagBits::Registered; | ||
| 1070 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1071 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1072 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1073 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1074 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1075 | } | ||
| 1076 | total_used_memory += Common::AlignUp(tentative_size, 1024); | ||
| 1077 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1078 | [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); | ||
| 1079 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1080 | auto map_id = | ||
| 1081 | slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); | ||
| 1082 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, | ||
| 1083 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||
| 1084 | image.map_view_id = map_id; | ||
| 1085 | return; | ||
| 1086 | } | ||
| 1087 | std::vector<ImageViewId> sparse_maps{}; | ||
| 1088 | ForEachSparseSegment( | ||
| 1089 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||
| 1090 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | ||
| 1091 | ForEachCPUPage(cpu_addr, size, | ||
| 1092 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||
| 1093 | sparse_maps.push_back(map_id); | ||
| 1094 | }); | ||
| 1095 | sparse_views.emplace(image_id, std::move(sparse_maps)); | ||
| 1096 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1097 | [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); | ||
| 1098 | } | ||
| 339 | 1099 | ||
| 340 | /// Returns true if the current clear parameters clear the whole image of a given image view | 1100 | template <class P> |
| 341 | [[nodiscard]] bool IsFullClear(ImageViewId id); | 1101 | void TextureCache<P>::UnregisterImage(ImageId image_id) { |
| 1102 | Image& image = slot_images[image_id]; | ||
| 1103 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), | ||
| 1104 | "Trying to unregister an already registered image"); | ||
| 1105 | image.flags &= ~ImageFlagBits::Registered; | ||
| 1106 | image.flags &= ~ImageFlagBits::BadOverlap; | ||
| 1107 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1108 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1109 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1110 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1111 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1112 | } | ||
| 1113 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | ||
| 1114 | const auto& clear_page_table = | ||
| 1115 | [this, image_id]( | ||
| 1116 | u64 page, | ||
| 1117 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { | ||
| 1118 | const auto page_it = selected_page_table.find(page); | ||
| 1119 | if (page_it == selected_page_table.end()) { | ||
| 1120 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1121 | return; | ||
| 1122 | } | ||
| 1123 | std::vector<ImageId>& image_ids = page_it->second; | ||
| 1124 | const auto vector_it = std::ranges::find(image_ids, image_id); | ||
| 1125 | if (vector_it == image_ids.end()) { | ||
| 1126 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", | ||
| 1127 | page << PAGE_BITS); | ||
| 1128 | return; | ||
| 1129 | } | ||
| 1130 | image_ids.erase(vector_it); | ||
| 1131 | }; | ||
| 1132 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1133 | [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); | ||
| 1134 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1135 | const auto map_id = image.map_view_id; | ||
| 1136 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { | ||
| 1137 | const auto page_it = page_table.find(page); | ||
| 1138 | if (page_it == page_table.end()) { | ||
| 1139 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1140 | return; | ||
| 1141 | } | ||
| 1142 | std::vector<ImageMapId>& image_map_ids = page_it->second; | ||
| 1143 | const auto vector_it = std::ranges::find(image_map_ids, map_id); | ||
| 1144 | if (vector_it == image_map_ids.end()) { | ||
| 1145 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", | ||
| 1146 | page << PAGE_BITS); | ||
| 1147 | return; | ||
| 1148 | } | ||
| 1149 | image_map_ids.erase(vector_it); | ||
| 1150 | }); | ||
| 1151 | slot_map_views.erase(map_id); | ||
| 1152 | return; | ||
| 1153 | } | ||
| 1154 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { | ||
| 1155 | clear_page_table(page, sparse_page_table); | ||
| 1156 | }); | ||
| 1157 | auto it = sparse_views.find(image_id); | ||
| 1158 | ASSERT(it != sparse_views.end()); | ||
| 1159 | auto& sparse_maps = it->second; | ||
| 1160 | for (auto& map_view_id : sparse_maps) { | ||
| 1161 | const auto& map_range = slot_map_views[map_view_id]; | ||
| 1162 | const VAddr cpu_addr = map_range.cpu_addr; | ||
| 1163 | const std::size_t size = map_range.size; | ||
| 1164 | ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { | ||
| 1165 | const auto page_it = page_table.find(page); | ||
| 1166 | if (page_it == page_table.end()) { | ||
| 1167 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1168 | return; | ||
| 1169 | } | ||
| 1170 | std::vector<ImageMapId>& image_map_ids = page_it->second; | ||
| 1171 | auto vector_it = image_map_ids.begin(); | ||
| 1172 | while (vector_it != image_map_ids.end()) { | ||
| 1173 | ImageMapView& map = slot_map_views[*vector_it]; | ||
| 1174 | if (map.image_id != image_id) { | ||
| 1175 | vector_it++; | ||
| 1176 | continue; | ||
| 1177 | } | ||
| 1178 | if (!map.picked) { | ||
| 1179 | map.picked = true; | ||
| 1180 | } | ||
| 1181 | vector_it = image_map_ids.erase(vector_it); | ||
| 1182 | } | ||
| 1183 | }); | ||
| 1184 | slot_map_views.erase(map_view_id); | ||
| 1185 | } | ||
| 1186 | sparse_views.erase(it); | ||
| 1187 | } | ||
| 342 | 1188 | ||
| 343 | Runtime& runtime; | 1189 | template <class P> |
| 344 | VideoCore::RasterizerInterface& rasterizer; | 1190 | void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { |
| 345 | Tegra::Engines::Maxwell3D& maxwell3d; | 1191 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); |
| 346 | Tegra::Engines::KeplerCompute& kepler_compute; | 1192 | image.flags |= ImageFlagBits::Tracked; |
| 347 | Tegra::MemoryManager& gpu_memory; | 1193 | if (False(image.flags & ImageFlagBits::Sparse)) { |
| 1194 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | ||
| 1195 | return; | ||
| 1196 | } | ||
| 1197 | if (True(image.flags & ImageFlagBits::Registered)) { | ||
| 1198 | auto it = sparse_views.find(image_id); | ||
| 1199 | ASSERT(it != sparse_views.end()); | ||
| 1200 | auto& sparse_maps = it->second; | ||
| 1201 | for (auto& map_view_id : sparse_maps) { | ||
| 1202 | const auto& map = slot_map_views[map_view_id]; | ||
| 1203 | const VAddr cpu_addr = map.cpu_addr; | ||
| 1204 | const std::size_t size = map.size; | ||
| 1205 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 1206 | } | ||
| 1207 | return; | ||
| 1208 | } | ||
| 1209 | ForEachSparseSegment(image, | ||
| 1210 | [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||
| 1211 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 1212 | }); | ||
| 1213 | } | ||
| 348 | 1214 | ||
| 349 | DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; | 1215 | template <class P> |
| 350 | DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; | 1216 | void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { |
| 351 | std::vector<SamplerId> graphics_sampler_ids; | 1217 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); |
| 352 | std::vector<ImageViewId> graphics_image_view_ids; | 1218 | image.flags &= ~ImageFlagBits::Tracked; |
| 1219 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1220 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||
| 1221 | return; | ||
| 1222 | } | ||
| 1223 | ASSERT(True(image.flags & ImageFlagBits::Registered)); | ||
| 1224 | auto it = sparse_views.find(image_id); | ||
| 1225 | ASSERT(it != sparse_views.end()); | ||
| 1226 | auto& sparse_maps = it->second; | ||
| 1227 | for (auto& map_view_id : sparse_maps) { | ||
| 1228 | const auto& map = slot_map_views[map_view_id]; | ||
| 1229 | const VAddr cpu_addr = map.cpu_addr; | ||
| 1230 | const std::size_t size = map.size; | ||
| 1231 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||
| 1232 | } | ||
| 1233 | } | ||
| 353 | 1234 | ||
| 354 | DescriptorTable<TICEntry> compute_image_table{gpu_memory}; | 1235 | template <class P> |
| 355 | DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; | 1236 | void TextureCache<P>::DeleteImage(ImageId image_id) { |
| 356 | std::vector<SamplerId> compute_sampler_ids; | 1237 | ImageBase& image = slot_images[image_id]; |
| 357 | std::vector<ImageViewId> compute_image_view_ids; | 1238 | const GPUVAddr gpu_addr = image.gpu_addr; |
| 1239 | const auto alloc_it = image_allocs_table.find(gpu_addr); | ||
| 1240 | if (alloc_it == image_allocs_table.end()) { | ||
| 1241 | UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", | ||
| 1242 | gpu_addr); | ||
| 1243 | return; | ||
| 1244 | } | ||
| 1245 | const ImageAllocId alloc_id = alloc_it->second; | ||
| 1246 | std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images; | ||
| 1247 | const auto alloc_image_it = std::ranges::find(alloc_images, image_id); | ||
| 1248 | if (alloc_image_it == alloc_images.end()) { | ||
| 1249 | UNREACHABLE_MSG("Trying to delete an image that does not exist"); | ||
| 1250 | return; | ||
| 1251 | } | ||
| 1252 | ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); | ||
| 1253 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); | ||
| 1254 | |||
| 1255 | // Mark render targets as dirty | ||
| 1256 | auto& dirty = maxwell3d.dirty.flags; | ||
| 1257 | dirty[Dirty::RenderTargets] = true; | ||
| 1258 | dirty[Dirty::ZetaBuffer] = true; | ||
| 1259 | for (size_t rt = 0; rt < NUM_RT; ++rt) { | ||
| 1260 | dirty[Dirty::ColorBuffer0 + rt] = true; | ||
| 1261 | } | ||
| 1262 | const std::span<const ImageViewId> image_view_ids = image.image_view_ids; | ||
| 1263 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1264 | std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); | ||
| 1265 | if (render_targets.depth_buffer_id == image_view_id) { | ||
| 1266 | render_targets.depth_buffer_id = ImageViewId{}; | ||
| 1267 | } | ||
| 1268 | } | ||
| 1269 | RemoveImageViewReferences(image_view_ids); | ||
| 1270 | RemoveFramebuffers(image_view_ids); | ||
| 1271 | |||
| 1272 | for (const AliasedImage& alias : image.aliased_images) { | ||
| 1273 | ImageBase& other_image = slot_images[alias.id]; | ||
| 1274 | [[maybe_unused]] const size_t num_removed_aliases = | ||
| 1275 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { | ||
| 1276 | return other_alias.id == image_id; | ||
| 1277 | }); | ||
| 1278 | other_image.CheckAliasState(); | ||
| 1279 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", | ||
| 1280 | num_removed_aliases); | ||
| 1281 | } | ||
| 1282 | for (const ImageId overlap_id : image.overlapping_images) { | ||
| 1283 | ImageBase& other_image = slot_images[overlap_id]; | ||
| 1284 | [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( | ||
| 1285 | other_image.overlapping_images, | ||
| 1286 | [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); | ||
| 1287 | other_image.CheckBadOverlapState(); | ||
| 1288 | ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", | ||
| 1289 | num_removed_overlaps); | ||
| 1290 | } | ||
| 1291 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1292 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | ||
| 1293 | slot_image_views.erase(image_view_id); | ||
| 1294 | } | ||
| 1295 | sentenced_images.Push(std::move(slot_images[image_id])); | ||
| 1296 | slot_images.erase(image_id); | ||
| 358 | 1297 | ||
| 359 | RenderTargets render_targets; | 1298 | alloc_images.erase(alloc_image_it); |
| 1299 | if (alloc_images.empty()) { | ||
| 1300 | image_allocs_table.erase(alloc_it); | ||
| 1301 | } | ||
| 1302 | if constexpr (ENABLE_VALIDATION) { | ||
| 1303 | std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); | ||
| 1304 | std::ranges::fill(compute_image_view_ids, CORRUPT_ID); | ||
| 1305 | } | ||
| 1306 | graphics_image_table.Invalidate(); | ||
| 1307 | compute_image_table.Invalidate(); | ||
| 1308 | has_deleted_images = true; | ||
| 1309 | } | ||
| 360 | 1310 | ||
| 361 | std::unordered_map<TICEntry, ImageViewId> image_views; | 1311 | template <class P> |
| 362 | std::unordered_map<TSCEntry, SamplerId> samplers; | 1312 | void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { |
| 363 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; | 1313 | auto it = image_views.begin(); |
| 1314 | while (it != image_views.end()) { | ||
| 1315 | const auto found = std::ranges::find(removed_views, it->second); | ||
| 1316 | if (found != removed_views.end()) { | ||
| 1317 | it = image_views.erase(it); | ||
| 1318 | } else { | ||
| 1319 | ++it; | ||
| 1320 | } | ||
| 1321 | } | ||
| 1322 | } | ||
| 364 | 1323 | ||
| 365 | std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; | 1324 | template <class P> |
| 366 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; | 1325 | void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) { |
| 367 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; | 1326 | auto it = framebuffers.begin(); |
| 1327 | while (it != framebuffers.end()) { | ||
| 1328 | if (it->first.Contains(removed_views)) { | ||
| 1329 | it = framebuffers.erase(it); | ||
| 1330 | } else { | ||
| 1331 | ++it; | ||
| 1332 | } | ||
| 1333 | } | ||
| 1334 | } | ||
| 368 | 1335 | ||
| 369 | std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; | 1336 | template <class P> |
| 1337 | void TextureCache<P>::MarkModification(ImageBase& image) noexcept { | ||
| 1338 | image.flags |= ImageFlagBits::GpuModified; | ||
| 1339 | image.modification_tick = ++modification_tick; | ||
| 1340 | } | ||
| 370 | 1341 | ||
| 371 | VAddr virtual_invalid_space{}; | 1342 | template <class P> |
| 1343 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | ||
| 1344 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | ||
| 1345 | ImageBase& image = slot_images[image_id]; | ||
| 1346 | u64 most_recent_tick = image.modification_tick; | ||
| 1347 | for (const AliasedImage& aliased : image.aliased_images) { | ||
| 1348 | ImageBase& aliased_image = slot_images[aliased.id]; | ||
| 1349 | if (image.modification_tick < aliased_image.modification_tick) { | ||
| 1350 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | ||
| 1351 | aliased_images.push_back(&aliased); | ||
| 1352 | } | ||
| 1353 | } | ||
| 1354 | if (aliased_images.empty()) { | ||
| 1355 | return; | ||
| 1356 | } | ||
| 1357 | image.modification_tick = most_recent_tick; | ||
| 1358 | std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { | ||
| 1359 | const ImageBase& lhs_image = slot_images[lhs->id]; | ||
| 1360 | const ImageBase& rhs_image = slot_images[rhs->id]; | ||
| 1361 | return lhs_image.modification_tick < rhs_image.modification_tick; | ||
| 1362 | }); | ||
| 1363 | for (const AliasedImage* const aliased : aliased_images) { | ||
| 1364 | CopyImage(image_id, aliased->id, aliased->copies); | ||
| 1365 | } | ||
| 1366 | } | ||
| 372 | 1367 | ||
| 373 | bool has_deleted_images = false; | 1368 | template <class P> |
| 374 | u64 total_used_memory = 0; | 1369 | void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { |
| 375 | u64 minimum_memory; | 1370 | Image& image = slot_images[image_id]; |
| 376 | u64 expected_memory; | 1371 | if (invalidate) { |
| 377 | u64 critical_memory; | 1372 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); |
| 1373 | if (False(image.flags & ImageFlagBits::Tracked)) { | ||
| 1374 | TrackImage(image, image_id); | ||
| 1375 | } | ||
| 1376 | } else { | ||
| 1377 | RefreshContents(image, image_id); | ||
| 1378 | SynchronizeAliases(image_id); | ||
| 1379 | } | ||
| 1380 | if (is_modification) { | ||
| 1381 | MarkModification(image); | ||
| 1382 | } | ||
| 1383 | image.frame_tick = frame_tick; | ||
| 1384 | } | ||
| 378 | 1385 | ||
| 379 | SlotVector<Image> slot_images; | 1386 | template <class P> |
| 380 | SlotVector<ImageMapView> slot_map_views; | 1387 | void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification, |
| 381 | SlotVector<ImageView> slot_image_views; | 1388 | bool invalidate) { |
| 382 | SlotVector<ImageAlloc> slot_image_allocs; | 1389 | if (!image_view_id) { |
| 383 | SlotVector<Sampler> slot_samplers; | 1390 | return; |
| 384 | SlotVector<Framebuffer> slot_framebuffers; | 1391 | } |
| 1392 | const ImageViewBase& image_view = slot_image_views[image_view_id]; | ||
| 1393 | if (image_view.IsBuffer()) { | ||
| 1394 | return; | ||
| 1395 | } | ||
| 1396 | PrepareImage(image_view.image_id, is_modification, invalidate); | ||
| 1397 | } | ||
| 385 | 1398 | ||
| 386 | // TODO: This data structure is not optimal and it should be reworked | 1399 | template <class P> |
| 387 | std::vector<ImageId> uncommitted_downloads; | 1400 | void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { |
| 388 | std::queue<std::vector<ImageId>> committed_downloads; | 1401 | Image& dst = slot_images[dst_id]; |
| 1402 | Image& src = slot_images[src_id]; | ||
| 1403 | const auto dst_format_type = GetFormatType(dst.info.format); | ||
| 1404 | const auto src_format_type = GetFormatType(src.info.format); | ||
| 1405 | if (src_format_type == dst_format_type) { | ||
| 1406 | if constexpr (HAS_EMULATED_COPIES) { | ||
| 1407 | if (!runtime.CanImageBeCopied(dst, src)) { | ||
| 1408 | return runtime.EmulateCopyImage(dst, src, copies); | ||
| 1409 | } | ||
| 1410 | } | ||
| 1411 | return runtime.CopyImage(dst, src, copies); | ||
| 1412 | } | ||
| 1413 | UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); | ||
| 1414 | UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); | ||
| 1415 | for (const ImageCopy& copy : copies) { | ||
| 1416 | UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); | ||
| 1417 | UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); | ||
| 1418 | UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); | ||
| 1419 | UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); | ||
| 1420 | |||
| 1421 | const SubresourceBase dst_base{ | ||
| 1422 | .level = copy.dst_subresource.base_level, | ||
| 1423 | .layer = copy.dst_subresource.base_layer, | ||
| 1424 | }; | ||
| 1425 | const SubresourceBase src_base{ | ||
| 1426 | .level = copy.src_subresource.base_level, | ||
| 1427 | .layer = copy.src_subresource.base_layer, | ||
| 1428 | }; | ||
| 1429 | const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; | ||
| 1430 | const SubresourceExtent src_extent{.levels = 1, .layers = 1}; | ||
| 1431 | const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; | ||
| 1432 | const SubresourceRange src_range{.base = src_base, .extent = src_extent}; | ||
| 1433 | const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); | ||
| 1434 | const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); | ||
| 1435 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 1436 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; | ||
| 1437 | const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); | ||
| 1438 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 1439 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 1440 | [[maybe_unused]] const Extent3D expected_size{ | ||
| 1441 | .width = std::min(dst_view.size.width, src_view.size.width), | ||
| 1442 | .height = std::min(dst_view.size.height, src_view.size.height), | ||
| 1443 | .depth = std::min(dst_view.size.depth, src_view.size.depth), | ||
| 1444 | }; | ||
| 1445 | UNIMPLEMENTED_IF(copy.extent != expected_size); | ||
| 1446 | |||
| 1447 | runtime.ConvertImage(dst_framebuffer, dst_view, src_view); | ||
| 1448 | } | ||
| 1449 | } | ||
| 389 | 1450 | ||
| 390 | static constexpr size_t TICKS_TO_DESTROY = 6; | 1451 | template <class P> |
| 391 | DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; | 1452 | void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { |
| 392 | DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; | 1453 | if (*old_id == new_id) { |
| 393 | DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers; | 1454 | return; |
| 1455 | } | ||
| 1456 | if (*old_id) { | ||
| 1457 | const ImageViewBase& old_view = slot_image_views[*old_id]; | ||
| 1458 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { | ||
| 1459 | uncommitted_downloads.push_back(old_view.image_id); | ||
| 1460 | } | ||
| 1461 | } | ||
| 1462 | *old_id = new_id; | ||
| 1463 | } | ||
| 394 | 1464 | ||
| 395 | std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; | 1465 | template <class P> |
| 1466 | std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( | ||
| 1467 | ImageId image_id, const ImageViewInfo& view_info) { | ||
| 1468 | const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 1469 | const ImageBase& image = slot_images[image_id]; | ||
| 1470 | const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; | ||
| 1471 | const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; | ||
| 1472 | const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; | ||
| 1473 | const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); | ||
| 1474 | const u32 num_samples = image.info.num_samples; | ||
| 1475 | const auto [samples_x, samples_y] = SamplesLog2(num_samples); | ||
| 1476 | const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ | ||
| 1477 | .color_buffer_ids = {color_view_id}, | ||
| 1478 | .depth_buffer_id = depth_view_id, | ||
| 1479 | .size = {extent.width >> samples_x, extent.height >> samples_y}, | ||
| 1480 | }); | ||
| 1481 | return {framebuffer_id, view_id}; | ||
| 1482 | } | ||
| 396 | 1483 | ||
| 397 | u64 modification_tick = 0; | 1484 | template <class P> |
| 398 | u64 frame_tick = 0; | 1485 | bool TextureCache<P>::IsFullClear(ImageViewId id) { |
| 399 | typename SlotVector<Image>::Iterator deletion_iterator; | 1486 | if (!id) { |
| 400 | }; | 1487 | return true; |
| 1488 | } | ||
| 1489 | const ImageViewBase& image_view = slot_image_views[id]; | ||
| 1490 | const ImageBase& image = slot_images[image_view.image_id]; | ||
| 1491 | const Extent3D size = image_view.size; | ||
| 1492 | const auto& regs = maxwell3d.regs; | ||
| 1493 | const auto& scissor = regs.scissor_test[0]; | ||
| 1494 | if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { | ||
| 1495 | // Images with multiple resources can't be cleared in a single call | ||
| 1496 | return false; | ||
| 1497 | } | ||
| 1498 | if (regs.clear_flags.scissor == 0) { | ||
| 1499 | // If scissor testing is disabled, the clear is always full | ||
| 1500 | return true; | ||
| 1501 | } | ||
| 1502 | // Make sure the clear covers all texels in the subresource | ||
| 1503 | return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && | ||
| 1504 | scissor.max_y >= size.height; | ||
| 1505 | } | ||
| 401 | 1506 | ||
| 402 | } // namespace VideoCommon | 1507 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h new file mode 100644 index 000000000..a4f6e9422 --- /dev/null +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -0,0 +1,402 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include <algorithm> | ||
| 8 | #include <array> | ||
| 9 | #include <bit> | ||
| 10 | #include <memory> | ||
| 11 | #include <mutex> | ||
| 12 | #include <optional> | ||
| 13 | #include <span> | ||
| 14 | #include <type_traits> | ||
| 15 | #include <unordered_map> | ||
| 16 | #include <unordered_set> | ||
| 17 | #include <utility> | ||
| 18 | #include <vector> | ||
| 19 | |||
| 20 | #include <boost/container/small_vector.hpp> | ||
| 21 | |||
| 22 | #include "common/alignment.h" | ||
| 23 | #include "common/common_types.h" | ||
| 24 | #include "common/literals.h" | ||
| 25 | #include "common/logging/log.h" | ||
| 26 | #include "common/settings.h" | ||
| 27 | #include "video_core/compatible_formats.h" | ||
| 28 | #include "video_core/delayed_destruction_ring.h" | ||
| 29 | #include "video_core/dirty_flags.h" | ||
| 30 | #include "video_core/engines/fermi_2d.h" | ||
| 31 | #include "video_core/engines/kepler_compute.h" | ||
| 32 | #include "video_core/engines/maxwell_3d.h" | ||
| 33 | #include "video_core/memory_manager.h" | ||
| 34 | #include "video_core/rasterizer_interface.h" | ||
| 35 | #include "video_core/surface.h" | ||
| 36 | #include "video_core/texture_cache/descriptor_table.h" | ||
| 37 | #include "video_core/texture_cache/format_lookup_table.h" | ||
| 38 | #include "video_core/texture_cache/formatter.h" | ||
| 39 | #include "video_core/texture_cache/image_base.h" | ||
| 40 | #include "video_core/texture_cache/image_info.h" | ||
| 41 | #include "video_core/texture_cache/image_view_base.h" | ||
| 42 | #include "video_core/texture_cache/image_view_info.h" | ||
| 43 | #include "video_core/texture_cache/render_targets.h" | ||
| 44 | #include "video_core/texture_cache/samples_helper.h" | ||
| 45 | #include "video_core/texture_cache/slot_vector.h" | ||
| 46 | #include "video_core/texture_cache/types.h" | ||
| 47 | #include "video_core/texture_cache/util.h" | ||
| 48 | #include "video_core/textures/texture.h" | ||
| 49 | |||
| 50 | namespace VideoCommon { | ||
| 51 | |||
| 52 | using Tegra::Texture::SwizzleSource; | ||
| 53 | using Tegra::Texture::TextureType; | ||
| 54 | using Tegra::Texture::TICEntry; | ||
| 55 | using Tegra::Texture::TSCEntry; | ||
| 56 | using VideoCore::Surface::GetFormatType; | ||
| 57 | using VideoCore::Surface::IsCopyCompatible; | ||
| 58 | using VideoCore::Surface::PixelFormat; | ||
| 59 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 60 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 61 | using VideoCore::Surface::SurfaceType; | ||
| 62 | using namespace Common::Literals; | ||
| 63 | |||
| 64 | template <class P> | ||
| 65 | class TextureCache { | ||
| 66 | /// Address shift for caching images into a hash table | ||
| 67 | static constexpr u64 PAGE_BITS = 20; | ||
| 68 | |||
| 69 | /// Enables debugging features to the texture cache | ||
| 70 | static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; | ||
| 71 | /// Implement blits as copies between framebuffers | ||
| 72 | static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; | ||
| 73 | /// True when some copies have to be emulated | ||
| 74 | static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; | ||
| 75 | /// True when the API can provide info about the memory of the device. | ||
| 76 | static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; | ||
| 77 | |||
| 78 | /// Image view ID for null descriptors | ||
| 79 | static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; | ||
| 80 | /// Sampler ID for bugged sampler ids | ||
| 81 | static constexpr SamplerId NULL_SAMPLER_ID{0}; | ||
| 82 | |||
| 83 | static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; | ||
| 84 | static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; | ||
| 85 | |||
| 86 | using Runtime = typename P::Runtime; | ||
| 87 | using Image = typename P::Image; | ||
| 88 | using ImageAlloc = typename P::ImageAlloc; | ||
| 89 | using ImageView = typename P::ImageView; | ||
| 90 | using Sampler = typename P::Sampler; | ||
| 91 | using Framebuffer = typename P::Framebuffer; | ||
| 92 | |||
| 93 | struct BlitImages { | ||
| 94 | ImageId dst_id; | ||
| 95 | ImageId src_id; | ||
| 96 | PixelFormat dst_format; | ||
| 97 | PixelFormat src_format; | ||
| 98 | }; | ||
| 99 | |||
| 100 | template <typename T> | ||
| 101 | struct IdentityHash { | ||
| 102 | [[nodiscard]] size_t operator()(T value) const noexcept { | ||
| 103 | return static_cast<size_t>(value); | ||
| 104 | } | ||
| 105 | }; | ||
| 106 | |||
| 107 | public: | ||
| 108 | explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, | ||
| 109 | Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); | ||
| 110 | |||
| 111 | /// Notify the cache that a new frame has been queued | ||
| 112 | void TickFrame(); | ||
| 113 | |||
| 114 | /// Return a constant reference to the given image view id | ||
| 115 | [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; | ||
| 116 | |||
| 117 | /// Return a reference to the given image view id | ||
| 118 | [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; | ||
| 119 | |||
| 120 | /// Mark an image as modified from the GPU | ||
| 121 | void MarkModification(ImageId id) noexcept; | ||
| 122 | |||
| 123 | /// Fill image_view_ids with the graphics images in indices | ||
| 124 | void FillGraphicsImageViews(std::span<const u32> indices, | ||
| 125 | std::span<ImageViewId> image_view_ids); | ||
| 126 | |||
| 127 | /// Fill image_view_ids with the compute images in indices | ||
| 128 | void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); | ||
| 129 | |||
| 130 | /// Get the sampler from the graphics descriptor table in the specified index | ||
| 131 | Sampler* GetGraphicsSampler(u32 index); | ||
| 132 | |||
| 133 | /// Get the sampler from the compute descriptor table in the specified index | ||
| 134 | Sampler* GetComputeSampler(u32 index); | ||
| 135 | |||
| 136 | /// Refresh the state for graphics image view and sampler descriptors | ||
| 137 | void SynchronizeGraphicsDescriptors(); | ||
| 138 | |||
| 139 | /// Refresh the state for compute image view and sampler descriptors | ||
| 140 | void SynchronizeComputeDescriptors(); | ||
| 141 | |||
| 142 | /// Update bound render targets and upload memory if necessary | ||
| 143 | /// @param is_clear True when the render targets are being used for clears | ||
| 144 | void UpdateRenderTargets(bool is_clear); | ||
| 145 | |||
| 146 | /// Find a framebuffer with the currently bound render targets | ||
| 147 | /// UpdateRenderTargets should be called before this | ||
| 148 | Framebuffer* GetFramebuffer(); | ||
| 149 | |||
| 150 | /// Mark images in a range as modified from the CPU | ||
| 151 | void WriteMemory(VAddr cpu_addr, size_t size); | ||
| 152 | |||
| 153 | /// Download contents of host images to guest memory in a region | ||
| 154 | void DownloadMemory(VAddr cpu_addr, size_t size); | ||
| 155 | |||
| 156 | /// Remove images in a region | ||
| 157 | void UnmapMemory(VAddr cpu_addr, size_t size); | ||
| 158 | |||
| 159 | /// Remove images in a region | ||
| 160 | void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); | ||
| 161 | |||
| 162 | /// Blit an image with the given parameters | ||
| 163 | void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | ||
| 164 | const Tegra::Engines::Fermi2D::Surface& src, | ||
| 165 | const Tegra::Engines::Fermi2D::Config& copy); | ||
| 166 | |||
| 167 | /// Try to find a cached image view in the given CPU address | ||
| 168 | [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); | ||
| 169 | |||
| 170 | /// Return true when there are uncommitted images to be downloaded | ||
| 171 | [[nodiscard]] bool HasUncommittedFlushes() const noexcept; | ||
| 172 | |||
| 173 | /// Return true when the caller should wait for async downloads | ||
| 174 | [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; | ||
| 175 | |||
| 176 | /// Commit asynchronous downloads | ||
| 177 | void CommitAsyncFlushes(); | ||
| 178 | |||
| 179 | /// Pop asynchronous downloads | ||
| 180 | void PopAsyncFlushes(); | ||
| 181 | |||
| 182 | /// Return true when a CPU region is modified from the GPU | ||
| 183 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||
| 184 | |||
| 185 | std::mutex mutex; | ||
| 186 | |||
| 187 | private: | ||
| 188 | /// Iterate over all page indices in a range | ||
| 189 | template <typename Func> | ||
| 190 | static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { | ||
| 191 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; | ||
| 192 | const u64 page_end = (addr + size - 1) >> PAGE_BITS; | ||
| 193 | for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { | ||
| 194 | if constexpr (RETURNS_BOOL) { | ||
| 195 | if (func(page)) { | ||
| 196 | break; | ||
| 197 | } | ||
| 198 | } else { | ||
| 199 | func(page); | ||
| 200 | } | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | template <typename Func> | ||
| 205 | static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { | ||
| 206 | static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; | ||
| 207 | const u64 page_end = (addr + size - 1) >> PAGE_BITS; | ||
| 208 | for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { | ||
| 209 | if constexpr (RETURNS_BOOL) { | ||
| 210 | if (func(page)) { | ||
| 211 | break; | ||
| 212 | } | ||
| 213 | } else { | ||
| 214 | func(page); | ||
| 215 | } | ||
| 216 | } | ||
| 217 | } | ||
| 218 | |||
| 219 | /// Runs the Garbage Collector. | ||
| 220 | void RunGarbageCollector(); | ||
| 221 | |||
| 222 | /// Fills image_view_ids in the image views in indices | ||
| 223 | void FillImageViews(DescriptorTable<TICEntry>& table, | ||
| 224 | std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, | ||
| 225 | std::span<ImageViewId> image_view_ids); | ||
| 226 | |||
| 227 | /// Find or create an image view in the guest descriptor table | ||
| 228 | ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, | ||
| 229 | std::span<ImageViewId> cached_image_view_ids, u32 index); | ||
| 230 | |||
| 231 | /// Find or create a framebuffer with the given render target parameters | ||
| 232 | FramebufferId GetFramebufferId(const RenderTargets& key); | ||
| 233 | |||
| 234 | /// Refresh the contents (pixel data) of an image | ||
| 235 | void RefreshContents(Image& image, ImageId image_id); | ||
| 236 | |||
| 237 | /// Upload data from guest to an image | ||
| 238 | template <typename StagingBuffer> | ||
| 239 | void UploadImageContents(Image& image, StagingBuffer& staging_buffer); | ||
| 240 | |||
| 241 | /// Find or create an image view from a guest descriptor | ||
| 242 | [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); | ||
| 243 | |||
| 244 | /// Create a new image view from a guest descriptor | ||
| 245 | [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); | ||
| 246 | |||
| 247 | /// Find or create an image from the given parameters | ||
| 248 | [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 249 | RelaxedOptions options = RelaxedOptions{}); | ||
| 250 | |||
| 251 | /// Find an image from the given parameters | ||
| 252 | [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 253 | RelaxedOptions options); | ||
| 254 | |||
| 255 | /// Create an image from the given parameters | ||
| 256 | [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 257 | RelaxedOptions options); | ||
| 258 | |||
| 259 | /// Create a new image and join perfectly matching existing images | ||
| 260 | /// Remove joined images from the cache | ||
| 261 | [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | ||
| 262 | |||
| 263 | /// Return a blit image pair from the given guest blit parameters | ||
| 264 | [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, | ||
| 265 | const Tegra::Engines::Fermi2D::Surface& src); | ||
| 266 | |||
| 267 | /// Find or create a sampler from a guest descriptor sampler | ||
| 268 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); | ||
| 269 | |||
| 270 | /// Find or create an image view for the given color buffer index | ||
| 271 | [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); | ||
| 272 | |||
| 273 | /// Find or create an image view for the depth buffer | ||
| 274 | [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); | ||
| 275 | |||
| 276 | /// Find or create a view for a render target with the given image parameters | ||
| 277 | [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 278 | bool is_clear); | ||
| 279 | |||
| 280 | /// Iterates over all the images in a region calling func | ||
| 281 | template <typename Func> | ||
| 282 | void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); | ||
| 283 | |||
| 284 | template <typename Func> | ||
| 285 | void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); | ||
| 286 | |||
| 287 | template <typename Func> | ||
| 288 | void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); | ||
| 289 | |||
| 290 | /// Iterates over all the images in a region calling func | ||
| 291 | template <typename Func> | ||
| 292 | void ForEachSparseSegment(ImageBase& image, Func&& func); | ||
| 293 | |||
| 294 | /// Find or create an image view in the given image with the passed parameters | ||
| 295 | [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); | ||
| 296 | |||
| 297 | /// Register image in the page table | ||
| 298 | void RegisterImage(ImageId image); | ||
| 299 | |||
| 300 | /// Unregister image from the page table | ||
| 301 | void UnregisterImage(ImageId image); | ||
| 302 | |||
| 303 | /// Track CPU reads and writes for image | ||
| 304 | void TrackImage(ImageBase& image, ImageId image_id); | ||
| 305 | |||
| 306 | /// Stop tracking CPU reads and writes for image | ||
| 307 | void UntrackImage(ImageBase& image, ImageId image_id); | ||
| 308 | |||
| 309 | /// Delete image from the cache | ||
| 310 | void DeleteImage(ImageId image); | ||
| 311 | |||
| 312 | /// Remove image views references from the cache | ||
| 313 | void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); | ||
| 314 | |||
| 315 | /// Remove framebuffers using the given image views from the cache | ||
| 316 | void RemoveFramebuffers(std::span<const ImageViewId> removed_views); | ||
| 317 | |||
| 318 | /// Mark an image as modified from the GPU | ||
| 319 | void MarkModification(ImageBase& image) noexcept; | ||
| 320 | |||
| 321 | /// Synchronize image aliases, copying data if needed | ||
| 322 | void SynchronizeAliases(ImageId image_id); | ||
| 323 | |||
| 324 | /// Prepare an image to be used | ||
| 325 | void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); | ||
| 326 | |||
| 327 | /// Prepare an image view to be used | ||
| 328 | void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); | ||
| 329 | |||
| 330 | /// Execute copies from one image to the other, even if they are incompatible | ||
| 331 | void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); | ||
| 332 | |||
| 333 | /// Bind an image view as render target, downloading resources preemtively if needed | ||
| 334 | void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); | ||
| 335 | |||
| 336 | /// Create a render target from a given image and image view parameters | ||
| 337 | [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage( | ||
| 338 | ImageId, const ImageViewInfo& view_info); | ||
| 339 | |||
| 340 | /// Returns true if the current clear parameters clear the whole image of a given image view | ||
| 341 | [[nodiscard]] bool IsFullClear(ImageViewId id); | ||
| 342 | |||
| 343 | Runtime& runtime; | ||
| 344 | VideoCore::RasterizerInterface& rasterizer; | ||
| 345 | Tegra::Engines::Maxwell3D& maxwell3d; | ||
| 346 | Tegra::Engines::KeplerCompute& kepler_compute; | ||
| 347 | Tegra::MemoryManager& gpu_memory; | ||
| 348 | |||
| 349 | DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; | ||
| 350 | DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; | ||
| 351 | std::vector<SamplerId> graphics_sampler_ids; | ||
| 352 | std::vector<ImageViewId> graphics_image_view_ids; | ||
| 353 | |||
| 354 | DescriptorTable<TICEntry> compute_image_table{gpu_memory}; | ||
| 355 | DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; | ||
| 356 | std::vector<SamplerId> compute_sampler_ids; | ||
| 357 | std::vector<ImageViewId> compute_image_view_ids; | ||
| 358 | |||
| 359 | RenderTargets render_targets; | ||
| 360 | |||
| 361 | std::unordered_map<TICEntry, ImageViewId> image_views; | ||
| 362 | std::unordered_map<TSCEntry, SamplerId> samplers; | ||
| 363 | std::unordered_map<RenderTargets, FramebufferId> framebuffers; | ||
| 364 | |||
| 365 | std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; | ||
| 366 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; | ||
| 367 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; | ||
| 368 | |||
| 369 | std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; | ||
| 370 | |||
| 371 | VAddr virtual_invalid_space{}; | ||
| 372 | |||
| 373 | bool has_deleted_images = false; | ||
| 374 | u64 total_used_memory = 0; | ||
| 375 | u64 minimum_memory; | ||
| 376 | u64 expected_memory; | ||
| 377 | u64 critical_memory; | ||
| 378 | |||
| 379 | SlotVector<Image> slot_images; | ||
| 380 | SlotVector<ImageMapView> slot_map_views; | ||
| 381 | SlotVector<ImageView> slot_image_views; | ||
| 382 | SlotVector<ImageAlloc> slot_image_allocs; | ||
| 383 | SlotVector<Sampler> slot_samplers; | ||
| 384 | SlotVector<Framebuffer> slot_framebuffers; | ||
| 385 | |||
| 386 | // TODO: This data structure is not optimal and it should be reworked | ||
| 387 | std::vector<ImageId> uncommitted_downloads; | ||
| 388 | std::queue<std::vector<ImageId>> committed_downloads; | ||
| 389 | |||
| 390 | static constexpr size_t TICKS_TO_DESTROY = 6; | ||
| 391 | DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; | ||
| 392 | DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; | ||
| 393 | DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers; | ||
| 394 | |||
| 395 | std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; | ||
| 396 | |||
| 397 | u64 modification_tick = 0; | ||
| 398 | u64 frame_tick = 0; | ||
| 399 | typename SlotVector<Image>::Iterator deletion_iterator; | ||
| 400 | }; | ||
| 401 | |||
| 402 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/texture_cache_templates.h b/src/video_core/texture_cache/texture_cache_templates.h deleted file mode 100644 index 8440d23d1..000000000 --- a/src/video_core/texture_cache/texture_cache_templates.h +++ /dev/null | |||
| @@ -1,1507 +0,0 @@ | |||
| 1 | // Copyright 2021 yuzu Emulator Project | ||
| 2 | // Licensed under GPLv2 or any later version | ||
| 3 | // Refer to the license.txt file included. | ||
| 4 | |||
| 5 | #pragma once | ||
| 6 | |||
| 7 | #include "video_core/texture_cache/texture_cache.h" | ||
| 8 | |||
| 9 | namespace VideoCommon { | ||
| 10 | |||
| 11 | using Tegra::Texture::SwizzleSource; | ||
| 12 | using Tegra::Texture::TextureType; | ||
| 13 | using Tegra::Texture::TICEntry; | ||
| 14 | using Tegra::Texture::TSCEntry; | ||
| 15 | using VideoCore::Surface::GetFormatType; | ||
| 16 | using VideoCore::Surface::IsCopyCompatible; | ||
| 17 | using VideoCore::Surface::PixelFormat; | ||
| 18 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 19 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 20 | using VideoCore::Surface::SurfaceType; | ||
| 21 | using namespace Common::Literals; | ||
| 22 | |||
| 23 | template <class P> | ||
| 24 | TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, | ||
| 25 | Tegra::Engines::Maxwell3D& maxwell3d_, | ||
| 26 | Tegra::Engines::KeplerCompute& kepler_compute_, | ||
| 27 | Tegra::MemoryManager& gpu_memory_) | ||
| 28 | : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||
| 29 | kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { | ||
| 30 | // Configure null sampler | ||
| 31 | TSCEntry sampler_descriptor{}; | ||
| 32 | sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||
| 33 | sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); | ||
| 34 | sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); | ||
| 35 | sampler_descriptor.cubemap_anisotropy.Assign(1); | ||
| 36 | |||
| 37 | // Make sure the first index is reserved for the null resources | ||
| 38 | // This way the null resource becomes a compile time constant | ||
| 39 | void(slot_image_views.insert(runtime, NullImageParams{})); | ||
| 40 | void(slot_samplers.insert(runtime, sampler_descriptor)); | ||
| 41 | |||
| 42 | deletion_iterator = slot_images.begin(); | ||
| 43 | |||
| 44 | if constexpr (HAS_DEVICE_MEMORY_INFO) { | ||
| 45 | const auto device_memory = runtime.GetDeviceLocalMemory(); | ||
| 46 | const u64 possible_expected_memory = (device_memory * 3) / 10; | ||
| 47 | const u64 possible_critical_memory = (device_memory * 6) / 10; | ||
| 48 | expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); | ||
| 49 | critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); | ||
| 50 | minimum_memory = 0; | ||
| 51 | } else { | ||
| 52 | // on OGL we can be more conservatives as the driver takes care. | ||
| 53 | expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; | ||
| 54 | critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; | ||
| 55 | minimum_memory = expected_memory; | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | template <class P> | ||
| 60 | void TextureCache<P>::RunGarbageCollector() { | ||
| 61 | const bool high_priority_mode = total_used_memory >= expected_memory; | ||
| 62 | const bool aggressive_mode = total_used_memory >= critical_memory; | ||
| 63 | const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; | ||
| 64 | int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); | ||
| 65 | for (; num_iterations > 0; --num_iterations) { | ||
| 66 | if (deletion_iterator == slot_images.end()) { | ||
| 67 | deletion_iterator = slot_images.begin(); | ||
| 68 | if (deletion_iterator == slot_images.end()) { | ||
| 69 | break; | ||
| 70 | } | ||
| 71 | } | ||
| 72 | auto [image_id, image_tmp] = *deletion_iterator; | ||
| 73 | Image* image = image_tmp; // fix clang error. | ||
| 74 | const bool is_alias = True(image->flags & ImageFlagBits::Alias); | ||
| 75 | const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); | ||
| 76 | const bool must_download = image->IsSafeDownload(); | ||
| 77 | bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); | ||
| 78 | const u64 ticks_needed = | ||
| 79 | is_bad_overlap | ||
| 80 | ? ticks_to_destroy >> 4 | ||
| 81 | : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); | ||
| 82 | should_care |= aggressive_mode; | ||
| 83 | if (should_care && image->frame_tick + ticks_needed < frame_tick) { | ||
| 84 | if (is_bad_overlap) { | ||
| 85 | const bool overlap_check = std::ranges::all_of( | ||
| 86 | image->overlapping_images, [&, image](const ImageId& overlap_id) { | ||
| 87 | auto& overlap = slot_images[overlap_id]; | ||
| 88 | return overlap.frame_tick >= image->frame_tick; | ||
| 89 | }); | ||
| 90 | if (!overlap_check) { | ||
| 91 | ++deletion_iterator; | ||
| 92 | continue; | ||
| 93 | } | ||
| 94 | } | ||
| 95 | if (!is_bad_overlap && must_download) { | ||
| 96 | const bool alias_check = std::ranges::none_of( | ||
| 97 | image->aliased_images, [&, image](const AliasedImage& alias) { | ||
| 98 | auto& alias_image = slot_images[alias.id]; | ||
| 99 | return (alias_image.frame_tick < image->frame_tick) || | ||
| 100 | (alias_image.modification_tick < image->modification_tick); | ||
| 101 | }); | ||
| 102 | |||
| 103 | if (alias_check) { | ||
| 104 | auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); | ||
| 105 | const auto copies = FullDownloadCopies(image->info); | ||
| 106 | image->DownloadMemory(map, copies); | ||
| 107 | runtime.Finish(); | ||
| 108 | SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); | ||
| 109 | } | ||
| 110 | } | ||
| 111 | if (True(image->flags & ImageFlagBits::Tracked)) { | ||
| 112 | UntrackImage(*image, image_id); | ||
| 113 | } | ||
| 114 | UnregisterImage(image_id); | ||
| 115 | DeleteImage(image_id); | ||
| 116 | if (is_bad_overlap) { | ||
| 117 | ++num_iterations; | ||
| 118 | } | ||
| 119 | } | ||
| 120 | ++deletion_iterator; | ||
| 121 | } | ||
| 122 | } | ||
| 123 | |||
| 124 | template <class P> | ||
| 125 | void TextureCache<P>::TickFrame() { | ||
| 126 | if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { | ||
| 127 | RunGarbageCollector(); | ||
| 128 | } | ||
| 129 | sentenced_images.Tick(); | ||
| 130 | sentenced_framebuffers.Tick(); | ||
| 131 | sentenced_image_view.Tick(); | ||
| 132 | ++frame_tick; | ||
| 133 | } | ||
| 134 | |||
| 135 | template <class P> | ||
| 136 | const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { | ||
| 137 | return slot_image_views[id]; | ||
| 138 | } | ||
| 139 | |||
| 140 | template <class P> | ||
| 141 | typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { | ||
| 142 | return slot_image_views[id]; | ||
| 143 | } | ||
| 144 | |||
| 145 | template <class P> | ||
| 146 | void TextureCache<P>::MarkModification(ImageId id) noexcept { | ||
| 147 | MarkModification(slot_images[id]); | ||
| 148 | } | ||
| 149 | |||
| 150 | template <class P> | ||
| 151 | void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, | ||
| 152 | std::span<ImageViewId> image_view_ids) { | ||
| 153 | FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); | ||
| 154 | } | ||
| 155 | |||
| 156 | template <class P> | ||
| 157 | void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, | ||
| 158 | std::span<ImageViewId> image_view_ids) { | ||
| 159 | FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); | ||
| 160 | } | ||
| 161 | |||
| 162 | template <class P> | ||
| 163 | typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { | ||
| 164 | if (index > graphics_sampler_table.Limit()) { | ||
| 165 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); | ||
| 166 | return &slot_samplers[NULL_SAMPLER_ID]; | ||
| 167 | } | ||
| 168 | const auto [descriptor, is_new] = graphics_sampler_table.Read(index); | ||
| 169 | SamplerId& id = graphics_sampler_ids[index]; | ||
| 170 | if (is_new) { | ||
| 171 | id = FindSampler(descriptor); | ||
| 172 | } | ||
| 173 | return &slot_samplers[id]; | ||
| 174 | } | ||
| 175 | |||
| 176 | template <class P> | ||
| 177 | typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { | ||
| 178 | if (index > compute_sampler_table.Limit()) { | ||
| 179 | LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); | ||
| 180 | return &slot_samplers[NULL_SAMPLER_ID]; | ||
| 181 | } | ||
| 182 | const auto [descriptor, is_new] = compute_sampler_table.Read(index); | ||
| 183 | SamplerId& id = compute_sampler_ids[index]; | ||
| 184 | if (is_new) { | ||
| 185 | id = FindSampler(descriptor); | ||
| 186 | } | ||
| 187 | return &slot_samplers[id]; | ||
| 188 | } | ||
| 189 | |||
| 190 | template <class P> | ||
| 191 | void TextureCache<P>::SynchronizeGraphicsDescriptors() { | ||
| 192 | using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; | ||
| 193 | const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; | ||
| 194 | const u32 tic_limit = maxwell3d.regs.tic.limit; | ||
| 195 | const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; | ||
| 196 | if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { | ||
| 197 | graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); | ||
| 198 | } | ||
| 199 | if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { | ||
| 200 | graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | template <class P> | ||
| 205 | void TextureCache<P>::SynchronizeComputeDescriptors() { | ||
| 206 | const bool linked_tsc = kepler_compute.launch_description.linked_tsc; | ||
| 207 | const u32 tic_limit = kepler_compute.regs.tic.limit; | ||
| 208 | const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; | ||
| 209 | const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); | ||
| 210 | if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { | ||
| 211 | compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); | ||
| 212 | } | ||
| 213 | if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { | ||
| 214 | compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); | ||
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | template <class P> | ||
| 219 | void TextureCache<P>::UpdateRenderTargets(bool is_clear) { | ||
| 220 | using namespace VideoCommon::Dirty; | ||
| 221 | auto& flags = maxwell3d.dirty.flags; | ||
| 222 | if (!flags[Dirty::RenderTargets]) { | ||
| 223 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 224 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | ||
| 225 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); | ||
| 226 | } | ||
| 227 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; | ||
| 228 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | ||
| 229 | return; | ||
| 230 | } | ||
| 231 | flags[Dirty::RenderTargets] = false; | ||
| 232 | |||
| 233 | // Render target control is used on all render targets, so force look ups when this one is up | ||
| 234 | const bool force = flags[Dirty::RenderTargetControl]; | ||
| 235 | flags[Dirty::RenderTargetControl] = false; | ||
| 236 | |||
| 237 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 238 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | ||
| 239 | if (flags[Dirty::ColorBuffer0 + index] || force) { | ||
| 240 | flags[Dirty::ColorBuffer0 + index] = false; | ||
| 241 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); | ||
| 242 | } | ||
| 243 | PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); | ||
| 244 | } | ||
| 245 | if (flags[Dirty::ZetaBuffer] || force) { | ||
| 246 | flags[Dirty::ZetaBuffer] = false; | ||
| 247 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); | ||
| 248 | } | ||
| 249 | const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; | ||
| 250 | PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); | ||
| 251 | |||
| 252 | for (size_t index = 0; index < NUM_RT; ++index) { | ||
| 253 | render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); | ||
| 254 | } | ||
| 255 | render_targets.size = Extent2D{ | ||
| 256 | maxwell3d.regs.render_area.width, | ||
| 257 | maxwell3d.regs.render_area.height, | ||
| 258 | }; | ||
| 259 | } | ||
| 260 | |||
| 261 | template <class P> | ||
| 262 | typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { | ||
| 263 | return &slot_framebuffers[GetFramebufferId(render_targets)]; | ||
| 264 | } | ||
| 265 | |||
| 266 | template <class P> | ||
| 267 | void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, | ||
| 268 | std::span<ImageViewId> cached_image_view_ids, | ||
| 269 | std::span<const u32> indices, | ||
| 270 | std::span<ImageViewId> image_view_ids) { | ||
| 271 | ASSERT(indices.size() <= image_view_ids.size()); | ||
| 272 | do { | ||
| 273 | has_deleted_images = false; | ||
| 274 | std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { | ||
| 275 | return VisitImageView(table, cached_image_view_ids, index); | ||
| 276 | }); | ||
| 277 | } while (has_deleted_images); | ||
| 278 | } | ||
| 279 | |||
| 280 | template <class P> | ||
| 281 | ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, | ||
| 282 | std::span<ImageViewId> cached_image_view_ids, | ||
| 283 | u32 index) { | ||
| 284 | if (index > table.Limit()) { | ||
| 285 | LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); | ||
| 286 | return NULL_IMAGE_VIEW_ID; | ||
| 287 | } | ||
| 288 | const auto [descriptor, is_new] = table.Read(index); | ||
| 289 | ImageViewId& image_view_id = cached_image_view_ids[index]; | ||
| 290 | if (is_new) { | ||
| 291 | image_view_id = FindImageView(descriptor); | ||
| 292 | } | ||
| 293 | if (image_view_id != NULL_IMAGE_VIEW_ID) { | ||
| 294 | PrepareImageView(image_view_id, false, false); | ||
| 295 | } | ||
| 296 | return image_view_id; | ||
| 297 | } | ||
| 298 | |||
| 299 | template <class P> | ||
| 300 | FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { | ||
| 301 | const auto [pair, is_new] = framebuffers.try_emplace(key); | ||
| 302 | FramebufferId& framebuffer_id = pair->second; | ||
| 303 | if (!is_new) { | ||
| 304 | return framebuffer_id; | ||
| 305 | } | ||
| 306 | std::array<ImageView*, NUM_RT> color_buffers; | ||
| 307 | std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), | ||
| 308 | [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); | ||
| 309 | ImageView* const depth_buffer = | ||
| 310 | key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; | ||
| 311 | framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); | ||
| 312 | return framebuffer_id; | ||
| 313 | } | ||
| 314 | |||
| 315 | template <class P> | ||
| 316 | void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | ||
| 317 | ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { | ||
| 318 | if (True(image.flags & ImageFlagBits::CpuModified)) { | ||
| 319 | return; | ||
| 320 | } | ||
| 321 | image.flags |= ImageFlagBits::CpuModified; | ||
| 322 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 323 | UntrackImage(image, image_id); | ||
| 324 | } | ||
| 325 | }); | ||
| 326 | } | ||
| 327 | |||
| 328 | template <class P> | ||
| 329 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | ||
| 330 | std::vector<ImageId> images; | ||
| 331 | ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { | ||
| 332 | if (!image.IsSafeDownload()) { | ||
| 333 | return; | ||
| 334 | } | ||
| 335 | image.flags &= ~ImageFlagBits::GpuModified; | ||
| 336 | images.push_back(image_id); | ||
| 337 | }); | ||
| 338 | if (images.empty()) { | ||
| 339 | return; | ||
| 340 | } | ||
| 341 | std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { | ||
| 342 | return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; | ||
| 343 | }); | ||
| 344 | for (const ImageId image_id : images) { | ||
| 345 | Image& image = slot_images[image_id]; | ||
| 346 | auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); | ||
| 347 | const auto copies = FullDownloadCopies(image.info); | ||
| 348 | image.DownloadMemory(map, copies); | ||
| 349 | runtime.Finish(); | ||
| 350 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); | ||
| 351 | } | ||
| 352 | } | ||
| 353 | |||
| 354 | template <class P> | ||
| 355 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | ||
| 356 | std::vector<ImageId> deleted_images; | ||
| 357 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); | ||
| 358 | for (const ImageId id : deleted_images) { | ||
| 359 | Image& image = slot_images[id]; | ||
| 360 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 361 | UntrackImage(image, id); | ||
| 362 | } | ||
| 363 | UnregisterImage(id); | ||
| 364 | DeleteImage(id); | ||
| 365 | } | ||
| 366 | } | ||
| 367 | |||
| 368 | template <class P> | ||
| 369 | void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { | ||
| 370 | std::vector<ImageId> deleted_images; | ||
| 371 | ForEachImageInRegionGPU(gpu_addr, size, | ||
| 372 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); | ||
| 373 | for (const ImageId id : deleted_images) { | ||
| 374 | Image& image = slot_images[id]; | ||
| 375 | if (True(image.flags & ImageFlagBits::Remapped)) { | ||
| 376 | continue; | ||
| 377 | } | ||
| 378 | image.flags |= ImageFlagBits::Remapped; | ||
| 379 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 380 | UntrackImage(image, id); | ||
| 381 | } | ||
| 382 | } | ||
| 383 | } | ||
| 384 | |||
| 385 | template <class P> | ||
| 386 | void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | ||
| 387 | const Tegra::Engines::Fermi2D::Surface& src, | ||
| 388 | const Tegra::Engines::Fermi2D::Config& copy) { | ||
| 389 | const BlitImages images = GetBlitImages(dst, src); | ||
| 390 | const ImageId dst_id = images.dst_id; | ||
| 391 | const ImageId src_id = images.src_id; | ||
| 392 | PrepareImage(src_id, false, false); | ||
| 393 | PrepareImage(dst_id, true, false); | ||
| 394 | |||
| 395 | ImageBase& dst_image = slot_images[dst_id]; | ||
| 396 | const ImageBase& src_image = slot_images[src_id]; | ||
| 397 | |||
| 398 | // TODO: Deduplicate | ||
| 399 | const std::optional src_base = src_image.TryFindBase(src.Address()); | ||
| 400 | const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; | ||
| 401 | const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); | ||
| 402 | const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); | ||
| 403 | const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); | ||
| 404 | const Region2D src_region{ | ||
| 405 | Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, | ||
| 406 | Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, | ||
| 407 | }; | ||
| 408 | |||
| 409 | const std::optional dst_base = dst_image.TryFindBase(dst.Address()); | ||
| 410 | const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; | ||
| 411 | const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); | ||
| 412 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 413 | const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); | ||
| 414 | const Region2D dst_region{ | ||
| 415 | Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, | ||
| 416 | Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, | ||
| 417 | }; | ||
| 418 | |||
| 419 | // Always call this after src_framebuffer_id was queried, as the address might be invalidated. | ||
| 420 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; | ||
| 421 | if constexpr (FRAMEBUFFER_BLITS) { | ||
| 422 | // OpenGL blits from framebuffers, not images | ||
| 423 | Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; | ||
| 424 | runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, | ||
| 425 | copy.filter, copy.operation); | ||
| 426 | } else { | ||
| 427 | // Vulkan can blit images, but it lacks format reinterpretations | ||
| 428 | // Provide a framebuffer in case it's necessary | ||
| 429 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 430 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 431 | runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, | ||
| 432 | copy.operation); | ||
| 433 | } | ||
| 434 | } | ||
| 435 | |||
| 436 | template <class P> | ||
| 437 | typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { | ||
| 438 | // TODO: Properly implement this | ||
| 439 | const auto it = page_table.find(cpu_addr >> PAGE_BITS); | ||
| 440 | if (it == page_table.end()) { | ||
| 441 | return nullptr; | ||
| 442 | } | ||
| 443 | const auto& image_map_ids = it->second; | ||
| 444 | for (const ImageMapId map_id : image_map_ids) { | ||
| 445 | const ImageMapView& map = slot_map_views[map_id]; | ||
| 446 | const ImageBase& image = slot_images[map.image_id]; | ||
| 447 | if (image.cpu_addr != cpu_addr) { | ||
| 448 | continue; | ||
| 449 | } | ||
| 450 | if (image.image_view_ids.empty()) { | ||
| 451 | continue; | ||
| 452 | } | ||
| 453 | return &slot_image_views[image.image_view_ids.at(0)]; | ||
| 454 | } | ||
| 455 | return nullptr; | ||
| 456 | } | ||
| 457 | |||
| 458 | template <class P> | ||
| 459 | bool TextureCache<P>::HasUncommittedFlushes() const noexcept { | ||
| 460 | return !uncommitted_downloads.empty(); | ||
| 461 | } | ||
| 462 | |||
| 463 | template <class P> | ||
| 464 | bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { | ||
| 465 | return !committed_downloads.empty() && !committed_downloads.front().empty(); | ||
| 466 | } | ||
| 467 | |||
| 468 | template <class P> | ||
| 469 | void TextureCache<P>::CommitAsyncFlushes() { | ||
| 470 | // This is intentionally passing the value by copy | ||
| 471 | committed_downloads.push(uncommitted_downloads); | ||
| 472 | uncommitted_downloads.clear(); | ||
| 473 | } | ||
| 474 | |||
| 475 | template <class P> | ||
| 476 | void TextureCache<P>::PopAsyncFlushes() { | ||
| 477 | if (committed_downloads.empty()) { | ||
| 478 | return; | ||
| 479 | } | ||
| 480 | const std::span<const ImageId> download_ids = committed_downloads.front(); | ||
| 481 | if (download_ids.empty()) { | ||
| 482 | committed_downloads.pop(); | ||
| 483 | return; | ||
| 484 | } | ||
| 485 | size_t total_size_bytes = 0; | ||
| 486 | for (const ImageId image_id : download_ids) { | ||
| 487 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | ||
| 488 | } | ||
| 489 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | ||
| 490 | const size_t original_offset = download_map.offset; | ||
| 491 | for (const ImageId image_id : download_ids) { | ||
| 492 | Image& image = slot_images[image_id]; | ||
| 493 | const auto copies = FullDownloadCopies(image.info); | ||
| 494 | image.DownloadMemory(download_map, copies); | ||
| 495 | download_map.offset += image.unswizzled_size_bytes; | ||
| 496 | } | ||
| 497 | // Wait for downloads to finish | ||
| 498 | runtime.Finish(); | ||
| 499 | |||
| 500 | download_map.offset = original_offset; | ||
| 501 | std::span<u8> download_span = download_map.mapped_span; | ||
| 502 | for (const ImageId image_id : download_ids) { | ||
| 503 | const ImageBase& image = slot_images[image_id]; | ||
| 504 | const auto copies = FullDownloadCopies(image.info); | ||
| 505 | SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); | ||
| 506 | download_map.offset += image.unswizzled_size_bytes; | ||
| 507 | download_span = download_span.subspan(image.unswizzled_size_bytes); | ||
| 508 | } | ||
| 509 | committed_downloads.pop(); | ||
| 510 | } | ||
| 511 | |||
| 512 | template <class P> | ||
| 513 | bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { | ||
| 514 | bool is_modified = false; | ||
| 515 | ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { | ||
| 516 | if (False(image.flags & ImageFlagBits::GpuModified)) { | ||
| 517 | return false; | ||
| 518 | } | ||
| 519 | is_modified = true; | ||
| 520 | return true; | ||
| 521 | }); | ||
| 522 | return is_modified; | ||
| 523 | } | ||
| 524 | |||
| 525 | template <class P> | ||
| 526 | void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { | ||
| 527 | if (False(image.flags & ImageFlagBits::CpuModified)) { | ||
| 528 | // Only upload modified images | ||
| 529 | return; | ||
| 530 | } | ||
| 531 | image.flags &= ~ImageFlagBits::CpuModified; | ||
| 532 | TrackImage(image, image_id); | ||
| 533 | |||
| 534 | if (image.info.num_samples > 1) { | ||
| 535 | LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); | ||
| 536 | return; | ||
| 537 | } | ||
| 538 | auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); | ||
| 539 | UploadImageContents(image, staging); | ||
| 540 | runtime.InsertUploadMemoryBarrier(); | ||
| 541 | } | ||
| 542 | |||
| 543 | template <class P> | ||
| 544 | template <typename StagingBuffer> | ||
| 545 | void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) { | ||
| 546 | const std::span<u8> mapped_span = staging.mapped_span; | ||
| 547 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 548 | |||
| 549 | if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { | ||
| 550 | gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); | ||
| 551 | const auto uploads = FullUploadSwizzles(image.info); | ||
| 552 | runtime.AccelerateImageUpload(image, staging, uploads); | ||
| 553 | } else if (True(image.flags & ImageFlagBits::Converted)) { | ||
| 554 | std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); | ||
| 555 | auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); | ||
| 556 | ConvertImage(unswizzled_data, image.info, mapped_span, copies); | ||
| 557 | image.UploadMemory(staging, copies); | ||
| 558 | } else { | ||
| 559 | const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); | ||
| 560 | image.UploadMemory(staging, copies); | ||
| 561 | } | ||
| 562 | } | ||
| 563 | |||
| 564 | template <class P> | ||
| 565 | ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { | ||
| 566 | if (!IsValidEntry(gpu_memory, config)) { | ||
| 567 | return NULL_IMAGE_VIEW_ID; | ||
| 568 | } | ||
| 569 | const auto [pair, is_new] = image_views.try_emplace(config); | ||
| 570 | ImageViewId& image_view_id = pair->second; | ||
| 571 | if (is_new) { | ||
| 572 | image_view_id = CreateImageView(config); | ||
| 573 | } | ||
| 574 | return image_view_id; | ||
| 575 | } | ||
| 576 | |||
| 577 | template <class P> | ||
| 578 | ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { | ||
| 579 | const ImageInfo info(config); | ||
| 580 | if (info.type == ImageType::Buffer) { | ||
| 581 | const ImageViewInfo view_info(config, 0); | ||
| 582 | return slot_image_views.insert(runtime, info, view_info, config.Address()); | ||
| 583 | } | ||
| 584 | const u32 layer_offset = config.BaseLayer() * info.layer_stride; | ||
| 585 | const GPUVAddr image_gpu_addr = config.Address() - layer_offset; | ||
| 586 | const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); | ||
| 587 | if (!image_id) { | ||
| 588 | return NULL_IMAGE_VIEW_ID; | ||
| 589 | } | ||
| 590 | ImageBase& image = slot_images[image_id]; | ||
| 591 | const SubresourceBase base = image.TryFindBase(config.Address()).value(); | ||
| 592 | ASSERT(base.level == 0); | ||
| 593 | const ImageViewInfo view_info(config, base.layer); | ||
| 594 | const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 595 | ImageViewBase& image_view = slot_image_views[image_view_id]; | ||
| 596 | image_view.flags |= ImageViewFlagBits::Strong; | ||
| 597 | image.flags |= ImageFlagBits::Strong; | ||
| 598 | return image_view_id; | ||
| 599 | } | ||
| 600 | |||
| 601 | template <class P> | ||
| 602 | ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 603 | RelaxedOptions options) { | ||
| 604 | if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { | ||
| 605 | return image_id; | ||
| 606 | } | ||
| 607 | return InsertImage(info, gpu_addr, options); | ||
| 608 | } | ||
| 609 | |||
| 610 | template <class P> | ||
| 611 | ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 612 | RelaxedOptions options) { | ||
| 613 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 614 | if (!cpu_addr) { | ||
| 615 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | ||
| 616 | if (!cpu_addr) { | ||
| 617 | return ImageId{}; | ||
| 618 | } | ||
| 619 | } | ||
| 620 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||
| 621 | const bool native_bgr = runtime.HasNativeBgr(); | ||
| 622 | ImageId image_id; | ||
| 623 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | ||
| 624 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | ||
| 625 | return false; | ||
| 626 | } | ||
| 627 | if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { | ||
| 628 | const bool strict_size = False(options & RelaxedOptions::Size) && | ||
| 629 | True(existing_image.flags & ImageFlagBits::Strong); | ||
| 630 | const ImageInfo& existing = existing_image.info; | ||
| 631 | if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && | ||
| 632 | existing.pitch == info.pitch && | ||
| 633 | IsPitchLinearSameSize(existing, info, strict_size) && | ||
| 634 | IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { | ||
| 635 | image_id = existing_image_id; | ||
| 636 | return true; | ||
| 637 | } | ||
| 638 | } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, | ||
| 639 | native_bgr)) { | ||
| 640 | image_id = existing_image_id; | ||
| 641 | return true; | ||
| 642 | } | ||
| 643 | return false; | ||
| 644 | }; | ||
| 645 | ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); | ||
| 646 | return image_id; | ||
| 647 | } | ||
| 648 | |||
| 649 | template <class P> | ||
| 650 | ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 651 | RelaxedOptions options) { | ||
| 652 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 653 | if (!cpu_addr) { | ||
| 654 | const auto size = CalculateGuestSizeInBytes(info); | ||
| 655 | cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); | ||
| 656 | if (!cpu_addr) { | ||
| 657 | const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; | ||
| 658 | virtual_invalid_space += Common::AlignUp(size, 32); | ||
| 659 | cpu_addr = std::optional<VAddr>(fake_addr); | ||
| 660 | } | ||
| 661 | } | ||
| 662 | ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); | ||
| 663 | const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); | ||
| 664 | const Image& image = slot_images[image_id]; | ||
| 665 | // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different | ||
| 666 | const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); | ||
| 667 | if (is_new) { | ||
| 668 | it->second = slot_image_allocs.insert(); | ||
| 669 | } | ||
| 670 | slot_image_allocs[it->second].images.push_back(image_id); | ||
| 671 | return image_id; | ||
| 672 | } | ||
| 673 | |||
| 674 | template <class P> | ||
| 675 | ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { | ||
| 676 | ImageInfo new_info = info; | ||
| 677 | const size_t size_bytes = CalculateGuestSizeInBytes(new_info); | ||
| 678 | const bool broken_views = runtime.HasBrokenTextureViewFormats(); | ||
| 679 | const bool native_bgr = runtime.HasNativeBgr(); | ||
| 680 | std::vector<ImageId> overlap_ids; | ||
| 681 | std::unordered_set<ImageId> overlaps_found; | ||
| 682 | std::vector<ImageId> left_aliased_ids; | ||
| 683 | std::vector<ImageId> right_aliased_ids; | ||
| 684 | std::unordered_set<ImageId> ignore_textures; | ||
| 685 | std::vector<ImageId> bad_overlap_ids; | ||
| 686 | const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 687 | if (True(overlap.flags & ImageFlagBits::Remapped)) { | ||
| 688 | ignore_textures.insert(overlap_id); | ||
| 689 | return; | ||
| 690 | } | ||
| 691 | if (info.type == ImageType::Linear) { | ||
| 692 | if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { | ||
| 693 | // Alias linear images with the same pitch | ||
| 694 | left_aliased_ids.push_back(overlap_id); | ||
| 695 | } | ||
| 696 | return; | ||
| 697 | } | ||
| 698 | overlaps_found.insert(overlap_id); | ||
| 699 | static constexpr bool strict_size = true; | ||
| 700 | const std::optional<OverlapResult> solution = ResolveOverlap( | ||
| 701 | new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); | ||
| 702 | if (solution) { | ||
| 703 | gpu_addr = solution->gpu_addr; | ||
| 704 | cpu_addr = solution->cpu_addr; | ||
| 705 | new_info.resources = solution->resources; | ||
| 706 | overlap_ids.push_back(overlap_id); | ||
| 707 | return; | ||
| 708 | } | ||
| 709 | static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; | ||
| 710 | const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); | ||
| 711 | if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { | ||
| 712 | left_aliased_ids.push_back(overlap_id); | ||
| 713 | overlap.flags |= ImageFlagBits::Alias; | ||
| 714 | } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, | ||
| 715 | broken_views, native_bgr)) { | ||
| 716 | right_aliased_ids.push_back(overlap_id); | ||
| 717 | overlap.flags |= ImageFlagBits::Alias; | ||
| 718 | } else { | ||
| 719 | bad_overlap_ids.push_back(overlap_id); | ||
| 720 | overlap.flags |= ImageFlagBits::BadOverlap; | ||
| 721 | } | ||
| 722 | }; | ||
| 723 | ForEachImageInRegion(cpu_addr, size_bytes, region_check); | ||
| 724 | const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { | ||
| 725 | if (!overlaps_found.contains(overlap_id)) { | ||
| 726 | if (True(overlap.flags & ImageFlagBits::Remapped)) { | ||
| 727 | ignore_textures.insert(overlap_id); | ||
| 728 | } | ||
| 729 | if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { | ||
| 730 | ignore_textures.insert(overlap_id); | ||
| 731 | } | ||
| 732 | } | ||
| 733 | }; | ||
| 734 | ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); | ||
| 735 | const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); | ||
| 736 | Image& new_image = slot_images[new_image_id]; | ||
| 737 | |||
| 738 | if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { | ||
| 739 | new_image.flags |= ImageFlagBits::Sparse; | ||
| 740 | } | ||
| 741 | |||
| 742 | for (const ImageId overlap_id : ignore_textures) { | ||
| 743 | Image& overlap = slot_images[overlap_id]; | ||
| 744 | if (True(overlap.flags & ImageFlagBits::GpuModified)) { | ||
| 745 | UNIMPLEMENTED(); | ||
| 746 | } | ||
| 747 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | ||
| 748 | UntrackImage(overlap, overlap_id); | ||
| 749 | } | ||
| 750 | UnregisterImage(overlap_id); | ||
| 751 | DeleteImage(overlap_id); | ||
| 752 | } | ||
| 753 | |||
| 754 | // TODO: Only upload what we need | ||
| 755 | RefreshContents(new_image, new_image_id); | ||
| 756 | |||
| 757 | for (const ImageId overlap_id : overlap_ids) { | ||
| 758 | Image& overlap = slot_images[overlap_id]; | ||
| 759 | if (overlap.info.num_samples != new_image.info.num_samples) { | ||
| 760 | LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); | ||
| 761 | } else { | ||
| 762 | const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); | ||
| 763 | const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); | ||
| 764 | runtime.CopyImage(new_image, overlap, copies); | ||
| 765 | } | ||
| 766 | if (True(overlap.flags & ImageFlagBits::Tracked)) { | ||
| 767 | UntrackImage(overlap, overlap_id); | ||
| 768 | } | ||
| 769 | UnregisterImage(overlap_id); | ||
| 770 | DeleteImage(overlap_id); | ||
| 771 | } | ||
| 772 | ImageBase& new_image_base = new_image; | ||
| 773 | for (const ImageId aliased_id : right_aliased_ids) { | ||
| 774 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 775 | AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); | ||
| 776 | new_image.flags |= ImageFlagBits::Alias; | ||
| 777 | } | ||
| 778 | for (const ImageId aliased_id : left_aliased_ids) { | ||
| 779 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 780 | AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); | ||
| 781 | new_image.flags |= ImageFlagBits::Alias; | ||
| 782 | } | ||
| 783 | for (const ImageId aliased_id : bad_overlap_ids) { | ||
| 784 | ImageBase& aliased = slot_images[aliased_id]; | ||
| 785 | aliased.overlapping_images.push_back(new_image_id); | ||
| 786 | new_image.overlapping_images.push_back(aliased_id); | ||
| 787 | new_image.flags |= ImageFlagBits::BadOverlap; | ||
| 788 | } | ||
| 789 | RegisterImage(new_image_id); | ||
| 790 | return new_image_id; | ||
| 791 | } | ||
| 792 | |||
| 793 | template <class P> | ||
| 794 | typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( | ||
| 795 | const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { | ||
| 796 | static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; | ||
| 797 | const GPUVAddr dst_addr = dst.Address(); | ||
| 798 | const GPUVAddr src_addr = src.Address(); | ||
| 799 | ImageInfo dst_info(dst); | ||
| 800 | ImageInfo src_info(src); | ||
| 801 | ImageId dst_id; | ||
| 802 | ImageId src_id; | ||
| 803 | do { | ||
| 804 | has_deleted_images = false; | ||
| 805 | dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); | ||
| 806 | src_id = FindImage(src_info, src_addr, FIND_OPTIONS); | ||
| 807 | const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; | ||
| 808 | const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; | ||
| 809 | DeduceBlitImages(dst_info, src_info, dst_image, src_image); | ||
| 810 | if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { | ||
| 811 | continue; | ||
| 812 | } | ||
| 813 | if (!dst_id) { | ||
| 814 | dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); | ||
| 815 | } | ||
| 816 | if (!src_id) { | ||
| 817 | src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); | ||
| 818 | } | ||
| 819 | } while (has_deleted_images); | ||
| 820 | return BlitImages{ | ||
| 821 | .dst_id = dst_id, | ||
| 822 | .src_id = src_id, | ||
| 823 | .dst_format = dst_info.format, | ||
| 824 | .src_format = src_info.format, | ||
| 825 | }; | ||
| 826 | } | ||
| 827 | |||
| 828 | template <class P> | ||
| 829 | SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { | ||
| 830 | if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { | ||
| 831 | return NULL_SAMPLER_ID; | ||
| 832 | } | ||
| 833 | const auto [pair, is_new] = samplers.try_emplace(config); | ||
| 834 | if (is_new) { | ||
| 835 | pair->second = slot_samplers.insert(runtime, config); | ||
| 836 | } | ||
| 837 | return pair->second; | ||
| 838 | } | ||
| 839 | |||
| 840 | template <class P> | ||
| 841 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { | ||
| 842 | const auto& regs = maxwell3d.regs; | ||
| 843 | if (index >= regs.rt_control.count) { | ||
| 844 | return ImageViewId{}; | ||
| 845 | } | ||
| 846 | const auto& rt = regs.rt[index]; | ||
| 847 | const GPUVAddr gpu_addr = rt.Address(); | ||
| 848 | if (gpu_addr == 0) { | ||
| 849 | return ImageViewId{}; | ||
| 850 | } | ||
| 851 | if (rt.format == Tegra::RenderTargetFormat::NONE) { | ||
| 852 | return ImageViewId{}; | ||
| 853 | } | ||
| 854 | const ImageInfo info(regs, index); | ||
| 855 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 856 | } | ||
| 857 | |||
| 858 | template <class P> | ||
| 859 | ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { | ||
| 860 | const auto& regs = maxwell3d.regs; | ||
| 861 | if (!regs.zeta_enable) { | ||
| 862 | return ImageViewId{}; | ||
| 863 | } | ||
| 864 | const GPUVAddr gpu_addr = regs.zeta.Address(); | ||
| 865 | if (gpu_addr == 0) { | ||
| 866 | return ImageViewId{}; | ||
| 867 | } | ||
| 868 | const ImageInfo info(regs); | ||
| 869 | return FindRenderTargetView(info, gpu_addr, is_clear); | ||
| 870 | } | ||
| 871 | |||
| 872 | template <class P> | ||
| 873 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, | ||
| 874 | bool is_clear) { | ||
| 875 | const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; | ||
| 876 | const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); | ||
| 877 | if (!image_id) { | ||
| 878 | return NULL_IMAGE_VIEW_ID; | ||
| 879 | } | ||
| 880 | Image& image = slot_images[image_id]; | ||
| 881 | const ImageViewType view_type = RenderTargetImageViewType(info); | ||
| 882 | SubresourceBase base; | ||
| 883 | if (image.info.type == ImageType::Linear) { | ||
| 884 | base = SubresourceBase{.level = 0, .layer = 0}; | ||
| 885 | } else { | ||
| 886 | base = image.TryFindBase(gpu_addr).value(); | ||
| 887 | } | ||
| 888 | const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; | ||
| 889 | const SubresourceRange range{ | ||
| 890 | .base = base, | ||
| 891 | .extent = {.levels = 1, .layers = layers}, | ||
| 892 | }; | ||
| 893 | return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); | ||
| 894 | } | ||
| 895 | |||
| 896 | template <class P> | ||
| 897 | template <typename Func> | ||
| 898 | void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { | ||
| 899 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 900 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 901 | boost::container::small_vector<ImageId, 32> images; | ||
| 902 | boost::container::small_vector<ImageMapId, 32> maps; | ||
| 903 | ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { | ||
| 904 | const auto it = page_table.find(page); | ||
| 905 | if (it == page_table.end()) { | ||
| 906 | if constexpr (BOOL_BREAK) { | ||
| 907 | return false; | ||
| 908 | } else { | ||
| 909 | return; | ||
| 910 | } | ||
| 911 | } | ||
| 912 | for (const ImageMapId map_id : it->second) { | ||
| 913 | ImageMapView& map = slot_map_views[map_id]; | ||
| 914 | if (map.picked) { | ||
| 915 | continue; | ||
| 916 | } | ||
| 917 | if (!map.Overlaps(cpu_addr, size)) { | ||
| 918 | continue; | ||
| 919 | } | ||
| 920 | map.picked = true; | ||
| 921 | maps.push_back(map_id); | ||
| 922 | Image& image = slot_images[map.image_id]; | ||
| 923 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 924 | continue; | ||
| 925 | } | ||
| 926 | image.flags |= ImageFlagBits::Picked; | ||
| 927 | images.push_back(map.image_id); | ||
| 928 | if constexpr (BOOL_BREAK) { | ||
| 929 | if (func(map.image_id, image)) { | ||
| 930 | return true; | ||
| 931 | } | ||
| 932 | } else { | ||
| 933 | func(map.image_id, image); | ||
| 934 | } | ||
| 935 | } | ||
| 936 | if constexpr (BOOL_BREAK) { | ||
| 937 | return false; | ||
| 938 | } | ||
| 939 | }); | ||
| 940 | for (const ImageId image_id : images) { | ||
| 941 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 942 | } | ||
| 943 | for (const ImageMapId map_id : maps) { | ||
| 944 | slot_map_views[map_id].picked = false; | ||
| 945 | } | ||
| 946 | } | ||
| 947 | |||
| 948 | template <class P> | ||
| 949 | template <typename Func> | ||
| 950 | void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { | ||
| 951 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 952 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 953 | boost::container::small_vector<ImageId, 8> images; | ||
| 954 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | ||
| 955 | const auto it = gpu_page_table.find(page); | ||
| 956 | if (it == gpu_page_table.end()) { | ||
| 957 | if constexpr (BOOL_BREAK) { | ||
| 958 | return false; | ||
| 959 | } else { | ||
| 960 | return; | ||
| 961 | } | ||
| 962 | } | ||
| 963 | for (const ImageId image_id : it->second) { | ||
| 964 | Image& image = slot_images[image_id]; | ||
| 965 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 966 | continue; | ||
| 967 | } | ||
| 968 | if (!image.OverlapsGPU(gpu_addr, size)) { | ||
| 969 | continue; | ||
| 970 | } | ||
| 971 | image.flags |= ImageFlagBits::Picked; | ||
| 972 | images.push_back(image_id); | ||
| 973 | if constexpr (BOOL_BREAK) { | ||
| 974 | if (func(image_id, image)) { | ||
| 975 | return true; | ||
| 976 | } | ||
| 977 | } else { | ||
| 978 | func(image_id, image); | ||
| 979 | } | ||
| 980 | } | ||
| 981 | if constexpr (BOOL_BREAK) { | ||
| 982 | return false; | ||
| 983 | } | ||
| 984 | }); | ||
| 985 | for (const ImageId image_id : images) { | ||
| 986 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 987 | } | ||
| 988 | } | ||
| 989 | |||
| 990 | template <class P> | ||
| 991 | template <typename Func> | ||
| 992 | void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { | ||
| 993 | using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||
| 994 | static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||
| 995 | boost::container::small_vector<ImageId, 8> images; | ||
| 996 | ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | ||
| 997 | const auto it = sparse_page_table.find(page); | ||
| 998 | if (it == sparse_page_table.end()) { | ||
| 999 | if constexpr (BOOL_BREAK) { | ||
| 1000 | return false; | ||
| 1001 | } else { | ||
| 1002 | return; | ||
| 1003 | } | ||
| 1004 | } | ||
| 1005 | for (const ImageId image_id : it->second) { | ||
| 1006 | Image& image = slot_images[image_id]; | ||
| 1007 | if (True(image.flags & ImageFlagBits::Picked)) { | ||
| 1008 | continue; | ||
| 1009 | } | ||
| 1010 | if (!image.OverlapsGPU(gpu_addr, size)) { | ||
| 1011 | continue; | ||
| 1012 | } | ||
| 1013 | image.flags |= ImageFlagBits::Picked; | ||
| 1014 | images.push_back(image_id); | ||
| 1015 | if constexpr (BOOL_BREAK) { | ||
| 1016 | if (func(image_id, image)) { | ||
| 1017 | return true; | ||
| 1018 | } | ||
| 1019 | } else { | ||
| 1020 | func(image_id, image); | ||
| 1021 | } | ||
| 1022 | } | ||
| 1023 | if constexpr (BOOL_BREAK) { | ||
| 1024 | return false; | ||
| 1025 | } | ||
| 1026 | }); | ||
| 1027 | for (const ImageId image_id : images) { | ||
| 1028 | slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||
| 1029 | } | ||
| 1030 | } | ||
| 1031 | |||
| 1032 | template <class P> | ||
| 1033 | template <typename Func> | ||
| 1034 | void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { | ||
| 1035 | using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; | ||
| 1036 | static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; | ||
| 1037 | const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); | ||
| 1038 | for (auto& segment : segments) { | ||
| 1039 | const auto gpu_addr = segment.first; | ||
| 1040 | const auto size = segment.second; | ||
| 1041 | std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); | ||
| 1042 | ASSERT(cpu_addr); | ||
| 1043 | if constexpr (RETURNS_BOOL) { | ||
| 1044 | if (func(gpu_addr, *cpu_addr, size)) { | ||
| 1045 | break; | ||
| 1046 | } | ||
| 1047 | } else { | ||
| 1048 | func(gpu_addr, *cpu_addr, size); | ||
| 1049 | } | ||
| 1050 | } | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | template <class P> | ||
| 1054 | ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { | ||
| 1055 | Image& image = slot_images[image_id]; | ||
| 1056 | if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { | ||
| 1057 | return image_view_id; | ||
| 1058 | } | ||
| 1059 | const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); | ||
| 1060 | image.InsertView(info, image_view_id); | ||
| 1061 | return image_view_id; | ||
| 1062 | } | ||
| 1063 | |||
| 1064 | template <class P> | ||
| 1065 | void TextureCache<P>::RegisterImage(ImageId image_id) { | ||
| 1066 | ImageBase& image = slot_images[image_id]; | ||
| 1067 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), | ||
| 1068 | "Trying to register an already registered image"); | ||
| 1069 | image.flags |= ImageFlagBits::Registered; | ||
| 1070 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1071 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1072 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1073 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1074 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1075 | } | ||
| 1076 | total_used_memory += Common::AlignUp(tentative_size, 1024); | ||
| 1077 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1078 | [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); | ||
| 1079 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1080 | auto map_id = | ||
| 1081 | slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); | ||
| 1082 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, | ||
| 1083 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||
| 1084 | image.map_view_id = map_id; | ||
| 1085 | return; | ||
| 1086 | } | ||
| 1087 | std::vector<ImageViewId> sparse_maps{}; | ||
| 1088 | ForEachSparseSegment( | ||
| 1089 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||
| 1090 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | ||
| 1091 | ForEachCPUPage(cpu_addr, size, | ||
| 1092 | [this, map_id](u64 page) { page_table[page].push_back(map_id); }); | ||
| 1093 | sparse_maps.push_back(map_id); | ||
| 1094 | }); | ||
| 1095 | sparse_views.emplace(image_id, std::move(sparse_maps)); | ||
| 1096 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1097 | [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); | ||
| 1098 | } | ||
| 1099 | |||
| 1100 | template <class P> | ||
| 1101 | void TextureCache<P>::UnregisterImage(ImageId image_id) { | ||
| 1102 | Image& image = slot_images[image_id]; | ||
| 1103 | ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), | ||
| 1104 | "Trying to unregister an already registered image"); | ||
| 1105 | image.flags &= ~ImageFlagBits::Registered; | ||
| 1106 | image.flags &= ~ImageFlagBits::BadOverlap; | ||
| 1107 | u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); | ||
| 1108 | if ((IsPixelFormatASTC(image.info.format) && | ||
| 1109 | True(image.flags & ImageFlagBits::AcceleratedUpload)) || | ||
| 1110 | True(image.flags & ImageFlagBits::Converted)) { | ||
| 1111 | tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); | ||
| 1112 | } | ||
| 1113 | total_used_memory -= Common::AlignUp(tentative_size, 1024); | ||
| 1114 | const auto& clear_page_table = | ||
| 1115 | [this, image_id]( | ||
| 1116 | u64 page, | ||
| 1117 | std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { | ||
| 1118 | const auto page_it = selected_page_table.find(page); | ||
| 1119 | if (page_it == selected_page_table.end()) { | ||
| 1120 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1121 | return; | ||
| 1122 | } | ||
| 1123 | std::vector<ImageId>& image_ids = page_it->second; | ||
| 1124 | const auto vector_it = std::ranges::find(image_ids, image_id); | ||
| 1125 | if (vector_it == image_ids.end()) { | ||
| 1126 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", | ||
| 1127 | page << PAGE_BITS); | ||
| 1128 | return; | ||
| 1129 | } | ||
| 1130 | image_ids.erase(vector_it); | ||
| 1131 | }; | ||
| 1132 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||
| 1133 | [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); | ||
| 1134 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1135 | const auto map_id = image.map_view_id; | ||
| 1136 | ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { | ||
| 1137 | const auto page_it = page_table.find(page); | ||
| 1138 | if (page_it == page_table.end()) { | ||
| 1139 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1140 | return; | ||
| 1141 | } | ||
| 1142 | std::vector<ImageMapId>& image_map_ids = page_it->second; | ||
| 1143 | const auto vector_it = std::ranges::find(image_map_ids, map_id); | ||
| 1144 | if (vector_it == image_map_ids.end()) { | ||
| 1145 | UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", | ||
| 1146 | page << PAGE_BITS); | ||
| 1147 | return; | ||
| 1148 | } | ||
| 1149 | image_map_ids.erase(vector_it); | ||
| 1150 | }); | ||
| 1151 | slot_map_views.erase(map_id); | ||
| 1152 | return; | ||
| 1153 | } | ||
| 1154 | ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { | ||
| 1155 | clear_page_table(page, sparse_page_table); | ||
| 1156 | }); | ||
| 1157 | auto it = sparse_views.find(image_id); | ||
| 1158 | ASSERT(it != sparse_views.end()); | ||
| 1159 | auto& sparse_maps = it->second; | ||
| 1160 | for (auto& map_view_id : sparse_maps) { | ||
| 1161 | const auto& map_range = slot_map_views[map_view_id]; | ||
| 1162 | const VAddr cpu_addr = map_range.cpu_addr; | ||
| 1163 | const std::size_t size = map_range.size; | ||
| 1164 | ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { | ||
| 1165 | const auto page_it = page_table.find(page); | ||
| 1166 | if (page_it == page_table.end()) { | ||
| 1167 | UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); | ||
| 1168 | return; | ||
| 1169 | } | ||
| 1170 | std::vector<ImageMapId>& image_map_ids = page_it->second; | ||
| 1171 | auto vector_it = image_map_ids.begin(); | ||
| 1172 | while (vector_it != image_map_ids.end()) { | ||
| 1173 | ImageMapView& map = slot_map_views[*vector_it]; | ||
| 1174 | if (map.image_id != image_id) { | ||
| 1175 | vector_it++; | ||
| 1176 | continue; | ||
| 1177 | } | ||
| 1178 | if (!map.picked) { | ||
| 1179 | map.picked = true; | ||
| 1180 | } | ||
| 1181 | vector_it = image_map_ids.erase(vector_it); | ||
| 1182 | } | ||
| 1183 | }); | ||
| 1184 | slot_map_views.erase(map_view_id); | ||
| 1185 | } | ||
| 1186 | sparse_views.erase(it); | ||
| 1187 | } | ||
| 1188 | |||
| 1189 | template <class P> | ||
| 1190 | void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { | ||
| 1191 | ASSERT(False(image.flags & ImageFlagBits::Tracked)); | ||
| 1192 | image.flags |= ImageFlagBits::Tracked; | ||
| 1193 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1194 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); | ||
| 1195 | return; | ||
| 1196 | } | ||
| 1197 | if (True(image.flags & ImageFlagBits::Registered)) { | ||
| 1198 | auto it = sparse_views.find(image_id); | ||
| 1199 | ASSERT(it != sparse_views.end()); | ||
| 1200 | auto& sparse_maps = it->second; | ||
| 1201 | for (auto& map_view_id : sparse_maps) { | ||
| 1202 | const auto& map = slot_map_views[map_view_id]; | ||
| 1203 | const VAddr cpu_addr = map.cpu_addr; | ||
| 1204 | const std::size_t size = map.size; | ||
| 1205 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 1206 | } | ||
| 1207 | return; | ||
| 1208 | } | ||
| 1209 | ForEachSparseSegment(image, | ||
| 1210 | [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | ||
| 1211 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); | ||
| 1212 | }); | ||
| 1213 | } | ||
| 1214 | |||
| 1215 | template <class P> | ||
| 1216 | void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { | ||
| 1217 | ASSERT(True(image.flags & ImageFlagBits::Tracked)); | ||
| 1218 | image.flags &= ~ImageFlagBits::Tracked; | ||
| 1219 | if (False(image.flags & ImageFlagBits::Sparse)) { | ||
| 1220 | rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); | ||
| 1221 | return; | ||
| 1222 | } | ||
| 1223 | ASSERT(True(image.flags & ImageFlagBits::Registered)); | ||
| 1224 | auto it = sparse_views.find(image_id); | ||
| 1225 | ASSERT(it != sparse_views.end()); | ||
| 1226 | auto& sparse_maps = it->second; | ||
| 1227 | for (auto& map_view_id : sparse_maps) { | ||
| 1228 | const auto& map = slot_map_views[map_view_id]; | ||
| 1229 | const VAddr cpu_addr = map.cpu_addr; | ||
| 1230 | const std::size_t size = map.size; | ||
| 1231 | rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); | ||
| 1232 | } | ||
| 1233 | } | ||
| 1234 | |||
| 1235 | template <class P> | ||
| 1236 | void TextureCache<P>::DeleteImage(ImageId image_id) { | ||
| 1237 | ImageBase& image = slot_images[image_id]; | ||
| 1238 | const GPUVAddr gpu_addr = image.gpu_addr; | ||
| 1239 | const auto alloc_it = image_allocs_table.find(gpu_addr); | ||
| 1240 | if (alloc_it == image_allocs_table.end()) { | ||
| 1241 | UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", | ||
| 1242 | gpu_addr); | ||
| 1243 | return; | ||
| 1244 | } | ||
| 1245 | const ImageAllocId alloc_id = alloc_it->second; | ||
| 1246 | std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images; | ||
| 1247 | const auto alloc_image_it = std::ranges::find(alloc_images, image_id); | ||
| 1248 | if (alloc_image_it == alloc_images.end()) { | ||
| 1249 | UNREACHABLE_MSG("Trying to delete an image that does not exist"); | ||
| 1250 | return; | ||
| 1251 | } | ||
| 1252 | ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); | ||
| 1253 | ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); | ||
| 1254 | |||
| 1255 | // Mark render targets as dirty | ||
| 1256 | auto& dirty = maxwell3d.dirty.flags; | ||
| 1257 | dirty[Dirty::RenderTargets] = true; | ||
| 1258 | dirty[Dirty::ZetaBuffer] = true; | ||
| 1259 | for (size_t rt = 0; rt < NUM_RT; ++rt) { | ||
| 1260 | dirty[Dirty::ColorBuffer0 + rt] = true; | ||
| 1261 | } | ||
| 1262 | const std::span<const ImageViewId> image_view_ids = image.image_view_ids; | ||
| 1263 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1264 | std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); | ||
| 1265 | if (render_targets.depth_buffer_id == image_view_id) { | ||
| 1266 | render_targets.depth_buffer_id = ImageViewId{}; | ||
| 1267 | } | ||
| 1268 | } | ||
| 1269 | RemoveImageViewReferences(image_view_ids); | ||
| 1270 | RemoveFramebuffers(image_view_ids); | ||
| 1271 | |||
| 1272 | for (const AliasedImage& alias : image.aliased_images) { | ||
| 1273 | ImageBase& other_image = slot_images[alias.id]; | ||
| 1274 | [[maybe_unused]] const size_t num_removed_aliases = | ||
| 1275 | std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { | ||
| 1276 | return other_alias.id == image_id; | ||
| 1277 | }); | ||
| 1278 | other_image.CheckAliasState(); | ||
| 1279 | ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", | ||
| 1280 | num_removed_aliases); | ||
| 1281 | } | ||
| 1282 | for (const ImageId overlap_id : image.overlapping_images) { | ||
| 1283 | ImageBase& other_image = slot_images[overlap_id]; | ||
| 1284 | [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( | ||
| 1285 | other_image.overlapping_images, | ||
| 1286 | [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); | ||
| 1287 | other_image.CheckBadOverlapState(); | ||
| 1288 | ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", | ||
| 1289 | num_removed_overlaps); | ||
| 1290 | } | ||
| 1291 | for (const ImageViewId image_view_id : image_view_ids) { | ||
| 1292 | sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); | ||
| 1293 | slot_image_views.erase(image_view_id); | ||
| 1294 | } | ||
| 1295 | sentenced_images.Push(std::move(slot_images[image_id])); | ||
| 1296 | slot_images.erase(image_id); | ||
| 1297 | |||
| 1298 | alloc_images.erase(alloc_image_it); | ||
| 1299 | if (alloc_images.empty()) { | ||
| 1300 | image_allocs_table.erase(alloc_it); | ||
| 1301 | } | ||
| 1302 | if constexpr (ENABLE_VALIDATION) { | ||
| 1303 | std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); | ||
| 1304 | std::ranges::fill(compute_image_view_ids, CORRUPT_ID); | ||
| 1305 | } | ||
| 1306 | graphics_image_table.Invalidate(); | ||
| 1307 | compute_image_table.Invalidate(); | ||
| 1308 | has_deleted_images = true; | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | template <class P> | ||
| 1312 | void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { | ||
| 1313 | auto it = image_views.begin(); | ||
| 1314 | while (it != image_views.end()) { | ||
| 1315 | const auto found = std::ranges::find(removed_views, it->second); | ||
| 1316 | if (found != removed_views.end()) { | ||
| 1317 | it = image_views.erase(it); | ||
| 1318 | } else { | ||
| 1319 | ++it; | ||
| 1320 | } | ||
| 1321 | } | ||
| 1322 | } | ||
| 1323 | |||
| 1324 | template <class P> | ||
| 1325 | void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) { | ||
| 1326 | auto it = framebuffers.begin(); | ||
| 1327 | while (it != framebuffers.end()) { | ||
| 1328 | if (it->first.Contains(removed_views)) { | ||
| 1329 | it = framebuffers.erase(it); | ||
| 1330 | } else { | ||
| 1331 | ++it; | ||
| 1332 | } | ||
| 1333 | } | ||
| 1334 | } | ||
| 1335 | |||
| 1336 | template <class P> | ||
| 1337 | void TextureCache<P>::MarkModification(ImageBase& image) noexcept { | ||
| 1338 | image.flags |= ImageFlagBits::GpuModified; | ||
| 1339 | image.modification_tick = ++modification_tick; | ||
| 1340 | } | ||
| 1341 | |||
| 1342 | template <class P> | ||
| 1343 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | ||
| 1344 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | ||
| 1345 | ImageBase& image = slot_images[image_id]; | ||
| 1346 | u64 most_recent_tick = image.modification_tick; | ||
| 1347 | for (const AliasedImage& aliased : image.aliased_images) { | ||
| 1348 | ImageBase& aliased_image = slot_images[aliased.id]; | ||
| 1349 | if (image.modification_tick < aliased_image.modification_tick) { | ||
| 1350 | most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); | ||
| 1351 | aliased_images.push_back(&aliased); | ||
| 1352 | } | ||
| 1353 | } | ||
| 1354 | if (aliased_images.empty()) { | ||
| 1355 | return; | ||
| 1356 | } | ||
| 1357 | image.modification_tick = most_recent_tick; | ||
| 1358 | std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { | ||
| 1359 | const ImageBase& lhs_image = slot_images[lhs->id]; | ||
| 1360 | const ImageBase& rhs_image = slot_images[rhs->id]; | ||
| 1361 | return lhs_image.modification_tick < rhs_image.modification_tick; | ||
| 1362 | }); | ||
| 1363 | for (const AliasedImage* const aliased : aliased_images) { | ||
| 1364 | CopyImage(image_id, aliased->id, aliased->copies); | ||
| 1365 | } | ||
| 1366 | } | ||
| 1367 | |||
| 1368 | template <class P> | ||
| 1369 | void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { | ||
| 1370 | Image& image = slot_images[image_id]; | ||
| 1371 | if (invalidate) { | ||
| 1372 | image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); | ||
| 1373 | if (False(image.flags & ImageFlagBits::Tracked)) { | ||
| 1374 | TrackImage(image, image_id); | ||
| 1375 | } | ||
| 1376 | } else { | ||
| 1377 | RefreshContents(image, image_id); | ||
| 1378 | SynchronizeAliases(image_id); | ||
| 1379 | } | ||
| 1380 | if (is_modification) { | ||
| 1381 | MarkModification(image); | ||
| 1382 | } | ||
| 1383 | image.frame_tick = frame_tick; | ||
| 1384 | } | ||
| 1385 | |||
| 1386 | template <class P> | ||
| 1387 | void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification, | ||
| 1388 | bool invalidate) { | ||
| 1389 | if (!image_view_id) { | ||
| 1390 | return; | ||
| 1391 | } | ||
| 1392 | const ImageViewBase& image_view = slot_image_views[image_view_id]; | ||
| 1393 | if (image_view.IsBuffer()) { | ||
| 1394 | return; | ||
| 1395 | } | ||
| 1396 | PrepareImage(image_view.image_id, is_modification, invalidate); | ||
| 1397 | } | ||
| 1398 | |||
| 1399 | template <class P> | ||
| 1400 | void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { | ||
| 1401 | Image& dst = slot_images[dst_id]; | ||
| 1402 | Image& src = slot_images[src_id]; | ||
| 1403 | const auto dst_format_type = GetFormatType(dst.info.format); | ||
| 1404 | const auto src_format_type = GetFormatType(src.info.format); | ||
| 1405 | if (src_format_type == dst_format_type) { | ||
| 1406 | if constexpr (HAS_EMULATED_COPIES) { | ||
| 1407 | if (!runtime.CanImageBeCopied(dst, src)) { | ||
| 1408 | return runtime.EmulateCopyImage(dst, src, copies); | ||
| 1409 | } | ||
| 1410 | } | ||
| 1411 | return runtime.CopyImage(dst, src, copies); | ||
| 1412 | } | ||
| 1413 | UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); | ||
| 1414 | UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); | ||
| 1415 | for (const ImageCopy& copy : copies) { | ||
| 1416 | UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); | ||
| 1417 | UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); | ||
| 1418 | UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); | ||
| 1419 | UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); | ||
| 1420 | |||
| 1421 | const SubresourceBase dst_base{ | ||
| 1422 | .level = copy.dst_subresource.base_level, | ||
| 1423 | .layer = copy.dst_subresource.base_layer, | ||
| 1424 | }; | ||
| 1425 | const SubresourceBase src_base{ | ||
| 1426 | .level = copy.src_subresource.base_level, | ||
| 1427 | .layer = copy.src_subresource.base_layer, | ||
| 1428 | }; | ||
| 1429 | const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; | ||
| 1430 | const SubresourceExtent src_extent{.levels = 1, .layers = 1}; | ||
| 1431 | const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; | ||
| 1432 | const SubresourceRange src_range{.base = src_base, .extent = src_extent}; | ||
| 1433 | const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); | ||
| 1434 | const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); | ||
| 1435 | const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); | ||
| 1436 | Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; | ||
| 1437 | const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); | ||
| 1438 | ImageView& dst_view = slot_image_views[dst_view_id]; | ||
| 1439 | ImageView& src_view = slot_image_views[src_view_id]; | ||
| 1440 | [[maybe_unused]] const Extent3D expected_size{ | ||
| 1441 | .width = std::min(dst_view.size.width, src_view.size.width), | ||
| 1442 | .height = std::min(dst_view.size.height, src_view.size.height), | ||
| 1443 | .depth = std::min(dst_view.size.depth, src_view.size.depth), | ||
| 1444 | }; | ||
| 1445 | UNIMPLEMENTED_IF(copy.extent != expected_size); | ||
| 1446 | |||
| 1447 | runtime.ConvertImage(dst_framebuffer, dst_view, src_view); | ||
| 1448 | } | ||
| 1449 | } | ||
| 1450 | |||
| 1451 | template <class P> | ||
| 1452 | void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { | ||
| 1453 | if (*old_id == new_id) { | ||
| 1454 | return; | ||
| 1455 | } | ||
| 1456 | if (*old_id) { | ||
| 1457 | const ImageViewBase& old_view = slot_image_views[*old_id]; | ||
| 1458 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { | ||
| 1459 | uncommitted_downloads.push_back(old_view.image_id); | ||
| 1460 | } | ||
| 1461 | } | ||
| 1462 | *old_id = new_id; | ||
| 1463 | } | ||
| 1464 | |||
| 1465 | template <class P> | ||
| 1466 | std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( | ||
| 1467 | ImageId image_id, const ImageViewInfo& view_info) { | ||
| 1468 | const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); | ||
| 1469 | const ImageBase& image = slot_images[image_id]; | ||
| 1470 | const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; | ||
| 1471 | const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; | ||
| 1472 | const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; | ||
| 1473 | const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); | ||
| 1474 | const u32 num_samples = image.info.num_samples; | ||
| 1475 | const auto [samples_x, samples_y] = SamplesLog2(num_samples); | ||
| 1476 | const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ | ||
| 1477 | .color_buffer_ids = {color_view_id}, | ||
| 1478 | .depth_buffer_id = depth_view_id, | ||
| 1479 | .size = {extent.width >> samples_x, extent.height >> samples_y}, | ||
| 1480 | }); | ||
| 1481 | return {framebuffer_id, view_id}; | ||
| 1482 | } | ||
| 1483 | |||
| 1484 | template <class P> | ||
| 1485 | bool TextureCache<P>::IsFullClear(ImageViewId id) { | ||
| 1486 | if (!id) { | ||
| 1487 | return true; | ||
| 1488 | } | ||
| 1489 | const ImageViewBase& image_view = slot_image_views[id]; | ||
| 1490 | const ImageBase& image = slot_images[image_view.image_id]; | ||
| 1491 | const Extent3D size = image_view.size; | ||
| 1492 | const auto& regs = maxwell3d.regs; | ||
| 1493 | const auto& scissor = regs.scissor_test[0]; | ||
| 1494 | if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { | ||
| 1495 | // Images with multiple resources can't be cleared in a single call | ||
| 1496 | return false; | ||
| 1497 | } | ||
| 1498 | if (regs.clear_flags.scissor == 0) { | ||
| 1499 | // If scissor testing is disabled, the clear is always full | ||
| 1500 | return true; | ||
| 1501 | } | ||
| 1502 | // Make sure the clear covers all texels in the subresource | ||
| 1503 | return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && | ||
| 1504 | scissor.max_y >= size.height; | ||
| 1505 | } | ||
| 1506 | |||
| 1507 | } // namespace VideoCommon | ||