summaryrefslogtreecommitdiff
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
authorGravatar Fernando S2022-10-06 21:29:53 +0200
committerGravatar GitHub2022-10-06 21:29:53 +0200
commit1effa578f12f79d7816e3543291f302f126cc1d2 (patch)
tree14803b31b6817294d40d57446f6fa94c5ff3fe9a /src/video_core/texture_cache
parentMerge pull request #9025 from FernandoS27/slava-ukrayini (diff)
parentvulkan_blitter: Fix pool allocation double free. (diff)
downloadyuzu-1effa578f12f79d7816e3543291f302f126cc1d2.tar.gz
yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.tar.xz
yuzu-1effa578f12f79d7816e3543291f302f126cc1d2.zip
Merge pull request #8467 from FernandoS27/yfc-rel-1
Project yuzu Fried Chicken (Y.F.C.) Part 1
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/format_lookup_table.cpp2
-rw-r--r--src/video_core/texture_cache/formatter.h4
-rw-r--r--src/video_core/texture_cache/image_base.cpp13
-rw-r--r--src/video_core/texture_cache/image_base.h3
-rw-r--r--src/video_core/texture_cache/render_targets.h1
-rw-r--r--src/video_core/texture_cache/texture_cache.cpp15
-rw-r--r--src/video_core/texture_cache/texture_cache.h222
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h85
-rw-r--r--src/video_core/texture_cache/util.cpp3
9 files changed, 214 insertions, 134 deletions
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index c71694d2a..ad935d386 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -63,7 +63,7 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red,
63 case Hash(TextureFormat::A4B4G4R4, UNORM): 63 case Hash(TextureFormat::A4B4G4R4, UNORM):
64 return PixelFormat::A4B4G4R4_UNORM; 64 return PixelFormat::A4B4G4R4_UNORM;
65 case Hash(TextureFormat::G4R4, UNORM): 65 case Hash(TextureFormat::G4R4, UNORM):
66 return PixelFormat::R4G4_UNORM; 66 return PixelFormat::G4R4_UNORM;
67 case Hash(TextureFormat::A5B5G5R1, UNORM): 67 case Hash(TextureFormat::A5B5G5R1, UNORM):
68 return PixelFormat::A5B5G5R1_UNORM; 68 return PixelFormat::A5B5G5R1_UNORM;
69 case Hash(TextureFormat::R8, UNORM): 69 case Hash(TextureFormat::R8, UNORM):
diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h
index 6881e4c90..acc854715 100644
--- a/src/video_core/texture_cache/formatter.h
+++ b/src/video_core/texture_cache/formatter.h
@@ -153,8 +153,8 @@ struct fmt::formatter<VideoCore::Surface::PixelFormat> : fmt::formatter<fmt::str
153 return "BC7_SRGB"; 153 return "BC7_SRGB";
154 case PixelFormat::A4B4G4R4_UNORM: 154 case PixelFormat::A4B4G4R4_UNORM:
155 return "A4B4G4R4_UNORM"; 155 return "A4B4G4R4_UNORM";
156 case PixelFormat::R4G4_UNORM: 156 case PixelFormat::G4R4_UNORM:
157 return "R4G4_UNORM"; 157 return "G4R4_UNORM";
158 case PixelFormat::ASTC_2D_4X4_SRGB: 158 case PixelFormat::ASTC_2D_4X4_SRGB:
159 return "ASTC_2D_4X4_SRGB"; 159 return "ASTC_2D_4X4_SRGB";
160 case PixelFormat::ASTC_2D_8X8_SRGB: 160 case PixelFormat::ASTC_2D_8X8_SRGB:
diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp
index f61e09ac7..91512022f 100644
--- a/src/video_core/texture_cache/image_base.cpp
+++ b/src/video_core/texture_cache/image_base.cpp
@@ -7,6 +7,7 @@
7#include <vector> 7#include <vector>
8 8
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "common/div_ceil.h"
10#include "video_core/surface.h" 11#include "video_core/surface.h"
11#include "video_core/texture_cache/formatter.h" 12#include "video_core/texture_cache/formatter.h"
12#include "video_core/texture_cache/image_base.h" 13#include "video_core/texture_cache/image_base.h"
@@ -182,10 +183,6 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
182 }; 183 };
183 const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1; 184 const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1;
184 const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1; 185 const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1;
185 if (is_lhs_compressed && is_rhs_compressed) {
186 LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented");
187 return;
188 }
189 const s32 lhs_mips = lhs.info.resources.levels; 186 const s32 lhs_mips = lhs.info.resources.levels;
190 const s32 rhs_mips = rhs.info.resources.levels; 187 const s32 rhs_mips = rhs.info.resources.levels;
191 const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips); 188 const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips);
@@ -199,12 +196,12 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
199 Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level); 196 Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level);
200 Extent3D rhs_size = MipSize(rhs.info.size, mip_level); 197 Extent3D rhs_size = MipSize(rhs.info.size, mip_level);
201 if (is_lhs_compressed) { 198 if (is_lhs_compressed) {
202 lhs_size.width /= lhs_block.width; 199 lhs_size.width = Common::DivCeil(lhs_size.width, lhs_block.width);
203 lhs_size.height /= lhs_block.height; 200 lhs_size.height = Common::DivCeil(lhs_size.height, lhs_block.height);
204 } 201 }
205 if (is_rhs_compressed) { 202 if (is_rhs_compressed) {
206 rhs_size.width /= rhs_block.width; 203 rhs_size.width = Common::DivCeil(rhs_size.width, rhs_block.width);
207 rhs_size.height /= rhs_block.height; 204 rhs_size.height = Common::DivCeil(rhs_size.height, rhs_block.height);
208 } 205 }
209 const Extent3D copy_size{ 206 const Extent3D copy_size{
210 .width = std::min(lhs_size.width, rhs_size.width), 207 .width = std::min(lhs_size.width, rhs_size.width),
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 1f85ec9da..620565684 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -88,6 +88,9 @@ struct ImageBase {
88 u32 scale_rating = 0; 88 u32 scale_rating = 0;
89 u64 scale_tick = 0; 89 u64 scale_tick = 0;
90 bool has_scaled = false; 90 bool has_scaled = false;
91
92 size_t channel = 0;
93
91 ImageFlagBits flags = ImageFlagBits::CpuModified; 94 ImageFlagBits flags = ImageFlagBits::CpuModified;
92 95
93 GPUVAddr gpu_addr = 0; 96 GPUVAddr gpu_addr = 0;
diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h
index da8ffe9ec..1efbd6507 100644
--- a/src/video_core/texture_cache/render_targets.h
+++ b/src/video_core/texture_cache/render_targets.h
@@ -26,6 +26,7 @@ struct RenderTargets {
26 ImageViewId depth_buffer_id{}; 26 ImageViewId depth_buffer_id{};
27 std::array<u8, NUM_RT> draw_buffers{}; 27 std::array<u8, NUM_RT> draw_buffers{};
28 Extent2D size{}; 28 Extent2D size{};
29 bool is_rescaled{};
29}; 30};
30 31
31} // namespace VideoCommon 32} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp
new file mode 100644
index 000000000..8a9a32f44
--- /dev/null
+++ b/src/video_core/texture_cache/texture_cache.cpp
@@ -0,0 +1,15 @@
1// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-3.0-or-later
3
4#include "video_core/control/channel_state_cache.inc"
5#include "video_core/texture_cache/texture_cache_base.h"
6
7namespace VideoCommon {
8
9TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept
10 : ChannelInfo(state), graphics_image_table{gpu_memory}, graphics_sampler_table{gpu_memory},
11 compute_image_table{gpu_memory}, compute_sampler_table{gpu_memory} {}
12
13template class VideoCommon::ChannelSetupCaches<VideoCommon::TextureCacheChannelInfo>;
14
15} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 1dbe01bc0..eaf4a1c95 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1,5 +1,5 @@
1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project 1// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
5 5
@@ -7,6 +7,7 @@
7 7
8#include "common/alignment.h" 8#include "common/alignment.h"
9#include "common/settings.h" 9#include "common/settings.h"
10#include "video_core/control/channel_state.h"
10#include "video_core/dirty_flags.h" 11#include "video_core/dirty_flags.h"
11#include "video_core/engines/kepler_compute.h" 12#include "video_core/engines/kepler_compute.h"
12#include "video_core/texture_cache/image_view_base.h" 13#include "video_core/texture_cache/image_view_base.h"
@@ -29,12 +30,8 @@ using VideoCore::Surface::SurfaceType;
29using namespace Common::Literals; 30using namespace Common::Literals;
30 31
31template <class P> 32template <class P>
32TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, 33TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_)
33 Tegra::Engines::Maxwell3D& maxwell3d_, 34 : runtime{runtime_}, rasterizer{rasterizer_} {
34 Tegra::Engines::KeplerCompute& kepler_compute_,
35 Tegra::MemoryManager& gpu_memory_)
36 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
37 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
38 // Configure null sampler 35 // Configure null sampler
39 TSCEntry sampler_descriptor{}; 36 TSCEntry sampler_descriptor{};
40 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); 37 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
@@ -93,7 +90,7 @@ void TextureCache<P>::RunGarbageCollector() {
93 const auto copies = FullDownloadCopies(image.info); 90 const auto copies = FullDownloadCopies(image.info);
94 image.DownloadMemory(map, copies); 91 image.DownloadMemory(map, copies);
95 runtime.Finish(); 92 runtime.Finish();
96 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); 93 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
97 } 94 }
98 if (True(image.flags & ImageFlagBits::Tracked)) { 95 if (True(image.flags & ImageFlagBits::Tracked)) {
99 UntrackImage(image, image_id); 96 UntrackImage(image, image_id);
@@ -152,22 +149,24 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
152template <class P> 149template <class P>
153template <bool has_blacklists> 150template <bool has_blacklists>
154void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) { 151void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
155 FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views); 152 FillImageViews<has_blacklists>(channel_state->graphics_image_table,
153 channel_state->graphics_image_view_ids, views);
156} 154}
157 155
158template <class P> 156template <class P>
159void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { 157void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
160 FillImageViews<true>(compute_image_table, compute_image_view_ids, views); 158 FillImageViews<true>(channel_state->compute_image_table, channel_state->compute_image_view_ids,
159 views);
161} 160}
162 161
163template <class P> 162template <class P>
164typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { 163typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
165 if (index > graphics_sampler_table.Limit()) { 164 if (index > channel_state->graphics_sampler_table.Limit()) {
166 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); 165 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
167 return &slot_samplers[NULL_SAMPLER_ID]; 166 return &slot_samplers[NULL_SAMPLER_ID];
168 } 167 }
169 const auto [descriptor, is_new] = graphics_sampler_table.Read(index); 168 const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index);
170 SamplerId& id = graphics_sampler_ids[index]; 169 SamplerId& id = channel_state->graphics_sampler_ids[index];
171 if (is_new) { 170 if (is_new) {
172 id = FindSampler(descriptor); 171 id = FindSampler(descriptor);
173 } 172 }
@@ -176,12 +175,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
176 175
177template <class P> 176template <class P>
178typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { 177typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
179 if (index > compute_sampler_table.Limit()) { 178 if (index > channel_state->compute_sampler_table.Limit()) {
180 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); 179 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
181 return &slot_samplers[NULL_SAMPLER_ID]; 180 return &slot_samplers[NULL_SAMPLER_ID];
182 } 181 }
183 const auto [descriptor, is_new] = compute_sampler_table.Read(index); 182 const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index);
184 SamplerId& id = compute_sampler_ids[index]; 183 SamplerId& id = channel_state->compute_sampler_ids[index];
185 if (is_new) { 184 if (is_new) {
186 id = FindSampler(descriptor); 185 id = FindSampler(descriptor);
187 } 186 }
@@ -191,34 +190,36 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
191template <class P> 190template <class P>
192void TextureCache<P>::SynchronizeGraphicsDescriptors() { 191void TextureCache<P>::SynchronizeGraphicsDescriptors() {
193 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; 192 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
194 const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; 193 const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex;
195 const u32 tic_limit = maxwell3d.regs.tic.limit; 194 const u32 tic_limit = maxwell3d->regs.tic.limit;
196 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; 195 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit;
197 if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { 196 if (channel_state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(),
198 graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); 197 tsc_limit)) {
198 channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
199 } 199 }
200 if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { 200 if (channel_state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) {
201 graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); 201 channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
202 } 202 }
203} 203}
204 204
205template <class P> 205template <class P>
206void TextureCache<P>::SynchronizeComputeDescriptors() { 206void TextureCache<P>::SynchronizeComputeDescriptors() {
207 const bool linked_tsc = kepler_compute.launch_description.linked_tsc; 207 const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
208 const u32 tic_limit = kepler_compute.regs.tic.limit; 208 const u32 tic_limit = kepler_compute->regs.tic.limit;
209 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; 209 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
210 const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); 210 const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
211 if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { 211 if (channel_state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
212 compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); 212 channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
213 } 213 }
214 if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { 214 if (channel_state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(),
215 compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); 215 tic_limit)) {
216 channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
216 } 217 }
217} 218}
218 219
219template <class P> 220template <class P>
220bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { 221bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
221 auto& flags = maxwell3d.dirty.flags; 222 auto& flags = maxwell3d->dirty.flags;
222 u32 scale_rating = 0; 223 u32 scale_rating = 0;
223 bool rescaled = false; 224 bool rescaled = false;
224 std::array<ImageId, NUM_RT> tmp_color_images{}; 225 std::array<ImageId, NUM_RT> tmp_color_images{};
@@ -315,7 +316,7 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
315template <class P> 316template <class P>
316void TextureCache<P>::UpdateRenderTargets(bool is_clear) { 317void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
317 using namespace VideoCommon::Dirty; 318 using namespace VideoCommon::Dirty;
318 auto& flags = maxwell3d.dirty.flags; 319 auto& flags = maxwell3d->dirty.flags;
319 if (!flags[Dirty::RenderTargets]) { 320 if (!flags[Dirty::RenderTargets]) {
320 for (size_t index = 0; index < NUM_RT; ++index) { 321 for (size_t index = 0; index < NUM_RT; ++index) {
321 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; 322 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
@@ -342,7 +343,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
342 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); 343 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
343 344
344 for (size_t index = 0; index < NUM_RT; ++index) { 345 for (size_t index = 0; index < NUM_RT; ++index) {
345 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); 346 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index));
346 } 347 }
347 u32 up_scale = 1; 348 u32 up_scale = 1;
348 u32 down_shift = 0; 349 u32 down_shift = 0;
@@ -351,9 +352,10 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
351 down_shift = Settings::values.resolution_info.down_shift; 352 down_shift = Settings::values.resolution_info.down_shift;
352 } 353 }
353 render_targets.size = Extent2D{ 354 render_targets.size = Extent2D{
354 (maxwell3d.regs.render_area.width * up_scale) >> down_shift, 355 (maxwell3d->regs.render_area.width * up_scale) >> down_shift,
355 (maxwell3d.regs.render_area.height * up_scale) >> down_shift, 356 (maxwell3d->regs.render_area.height * up_scale) >> down_shift,
356 }; 357 };
358 render_targets.is_rescaled = is_rescaling;
357 359
358 flags[Dirty::DepthBiasGlobal] = true; 360 flags[Dirty::DepthBiasGlobal] = true;
359} 361}
@@ -458,7 +460,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
458 const auto copies = FullDownloadCopies(image.info); 460 const auto copies = FullDownloadCopies(image.info);
459 image.DownloadMemory(map, copies); 461 image.DownloadMemory(map, copies);
460 runtime.Finish(); 462 runtime.Finish();
461 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); 463 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
462 } 464 }
463} 465}
464 466
@@ -477,12 +479,20 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
477} 479}
478 480
479template <class P> 481template <class P>
480void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { 482void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
481 std::vector<ImageId> deleted_images; 483 std::vector<ImageId> deleted_images;
482 ForEachImageInRegionGPU(gpu_addr, size, 484 ForEachImageInRegionGPU(as_id, gpu_addr, size,
483 [&](ImageId id, Image&) { deleted_images.push_back(id); }); 485 [&](ImageId id, Image&) { deleted_images.push_back(id); });
484 for (const ImageId id : deleted_images) { 486 for (const ImageId id : deleted_images) {
485 Image& image = slot_images[id]; 487 Image& image = slot_images[id];
488 if (True(image.flags & ImageFlagBits::CpuModified)) {
489 return;
490 }
491 image.flags |= ImageFlagBits::CpuModified;
492 if (True(image.flags & ImageFlagBits::Tracked)) {
493 UntrackImage(image, id);
494 }
495 /*
486 if (True(image.flags & ImageFlagBits::Remapped)) { 496 if (True(image.flags & ImageFlagBits::Remapped)) {
487 continue; 497 continue;
488 } 498 }
@@ -490,6 +500,7 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
490 if (True(image.flags & ImageFlagBits::Tracked)) { 500 if (True(image.flags & ImageFlagBits::Tracked)) {
491 UntrackImage(image, id); 501 UntrackImage(image, id);
492 } 502 }
503 */
493 } 504 }
494} 505}
495 506
@@ -655,7 +666,7 @@ void TextureCache<P>::PopAsyncFlushes() {
655 for (const ImageId image_id : download_ids) { 666 for (const ImageId image_id : download_ids) {
656 const ImageBase& image = slot_images[image_id]; 667 const ImageBase& image = slot_images[image_id];
657 const auto copies = FullDownloadCopies(image.info); 668 const auto copies = FullDownloadCopies(image.info);
658 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); 669 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span);
659 download_map.offset += image.unswizzled_size_bytes; 670 download_map.offset += image.unswizzled_size_bytes;
660 download_span = download_span.subspan(image.unswizzled_size_bytes); 671 download_span = download_span.subspan(image.unswizzled_size_bytes);
661 } 672 }
@@ -714,26 +725,26 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
714 const GPUVAddr gpu_addr = image.gpu_addr; 725 const GPUVAddr gpu_addr = image.gpu_addr;
715 726
716 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { 727 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
717 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); 728 gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
718 const auto uploads = FullUploadSwizzles(image.info); 729 const auto uploads = FullUploadSwizzles(image.info);
719 runtime.AccelerateImageUpload(image, staging, uploads); 730 runtime.AccelerateImageUpload(image, staging, uploads);
720 } else if (True(image.flags & ImageFlagBits::Converted)) { 731 } else if (True(image.flags & ImageFlagBits::Converted)) {
721 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); 732 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
722 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); 733 auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data);
723 ConvertImage(unswizzled_data, image.info, mapped_span, copies); 734 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
724 image.UploadMemory(staging, copies); 735 image.UploadMemory(staging, copies);
725 } else { 736 } else {
726 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); 737 const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span);
727 image.UploadMemory(staging, copies); 738 image.UploadMemory(staging, copies);
728 } 739 }
729} 740}
730 741
731template <class P> 742template <class P>
732ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { 743ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
733 if (!IsValidEntry(gpu_memory, config)) { 744 if (!IsValidEntry(*gpu_memory, config)) {
734 return NULL_IMAGE_VIEW_ID; 745 return NULL_IMAGE_VIEW_ID;
735 } 746 }
736 const auto [pair, is_new] = image_views.try_emplace(config); 747 const auto [pair, is_new] = channel_state->image_views.try_emplace(config);
737 ImageViewId& image_view_id = pair->second; 748 ImageViewId& image_view_id = pair->second;
738 if (is_new) { 749 if (is_new) {
739 image_view_id = CreateImageView(config); 750 image_view_id = CreateImageView(config);
@@ -777,9 +788,9 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
777template <class P> 788template <class P>
778ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, 789ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
779 RelaxedOptions options) { 790 RelaxedOptions options) {
780 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 791 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
781 if (!cpu_addr) { 792 if (!cpu_addr) {
782 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); 793 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
783 if (!cpu_addr) { 794 if (!cpu_addr) {
784 return ImageId{}; 795 return ImageId{};
785 } 796 }
@@ -860,7 +871,7 @@ void TextureCache<P>::InvalidateScale(Image& image) {
860 image.scale_tick = frame_tick + 1; 871 image.scale_tick = frame_tick + 1;
861 } 872 }
862 const std::span<const ImageViewId> image_view_ids = image.image_view_ids; 873 const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
863 auto& dirty = maxwell3d.dirty.flags; 874 auto& dirty = maxwell3d->dirty.flags;
864 dirty[Dirty::RenderTargets] = true; 875 dirty[Dirty::RenderTargets] = true;
865 dirty[Dirty::ZetaBuffer] = true; 876 dirty[Dirty::ZetaBuffer] = true;
866 for (size_t rt = 0; rt < NUM_RT; ++rt) { 877 for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -880,12 +891,15 @@ void TextureCache<P>::InvalidateScale(Image& image) {
880 } 891 }
881 image.image_view_ids.clear(); 892 image.image_view_ids.clear();
882 image.image_view_infos.clear(); 893 image.image_view_infos.clear();
883 if constexpr (ENABLE_VALIDATION) { 894 for (size_t c : active_channel_ids) {
884 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); 895 auto& channel_info = channel_storage[c];
885 std::ranges::fill(compute_image_view_ids, CORRUPT_ID); 896 if constexpr (ENABLE_VALIDATION) {
897 std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID);
898 std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID);
899 }
900 channel_info.graphics_image_table.Invalidate();
901 channel_info.compute_image_table.Invalidate();
886 } 902 }
887 graphics_image_table.Invalidate();
888 compute_image_table.Invalidate();
889 has_deleted_images = true; 903 has_deleted_images = true;
890} 904}
891 905
@@ -929,10 +943,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
929template <class P> 943template <class P>
930ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, 944ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
931 RelaxedOptions options) { 945 RelaxedOptions options) {
932 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 946 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
933 if (!cpu_addr) { 947 if (!cpu_addr) {
934 const auto size = CalculateGuestSizeInBytes(info); 948 const auto size = CalculateGuestSizeInBytes(info);
935 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); 949 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size);
936 if (!cpu_addr) { 950 if (!cpu_addr) {
937 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; 951 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
938 virtual_invalid_space += Common::AlignUp(size, 32); 952 virtual_invalid_space += Common::AlignUp(size, 32);
@@ -1050,7 +1064,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1050 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); 1064 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
1051 Image& new_image = slot_images[new_image_id]; 1065 Image& new_image = slot_images[new_image_id];
1052 1066
1053 if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { 1067 if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
1054 new_image.flags |= ImageFlagBits::Sparse; 1068 new_image.flags |= ImageFlagBits::Sparse;
1055 } 1069 }
1056 1070
@@ -1192,7 +1206,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1192 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { 1206 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
1193 return NULL_SAMPLER_ID; 1207 return NULL_SAMPLER_ID;
1194 } 1208 }
1195 const auto [pair, is_new] = samplers.try_emplace(config); 1209 const auto [pair, is_new] = channel_state->samplers.try_emplace(config);
1196 if (is_new) { 1210 if (is_new) {
1197 pair->second = slot_samplers.insert(runtime, config); 1211 pair->second = slot_samplers.insert(runtime, config);
1198 } 1212 }
@@ -1201,7 +1215,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1201 1215
1202template <class P> 1216template <class P>
1203ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { 1217ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1204 const auto& regs = maxwell3d.regs; 1218 const auto& regs = maxwell3d->regs;
1205 if (index >= regs.rt_control.count) { 1219 if (index >= regs.rt_control.count) {
1206 return ImageViewId{}; 1220 return ImageViewId{};
1207 } 1221 }
@@ -1219,7 +1233,7 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1219 1233
1220template <class P> 1234template <class P>
1221ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { 1235ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
1222 const auto& regs = maxwell3d.regs; 1236 const auto& regs = maxwell3d->regs;
1223 if (!regs.zeta_enable) { 1237 if (!regs.zeta_enable) {
1224 return ImageViewId{}; 1238 return ImageViewId{};
1225 } 1239 }
@@ -1316,11 +1330,17 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
1316 1330
1317template <class P> 1331template <class P>
1318template <typename Func> 1332template <typename Func>
1319void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { 1333void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size,
1334 Func&& func) {
1320 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; 1335 using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
1321 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; 1336 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1322 boost::container::small_vector<ImageId, 8> images; 1337 boost::container::small_vector<ImageId, 8> images;
1323 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { 1338 auto storage_id = getStorageID(as_id);
1339 if (!storage_id) {
1340 return;
1341 }
1342 auto& gpu_page_table = gpu_page_table_storage[*storage_id];
1343 ForEachGPUPage(gpu_addr, size, [this, gpu_page_table, &images, gpu_addr, size, func](u64 page) {
1324 const auto it = gpu_page_table.find(page); 1344 const auto it = gpu_page_table.find(page);
1325 if (it == gpu_page_table.end()) { 1345 if (it == gpu_page_table.end()) {
1326 if constexpr (BOOL_BREAK) { 1346 if constexpr (BOOL_BREAK) {
@@ -1403,9 +1423,9 @@ template <typename Func>
1403void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { 1423void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1404 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; 1424 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
1405 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; 1425 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
1406 const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); 1426 const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
1407 for (const auto& [gpu_addr, size] : segments) { 1427 for (const auto& [gpu_addr, size] : segments) {
1408 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1428 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1409 ASSERT(cpu_addr); 1429 ASSERT(cpu_addr);
1410 if constexpr (RETURNS_BOOL) { 1430 if constexpr (RETURNS_BOOL) {
1411 if (func(gpu_addr, *cpu_addr, size)) { 1431 if (func(gpu_addr, *cpu_addr, size)) {
@@ -1448,8 +1468,9 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1448 } 1468 }
1449 image.lru_index = lru_cache.Insert(image_id, frame_tick); 1469 image.lru_index = lru_cache.Insert(image_id, frame_tick);
1450 1470
1451 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, 1471 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
1452 [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); 1472 (*channel_state->gpu_page_table)[page].push_back(image_id);
1473 });
1453 if (False(image.flags & ImageFlagBits::Sparse)) { 1474 if (False(image.flags & ImageFlagBits::Sparse)) {
1454 auto map_id = 1475 auto map_id =
1455 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); 1476 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
@@ -1480,9 +1501,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1480 image.flags &= ~ImageFlagBits::BadOverlap; 1501 image.flags &= ~ImageFlagBits::BadOverlap;
1481 lru_cache.Free(image.lru_index); 1502 lru_cache.Free(image.lru_index);
1482 const auto& clear_page_table = 1503 const auto& clear_page_table =
1483 [this, image_id]( 1504 [this, image_id](u64 page,
1484 u64 page, 1505 std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>&
1485 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { 1506 selected_page_table) {
1486 const auto page_it = selected_page_table.find(page); 1507 const auto page_it = selected_page_table.find(page);
1487 if (page_it == selected_page_table.end()) { 1508 if (page_it == selected_page_table.end()) {
1488 ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << YUZU_PAGEBITS); 1509 ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << YUZU_PAGEBITS);
@@ -1497,8 +1518,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1497 } 1518 }
1498 image_ids.erase(vector_it); 1519 image_ids.erase(vector_it);
1499 }; 1520 };
1500 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, 1521 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
1501 [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); 1522 clear_page_table(page, (*channel_state->gpu_page_table));
1523 });
1502 if (False(image.flags & ImageFlagBits::Sparse)) { 1524 if (False(image.flags & ImageFlagBits::Sparse)) {
1503 const auto map_id = image.map_view_id; 1525 const auto map_id = image.map_view_id;
1504 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { 1526 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
@@ -1631,7 +1653,7 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
1631 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); 1653 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
1632 1654
1633 // Mark render targets as dirty 1655 // Mark render targets as dirty
1634 auto& dirty = maxwell3d.dirty.flags; 1656 auto& dirty = maxwell3d->dirty.flags;
1635 dirty[Dirty::RenderTargets] = true; 1657 dirty[Dirty::RenderTargets] = true;
1636 dirty[Dirty::ZetaBuffer] = true; 1658 dirty[Dirty::ZetaBuffer] = true;
1637 for (size_t rt = 0; rt < NUM_RT; ++rt) { 1659 for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -1681,24 +1703,30 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
1681 if (alloc_images.empty()) { 1703 if (alloc_images.empty()) {
1682 image_allocs_table.erase(alloc_it); 1704 image_allocs_table.erase(alloc_it);
1683 } 1705 }
1684 if constexpr (ENABLE_VALIDATION) { 1706 for (size_t c : active_channel_ids) {
1685 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); 1707 auto& channel_info = channel_storage[c];
1686 std::ranges::fill(compute_image_view_ids, CORRUPT_ID); 1708 if constexpr (ENABLE_VALIDATION) {
1709 std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID);
1710 std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID);
1711 }
1712 channel_info.graphics_image_table.Invalidate();
1713 channel_info.compute_image_table.Invalidate();
1687 } 1714 }
1688 graphics_image_table.Invalidate();
1689 compute_image_table.Invalidate();
1690 has_deleted_images = true; 1715 has_deleted_images = true;
1691} 1716}
1692 1717
1693template <class P> 1718template <class P>
1694void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { 1719void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
1695 auto it = image_views.begin(); 1720 for (size_t c : active_channel_ids) {
1696 while (it != image_views.end()) { 1721 auto& channel_info = channel_storage[c];
1697 const auto found = std::ranges::find(removed_views, it->second); 1722 auto it = channel_info.image_views.begin();
1698 if (found != removed_views.end()) { 1723 while (it != channel_info.image_views.end()) {
1699 it = image_views.erase(it); 1724 const auto found = std::ranges::find(removed_views, it->second);
1700 } else { 1725 if (found != removed_views.end()) {
1701 ++it; 1726 it = channel_info.image_views.erase(it);
1727 } else {
1728 ++it;
1729 }
1702 } 1730 }
1703 } 1731 }
1704} 1732}
@@ -1729,6 +1757,7 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1729 boost::container::small_vector<const AliasedImage*, 1> aliased_images; 1757 boost::container::small_vector<const AliasedImage*, 1> aliased_images;
1730 Image& image = slot_images[image_id]; 1758 Image& image = slot_images[image_id];
1731 bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); 1759 bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
1760 bool any_modified = True(image.flags & ImageFlagBits::GpuModified);
1732 u64 most_recent_tick = image.modification_tick; 1761 u64 most_recent_tick = image.modification_tick;
1733 for (const AliasedImage& aliased : image.aliased_images) { 1762 for (const AliasedImage& aliased : image.aliased_images) {
1734 ImageBase& aliased_image = slot_images[aliased.id]; 1763 ImageBase& aliased_image = slot_images[aliased.id];
@@ -1736,9 +1765,7 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1736 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); 1765 most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
1737 aliased_images.push_back(&aliased); 1766 aliased_images.push_back(&aliased);
1738 any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); 1767 any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled);
1739 if (True(aliased_image.flags & ImageFlagBits::GpuModified)) { 1768 any_modified |= True(aliased_image.flags & ImageFlagBits::GpuModified);
1740 image.flags |= ImageFlagBits::GpuModified;
1741 }
1742 } 1769 }
1743 } 1770 }
1744 if (aliased_images.empty()) { 1771 if (aliased_images.empty()) {
@@ -1753,6 +1780,9 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
1753 } 1780 }
1754 } 1781 }
1755 image.modification_tick = most_recent_tick; 1782 image.modification_tick = most_recent_tick;
1783 if (any_modified) {
1784 image.flags |= ImageFlagBits::GpuModified;
1785 }
1756 std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { 1786 std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
1757 const ImageBase& lhs_image = slot_images[lhs->id]; 1787 const ImageBase& lhs_image = slot_images[lhs->id];
1758 const ImageBase& rhs_image = slot_images[rhs->id]; 1788 const ImageBase& rhs_image = slot_images[rhs->id];
@@ -1931,6 +1961,7 @@ std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage(
1931 .color_buffer_ids = {color_view_id}, 1961 .color_buffer_ids = {color_view_id},
1932 .depth_buffer_id = depth_view_id, 1962 .depth_buffer_id = depth_view_id,
1933 .size = {extent.width >> samples_x, extent.height >> samples_y}, 1963 .size = {extent.width >> samples_x, extent.height >> samples_y},
1964 .is_rescaled = is_rescaled,
1934 }); 1965 });
1935 return {framebuffer_id, view_id}; 1966 return {framebuffer_id, view_id};
1936} 1967}
@@ -1943,7 +1974,7 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
1943 const ImageViewBase& image_view = slot_image_views[id]; 1974 const ImageViewBase& image_view = slot_image_views[id];
1944 const ImageBase& image = slot_images[image_view.image_id]; 1975 const ImageBase& image = slot_images[image_view.image_id];
1945 const Extent3D size = image_view.size; 1976 const Extent3D size = image_view.size;
1946 const auto& regs = maxwell3d.regs; 1977 const auto& regs = maxwell3d->regs;
1947 const auto& scissor = regs.scissor_test[0]; 1978 const auto& scissor = regs.scissor_test[0];
1948 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { 1979 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
1949 // Images with multiple resources can't be cleared in a single call 1980 // Images with multiple resources can't be cleared in a single call
@@ -1958,4 +1989,19 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
1958 scissor.max_y >= size.height; 1989 scissor.max_y >= size.height;
1959} 1990}
1960 1991
1992template <class P>
1993void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
1994 VideoCommon::ChannelSetupCaches<TextureCacheChannelInfo>::CreateChannel(channel);
1995 const auto it = channel_map.find(channel.bind_id);
1996 auto* this_state = &channel_storage[it->second];
1997 const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()];
1998 this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id];
1999}
2000
2001/// Bind a channel for execution.
2002template <class P>
2003void TextureCache<P>::OnGPUASRegister([[maybe_unused]] size_t map_id) {
2004 gpu_page_table_storage.emplace_back();
2005}
2006
1961} // namespace VideoCommon 2007} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 7e6c6cef2..2fa8445eb 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -1,8 +1,10 @@
1// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project 1// SPDX-FileCopyrightText: 2021 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-3.0-or-later
3 3
4#pragma once 4#pragma once
5 5
6#include <deque>
7#include <limits>
6#include <mutex> 8#include <mutex>
7#include <span> 9#include <span>
8#include <type_traits> 10#include <type_traits>
@@ -11,9 +13,11 @@
11#include <queue> 13#include <queue>
12 14
13#include "common/common_types.h" 15#include "common/common_types.h"
16#include "common/hash.h"
14#include "common/literals.h" 17#include "common/literals.h"
15#include "common/lru_cache.h" 18#include "common/lru_cache.h"
16#include "video_core/compatible_formats.h" 19#include "video_core/compatible_formats.h"
20#include "video_core/control/channel_state_cache.h"
17#include "video_core/delayed_destruction_ring.h" 21#include "video_core/delayed_destruction_ring.h"
18#include "video_core/engines/fermi_2d.h" 22#include "video_core/engines/fermi_2d.h"
19#include "video_core/surface.h" 23#include "video_core/surface.h"
@@ -26,6 +30,10 @@
26#include "video_core/texture_cache/types.h" 30#include "video_core/texture_cache/types.h"
27#include "video_core/textures/texture.h" 31#include "video_core/textures/texture.h"
28 32
33namespace Tegra::Control {
34struct ChannelState;
35}
36
29namespace VideoCommon { 37namespace VideoCommon {
30 38
31using Tegra::Texture::SwizzleSource; 39using Tegra::Texture::SwizzleSource;
@@ -44,8 +52,35 @@ struct ImageViewInOut {
44 ImageViewId id{}; 52 ImageViewId id{};
45}; 53};
46 54
55using TextureCacheGPUMap = std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>;
56
57class TextureCacheChannelInfo : public ChannelInfo {
58public:
59 TextureCacheChannelInfo() = delete;
60 TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept;
61 TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete;
62 TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete;
63 TextureCacheChannelInfo(TextureCacheChannelInfo&& other) noexcept = default;
64 TextureCacheChannelInfo& operator=(TextureCacheChannelInfo&& other) noexcept = default;
65
66 DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
67 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
68 std::vector<SamplerId> graphics_sampler_ids;
69 std::vector<ImageViewId> graphics_image_view_ids;
70
71 DescriptorTable<TICEntry> compute_image_table{gpu_memory};
72 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
73 std::vector<SamplerId> compute_sampler_ids;
74 std::vector<ImageViewId> compute_image_view_ids;
75
76 std::unordered_map<TICEntry, ImageViewId> image_views;
77 std::unordered_map<TSCEntry, SamplerId> samplers;
78
79 TextureCacheGPUMap* gpu_page_table;
80};
81
47template <class P> 82template <class P>
48class TextureCache { 83class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelInfo> {
49 /// Address shift for caching images into a hash table 84 /// Address shift for caching images into a hash table
50 static constexpr u64 YUZU_PAGEBITS = 20; 85 static constexpr u64 YUZU_PAGEBITS = 20;
51 86
@@ -58,6 +93,8 @@ class TextureCache {
58 /// True when the API can provide info about the memory of the device. 93 /// True when the API can provide info about the memory of the device.
59 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; 94 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
60 95
96 static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
97
61 static constexpr s64 TARGET_THRESHOLD = 4_GiB; 98 static constexpr s64 TARGET_THRESHOLD = 4_GiB;
62 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; 99 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
63 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; 100 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
@@ -77,16 +114,8 @@ class TextureCache {
77 PixelFormat src_format; 114 PixelFormat src_format;
78 }; 115 };
79 116
80 template <typename T>
81 struct IdentityHash {
82 [[nodiscard]] size_t operator()(T value) const noexcept {
83 return static_cast<size_t>(value);
84 }
85 };
86
87public: 117public:
88 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, 118 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);
89 Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
90 119
91 /// Notify the cache that a new frame has been queued 120 /// Notify the cache that a new frame has been queued
92 void TickFrame(); 121 void TickFrame();
@@ -142,7 +171,7 @@ public:
142 void UnmapMemory(VAddr cpu_addr, size_t size); 171 void UnmapMemory(VAddr cpu_addr, size_t size);
143 172
144 /// Remove images in a region 173 /// Remove images in a region
145 void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); 174 void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
146 175
147 /// Blit an image with the given parameters 176 /// Blit an image with the given parameters
148 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, 177 void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
@@ -171,6 +200,9 @@ public:
171 200
172 [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; 201 [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
173 202
203 /// Create channel state.
204 void CreateChannel(Tegra::Control::ChannelState& channel) final override;
205
174 std::mutex mutex; 206 std::mutex mutex;
175 207
176private: 208private:
@@ -205,6 +237,8 @@ private:
205 } 237 }
206 } 238 }
207 239
240 void OnGPUASRegister(size_t map_id) final override;
241
208 /// Runs the Garbage Collector. 242 /// Runs the Garbage Collector.
209 void RunGarbageCollector(); 243 void RunGarbageCollector();
210 244
@@ -273,7 +307,7 @@ private:
273 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); 307 void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
274 308
275 template <typename Func> 309 template <typename Func>
276 void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); 310 void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);
277 311
278 template <typename Func> 312 template <typename Func>
279 void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); 313 void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
@@ -338,31 +372,16 @@ private:
338 u64 GetScaledImageSizeBytes(ImageBase& image); 372 u64 GetScaledImageSizeBytes(ImageBase& image);
339 373
340 Runtime& runtime; 374 Runtime& runtime;
341 VideoCore::RasterizerInterface& rasterizer;
342 Tegra::Engines::Maxwell3D& maxwell3d;
343 Tegra::Engines::KeplerCompute& kepler_compute;
344 Tegra::MemoryManager& gpu_memory;
345 375
346 DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; 376 VideoCore::RasterizerInterface& rasterizer;
347 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; 377 std::deque<TextureCacheGPUMap> gpu_page_table_storage;
348 std::vector<SamplerId> graphics_sampler_ids;
349 std::vector<ImageViewId> graphics_image_view_ids;
350
351 DescriptorTable<TICEntry> compute_image_table{gpu_memory};
352 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
353 std::vector<SamplerId> compute_sampler_ids;
354 std::vector<ImageViewId> compute_image_view_ids;
355 378
356 RenderTargets render_targets; 379 RenderTargets render_targets;
357 380
358 std::unordered_map<TICEntry, ImageViewId> image_views;
359 std::unordered_map<TSCEntry, SamplerId> samplers;
360 std::unordered_map<RenderTargets, FramebufferId> framebuffers; 381 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
361 382
362 std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; 383 std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
363 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; 384 std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
364 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
365
366 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; 385 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
367 386
368 VAddr virtual_invalid_space{}; 387 VAddr virtual_invalid_space{};
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 1820823b2..1223df5a0 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -517,7 +517,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
517 const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; 517 const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block;
518 518
519 UNIMPLEMENTED_IF(info.tile_width_spacing > 0); 519 UNIMPLEMENTED_IF(info.tile_width_spacing > 0);
520
521 UNIMPLEMENTED_IF(copy.image_offset.x != 0); 520 UNIMPLEMENTED_IF(copy.image_offset.x != 0);
522 UNIMPLEMENTED_IF(copy.image_offset.y != 0); 521 UNIMPLEMENTED_IF(copy.image_offset.y != 0);
523 UNIMPLEMENTED_IF(copy.image_offset.z != 0); 522 UNIMPLEMENTED_IF(copy.image_offset.z != 0);
@@ -755,7 +754,7 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
755 if (address == 0) { 754 if (address == 0) {
756 return false; 755 return false;
757 } 756 }
758 if (address > (1ULL << 48)) { 757 if (address >= (1ULL << 40)) {
759 return false; 758 return false;
760 } 759 }
761 if (gpu_memory.GpuToCpuAddress(address).has_value()) { 760 if (gpu_memory.GpuToCpuAddress(address).has_value()) {