summaryrefslogtreecommitdiff
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/image_base.h3
-rw-r--r--src/video_core/texture_cache/texture_cache.h209
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h73
3 files changed, 195 insertions, 90 deletions
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 1f85ec9da..620565684 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -88,6 +88,9 @@ struct ImageBase {
88 u32 scale_rating = 0; 88 u32 scale_rating = 0;
89 u64 scale_tick = 0; 89 u64 scale_tick = 0;
90 bool has_scaled = false; 90 bool has_scaled = false;
91
92 size_t channel = 0;
93
91 ImageFlagBits flags = ImageFlagBits::CpuModified; 94 ImageFlagBits flags = ImageFlagBits::CpuModified;
92 95
93 GPUVAddr gpu_addr = 0; 96 GPUVAddr gpu_addr = 0;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 1dbe01bc0..2731aead0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -7,6 +7,7 @@
7 7
8#include "common/alignment.h" 8#include "common/alignment.h"
9#include "common/settings.h" 9#include "common/settings.h"
10#include "video_core/control/channel_state.h"
10#include "video_core/dirty_flags.h" 11#include "video_core/dirty_flags.h"
11#include "video_core/engines/kepler_compute.h" 12#include "video_core/engines/kepler_compute.h"
12#include "video_core/texture_cache/image_view_base.h" 13#include "video_core/texture_cache/image_view_base.h"
@@ -29,12 +30,8 @@ using VideoCore::Surface::SurfaceType;
29using namespace Common::Literals; 30using namespace Common::Literals;
30 31
31template <class P> 32template <class P>
32TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, 33TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_)
33 Tegra::Engines::Maxwell3D& maxwell3d_, 34 : runtime{runtime_}, rasterizer{rasterizer_} {
34 Tegra::Engines::KeplerCompute& kepler_compute_,
35 Tegra::MemoryManager& gpu_memory_)
36 : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
37 kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
38 // Configure null sampler 35 // Configure null sampler
39 TSCEntry sampler_descriptor{}; 36 TSCEntry sampler_descriptor{};
40 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); 37 sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
@@ -42,6 +39,13 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
42 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); 39 sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
43 sampler_descriptor.cubemap_anisotropy.Assign(1); 40 sampler_descriptor.cubemap_anisotropy.Assign(1);
44 41
42 // Setup channels
43 current_channel_id = UNSET_CHANNEL;
44 state = nullptr;
45 maxwell3d = nullptr;
46 kepler_compute = nullptr;
47 gpu_memory = nullptr;
48
45 // Make sure the first index is reserved for the null resources 49 // Make sure the first index is reserved for the null resources
46 // This way the null resource becomes a compile time constant 50 // This way the null resource becomes a compile time constant
47 void(slot_images.insert(NullImageParams{})); 51 void(slot_images.insert(NullImageParams{}));
@@ -93,7 +97,7 @@ void TextureCache<P>::RunGarbageCollector() {
93 const auto copies = FullDownloadCopies(image.info); 97 const auto copies = FullDownloadCopies(image.info);
94 image.DownloadMemory(map, copies); 98 image.DownloadMemory(map, copies);
95 runtime.Finish(); 99 runtime.Finish();
96 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); 100 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
97 } 101 }
98 if (True(image.flags & ImageFlagBits::Tracked)) { 102 if (True(image.flags & ImageFlagBits::Tracked)) {
99 UntrackImage(image, image_id); 103 UntrackImage(image, image_id);
@@ -152,22 +156,23 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
152template <class P> 156template <class P>
153template <bool has_blacklists> 157template <bool has_blacklists>
154void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) { 158void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
155 FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views); 159 FillImageViews<has_blacklists>(state->graphics_image_table, state->graphics_image_view_ids,
160 views);
156} 161}
157 162
158template <class P> 163template <class P>
159void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { 164void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
160 FillImageViews<true>(compute_image_table, compute_image_view_ids, views); 165 FillImageViews<true>(state->compute_image_table, state->compute_image_view_ids, views);
161} 166}
162 167
163template <class P> 168template <class P>
164typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { 169typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
165 if (index > graphics_sampler_table.Limit()) { 170 if (index > state->graphics_sampler_table.Limit()) {
166 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); 171 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
167 return &slot_samplers[NULL_SAMPLER_ID]; 172 return &slot_samplers[NULL_SAMPLER_ID];
168 } 173 }
169 const auto [descriptor, is_new] = graphics_sampler_table.Read(index); 174 const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index);
170 SamplerId& id = graphics_sampler_ids[index]; 175 SamplerId& id = state->graphics_sampler_ids[index];
171 if (is_new) { 176 if (is_new) {
172 id = FindSampler(descriptor); 177 id = FindSampler(descriptor);
173 } 178 }
@@ -176,12 +181,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
176 181
177template <class P> 182template <class P>
178typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { 183typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
179 if (index > compute_sampler_table.Limit()) { 184 if (index > state->compute_sampler_table.Limit()) {
180 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); 185 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
181 return &slot_samplers[NULL_SAMPLER_ID]; 186 return &slot_samplers[NULL_SAMPLER_ID];
182 } 187 }
183 const auto [descriptor, is_new] = compute_sampler_table.Read(index); 188 const auto [descriptor, is_new] = state->compute_sampler_table.Read(index);
184 SamplerId& id = compute_sampler_ids[index]; 189 SamplerId& id = state->compute_sampler_ids[index];
185 if (is_new) { 190 if (is_new) {
186 id = FindSampler(descriptor); 191 id = FindSampler(descriptor);
187 } 192 }
@@ -191,34 +196,34 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
191template <class P> 196template <class P>
192void TextureCache<P>::SynchronizeGraphicsDescriptors() { 197void TextureCache<P>::SynchronizeGraphicsDescriptors() {
193 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; 198 using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
194 const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; 199 const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex;
195 const u32 tic_limit = maxwell3d.regs.tic.limit; 200 const u32 tic_limit = maxwell3d->regs.tic.limit;
196 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; 201 const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit;
197 if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { 202 if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) {
198 graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); 203 state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
199 } 204 }
200 if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { 205 if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) {
201 graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); 206 state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
202 } 207 }
203} 208}
204 209
205template <class P> 210template <class P>
206void TextureCache<P>::SynchronizeComputeDescriptors() { 211void TextureCache<P>::SynchronizeComputeDescriptors() {
207 const bool linked_tsc = kepler_compute.launch_description.linked_tsc; 212 const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
208 const u32 tic_limit = kepler_compute.regs.tic.limit; 213 const u32 tic_limit = kepler_compute->regs.tic.limit;
209 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; 214 const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
210 const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); 215 const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
211 if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { 216 if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
212 compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); 217 state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
213 } 218 }
214 if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { 219 if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) {
215 compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); 220 state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
216 } 221 }
217} 222}
218 223
219template <class P> 224template <class P>
220bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { 225bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
221 auto& flags = maxwell3d.dirty.flags; 226 auto& flags = maxwell3d->dirty.flags;
222 u32 scale_rating = 0; 227 u32 scale_rating = 0;
223 bool rescaled = false; 228 bool rescaled = false;
224 std::array<ImageId, NUM_RT> tmp_color_images{}; 229 std::array<ImageId, NUM_RT> tmp_color_images{};
@@ -315,7 +320,7 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
315template <class P> 320template <class P>
316void TextureCache<P>::UpdateRenderTargets(bool is_clear) { 321void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
317 using namespace VideoCommon::Dirty; 322 using namespace VideoCommon::Dirty;
318 auto& flags = maxwell3d.dirty.flags; 323 auto& flags = maxwell3d->dirty.flags;
319 if (!flags[Dirty::RenderTargets]) { 324 if (!flags[Dirty::RenderTargets]) {
320 for (size_t index = 0; index < NUM_RT; ++index) { 325 for (size_t index = 0; index < NUM_RT; ++index) {
321 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; 326 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
@@ -342,7 +347,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
342 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); 347 PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
343 348
344 for (size_t index = 0; index < NUM_RT; ++index) { 349 for (size_t index = 0; index < NUM_RT; ++index) {
345 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); 350 render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index));
346 } 351 }
347 u32 up_scale = 1; 352 u32 up_scale = 1;
348 u32 down_shift = 0; 353 u32 down_shift = 0;
@@ -351,8 +356,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
351 down_shift = Settings::values.resolution_info.down_shift; 356 down_shift = Settings::values.resolution_info.down_shift;
352 } 357 }
353 render_targets.size = Extent2D{ 358 render_targets.size = Extent2D{
354 (maxwell3d.regs.render_area.width * up_scale) >> down_shift, 359 (maxwell3d->regs.render_area.width * up_scale) >> down_shift,
355 (maxwell3d.regs.render_area.height * up_scale) >> down_shift, 360 (maxwell3d->regs.render_area.height * up_scale) >> down_shift,
356 }; 361 };
357 362
358 flags[Dirty::DepthBiasGlobal] = true; 363 flags[Dirty::DepthBiasGlobal] = true;
@@ -458,7 +463,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
458 const auto copies = FullDownloadCopies(image.info); 463 const auto copies = FullDownloadCopies(image.info);
459 image.DownloadMemory(map, copies); 464 image.DownloadMemory(map, copies);
460 runtime.Finish(); 465 runtime.Finish();
461 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); 466 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
462 } 467 }
463} 468}
464 469
@@ -655,7 +660,7 @@ void TextureCache<P>::PopAsyncFlushes() {
655 for (const ImageId image_id : download_ids) { 660 for (const ImageId image_id : download_ids) {
656 const ImageBase& image = slot_images[image_id]; 661 const ImageBase& image = slot_images[image_id];
657 const auto copies = FullDownloadCopies(image.info); 662 const auto copies = FullDownloadCopies(image.info);
658 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); 663 SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span);
659 download_map.offset += image.unswizzled_size_bytes; 664 download_map.offset += image.unswizzled_size_bytes;
660 download_span = download_span.subspan(image.unswizzled_size_bytes); 665 download_span = download_span.subspan(image.unswizzled_size_bytes);
661 } 666 }
@@ -714,26 +719,26 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
714 const GPUVAddr gpu_addr = image.gpu_addr; 719 const GPUVAddr gpu_addr = image.gpu_addr;
715 720
716 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { 721 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
717 gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); 722 gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
718 const auto uploads = FullUploadSwizzles(image.info); 723 const auto uploads = FullUploadSwizzles(image.info);
719 runtime.AccelerateImageUpload(image, staging, uploads); 724 runtime.AccelerateImageUpload(image, staging, uploads);
720 } else if (True(image.flags & ImageFlagBits::Converted)) { 725 } else if (True(image.flags & ImageFlagBits::Converted)) {
721 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); 726 std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
722 auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); 727 auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data);
723 ConvertImage(unswizzled_data, image.info, mapped_span, copies); 728 ConvertImage(unswizzled_data, image.info, mapped_span, copies);
724 image.UploadMemory(staging, copies); 729 image.UploadMemory(staging, copies);
725 } else { 730 } else {
726 const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); 731 const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span);
727 image.UploadMemory(staging, copies); 732 image.UploadMemory(staging, copies);
728 } 733 }
729} 734}
730 735
731template <class P> 736template <class P>
732ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { 737ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
733 if (!IsValidEntry(gpu_memory, config)) { 738 if (!IsValidEntry(*gpu_memory, config)) {
734 return NULL_IMAGE_VIEW_ID; 739 return NULL_IMAGE_VIEW_ID;
735 } 740 }
736 const auto [pair, is_new] = image_views.try_emplace(config); 741 const auto [pair, is_new] = state->image_views.try_emplace(config);
737 ImageViewId& image_view_id = pair->second; 742 ImageViewId& image_view_id = pair->second;
738 if (is_new) { 743 if (is_new) {
739 image_view_id = CreateImageView(config); 744 image_view_id = CreateImageView(config);
@@ -777,9 +782,9 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
777template <class P> 782template <class P>
778ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, 783ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
779 RelaxedOptions options) { 784 RelaxedOptions options) {
780 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 785 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
781 if (!cpu_addr) { 786 if (!cpu_addr) {
782 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); 787 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
783 if (!cpu_addr) { 788 if (!cpu_addr) {
784 return ImageId{}; 789 return ImageId{};
785 } 790 }
@@ -860,7 +865,7 @@ void TextureCache<P>::InvalidateScale(Image& image) {
860 image.scale_tick = frame_tick + 1; 865 image.scale_tick = frame_tick + 1;
861 } 866 }
862 const std::span<const ImageViewId> image_view_ids = image.image_view_ids; 867 const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
863 auto& dirty = maxwell3d.dirty.flags; 868 auto& dirty = maxwell3d->dirty.flags;
864 dirty[Dirty::RenderTargets] = true; 869 dirty[Dirty::RenderTargets] = true;
865 dirty[Dirty::ZetaBuffer] = true; 870 dirty[Dirty::ZetaBuffer] = true;
866 for (size_t rt = 0; rt < NUM_RT; ++rt) { 871 for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -881,11 +886,11 @@ void TextureCache<P>::InvalidateScale(Image& image) {
881 image.image_view_ids.clear(); 886 image.image_view_ids.clear();
882 image.image_view_infos.clear(); 887 image.image_view_infos.clear();
883 if constexpr (ENABLE_VALIDATION) { 888 if constexpr (ENABLE_VALIDATION) {
884 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); 889 std::ranges::fill(state->graphics_image_view_ids, CORRUPT_ID);
885 std::ranges::fill(compute_image_view_ids, CORRUPT_ID); 890 std::ranges::fill(state->compute_image_view_ids, CORRUPT_ID);
886 } 891 }
887 graphics_image_table.Invalidate(); 892 state->graphics_image_table.Invalidate();
888 compute_image_table.Invalidate(); 893 state->compute_image_table.Invalidate();
889 has_deleted_images = true; 894 has_deleted_images = true;
890} 895}
891 896
@@ -929,10 +934,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
929template <class P> 934template <class P>
930ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, 935ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
931 RelaxedOptions options) { 936 RelaxedOptions options) {
932 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 937 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
933 if (!cpu_addr) { 938 if (!cpu_addr) {
934 const auto size = CalculateGuestSizeInBytes(info); 939 const auto size = CalculateGuestSizeInBytes(info);
935 cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); 940 cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size);
936 if (!cpu_addr) { 941 if (!cpu_addr) {
937 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; 942 const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
938 virtual_invalid_space += Common::AlignUp(size, 32); 943 virtual_invalid_space += Common::AlignUp(size, 32);
@@ -1050,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
1050 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); 1055 const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
1051 Image& new_image = slot_images[new_image_id]; 1056 Image& new_image = slot_images[new_image_id];
1052 1057
1053 if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { 1058 if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
1054 new_image.flags |= ImageFlagBits::Sparse; 1059 new_image.flags |= ImageFlagBits::Sparse;
1055 } 1060 }
1056 1061
@@ -1192,7 +1197,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1192 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { 1197 if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
1193 return NULL_SAMPLER_ID; 1198 return NULL_SAMPLER_ID;
1194 } 1199 }
1195 const auto [pair, is_new] = samplers.try_emplace(config); 1200 const auto [pair, is_new] = state->samplers.try_emplace(config);
1196 if (is_new) { 1201 if (is_new) {
1197 pair->second = slot_samplers.insert(runtime, config); 1202 pair->second = slot_samplers.insert(runtime, config);
1198 } 1203 }
@@ -1201,7 +1206,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1201 1206
1202template <class P> 1207template <class P>
1203ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { 1208ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1204 const auto& regs = maxwell3d.regs; 1209 const auto& regs = maxwell3d->regs;
1205 if (index >= regs.rt_control.count) { 1210 if (index >= regs.rt_control.count) {
1206 return ImageViewId{}; 1211 return ImageViewId{};
1207 } 1212 }
@@ -1219,7 +1224,7 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1219 1224
1220template <class P> 1225template <class P>
1221ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { 1226ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
1222 const auto& regs = maxwell3d.regs; 1227 const auto& regs = maxwell3d->regs;
1223 if (!regs.zeta_enable) { 1228 if (!regs.zeta_enable) {
1224 return ImageViewId{}; 1229 return ImageViewId{};
1225 } 1230 }
@@ -1321,8 +1326,8 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu
1321 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; 1326 static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
1322 boost::container::small_vector<ImageId, 8> images; 1327 boost::container::small_vector<ImageId, 8> images;
1323 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { 1328 ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
1324 const auto it = gpu_page_table.find(page); 1329 const auto it = state->gpu_page_table.find(page);
1325 if (it == gpu_page_table.end()) { 1330 if (it == state->gpu_page_table.end()) {
1326 if constexpr (BOOL_BREAK) { 1331 if constexpr (BOOL_BREAK) {
1327 return false; 1332 return false;
1328 } else { 1333 } else {
@@ -1403,9 +1408,9 @@ template <typename Func>
1403void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { 1408void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
1404 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; 1409 using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
1405 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; 1410 static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
1406 const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); 1411 const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
1407 for (const auto& [gpu_addr, size] : segments) { 1412 for (const auto& [gpu_addr, size] : segments) {
1408 std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); 1413 std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1409 ASSERT(cpu_addr); 1414 ASSERT(cpu_addr);
1410 if constexpr (RETURNS_BOOL) { 1415 if constexpr (RETURNS_BOOL) {
1411 if (func(gpu_addr, *cpu_addr, size)) { 1416 if (func(gpu_addr, *cpu_addr, size)) {
@@ -1449,7 +1454,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1449 image.lru_index = lru_cache.Insert(image_id, frame_tick); 1454 image.lru_index = lru_cache.Insert(image_id, frame_tick);
1450 1455
1451 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, 1456 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
1452 [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); 1457 [this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); });
1453 if (False(image.flags & ImageFlagBits::Sparse)) { 1458 if (False(image.flags & ImageFlagBits::Sparse)) {
1454 auto map_id = 1459 auto map_id =
1455 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); 1460 slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
@@ -1497,8 +1502,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
1497 } 1502 }
1498 image_ids.erase(vector_it); 1503 image_ids.erase(vector_it);
1499 }; 1504 };
1500 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, 1505 ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
1501 [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); 1506 clear_page_table(page, state->gpu_page_table);
1507 });
1502 if (False(image.flags & ImageFlagBits::Sparse)) { 1508 if (False(image.flags & ImageFlagBits::Sparse)) {
1503 const auto map_id = image.map_view_id; 1509 const auto map_id = image.map_view_id;
1504 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { 1510 ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
@@ -1631,7 +1637,7 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
1631 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); 1637 ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
1632 1638
1633 // Mark render targets as dirty 1639 // Mark render targets as dirty
1634 auto& dirty = maxwell3d.dirty.flags; 1640 auto& dirty = maxwell3d->dirty.flags;
1635 dirty[Dirty::RenderTargets] = true; 1641 dirty[Dirty::RenderTargets] = true;
1636 dirty[Dirty::ZetaBuffer] = true; 1642 dirty[Dirty::ZetaBuffer] = true;
1637 for (size_t rt = 0; rt < NUM_RT; ++rt) { 1643 for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -1681,22 +1687,24 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
1681 if (alloc_images.empty()) { 1687 if (alloc_images.empty()) {
1682 image_allocs_table.erase(alloc_it); 1688 image_allocs_table.erase(alloc_it);
1683 } 1689 }
1684 if constexpr (ENABLE_VALIDATION) { 1690 for (auto& this_state : channel_storage) {
1685 std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); 1691 if constexpr (ENABLE_VALIDATION) {
1686 std::ranges::fill(compute_image_view_ids, CORRUPT_ID); 1692 std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID);
1693 std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID);
1694 }
1695 this_state.graphics_image_table.Invalidate();
1696 this_state.compute_image_table.Invalidate();
1687 } 1697 }
1688 graphics_image_table.Invalidate();
1689 compute_image_table.Invalidate();
1690 has_deleted_images = true; 1698 has_deleted_images = true;
1691} 1699}
1692 1700
1693template <class P> 1701template <class P>
1694void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { 1702void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
1695 auto it = image_views.begin(); 1703 auto it = state->image_views.begin();
1696 while (it != image_views.end()) { 1704 while (it != state->image_views.end()) {
1697 const auto found = std::ranges::find(removed_views, it->second); 1705 const auto found = std::ranges::find(removed_views, it->second);
1698 if (found != removed_views.end()) { 1706 if (found != removed_views.end()) {
1699 it = image_views.erase(it); 1707 it = state->image_views.erase(it);
1700 } else { 1708 } else {
1701 ++it; 1709 ++it;
1702 } 1710 }
@@ -1943,7 +1951,7 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
1943 const ImageViewBase& image_view = slot_image_views[id]; 1951 const ImageViewBase& image_view = slot_image_views[id];
1944 const ImageBase& image = slot_images[image_view.image_id]; 1952 const ImageBase& image = slot_images[image_view.image_id];
1945 const Extent3D size = image_view.size; 1953 const Extent3D size = image_view.size;
1946 const auto& regs = maxwell3d.regs; 1954 const auto& regs = maxwell3d->regs;
1947 const auto& scissor = regs.scissor_test[0]; 1955 const auto& scissor = regs.scissor_test[0];
1948 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { 1956 if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
1949 // Images with multiple resources can't be cleared in a single call 1957 // Images with multiple resources can't be cleared in a single call
@@ -1958,4 +1966,61 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
1958 scissor.max_y >= size.height; 1966 scissor.max_y >= size.height;
1959} 1967}
1960 1968
1969template <class P>
1970TextureCache<P>::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept
1971 : maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute},
1972 gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory},
1973 graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{
1974 gpu_memory} {}
1975
1976template <class P>
1977void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
1978 ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
1979 auto new_id = [this, &channel]() {
1980 if (!free_channel_ids.empty()) {
1981 auto id = free_channel_ids.front();
1982 free_channel_ids.pop_front();
1983 new (&channel_storage[id]) ChannelInfo(channel);
1984 return id;
1985 }
1986 channel_storage.emplace_back(channel);
1987 return channel_storage.size() - 1;
1988 }();
1989 channel_map.emplace(channel.bind_id, new_id);
1990 if (current_channel_id != UNSET_CHANNEL) {
1991 state = &channel_storage[current_channel_id];
1992 }
1993}
1994
1995/// Bind a channel for execution.
1996template <class P>
1997void TextureCache<P>::BindToChannel(s32 id) {
1998 auto it = channel_map.find(id);
1999 ASSERT(it != channel_map.end() && id >= 0);
2000 current_channel_id = it->second;
2001 state = &channel_storage[current_channel_id];
2002 maxwell3d = &state->maxwell3d;
2003 kepler_compute = &state->kepler_compute;
2004 gpu_memory = &state->gpu_memory;
2005}
2006
2007/// Erase channel's state.
2008template <class P>
2009void TextureCache<P>::EraseChannel(s32 id) {
2010 const auto it = channel_map.find(id);
2011 ASSERT(it != channel_map.end() && id >= 0);
2012 const auto this_id = it->second;
2013 free_channel_ids.push_back(this_id);
2014 channel_map.erase(it);
2015 if (this_id == current_channel_id) {
2016 current_channel_id = UNSET_CHANNEL;
2017 state = nullptr;
2018 maxwell3d = nullptr;
2019 kepler_compute = nullptr;
2020 gpu_memory = nullptr;
2021 } else if (current_channel_id != UNSET_CHANNEL) {
2022 state = &channel_storage[current_channel_id];
2023 }
2024}
2025
1961} // namespace VideoCommon 2026} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 7e6c6cef2..69efcb718 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -3,6 +3,8 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <deque>
7#include <limits>
6#include <mutex> 8#include <mutex>
7#include <span> 9#include <span>
8#include <type_traits> 10#include <type_traits>
@@ -26,6 +28,10 @@
26#include "video_core/texture_cache/types.h" 28#include "video_core/texture_cache/types.h"
27#include "video_core/textures/texture.h" 29#include "video_core/textures/texture.h"
28 30
31namespace Tegra::Control {
32struct ChannelState;
33}
34
29namespace VideoCommon { 35namespace VideoCommon {
30 36
31using Tegra::Texture::SwizzleSource; 37using Tegra::Texture::SwizzleSource;
@@ -58,6 +64,8 @@ class TextureCache {
58 /// True when the API can provide info about the memory of the device. 64 /// True when the API can provide info about the memory of the device.
59 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; 65 static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
60 66
67 static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
68
61 static constexpr s64 TARGET_THRESHOLD = 4_GiB; 69 static constexpr s64 TARGET_THRESHOLD = 4_GiB;
62 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; 70 static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
63 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; 71 static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
@@ -85,8 +93,7 @@ class TextureCache {
85 }; 93 };
86 94
87public: 95public:
88 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, 96 explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);
89 Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
90 97
91 /// Notify the cache that a new frame has been queued 98 /// Notify the cache that a new frame has been queued
92 void TickFrame(); 99 void TickFrame();
@@ -171,6 +178,15 @@ public:
171 178
172 [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; 179 [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
173 180
181 /// Create channel state.
182 void CreateChannel(struct Tegra::Control::ChannelState& channel);
183
184 /// Bind a channel for execution.
185 void BindToChannel(s32 id);
186
187 /// Erase channel's state.
188 void EraseChannel(s32 id);
189
174 std::mutex mutex; 190 std::mutex mutex;
175 191
176private: 192private:
@@ -338,31 +354,52 @@ private:
338 u64 GetScaledImageSizeBytes(ImageBase& image); 354 u64 GetScaledImageSizeBytes(ImageBase& image);
339 355
340 Runtime& runtime; 356 Runtime& runtime;
341 VideoCore::RasterizerInterface& rasterizer;
342 Tegra::Engines::Maxwell3D& maxwell3d;
343 Tegra::Engines::KeplerCompute& kepler_compute;
344 Tegra::MemoryManager& gpu_memory;
345 357
346 DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; 358 struct ChannelInfo {
347 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; 359 ChannelInfo() = delete;
348 std::vector<SamplerId> graphics_sampler_ids; 360 ChannelInfo(struct Tegra::Control::ChannelState& state) noexcept;
349 std::vector<ImageViewId> graphics_image_view_ids; 361 ChannelInfo(const ChannelInfo& state) = delete;
362 ChannelInfo& operator=(const ChannelInfo&) = delete;
363 ChannelInfo(ChannelInfo&& other) noexcept = default;
364 ChannelInfo& operator=(ChannelInfo&& other) noexcept = default;
365
366 Tegra::Engines::Maxwell3D& maxwell3d;
367 Tegra::Engines::KeplerCompute& kepler_compute;
368 Tegra::MemoryManager& gpu_memory;
369
370 DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
371 DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
372 std::vector<SamplerId> graphics_sampler_ids;
373 std::vector<ImageViewId> graphics_image_view_ids;
350 374
351 DescriptorTable<TICEntry> compute_image_table{gpu_memory}; 375 DescriptorTable<TICEntry> compute_image_table{gpu_memory};
352 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; 376 DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
353 std::vector<SamplerId> compute_sampler_ids; 377 std::vector<SamplerId> compute_sampler_ids;
354 std::vector<ImageViewId> compute_image_view_ids; 378 std::vector<ImageViewId> compute_image_view_ids;
379
380 std::unordered_map<TICEntry, ImageViewId> image_views;
381 std::unordered_map<TSCEntry, SamplerId> samplers;
382
383 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
384 };
385
386 std::deque<ChannelInfo> channel_storage;
387 std::deque<size_t> free_channel_ids;
388 std::unordered_map<s32, size_t> channel_map;
389
390 ChannelInfo* state;
391 size_t current_channel_id{UNSET_CHANNEL};
392 VideoCore::RasterizerInterface& rasterizer;
393 Tegra::Engines::Maxwell3D* maxwell3d;
394 Tegra::Engines::KeplerCompute* kepler_compute;
395 Tegra::MemoryManager* gpu_memory;
355 396
356 RenderTargets render_targets; 397 RenderTargets render_targets;
357 398
358 std::unordered_map<TICEntry, ImageViewId> image_views;
359 std::unordered_map<TSCEntry, SamplerId> samplers;
360 std::unordered_map<RenderTargets, FramebufferId> framebuffers; 399 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
361 400
362 std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; 401 std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
363 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
364 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; 402 std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
365
366 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; 403 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
367 404
368 VAddr virtual_invalid_space{}; 405 VAddr virtual_invalid_space{};