From 139ea93512aeead8a4aee3910a3de86eb109a838 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 5 Nov 2021 15:52:31 +0100 Subject: VideoCore: implement channels on gpu caches. --- src/video_core/texture_cache/image_base.h | 3 + src/video_core/texture_cache/texture_cache.h | 209 ++++++++++++++-------- src/video_core/texture_cache/texture_cache_base.h | 73 ++++++-- 3 files changed, 195 insertions(+), 90 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 1f85ec9da..620565684 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -88,6 +88,9 @@ struct ImageBase { u32 scale_rating = 0; u64 scale_tick = 0; bool has_scaled = false; + + size_t channel = 0; + ImageFlagBits flags = ImageFlagBits::CpuModified; GPUVAddr gpu_addr = 0; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 1dbe01bc0..2731aead0 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -7,6 +7,7 @@ #include "common/alignment.h" #include "common/settings.h" +#include "video_core/control/channel_state.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" #include "video_core/texture_cache/image_view_base.h" @@ -29,12 +30,8 @@ using VideoCore::Surface::SurfaceType; using namespace Common::Literals; template -TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_) - : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { +TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_) + : runtime{runtime_}, rasterizer{rasterizer_} { // Configure null sampler TSCEntry sampler_descriptor{}; sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); @@ -42,6 +39,13 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); sampler_descriptor.cubemap_anisotropy.Assign(1); + // Setup channels + current_channel_id = UNSET_CHANNEL; + state = nullptr; + maxwell3d = nullptr; + kepler_compute = nullptr; + gpu_memory = nullptr; + // Make sure the first index is reserved for the null resources // This way the null resource becomes a compile time constant void(slot_images.insert(NullImageParams{})); @@ -93,7 +97,7 @@ void TextureCache

::RunGarbageCollector() { const auto copies = FullDownloadCopies(image.info); image.DownloadMemory(map, copies); runtime.Finish(); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); } if (True(image.flags & ImageFlagBits::Tracked)) { UntrackImage(image, image_id); @@ -152,22 +156,23 @@ void TextureCache

::MarkModification(ImageId id) noexcept { template template void TextureCache

::FillGraphicsImageViews(std::span views) { - FillImageViews(graphics_image_table, graphics_image_view_ids, views); + FillImageViews(state->graphics_image_table, state->graphics_image_view_ids, + views); } template void TextureCache

::FillComputeImageViews(std::span views) { - FillImageViews(compute_image_table, compute_image_view_ids, views); + FillImageViews(state->compute_image_table, state->compute_image_view_ids, views); } template typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - if (index > graphics_sampler_table.Limit()) { + if (index > state->graphics_sampler_table.Limit()) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } - const auto [descriptor, is_new] = graphics_sampler_table.Read(index); - SamplerId& id = graphics_sampler_ids[index]; + const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index); + SamplerId& id = state->graphics_sampler_ids[index]; if (is_new) { id = FindSampler(descriptor); } @@ -176,12 +181,12 @@ typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { template typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - if (index > compute_sampler_table.Limit()) { + if (index > state->compute_sampler_table.Limit()) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } - const auto [descriptor, is_new] = compute_sampler_table.Read(index); - SamplerId& id = compute_sampler_ids[index]; + const auto [descriptor, is_new] = state->compute_sampler_table.Read(index); + SamplerId& id = state->compute_sampler_ids[index]; if (is_new) { id = FindSampler(descriptor); } @@ -191,34 +196,34 @@ typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { template void TextureCache

::SynchronizeGraphicsDescriptors() { using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; - const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; - const u32 tic_limit = maxwell3d.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; - if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { - graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex; + const u32 tic_limit = maxwell3d->regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit; + if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) { + state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } - if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { - graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) { + state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); } } template void TextureCache

::SynchronizeComputeDescriptors() { - const bool linked_tsc = kepler_compute.launch_description.linked_tsc; - const u32 tic_limit = kepler_compute.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; - const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); - if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { - compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + const bool linked_tsc = kepler_compute->launch_description.linked_tsc; + const u32 tic_limit = kepler_compute->regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; + const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address(); + if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { + state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } - if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { - compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) { + state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); } } template bool TextureCache

::RescaleRenderTargets(bool is_clear) { - auto& flags = maxwell3d.dirty.flags; + auto& flags = maxwell3d->dirty.flags; u32 scale_rating = 0; bool rescaled = false; std::array tmp_color_images{}; @@ -315,7 +320,7 @@ bool TextureCache

::RescaleRenderTargets(bool is_clear) { template void TextureCache

::UpdateRenderTargets(bool is_clear) { using namespace VideoCommon::Dirty; - auto& flags = maxwell3d.dirty.flags; + auto& flags = maxwell3d->dirty.flags; if (!flags[Dirty::RenderTargets]) { for (size_t index = 0; index < NUM_RT; ++index) { ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; @@ -342,7 +347,7 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); for (size_t index = 0; index < NUM_RT; ++index) { - render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); + render_targets.draw_buffers[index] = static_cast(maxwell3d->regs.rt_control.Map(index)); } u32 up_scale = 1; u32 down_shift = 0; @@ -351,8 +356,8 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { down_shift = Settings::values.resolution_info.down_shift; } render_targets.size = Extent2D{ - (maxwell3d.regs.render_area.width * up_scale) >> down_shift, - (maxwell3d.regs.render_area.height * up_scale) >> down_shift, + (maxwell3d->regs.render_area.width * up_scale) >> down_shift, + (maxwell3d->regs.render_area.height * up_scale) >> down_shift, }; flags[Dirty::DepthBiasGlobal] = true; @@ -458,7 +463,7 @@ void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { const auto copies = FullDownloadCopies(image.info); image.DownloadMemory(map, copies); runtime.Finish(); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); } } @@ -655,7 +660,7 @@ void TextureCache

::PopAsyncFlushes() { for (const ImageId image_id : download_ids) { const ImageBase& image = slot_images[image_id]; const auto copies = FullDownloadCopies(image.info); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span); download_map.offset += image.unswizzled_size_bytes; download_span = download_span.subspan(image.unswizzled_size_bytes); } @@ -714,26 +719,26 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) const GPUVAddr gpu_addr = image.gpu_addr; if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { - gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); const auto uploads = FullUploadSwizzles(image.info); runtime.AccelerateImageUpload(image, staging, uploads); } else if (True(image.flags & ImageFlagBits::Converted)) { std::vector unswizzled_data(image.unswizzled_size_bytes); - auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); + auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data); ConvertImage(unswizzled_data, image.info, mapped_span, copies); image.UploadMemory(staging, copies); } else { - const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); + const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span); image.UploadMemory(staging, copies); } } template ImageViewId TextureCache

::FindImageView(const TICEntry& config) { - if (!IsValidEntry(gpu_memory, config)) { + if (!IsValidEntry(*gpu_memory, config)) { return NULL_IMAGE_VIEW_ID; } - const auto [pair, is_new] = image_views.try_emplace(config); + const auto [pair, is_new] = state->image_views.try_emplace(config); ImageViewId& image_view_id = pair->second; if (is_new) { image_view_id = CreateImageView(config); @@ -777,9 +782,9 @@ ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a template ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr) { - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); + cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); if (!cpu_addr) { return ImageId{}; } @@ -860,7 +865,7 @@ void TextureCache

::InvalidateScale(Image& image) { image.scale_tick = frame_tick + 1; } const std::span image_view_ids = image.image_view_ids; - auto& dirty = maxwell3d.dirty.flags; + auto& dirty = maxwell3d->dirty.flags; dirty[Dirty::RenderTargets] = true; dirty[Dirty::ZetaBuffer] = true; for (size_t rt = 0; rt < NUM_RT; ++rt) { @@ -881,11 +886,11 @@ void TextureCache

::InvalidateScale(Image& image) { image.image_view_ids.clear(); image.image_view_infos.clear(); if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + std::ranges::fill(state->graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(state->compute_image_view_ids, CORRUPT_ID); } - graphics_image_table.Invalidate(); - compute_image_table.Invalidate(); + state->graphics_image_table.Invalidate(); + state->compute_image_table.Invalidate(); has_deleted_images = true; } @@ -929,10 +934,10 @@ bool TextureCache

::ScaleDown(Image& image) { template ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr) { const auto size = CalculateGuestSizeInBytes(info); - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); + cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); if (!cpu_addr) { const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; virtual_invalid_space += Common::AlignUp(size, 32); @@ -1050,7 +1055,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; - if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { + if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { new_image.flags |= ImageFlagBits::Sparse; } @@ -1192,7 +1197,7 @@ SamplerId TextureCache

::FindSampler(const TSCEntry& config) { if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { return NULL_SAMPLER_ID; } - const auto [pair, is_new] = samplers.try_emplace(config); + const auto [pair, is_new] = state->samplers.try_emplace(config); if (is_new) { pair->second = slot_samplers.insert(runtime, config); } @@ -1201,7 +1206,7 @@ SamplerId TextureCache

::FindSampler(const TSCEntry& config) { template ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { - const auto& regs = maxwell3d.regs; + const auto& regs = maxwell3d->regs; if (index >= regs.rt_control.count) { return ImageViewId{}; } @@ -1219,7 +1224,7 @@ ImageViewId TextureCache

::FindColorBuffer(size_t index, bool is_clear) { template ImageViewId TextureCache

::FindDepthBuffer(bool is_clear) { - const auto& regs = maxwell3d.regs; + const auto& regs = maxwell3d->regs; if (!regs.zeta_enable) { return ImageViewId{}; } @@ -1321,8 +1326,8 @@ void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu static constexpr bool BOOL_BREAK = std::is_same_v; boost::container::small_vector images; ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = gpu_page_table.find(page); - if (it == gpu_page_table.end()) { + const auto it = state->gpu_page_table.find(page); + if (it == state->gpu_page_table.end()) { if constexpr (BOOL_BREAK) { return false; } else { @@ -1403,9 +1408,9 @@ template void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { using FuncReturn = typename std::invoke_result::type; static constexpr bool RETURNS_BOOL = std::is_same_v; - const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); + const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); for (const auto& [gpu_addr, size] : segments) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); if constexpr (RETURNS_BOOL) { if (func(gpu_addr, *cpu_addr, size)) { @@ -1449,7 +1454,7 @@ void TextureCache

::RegisterImage(ImageId image_id) { image.lru_index = lru_cache.Insert(image_id, frame_tick); ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); + [this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); }); if (False(image.flags & ImageFlagBits::Sparse)) { auto map_id = slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); @@ -1497,8 +1502,9 @@ void TextureCache

::UnregisterImage(ImageId image_id) { } image_ids.erase(vector_it); }; - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { + clear_page_table(page, state->gpu_page_table); + }); if (False(image.flags & ImageFlagBits::Sparse)) { const auto map_id = image.map_view_id; ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { @@ -1631,7 +1637,7 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); // Mark render targets as dirty - auto& dirty = maxwell3d.dirty.flags; + auto& dirty = maxwell3d->dirty.flags; dirty[Dirty::RenderTargets] = true; dirty[Dirty::ZetaBuffer] = true; for (size_t rt = 0; rt < NUM_RT; ++rt) { @@ -1681,22 +1687,24 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { if (alloc_images.empty()) { image_allocs_table.erase(alloc_it); } - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + for (auto& this_state : channel_storage) { + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID); + } + this_state.graphics_image_table.Invalidate(); + this_state.compute_image_table.Invalidate(); } - graphics_image_table.Invalidate(); - compute_image_table.Invalidate(); has_deleted_images = true; } template void TextureCache

::RemoveImageViewReferences(std::span removed_views) { - auto it = image_views.begin(); - while (it != image_views.end()) { + auto it = state->image_views.begin(); + while (it != state->image_views.end()) { const auto found = std::ranges::find(removed_views, it->second); if (found != removed_views.end()) { - it = image_views.erase(it); + it = state->image_views.erase(it); } else { ++it; } @@ -1943,7 +1951,7 @@ bool TextureCache

::IsFullClear(ImageViewId id) { const ImageViewBase& image_view = slot_image_views[id]; const ImageBase& image = slot_images[image_view.image_id]; const Extent3D size = image_view.size; - const auto& regs = maxwell3d.regs; + const auto& regs = maxwell3d->regs; const auto& scissor = regs.scissor_test[0]; if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { // Images with multiple resources can't be cleared in a single call @@ -1958,4 +1966,61 @@ bool TextureCache

::IsFullClear(ImageViewId id) { scissor.max_y >= size.height; } +template +TextureCache

::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept + : maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute}, + gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory}, + graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{ + gpu_memory} {} + +template +void TextureCache

::CreateChannel(struct Tegra::Control::ChannelState& channel) { + ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0); + auto new_id = [this, &channel]() { + if (!free_channel_ids.empty()) { + auto id = free_channel_ids.front(); + free_channel_ids.pop_front(); + new (&channel_storage[id]) ChannelInfo(channel); + return id; + } + channel_storage.emplace_back(channel); + return channel_storage.size() - 1; + }(); + channel_map.emplace(channel.bind_id, new_id); + if (current_channel_id != UNSET_CHANNEL) { + state = &channel_storage[current_channel_id]; + } +} + +/// Bind a channel for execution. +template +void TextureCache

::BindToChannel(s32 id) { + auto it = channel_map.find(id); + ASSERT(it != channel_map.end() && id >= 0); + current_channel_id = it->second; + state = &channel_storage[current_channel_id]; + maxwell3d = &state->maxwell3d; + kepler_compute = &state->kepler_compute; + gpu_memory = &state->gpu_memory; +} + +/// Erase channel's state. +template +void TextureCache

::EraseChannel(s32 id) { + const auto it = channel_map.find(id); + ASSERT(it != channel_map.end() && id >= 0); + const auto this_id = it->second; + free_channel_ids.push_back(this_id); + channel_map.erase(it); + if (this_id == current_channel_id) { + current_channel_id = UNSET_CHANNEL; + state = nullptr; + maxwell3d = nullptr; + kepler_compute = nullptr; + gpu_memory = nullptr; + } else if (current_channel_id != UNSET_CHANNEL) { + state = &channel_storage[current_channel_id]; + } +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 7e6c6cef2..69efcb718 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -3,6 +3,8 @@ #pragma once +#include +#include #include #include #include @@ -26,6 +28,10 @@ #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" +namespace Tegra::Control { +struct ChannelState; +} + namespace VideoCommon { using Tegra::Texture::SwizzleSource; @@ -58,6 +64,8 @@ class TextureCache { /// True when the API can provide info about the memory of the device. static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; + static constexpr size_t UNSET_CHANNEL{std::numeric_limits::max()}; + static constexpr s64 TARGET_THRESHOLD = 4_GiB; static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; @@ -85,8 +93,7 @@ class TextureCache { }; public: - explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, - Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); + explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&); /// Notify the cache that a new frame has been queued void TickFrame(); @@ -171,6 +178,15 @@ public: [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; + /// Create channel state. + void CreateChannel(struct Tegra::Control::ChannelState& channel); + + /// Bind a channel for execution. + void BindToChannel(s32 id); + + /// Erase channel's state. + void EraseChannel(s32 id); + std::mutex mutex; private: @@ -338,31 +354,52 @@ private: u64 GetScaledImageSizeBytes(ImageBase& image); Runtime& runtime; - VideoCore::RasterizerInterface& rasterizer; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - DescriptorTable graphics_image_table{gpu_memory}; - DescriptorTable graphics_sampler_table{gpu_memory}; - std::vector graphics_sampler_ids; - std::vector graphics_image_view_ids; + struct ChannelInfo { + ChannelInfo() = delete; + ChannelInfo(struct Tegra::Control::ChannelState& state) noexcept; + ChannelInfo(const ChannelInfo& state) = delete; + ChannelInfo& operator=(const ChannelInfo&) = delete; + ChannelInfo(ChannelInfo&& other) noexcept = default; + ChannelInfo& operator=(ChannelInfo&& other) noexcept = default; + + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + + DescriptorTable graphics_image_table{gpu_memory}; + DescriptorTable graphics_sampler_table{gpu_memory}; + std::vector graphics_sampler_ids; + std::vector graphics_image_view_ids; - DescriptorTable compute_image_table{gpu_memory}; - DescriptorTable compute_sampler_table{gpu_memory}; - std::vector compute_sampler_ids; - std::vector compute_image_view_ids; + DescriptorTable compute_image_table{gpu_memory}; + DescriptorTable compute_sampler_table{gpu_memory}; + std::vector compute_sampler_ids; + std::vector compute_image_view_ids; + + std::unordered_map image_views; + std::unordered_map samplers; + + std::unordered_map, IdentityHash> gpu_page_table; + }; + + std::deque channel_storage; + std::deque free_channel_ids; + std::unordered_map channel_map; + + ChannelInfo* state; + size_t current_channel_id{UNSET_CHANNEL}; + VideoCore::RasterizerInterface& rasterizer; + Tegra::Engines::Maxwell3D* maxwell3d; + Tegra::Engines::KeplerCompute* kepler_compute; + Tegra::MemoryManager* gpu_memory; RenderTargets render_targets; - std::unordered_map image_views; - std::unordered_map samplers; std::unordered_map framebuffers; std::unordered_map, IdentityHash> page_table; - std::unordered_map, IdentityHash> gpu_page_table; std::unordered_map, IdentityHash> sparse_page_table; - std::unordered_map> sparse_views; VAddr virtual_invalid_space{}; -- cgit v1.2.3 From 3f8e7a55851a613becf715cbf3016a8e9f63d65f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 7 Nov 2021 17:52:45 +0100 Subject: VideoCore: Fix channels with disk pipeline/shader cache. --- src/video_core/texture_cache/texture_cache.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2731aead0..e8b0b0a3b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -885,12 +885,14 @@ void TextureCache

::InvalidateScale(Image& image) { } image.image_view_ids.clear(); image.image_view_infos.clear(); - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(state->graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(state->compute_image_view_ids, CORRUPT_ID); + for (auto& this_state : channel_storage) { + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID); + } + this_state.graphics_image_table.Invalidate(); + this_state.compute_image_table.Invalidate(); } - state->graphics_image_table.Invalidate(); - state->compute_image_table.Invalidate(); has_deleted_images = true; } -- cgit v1.2.3 From 6fc4012396e98a1a6ac455791b314d2280a12a51 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 16 Nov 2021 00:01:40 +0100 Subject: VideoCore: Extra Fixes. --- src/video_core/texture_cache/util.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 1820823b2..bea1c27d0 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -755,7 +755,7 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config if (address == 0) { return false; } - if (address > (1ULL << 48)) { + if (address >= (1ULL << 40)) { return false; } if (gpu_memory.GpuToCpuAddress(address).has_value()) { -- cgit v1.2.3 From bb74973bba6005cee5f1409b3b5a6c572da3cf66 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 17 Nov 2021 05:44:01 +0100 Subject: General: Rebase fixes. --- src/video_core/texture_cache/texture_cache.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index e8b0b0a3b..0ec999d63 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -885,14 +885,13 @@ void TextureCache

::InvalidateScale(Image& image) { } image.image_view_ids.clear(); image.image_view_infos.clear(); - for (auto& this_state : channel_storage) { - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID); - } - this_state.graphics_image_table.Invalidate(); - this_state.compute_image_table.Invalidate(); + auto& channel_info = channel_storage[image.channel]; + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID); } + channel_info.graphics_image_table.Invalidate(); + channel_info.compute_image_table.Invalidate(); has_deleted_images = true; } -- cgit v1.2.3 From e462191482c6507daed67802c6c1d2c50f10c96e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 17 Dec 2021 16:45:06 +0100 Subject: Refactor VideoCore to use AS sepparate from Channel. --- src/video_core/texture_cache/texture_cache.cpp | 16 +++ src/video_core/texture_cache/texture_cache.h | 130 ++++++++-------------- src/video_core/texture_cache/texture_cache_base.h | 96 +++++++--------- 3 files changed, 101 insertions(+), 141 deletions(-) create mode 100644 src/video_core/texture_cache/texture_cache.cpp (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp new file mode 100644 index 000000000..bc905a1a4 --- /dev/null +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -0,0 +1,16 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv3 or any later version +// Refer to the license.txt file included. + +#include "video_core/control/channel_state_cache.inc" +#include "video_core/texture_cache/texture_cache_base.h" + +namespace VideoCommon { + +TextureCacheChannelInfo::TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept + : ChannelInfo(state), graphics_image_table{gpu_memory}, graphics_sampler_table{gpu_memory}, + compute_image_table{gpu_memory}, compute_sampler_table{gpu_memory} {} + +template class VideoCommon::ChannelSetupCaches; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0ec999d63..89c5faf88 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1,5 +1,7 @@ -// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: 2021 yuzu emulator team +// (https://github.com/skyline-emu/) +// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 +// or any later version Refer to the license.txt file included. #pragma once @@ -41,10 +43,6 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& // Setup channels current_channel_id = UNSET_CHANNEL; - state = nullptr; - maxwell3d = nullptr; - kepler_compute = nullptr; - gpu_memory = nullptr; // Make sure the first index is reserved for the null resources // This way the null resource becomes a compile time constant @@ -156,23 +154,24 @@ void TextureCache

::MarkModification(ImageId id) noexcept { template template void TextureCache

::FillGraphicsImageViews(std::span views) { - FillImageViews(state->graphics_image_table, state->graphics_image_view_ids, - views); + FillImageViews(channel_state->graphics_image_table, + channel_state->graphics_image_view_ids, views); } template void TextureCache

::FillComputeImageViews(std::span views) { - FillImageViews(state->compute_image_table, state->compute_image_view_ids, views); + FillImageViews(channel_state->compute_image_table, channel_state->compute_image_view_ids, + views); } template typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { - if (index > state->graphics_sampler_table.Limit()) { + if (index > channel_state->graphics_sampler_table.Limit()) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } - const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index); - SamplerId& id = state->graphics_sampler_ids[index]; + const auto [descriptor, is_new] = channel_state->graphics_sampler_table.Read(index); + SamplerId& id = channel_state->graphics_sampler_ids[index]; if (is_new) { id = FindSampler(descriptor); } @@ -181,12 +180,12 @@ typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { template typename P::Sampler* TextureCache

::GetComputeSampler(u32 index) { - if (index > state->compute_sampler_table.Limit()) { + if (index > channel_state->compute_sampler_table.Limit()) { LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); return &slot_samplers[NULL_SAMPLER_ID]; } - const auto [descriptor, is_new] = state->compute_sampler_table.Read(index); - SamplerId& id = state->compute_sampler_ids[index]; + const auto [descriptor, is_new] = channel_state->compute_sampler_table.Read(index); + SamplerId& id = channel_state->compute_sampler_ids[index]; if (is_new) { id = FindSampler(descriptor); } @@ -199,11 +198,12 @@ void TextureCache

::SynchronizeGraphicsDescriptors() { const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex; const u32 tic_limit = maxwell3d->regs.tic.limit; const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit; - if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) { - state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + if (channel_state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), + tsc_limit)) { + channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } - if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) { - state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + if (channel_state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) { + channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); } } @@ -213,11 +213,12 @@ void TextureCache

::SynchronizeComputeDescriptors() { const u32 tic_limit = kepler_compute->regs.tic.limit; const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address(); - if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { - state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + if (channel_state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { + channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); } - if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) { - state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + if (channel_state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), + tic_limit)) { + channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); } } @@ -738,7 +739,7 @@ ImageViewId TextureCache

::FindImageView(const TICEntry& config) { if (!IsValidEntry(*gpu_memory, config)) { return NULL_IMAGE_VIEW_ID; } - const auto [pair, is_new] = state->image_views.try_emplace(config); + const auto [pair, is_new] = channel_state->image_views.try_emplace(config); ImageViewId& image_view_id = pair->second; if (is_new) { image_view_id = CreateImageView(config); @@ -1198,7 +1199,7 @@ SamplerId TextureCache

::FindSampler(const TSCEntry& config) { if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { return NULL_SAMPLER_ID; } - const auto [pair, is_new] = state->samplers.try_emplace(config); + const auto [pair, is_new] = channel_state->samplers.try_emplace(config); if (is_new) { pair->second = slot_samplers.insert(runtime, config); } @@ -1327,8 +1328,8 @@ void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu static constexpr bool BOOL_BREAK = std::is_same_v; boost::container::small_vector images; ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = state->gpu_page_table.find(page); - if (it == state->gpu_page_table.end()) { + const auto it = channel_state->gpu_page_table->find(page); + if (it == channel_state->gpu_page_table->end()) { if constexpr (BOOL_BREAK) { return false; } else { @@ -1454,8 +1455,9 @@ void TextureCache

::RegisterImage(ImageId image_id) { } image.lru_index = lru_cache.Insert(image_id, frame_tick); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); }); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { + (*channel_state->gpu_page_table)[page].push_back(image_id); + }); if (False(image.flags & ImageFlagBits::Sparse)) { auto map_id = slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); @@ -1486,9 +1488,9 @@ void TextureCache

::UnregisterImage(ImageId image_id) { image.flags &= ~ImageFlagBits::BadOverlap; lru_cache.Free(image.lru_index); const auto& clear_page_table = - [this, image_id]( - u64 page, - std::unordered_map, IdentityHash>& selected_page_table) { + [this, image_id](u64 page, + std::unordered_map, Common::IdentityHash>& + selected_page_table) { const auto page_it = selected_page_table.find(page); if (page_it == selected_page_table.end()) { ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << YUZU_PAGEBITS); @@ -1504,7 +1506,7 @@ void TextureCache

::UnregisterImage(ImageId image_id) { image_ids.erase(vector_it); }; ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { - clear_page_table(page, state->gpu_page_table); + clear_page_table(page, (*channel_state->gpu_page_table)); }); if (False(image.flags & ImageFlagBits::Sparse)) { const auto map_id = image.map_view_id; @@ -1701,11 +1703,11 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { template void TextureCache

::RemoveImageViewReferences(std::span removed_views) { - auto it = state->image_views.begin(); - while (it != state->image_views.end()) { + auto it = channel_state->image_views.begin(); + while (it != channel_state->image_views.end()) { const auto found = std::ranges::find(removed_views, it->second); if (found != removed_views.end()) { - it = state->image_views.erase(it); + it = channel_state->image_views.erase(it); } else { ++it; } @@ -1967,61 +1969,19 @@ bool TextureCache

::IsFullClear(ImageViewId id) { scissor.max_y >= size.height; } -template -TextureCache

::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept - : maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute}, - gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory}, - graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{ - gpu_memory} {} - template void TextureCache

::CreateChannel(struct Tegra::Control::ChannelState& channel) { - ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0); - auto new_id = [this, &channel]() { - if (!free_channel_ids.empty()) { - auto id = free_channel_ids.front(); - free_channel_ids.pop_front(); - new (&channel_storage[id]) ChannelInfo(channel); - return id; - } - channel_storage.emplace_back(channel); - return channel_storage.size() - 1; - }(); - channel_map.emplace(channel.bind_id, new_id); - if (current_channel_id != UNSET_CHANNEL) { - state = &channel_storage[current_channel_id]; - } + VideoCommon::ChannelSetupCaches::CreateChannel(channel); + const auto it = channel_map.find(channel.bind_id); + auto* this_state = &channel_storage[it->second]; + const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()]; + this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id]; } /// Bind a channel for execution. template -void TextureCache

::BindToChannel(s32 id) { - auto it = channel_map.find(id); - ASSERT(it != channel_map.end() && id >= 0); - current_channel_id = it->second; - state = &channel_storage[current_channel_id]; - maxwell3d = &state->maxwell3d; - kepler_compute = &state->kepler_compute; - gpu_memory = &state->gpu_memory; -} - -/// Erase channel's state. -template -void TextureCache

::EraseChannel(s32 id) { - const auto it = channel_map.find(id); - ASSERT(it != channel_map.end() && id >= 0); - const auto this_id = it->second; - free_channel_ids.push_back(this_id); - channel_map.erase(it); - if (this_id == current_channel_id) { - current_channel_id = UNSET_CHANNEL; - state = nullptr; - maxwell3d = nullptr; - kepler_compute = nullptr; - gpu_memory = nullptr; - } else if (current_channel_id != UNSET_CHANNEL) { - state = &channel_storage[current_channel_id]; - } +void TextureCache

::OnGPUASRegister([[maybe_unused]] size_t map_id) { + gpu_page_table_storage.emplace_back(); } } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 69efcb718..b24968b03 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -1,5 +1,7 @@ -// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: 2021 yuzu emulator team +// (https://github.com/skyline-emu/) +// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 +// or any later version Refer to the license.txt file included. #pragma once @@ -13,9 +15,11 @@ #include #include "common/common_types.h" +#include "common/hash.h" #include "common/literals.h" #include "common/lru_cache.h" #include "video_core/compatible_formats.h" +#include "video_core/control/channel_state_cache.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/engines/fermi_2d.h" #include "video_core/surface.h" @@ -50,8 +54,35 @@ struct ImageViewInOut { ImageViewId id{}; }; +using TextureCacheGPUMap = std::unordered_map, Common::IdentityHash>; + +class TextureCacheChannelInfo : public ChannelInfo { +public: + TextureCacheChannelInfo() = delete; + TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept; + TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete; + TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete; + TextureCacheChannelInfo(TextureCacheChannelInfo&& other) noexcept = default; + TextureCacheChannelInfo& operator=(TextureCacheChannelInfo&& other) noexcept = default; + + DescriptorTable graphics_image_table{gpu_memory}; + DescriptorTable graphics_sampler_table{gpu_memory}; + std::vector graphics_sampler_ids; + std::vector graphics_image_view_ids; + + DescriptorTable compute_image_table{gpu_memory}; + DescriptorTable compute_sampler_table{gpu_memory}; + std::vector compute_sampler_ids; + std::vector compute_image_view_ids; + + std::unordered_map image_views; + std::unordered_map samplers; + + TextureCacheGPUMap* gpu_page_table; +}; + template -class TextureCache { +class TextureCache : public VideoCommon::ChannelSetupCaches { /// Address shift for caching images into a hash table static constexpr u64 YUZU_PAGEBITS = 20; @@ -85,13 +116,6 @@ class TextureCache { PixelFormat src_format; }; - template - struct IdentityHash { - [[nodiscard]] size_t operator()(T value) const noexcept { - return static_cast(value); - } - }; - public: explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&); @@ -179,13 +203,7 @@ public: [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept; /// Create channel state. - void CreateChannel(struct Tegra::Control::ChannelState& channel); - - /// Bind a channel for execution. - void BindToChannel(s32 id); - - /// Erase channel's state. - void EraseChannel(s32 id); + void CreateChannel(Tegra::Control::ChannelState& channel) final override; std::mutex mutex; @@ -221,6 +239,8 @@ private: } } + void OnGPUASRegister(size_t map_id) final override; + /// Runs the Garbage Collector. void RunGarbageCollector(); @@ -355,51 +375,15 @@ private: Runtime& runtime; - struct ChannelInfo { - ChannelInfo() = delete; - ChannelInfo(struct Tegra::Control::ChannelState& state) noexcept; - ChannelInfo(const ChannelInfo& state) = delete; - ChannelInfo& operator=(const ChannelInfo&) = delete; - ChannelInfo(ChannelInfo&& other) noexcept = default; - ChannelInfo& operator=(ChannelInfo&& other) noexcept = default; - - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - - DescriptorTable graphics_image_table{gpu_memory}; - DescriptorTable graphics_sampler_table{gpu_memory}; - std::vector graphics_sampler_ids; - std::vector graphics_image_view_ids; - - DescriptorTable compute_image_table{gpu_memory}; - DescriptorTable compute_sampler_table{gpu_memory}; - std::vector compute_sampler_ids; - std::vector compute_image_view_ids; - - std::unordered_map image_views; - std::unordered_map samplers; - - std::unordered_map, IdentityHash> gpu_page_table; - }; - - std::deque channel_storage; - std::deque free_channel_ids; - std::unordered_map channel_map; - - ChannelInfo* state; - size_t current_channel_id{UNSET_CHANNEL}; VideoCore::RasterizerInterface& rasterizer; - Tegra::Engines::Maxwell3D* maxwell3d; - Tegra::Engines::KeplerCompute* kepler_compute; - Tegra::MemoryManager* gpu_memory; + std::deque gpu_page_table_storage; RenderTargets render_targets; std::unordered_map framebuffers; - std::unordered_map, IdentityHash> page_table; - std::unordered_map, IdentityHash> sparse_page_table; + std::unordered_map, Common::IdentityHash> page_table; + std::unordered_map, Common::IdentityHash> sparse_page_table; std::unordered_map> sparse_views; VAddr virtual_invalid_space{}; -- cgit v1.2.3 From 9cf4c8831d6a8b0d9c6e2a48e89b667fd73accee Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 23 Dec 2021 01:40:45 +0100 Subject: Texture cache: Fix dangling references on multichannel. --- src/video_core/texture_cache/texture_cache.h | 45 +++++++++++++++------------- 1 file changed, 24 insertions(+), 21 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 89c5faf88..5ef07d18f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -41,9 +41,6 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); sampler_descriptor.cubemap_anisotropy.Assign(1); - // Setup channels - current_channel_id = UNSET_CHANNEL; - // Make sure the first index is reserved for the null resources // This way the null resource becomes a compile time constant void(slot_images.insert(NullImageParams{})); @@ -886,13 +883,15 @@ void TextureCache

::InvalidateScale(Image& image) { } image.image_view_ids.clear(); image.image_view_infos.clear(); - auto& channel_info = channel_storage[image.channel]; - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID); + for (size_t c : active_channel_ids) { + auto& channel_info = channel_storage[c]; + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID); + } + channel_info.graphics_image_table.Invalidate(); + channel_info.compute_image_table.Invalidate(); } - channel_info.graphics_image_table.Invalidate(); - channel_info.compute_image_table.Invalidate(); has_deleted_images = true; } @@ -1690,26 +1689,30 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { if (alloc_images.empty()) { image_allocs_table.erase(alloc_it); } - for (auto& this_state : channel_storage) { + for (size_t c : active_channel_ids) { + auto& channel_info = channel_storage[c]; if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID); + std::ranges::fill(channel_info.graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(channel_info.compute_image_view_ids, CORRUPT_ID); } - this_state.graphics_image_table.Invalidate(); - this_state.compute_image_table.Invalidate(); + channel_info.graphics_image_table.Invalidate(); + channel_info.compute_image_table.Invalidate(); } has_deleted_images = true; } template void TextureCache

::RemoveImageViewReferences(std::span removed_views) { - auto it = channel_state->image_views.begin(); - while (it != channel_state->image_views.end()) { - const auto found = std::ranges::find(removed_views, it->second); - if (found != removed_views.end()) { - it = channel_state->image_views.erase(it); - } else { - ++it; + for (size_t c : active_channel_ids) { + auto& channel_info = channel_storage[c]; + auto it = channel_info.image_views.begin(); + while (it != channel_info.image_views.end()) { + const auto found = std::ranges::find(removed_views, it->second); + if (found != removed_views.end()) { + it = channel_info.image_views.erase(it); + } else { + ++it; + } } } } -- cgit v1.2.3 From f350c3d74ea7880fc6d21f7f638b0d4a70a3246b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 1 Jan 2022 22:03:37 +0100 Subject: Texture cache: Fix the remaining issues with memory mnagement and unmapping. --- src/video_core/texture_cache/texture_cache.h | 27 ++++++++++++++++++----- src/video_core/texture_cache/texture_cache_base.h | 4 ++-- 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 5ef07d18f..a483892e4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -480,12 +480,20 @@ void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { } template -void TextureCache

::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { +void TextureCache

::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { std::vector deleted_images; - ForEachImageInRegionGPU(gpu_addr, size, + ForEachImageInRegionGPU(as_id, gpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::CpuModified)) { + return; + } + image.flags |= ImageFlagBits::CpuModified; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + /* if (True(image.flags & ImageFlagBits::Remapped)) { continue; } @@ -493,6 +501,7 @@ void TextureCache

::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { if (True(image.flags & ImageFlagBits::Tracked)) { UntrackImage(image, id); } + */ } } @@ -1322,13 +1331,19 @@ void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f template template -void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { +void TextureCache

::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, + Func&& func) { using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = channel_state->gpu_page_table->find(page); - if (it == channel_state->gpu_page_table->end()) { + auto storage_id = getStorageID(as_id); + if (!storage_id) { + return; + } + auto& gpu_page_table = gpu_page_table_storage[*storage_id]; + ForEachGPUPage(gpu_addr, size, [this, gpu_page_table, &images, gpu_addr, size, func](u64 page) { + const auto it = gpu_page_table.find(page); + if (it == gpu_page_table.end()) { if constexpr (BOOL_BREAK) { return false; } else { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index b24968b03..2f4db5047 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -173,7 +173,7 @@ public: void UnmapMemory(VAddr cpu_addr, size_t size); /// Remove images in a region - void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); + void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); /// Blit an image with the given parameters void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, @@ -309,7 +309,7 @@ private: void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); template - void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); + void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); template void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); -- cgit v1.2.3 From e44ac8b821833672f7c99ad22dbe57ac9403279d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 6 Jan 2022 19:47:15 +0100 Subject: Texture Cache: Fix GC and GPU Modified on Joins. --- src/video_core/texture_cache/texture_cache.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a483892e4..66de41f04 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1758,6 +1758,7 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { boost::container::small_vector aliased_images; Image& image = slot_images[image_id]; bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); + bool any_modified = True(image.flags & ImageFlagBits::GpuModified); u64 most_recent_tick = image.modification_tick; for (const AliasedImage& aliased : image.aliased_images) { ImageBase& aliased_image = slot_images[aliased.id]; @@ -1765,9 +1766,7 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); aliased_images.push_back(&aliased); any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled); - if (True(aliased_image.flags & ImageFlagBits::GpuModified)) { - image.flags |= ImageFlagBits::GpuModified; - } + any_modified |= True(aliased_image.flags & ImageFlagBits::GpuModified); } } if (aliased_images.empty()) { @@ -1782,6 +1781,9 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { } } image.modification_tick = most_recent_tick; + if (any_modified) { + image.flags |= ImageFlagBits::GpuModified; + } std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { const ImageBase& lhs_image = slot_images[lhs->id]; const ImageBase& rhs_image = slot_images[rhs->id]; -- cgit v1.2.3 From f5fd6b5c8674fcf64a3e70809ee0a34d3a95beb6 Mon Sep 17 00:00:00 2001 From: bunnei Date: Sun, 14 Aug 2022 02:36:36 -0700 Subject: DMA & InlineToMemory Engines Rework. --- src/video_core/texture_cache/util.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index bea1c27d0..1223df5a0 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -517,7 +517,6 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr const u32 host_bytes_per_layer = num_blocks_per_layer * bytes_per_block; UNIMPLEMENTED_IF(info.tile_width_spacing > 0); - UNIMPLEMENTED_IF(copy.image_offset.x != 0); UNIMPLEMENTED_IF(copy.image_offset.y != 0); UNIMPLEMENTED_IF(copy.image_offset.z != 0); -- cgit v1.2.3 From fd7afda1e802fffe843ac28811470432949fe7ee Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 13 Apr 2022 20:19:06 +0200 Subject: VideoCore: Implement formats needed for N64 emulation. --- src/video_core/texture_cache/format_lookup_table.cpp | 2 +- src/video_core/texture_cache/formatter.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index c71694d2a..ad935d386 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -63,7 +63,7 @@ PixelFormat PixelFormatFromTextureInfo(TextureFormat format, ComponentType red, case Hash(TextureFormat::A4B4G4R4, UNORM): return PixelFormat::A4B4G4R4_UNORM; case Hash(TextureFormat::G4R4, UNORM): - return PixelFormat::R4G4_UNORM; + return PixelFormat::G4R4_UNORM; case Hash(TextureFormat::A5B5G5R1, UNORM): return PixelFormat::A5B5G5R1_UNORM; case Hash(TextureFormat::R8, UNORM): diff --git a/src/video_core/texture_cache/formatter.h b/src/video_core/texture_cache/formatter.h index 6881e4c90..acc854715 100644 --- a/src/video_core/texture_cache/formatter.h +++ b/src/video_core/texture_cache/formatter.h @@ -153,8 +153,8 @@ struct fmt::formatter : fmt::formatter #include "common/common_types.h" +#include "common/div_ceil.h" #include "video_core/surface.h" #include "video_core/texture_cache/formatter.h" #include "video_core/texture_cache/image_base.h" @@ -182,10 +183,6 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i }; const bool is_lhs_compressed = lhs_block.width > 1 || lhs_block.height > 1; const bool is_rhs_compressed = rhs_block.width > 1 || rhs_block.height > 1; - if (is_lhs_compressed && is_rhs_compressed) { - LOG_ERROR(HW_GPU, "Compressed to compressed image aliasing is not implemented"); - return; - } const s32 lhs_mips = lhs.info.resources.levels; const s32 rhs_mips = rhs.info.resources.levels; const s32 num_mips = std::min(lhs_mips - base->level, rhs_mips); @@ -199,12 +196,12 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i Extent3D lhs_size = MipSize(lhs.info.size, base->level + mip_level); Extent3D rhs_size = MipSize(rhs.info.size, mip_level); if (is_lhs_compressed) { - lhs_size.width /= lhs_block.width; - lhs_size.height /= lhs_block.height; + lhs_size.width = Common::DivCeil(lhs_size.width, lhs_block.width); + lhs_size.height = Common::DivCeil(lhs_size.height, lhs_block.height); } if (is_rhs_compressed) { - rhs_size.width /= rhs_block.width; - rhs_size.height /= rhs_block.height; + rhs_size.width = Common::DivCeil(rhs_size.width, rhs_block.width); + rhs_size.height = Common::DivCeil(rhs_size.height, rhs_block.height); } const Extent3D copy_size{ .width = std::min(lhs_size.width, rhs_size.width), -- cgit v1.2.3 From ada09778d97d39d83353ca54d0d6c9abd5eefc60 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 15 Apr 2022 12:29:49 +0200 Subject: Vulkan Texture Cache: Limit render area to the max width/height of the targets. --- src/video_core/texture_cache/render_targets.h | 1 + src/video_core/texture_cache/texture_cache.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/render_targets.h b/src/video_core/texture_cache/render_targets.h index da8ffe9ec..1efbd6507 100644 --- a/src/video_core/texture_cache/render_targets.h +++ b/src/video_core/texture_cache/render_targets.h @@ -26,6 +26,7 @@ struct RenderTargets { ImageViewId depth_buffer_id{}; std::array draw_buffers{}; Extent2D size{}; + bool is_rescaled{}; }; } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 66de41f04..9a835cefc 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -357,6 +357,7 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { (maxwell3d->regs.render_area.width * up_scale) >> down_shift, (maxwell3d->regs.render_area.height * up_scale) >> down_shift, }; + render_targets.is_rescaled = is_rescaling; flags[Dirty::DepthBiasGlobal] = true; } @@ -1962,6 +1963,7 @@ std::pair TextureCache

::RenderTargetFromImage( .color_buffer_ids = {color_view_id}, .depth_buffer_id = depth_view_id, .size = {extent.width >> samples_x, extent.height >> samples_y}, + .is_rescaled = is_rescaled, }); return {framebuffer_id, view_id}; } -- cgit v1.2.3 From fedd983f96bcbcc0c39f651db1cca0503d582fd9 Mon Sep 17 00:00:00 2001 From: Morph Date: Wed, 29 Jun 2022 19:27:49 -0400 Subject: general: Format licenses as per SPDX guidelines --- src/video_core/texture_cache/texture_cache.cpp | 5 ++--- src/video_core/texture_cache/texture_cache.h | 6 ++---- src/video_core/texture_cache/texture_cache_base.h | 6 ++---- 3 files changed, 6 insertions(+), 11 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index bc905a1a4..8a9a32f44 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -1,6 +1,5 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv3 or any later version -// Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #include "video_core/control/channel_state_cache.inc" #include "video_core/texture_cache/texture_cache_base.h" diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9a835cefc..eaf4a1c95 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -1,7 +1,5 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 2f4db5047..2fa8445eb 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -1,7 +1,5 @@ -// SPDX-FileCopyrightText: 2021 yuzu emulator team -// (https://github.com/skyline-emu/) -// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 -// or any later version Refer to the license.txt file included. +// SPDX-FileCopyrightText: 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #pragma once -- cgit v1.2.3