From 0a2536a0df1f4aea406f2132d3edda0430acc9d1 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 25 Dec 2023 07:32:16 +0100 Subject: SMMU: Initial adaptation to video_core. --- src/video_core/texture_cache/texture_cache.h | 75 ++++++++++++----------- src/video_core/texture_cache/texture_cache_base.h | 28 +++++---- src/video_core/texture_cache/util.cpp | 4 +- 3 files changed, 56 insertions(+), 51 deletions(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 0d5a1709f..7398ed2ec 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -8,10 +8,11 @@ #include "common/alignment.h" #include "common/settings.h" -#include "core/memory.h" #include "video_core/control/channel_state.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" +#include "video_core/guest_memory.h" +#include "video_core/host1x/gpu_device_memory_manager.h" #include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/texture_cache_base.h" @@ -27,8 +28,8 @@ using VideoCore::Surface::SurfaceType; using namespace Common::Literals; template -TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_) - : runtime{runtime_}, rasterizer{rasterizer_} { +TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_) + : runtime{runtime_}, device_memory{device_memory_} { // Configure null sampler TSCEntry sampler_descriptor{}; sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); @@ -49,19 +50,19 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& void(slot_samplers.insert(runtime, sampler_descriptor)); if constexpr (HAS_DEVICE_MEMORY_INFO) { - const s64 device_memory = static_cast(runtime.GetDeviceLocalMemory()); - const s64 min_spacing_expected = device_memory - 1_GiB; - const s64 min_spacing_critical = device_memory - 512_MiB; - const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD); + const s64 device_local_memory = static_cast(runtime.GetDeviceLocalMemory()); + const s64 min_spacing_expected = device_local_memory - 1_GiB; + const s64 min_spacing_critical = device_local_memory - 512_MiB; + const s64 mem_threshold = std::min(device_local_memory, TARGET_THRESHOLD); const s64 min_vacancy_expected = (6 * mem_threshold) / 10; const s64 min_vacancy_critical = (3 * mem_threshold) / 10; expected_memory = static_cast( - std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected), + std::max(std::min(device_local_memory - min_vacancy_expected, min_spacing_expected), DEFAULT_EXPECTED_MEMORY)); critical_memory = static_cast( - std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical), + std::max(std::min(device_local_memory - min_vacancy_critical, min_spacing_critical), DEFAULT_CRITICAL_MEMORY)); - minimum_memory = static_cast((device_memory - mem_threshold) / 2); + minimum_memory = static_cast((device_local_memory - mem_threshold) / 2); } else { expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; @@ -513,7 +514,7 @@ FramebufferId TextureCache

::GetFramebufferId(const RenderTargets& key) { } template -void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { +void TextureCache

::WriteMemory(DAddr cpu_addr, size_t size) { ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { if (True(image.flags & ImageFlagBits::CpuModified)) { return; @@ -526,7 +527,7 @@ void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { } template -void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { +void TextureCache

::DownloadMemory(DAddr cpu_addr, size_t size) { boost::container::small_vector images; ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { if (!image.IsSafeDownload()) { @@ -553,7 +554,7 @@ void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { } template -std::optional TextureCache

::GetFlushArea(VAddr cpu_addr, +std::optional TextureCache

::GetFlushArea(DAddr cpu_addr, u64 size) { std::optional area{}; ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) { @@ -579,7 +580,7 @@ std::optional TextureCache

::GetFlushArea(V } template -void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { +void TextureCache

::UnmapMemory(DAddr cpu_addr, size_t size) { boost::container::small_vector deleted_images; ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); for (const ImageId id : deleted_images) { @@ -713,7 +714,7 @@ bool TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, template typename P::ImageView* TextureCache

::TryFindFramebufferImageView( - const Tegra::FramebufferConfig& config, VAddr cpu_addr) { + const Tegra::FramebufferConfig& config, DAddr cpu_addr) { // TODO: Properly implement this const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); if (it == page_table.end()) { @@ -940,7 +941,7 @@ bool TextureCache

::IsRescaling(const ImageViewBase& image_view) const noexcep } template -bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { +bool TextureCache

::IsRegionGpuModified(DAddr addr, size_t size) { bool is_modified = false; ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { if (False(image.flags & ImageFlagBits::GpuModified)) { @@ -1059,7 +1060,7 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) return; } - Core::Memory::GpuGuestMemory swizzle_data( + Tegra::Memory::GpuGuestMemory swizzle_data( *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); if (True(image.flags & ImageFlagBits::Converted)) { @@ -1124,7 +1125,7 @@ ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a template ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); + std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr) { cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); if (!cpu_addr) { @@ -1265,7 +1266,7 @@ void TextureCache

::QueueAsyncDecode(Image& image, ImageId image_id) { static Common::ScratchBuffer local_unswizzle_data_buffer; local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); - Core::Memory::GpuGuestMemory swizzle_data( + Tegra::Memory::GpuGuestMemory swizzle_data( *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, @@ -1339,14 +1340,14 @@ bool TextureCache

::ScaleDown(Image& image) { template ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); + std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr) { const auto size = CalculateGuestSizeInBytes(info); cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); if (!cpu_addr) { - const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; + const DAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; virtual_invalid_space += Common::AlignUp(size, 32); - cpu_addr = std::optional(fake_addr); + cpu_addr = std::optional(fake_addr); } } ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); @@ -1362,7 +1363,7 @@ ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, } template -ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { +ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) { ImageInfo new_info = info; const size_t size_bytes = CalculateGuestSizeInBytes(new_info); const bool broken_views = runtime.HasBrokenTextureViewFormats(); @@ -1650,7 +1651,7 @@ std::optional::BlitImages> TextureCache

::GetBlitImag template ImageId TextureCache

::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { - std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); + std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); if (!cpu_addr) { cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); if (!cpu_addr) { @@ -1780,7 +1781,7 @@ ImageViewId TextureCache

::FindRenderTargetView(const ImageInfo& info, GPUVAdd template template -void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { +void TextureCache

::ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func) { using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; boost::container::small_vector images; @@ -1924,11 +1925,11 @@ void TextureCache

::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, template template void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { - using FuncReturn = typename std::invoke_result::type; + using FuncReturn = typename std::invoke_result::type; static constexpr bool RETURNS_BOOL = std::is_same_v; const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); for (const auto& [gpu_addr, size] : segments) { - std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); + std::optional cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); if constexpr (RETURNS_BOOL) { if (func(gpu_addr, *cpu_addr, size)) { @@ -1980,7 +1981,7 @@ void TextureCache

::RegisterImage(ImageId image_id) { } boost::container::small_vector sparse_maps; ForEachSparseSegment( - image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) { auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); ForEachCPUPage(cpu_addr, size, [this, map_id](u64 page) { page_table[page].push_back(map_id); }); @@ -2048,7 +2049,7 @@ void TextureCache

::UnregisterImage(ImageId image_id) { auto& sparse_maps = it->second; for (auto& map_view_id : sparse_maps) { const auto& map_range = slot_map_views[map_view_id]; - const VAddr cpu_addr = map_range.cpu_addr; + const DAddr cpu_addr = map_range.cpu_addr; const std::size_t size = map_range.size; ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { const auto page_it = page_table.find(page); @@ -2080,7 +2081,7 @@ void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { ASSERT(False(image.flags & ImageFlagBits::Tracked)); image.flags |= ImageFlagBits::Tracked; if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); return; } if (True(image.flags & ImageFlagBits::Registered)) { @@ -2089,15 +2090,15 @@ void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { auto& sparse_maps = it->second; for (auto& map_view_id : sparse_maps) { const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; + const DAddr cpu_addr = map.cpu_addr; const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + device_memory.UpdatePagesCachedCount(cpu_addr, size, 1); } return; } ForEachSparseSegment(image, - [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + [this]([[maybe_unused]] GPUVAddr gpu_addr, DAddr cpu_addr, size_t size) { + device_memory.UpdatePagesCachedCount(cpu_addr, size, 1); }); } @@ -2106,7 +2107,7 @@ void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { ASSERT(True(image.flags & ImageFlagBits::Tracked)); image.flags &= ~ImageFlagBits::Tracked; if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + device_memory.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); return; } ASSERT(True(image.flags & ImageFlagBits::Registered)); @@ -2115,9 +2116,9 @@ void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { auto& sparse_maps = it->second; for (auto& map_view_id : sparse_maps) { const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; + const DAddr cpu_addr = map.cpu_addr; const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + device_memory.UpdatePagesCachedCount(cpu_addr, size, -1); } } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 6caf75b46..8699d40d4 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -36,9 +36,11 @@ #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" -namespace Tegra::Control { +namespace Tegra { +namespace Control { struct ChannelState; } +} // namespace Tegra namespace VideoCommon { @@ -126,7 +128,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches GetFlushArea(VAddr cpu_addr, u64 size); + std::optional GetFlushArea(DAddr cpu_addr, u64 size); /// Remove images in a region - void UnmapMemory(VAddr cpu_addr, size_t size); + void UnmapMemory(DAddr cpu_addr, size_t size); /// Remove images in a region void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); @@ -210,7 +212,7 @@ public: /// Try to find a cached image view in the given CPU address [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, - VAddr cpu_addr); + DAddr cpu_addr); /// Return true when there are uncommitted images to be downloaded [[nodiscard]] bool HasUncommittedFlushes() const noexcept; @@ -235,7 +237,7 @@ public: GPUVAddr address = 0, size_t size = 0); /// Return true when a CPU region is modified from the GPU - [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + [[nodiscard]] bool IsRegionGpuModified(DAddr addr, size_t size); [[nodiscard]] bool IsRescaling() const noexcept; @@ -252,7 +254,7 @@ public: private: /// Iterate over all page indices in a range template - static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { + static void ForEachCPUPage(DAddr addr, size_t size, Func&& func) { static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS; for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) { @@ -326,7 +328,7 @@ private: /// Create a new image and join perfectly matching existing images /// Remove joined images from the cache - [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr); [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); @@ -349,7 +351,7 @@ private: /// Iterates over all the images in a region calling func template - void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); + void ForEachImageInRegion(DAddr cpu_addr, size_t size, Func&& func); template void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); @@ -421,7 +423,7 @@ private: Runtime& runtime; - VideoCore::RasterizerInterface& rasterizer; + Tegra::MaxwellDeviceMemoryManager& device_memory; std::deque gpu_page_table_storage; RenderTargets render_targets; @@ -432,7 +434,7 @@ private: std::unordered_map, Common::IdentityHash> sparse_page_table; std::unordered_map> sparse_views; - VAddr virtual_invalid_space{}; + DAddr virtual_invalid_space{}; bool has_deleted_images = false; bool is_rescaling = false; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index fcf70068e..96f04b6c8 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -23,6 +23,7 @@ #include "core/memory.h" #include "video_core/compatible_formats.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/guest_memory.h" #include "video_core/memory_manager.h" #include "video_core/surface.h" #include "video_core/texture_cache/decode_bc.h" @@ -552,7 +553,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr for (s32 layer = 0; layer < info.resources.layers; ++layer) { const std::span src = input.subspan(host_offset); { - Core::Memory::GpuGuestMemoryScoped + Tegra::Memory::GpuGuestMemoryScoped dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, -- cgit v1.2.3 From 23430e67724d803184b6a861e4bcb3cac0e38cb0 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 7 Jan 2024 05:33:43 +0100 Subject: Core: Eliminate core/memory dependancies. --- src/video_core/texture_cache/util.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src/video_core/texture_cache') diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 96f04b6c8..1a6f0d1ad 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -20,7 +20,6 @@ #include "common/div_ceil.h" #include "common/scratch_buffer.h" #include "common/settings.h" -#include "core/memory.h" #include "video_core/compatible_formats.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/guest_memory.h" -- cgit v1.2.3