diff options
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/format_lookup_table.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_base.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_base.h | 13 | ||||
| -rw-r--r-- | src/video_core/texture_cache/surface_params.cpp | 19 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 245 |
5 files changed, 163 insertions, 124 deletions
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index 7032e0059..f476f03b0 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp | |||
| @@ -41,7 +41,7 @@ struct Table { | |||
| 41 | ComponentType alpha_component; | 41 | ComponentType alpha_component; |
| 42 | bool is_srgb; | 42 | bool is_srgb; |
| 43 | }; | 43 | }; |
| 44 | constexpr std::array<Table, 77> DefinitionTable = {{ | 44 | constexpr std::array<Table, 78> DefinitionTable = {{ |
| 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, | 45 | {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U}, |
| 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, | 46 | {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S}, |
| 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, | 47 | {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI}, |
| @@ -98,6 +98,7 @@ constexpr std::array<Table, 77> DefinitionTable = {{ | |||
| 98 | {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, | 98 | {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F}, |
| 99 | {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, | 99 | {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16}, |
| 100 | {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, | 100 | {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, |
| 101 | {TextureFormat::G24R8, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24}, | ||
| 101 | {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, | 102 | {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8}, |
| 102 | 103 | ||
| 103 | {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, | 104 | {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1}, |
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 715f39d0d..94d3a6ae5 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp | |||
| @@ -248,12 +248,11 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, | |||
| 248 | 248 | ||
| 249 | // Use an extra temporal buffer | 249 | // Use an extra temporal buffer |
| 250 | auto& tmp_buffer = staging_cache.GetBuffer(1); | 250 | auto& tmp_buffer = staging_cache.GetBuffer(1); |
| 251 | // Special case for 3D Texture Segments | ||
| 252 | const bool must_read_current_data = | ||
| 253 | params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D; | ||
| 254 | tmp_buffer.resize(guest_memory_size); | 251 | tmp_buffer.resize(guest_memory_size); |
| 255 | host_ptr = tmp_buffer.data(); | 252 | host_ptr = tmp_buffer.data(); |
| 256 | if (must_read_current_data) { | 253 | |
| 254 | if (params.target == SurfaceTarget::Texture3D) { | ||
| 255 | // Special case for 3D texture segments | ||
| 257 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); | 256 | memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); |
| 258 | } | 257 | } |
| 259 | 258 | ||
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index 79e10ffbb..173f2edba 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h | |||
| @@ -217,8 +217,8 @@ public: | |||
| 217 | } | 217 | } |
| 218 | 218 | ||
| 219 | bool IsProtected() const { | 219 | bool IsProtected() const { |
| 220 | // Only 3D Slices are to be protected | 220 | // Only 3D slices are to be protected |
| 221 | return is_target && params.block_depth > 0; | 221 | return is_target && params.target == SurfaceTarget::Texture3D; |
| 222 | } | 222 | } |
| 223 | 223 | ||
| 224 | bool IsRenderTarget() const { | 224 | bool IsRenderTarget() const { |
| @@ -250,6 +250,11 @@ public: | |||
| 250 | return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); | 250 | return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); |
| 251 | } | 251 | } |
| 252 | 252 | ||
| 253 | TView Emplace3DView(u32 slice, u32 depth, u32 base_level, u32 num_levels) { | ||
| 254 | return GetView(ViewParams(VideoCore::Surface::SurfaceTarget::Texture3D, slice, depth, | ||
| 255 | base_level, num_levels)); | ||
| 256 | } | ||
| 257 | |||
| 253 | std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, | 258 | std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, |
| 254 | const GPUVAddr view_addr, | 259 | const GPUVAddr view_addr, |
| 255 | const std::size_t candidate_size, const u32 mipmap, | 260 | const std::size_t candidate_size, const u32 mipmap, |
| @@ -272,8 +277,8 @@ public: | |||
| 272 | std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, | 277 | std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, |
| 273 | const std::size_t candidate_size) { | 278 | const std::size_t candidate_size) { |
| 274 | if (params.target == SurfaceTarget::Texture3D || | 279 | if (params.target == SurfaceTarget::Texture3D || |
| 275 | (params.num_levels == 1 && !params.is_layered) || | 280 | view_params.target == SurfaceTarget::Texture3D || |
| 276 | view_params.target == SurfaceTarget::Texture3D) { | 281 | (params.num_levels == 1 && !params.is_layered)) { |
| 277 | return {}; | 282 | return {}; |
| 278 | } | 283 | } |
| 279 | const auto layer_mipmap{GetLayerMipmap(view_addr)}; | 284 | const auto layer_mipmap{GetLayerMipmap(view_addr)}; |
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 884fabffe..0b2b2b8c4 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp | |||
| @@ -215,10 +215,19 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz | |||
| 215 | params.num_levels = 1; | 215 | params.num_levels = 1; |
| 216 | params.emulated_levels = 1; | 216 | params.emulated_levels = 1; |
| 217 | 217 | ||
| 218 | const bool is_layered = config.layers > 1 && params.block_depth == 0; | 218 | if (config.memory_layout.is_3d != 0) { |
| 219 | params.is_layered = is_layered; | 219 | params.depth = config.layers.Value(); |
| 220 | params.depth = is_layered ? config.layers.Value() : 1; | 220 | params.is_layered = false; |
| 221 | params.target = is_layered ? SurfaceTarget::Texture2DArray : SurfaceTarget::Texture2D; | 221 | params.target = SurfaceTarget::Texture3D; |
| 222 | } else if (config.layers > 1) { | ||
| 223 | params.depth = config.layers.Value(); | ||
| 224 | params.is_layered = true; | ||
| 225 | params.target = SurfaceTarget::Texture2DArray; | ||
| 226 | } else { | ||
| 227 | params.depth = 1; | ||
| 228 | params.is_layered = false; | ||
| 229 | params.target = SurfaceTarget::Texture2D; | ||
| 230 | } | ||
| 222 | return params; | 231 | return params; |
| 223 | } | 232 | } |
| 224 | 233 | ||
| @@ -237,7 +246,7 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface( | |||
| 237 | params.width = config.width; | 246 | params.width = config.width; |
| 238 | params.height = config.height; | 247 | params.height = config.height; |
| 239 | params.pitch = config.pitch; | 248 | params.pitch = config.pitch; |
| 240 | // TODO(Rodrigo): Try to guess the surface target from depth and layer parameters | 249 | // TODO(Rodrigo): Try to guess texture arrays from parameters |
| 241 | params.target = SurfaceTarget::Texture2D; | 250 | params.target = SurfaceTarget::Texture2D; |
| 242 | params.depth = 1; | 251 | params.depth = 1; |
| 243 | params.num_levels = 1; | 252 | params.num_levels = 1; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8bfc541d4..85075e868 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <unordered_map> | 14 | #include <unordered_map> |
| 15 | #include <vector> | 15 | #include <vector> |
| 16 | 16 | ||
| 17 | #include <boost/container/small_vector.hpp> | ||
| 17 | #include <boost/icl/interval_map.hpp> | 18 | #include <boost/icl/interval_map.hpp> |
| 18 | #include <boost/range/iterator_range.hpp> | 19 | #include <boost/range/iterator_range.hpp> |
| 19 | 20 | ||
| @@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | |||
| 53 | 54 | ||
| 54 | template <typename TSurface, typename TView> | 55 | template <typename TSurface, typename TView> |
| 55 | class TextureCache { | 56 | class TextureCache { |
| 57 | using VectorSurface = boost::container::small_vector<TSurface, 1>; | ||
| 56 | 58 | ||
| 57 | public: | 59 | public: |
| 58 | void InvalidateRegion(VAddr addr, std::size_t size) { | 60 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| @@ -296,30 +298,30 @@ public: | |||
| 296 | const GPUVAddr src_gpu_addr = src_config.Address(); | 298 | const GPUVAddr src_gpu_addr = src_config.Address(); |
| 297 | const GPUVAddr dst_gpu_addr = dst_config.Address(); | 299 | const GPUVAddr dst_gpu_addr = dst_config.Address(); |
| 298 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); | 300 | DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); |
| 299 | const std::optional<VAddr> dst_cpu_addr = | 301 | |
| 300 | system.GPU().MemoryManager().GpuToCpuAddress(dst_gpu_addr); | 302 | const auto& memory_manager = system.GPU().MemoryManager(); |
| 301 | const std::optional<VAddr> src_cpu_addr = | 303 | const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr); |
| 302 | system.GPU().MemoryManager().GpuToCpuAddress(src_gpu_addr); | 304 | const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr); |
| 303 | std::pair<TSurface, TView> dst_surface = | 305 | std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); |
| 304 | GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); | 306 | TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; |
| 305 | std::pair<TSurface, TView> src_surface = | 307 | ImageBlit(src_surface, dst_surface.second, copy_config); |
| 306 | GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false); | ||
| 307 | ImageBlit(src_surface.second, dst_surface.second, copy_config); | ||
| 308 | dst_surface.first->MarkAsModified(true, Tick()); | 308 | dst_surface.first->MarkAsModified(true, Tick()); |
| 309 | } | 309 | } |
| 310 | 310 | ||
| 311 | TSurface TryFindFramebufferSurface(VAddr addr) { | 311 | TSurface TryFindFramebufferSurface(VAddr addr) const { |
| 312 | if (!addr) { | 312 | if (!addr) { |
| 313 | return nullptr; | 313 | return nullptr; |
| 314 | } | 314 | } |
| 315 | const VAddr page = addr >> registry_page_bits; | 315 | const VAddr page = addr >> registry_page_bits; |
| 316 | std::vector<TSurface>& list = registry[page]; | 316 | const auto it = registry.find(page); |
| 317 | for (auto& surface : list) { | 317 | if (it == registry.end()) { |
| 318 | if (surface->GetCpuAddr() == addr) { | 318 | return nullptr; |
| 319 | return surface; | ||
| 320 | } | ||
| 321 | } | 319 | } |
| 322 | return nullptr; | 320 | const auto& list = it->second; |
| 321 | const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { | ||
| 322 | return surface->GetCpuAddr() == addr; | ||
| 323 | }); | ||
| 324 | return found != list.end() ? *found : nullptr; | ||
| 323 | } | 325 | } |
| 324 | 326 | ||
| 325 | u64 Tick() { | 327 | u64 Tick() { |
| @@ -498,18 +500,18 @@ private: | |||
| 498 | * @param untopological Indicates to the recycler that the texture has no way | 500 | * @param untopological Indicates to the recycler that the texture has no way |
| 499 | * to match the overlaps due to topological reasons. | 501 | * to match the overlaps due to topological reasons. |
| 500 | **/ | 502 | **/ |
| 501 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, | 503 | RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, |
| 502 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | 504 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { |
| 503 | if (Settings::IsGPULevelExtreme()) { | 505 | if (Settings::IsGPULevelExtreme()) { |
| 504 | return RecycleStrategy::Flush; | 506 | return RecycleStrategy::Flush; |
| 505 | } | 507 | } |
| 506 | // 3D Textures decision | 508 | // 3D Textures decision |
| 507 | if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { | 509 | if (params.target == SurfaceTarget::Texture3D) { |
| 508 | return RecycleStrategy::Flush; | 510 | return RecycleStrategy::Flush; |
| 509 | } | 511 | } |
| 510 | for (const auto& s : overlaps) { | 512 | for (const auto& s : overlaps) { |
| 511 | const auto& s_params = s->GetSurfaceParams(); | 513 | const auto& s_params = s->GetSurfaceParams(); |
| 512 | if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { | 514 | if (s_params.target == SurfaceTarget::Texture3D) { |
| 513 | return RecycleStrategy::Flush; | 515 | return RecycleStrategy::Flush; |
| 514 | } | 516 | } |
| 515 | } | 517 | } |
| @@ -538,9 +540,8 @@ private: | |||
| 538 | * @param untopological Indicates to the recycler that the texture has no way to match the | 540 | * @param untopological Indicates to the recycler that the texture has no way to match the |
| 539 | * overlaps due to topological reasons. | 541 | * overlaps due to topological reasons. |
| 540 | **/ | 542 | **/ |
| 541 | std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, | 543 | std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, |
| 542 | const SurfaceParams& params, const GPUVAddr gpu_addr, | 544 | const GPUVAddr gpu_addr, const bool preserve_contents, |
| 543 | const bool preserve_contents, | ||
| 544 | const MatchTopologyResult untopological) { | 545 | const MatchTopologyResult untopological) { |
| 545 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); | 546 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); |
| 546 | for (auto& surface : overlaps) { | 547 | for (auto& surface : overlaps) { |
| @@ -650,47 +651,65 @@ private: | |||
| 650 | * @param params The parameters on the new surface. | 651 | * @param params The parameters on the new surface. |
| 651 | * @param gpu_addr The starting address of the new surface. | 652 | * @param gpu_addr The starting address of the new surface. |
| 652 | **/ | 653 | **/ |
| 653 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, | 654 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, |
| 654 | const SurfaceParams& params, | 655 | const SurfaceParams& params, |
| 655 | const GPUVAddr gpu_addr) { | 656 | GPUVAddr gpu_addr) { |
| 656 | if (params.target == SurfaceTarget::Texture3D) { | 657 | if (params.target == SurfaceTarget::Texture3D) { |
| 657 | return {}; | 658 | return std::nullopt; |
| 658 | } | 659 | } |
| 659 | bool modified = false; | 660 | const auto test_modified = [](TSurface& surface) { return surface->IsModified(); }; |
| 660 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | 661 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); |
| 661 | u32 passed_tests = 0; | 662 | |
| 663 | if (std::none_of(overlaps.begin(), overlaps.end(), test_modified)) { | ||
| 664 | LoadSurface(new_surface); | ||
| 665 | for (const auto& surface : overlaps) { | ||
| 666 | Unregister(surface); | ||
| 667 | } | ||
| 668 | Register(new_surface); | ||
| 669 | return {{new_surface, new_surface->GetMainView()}}; | ||
| 670 | } | ||
| 671 | |||
| 672 | std::size_t passed_tests = 0; | ||
| 662 | for (auto& surface : overlaps) { | 673 | for (auto& surface : overlaps) { |
| 663 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | 674 | const SurfaceParams& src_params = surface->GetSurfaceParams(); |
| 664 | if (src_params.is_layered || src_params.num_levels > 1) { | 675 | const auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; |
| 665 | // We send this cases to recycle as they are more complex to handle | ||
| 666 | return {}; | ||
| 667 | } | ||
| 668 | const std::size_t candidate_size = surface->GetSizeInBytes(); | ||
| 669 | auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; | ||
| 670 | if (!mipmap_layer) { | 676 | if (!mipmap_layer) { |
| 671 | continue; | 677 | continue; |
| 672 | } | 678 | } |
| 673 | const auto [layer, mipmap] = *mipmap_layer; | 679 | const auto [base_layer, base_mipmap] = *mipmap_layer; |
| 674 | if (new_surface->GetMipmapSize(mipmap) != candidate_size) { | 680 | if (new_surface->GetMipmapSize(base_mipmap) != surface->GetMipmapSize(0)) { |
| 675 | continue; | 681 | continue; |
| 676 | } | 682 | } |
| 677 | modified |= surface->IsModified(); | 683 | ++passed_tests; |
| 678 | // Now we got all the data set up | 684 | |
| 679 | const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); | 685 | // Copy all mipmaps and layers |
| 680 | const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); | 686 | const u32 block_width = params.GetDefaultBlockWidth(); |
| 681 | const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); | 687 | const u32 block_height = params.GetDefaultBlockHeight(); |
| 682 | passed_tests++; | 688 | for (u32 mipmap = base_mipmap; mipmap < base_mipmap + src_params.num_levels; ++mipmap) { |
| 683 | ImageCopy(surface, new_surface, copy_params); | 689 | const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); |
| 690 | const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); | ||
| 691 | if (width < block_width || height < block_height) { | ||
| 692 | // Current APIs forbid copying small compressed textures, avoid errors | ||
| 693 | break; | ||
| 694 | } | ||
| 695 | const CopyParams copy_params(0, 0, 0, 0, 0, base_layer, 0, mipmap, width, height, | ||
| 696 | src_params.depth); | ||
| 697 | ImageCopy(surface, new_surface, copy_params); | ||
| 698 | } | ||
| 684 | } | 699 | } |
| 685 | if (passed_tests == 0) { | 700 | if (passed_tests == 0) { |
| 686 | return {}; | 701 | return std::nullopt; |
| 702 | } | ||
| 703 | if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { | ||
| 687 | // In Accurate GPU all tests should pass, else we recycle | 704 | // In Accurate GPU all tests should pass, else we recycle |
| 688 | } else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) { | 705 | return std::nullopt; |
| 689 | return {}; | ||
| 690 | } | 706 | } |
| 707 | |||
| 708 | const bool modified = std::any_of(overlaps.begin(), overlaps.end(), test_modified); | ||
| 691 | for (const auto& surface : overlaps) { | 709 | for (const auto& surface : overlaps) { |
| 692 | Unregister(surface); | 710 | Unregister(surface); |
| 693 | } | 711 | } |
| 712 | |||
| 694 | new_surface->MarkAsModified(modified, Tick()); | 713 | new_surface->MarkAsModified(modified, Tick()); |
| 695 | Register(new_surface); | 714 | Register(new_surface); |
| 696 | return {{new_surface, new_surface->GetMainView()}}; | 715 | return {{new_surface, new_surface->GetMainView()}}; |
| @@ -708,53 +727,11 @@ private: | |||
| 708 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | 727 | * @param preserve_contents Indicates that the new surface should be loaded from memory or |
| 709 | * left blank. | 728 | * left blank. |
| 710 | */ | 729 | */ |
| 711 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, | 730 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, |
| 712 | const SurfaceParams& params, | 731 | const SurfaceParams& params, |
| 713 | const GPUVAddr gpu_addr, | 732 | GPUVAddr gpu_addr, VAddr cpu_addr, |
| 714 | const VAddr cpu_addr, | ||
| 715 | bool preserve_contents) { | 733 | bool preserve_contents) { |
| 716 | if (params.target == SurfaceTarget::Texture3D) { | 734 | if (params.target != SurfaceTarget::Texture3D) { |
| 717 | bool failed = false; | ||
| 718 | if (params.num_levels > 1) { | ||
| 719 | // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach | ||
| 720 | return std::nullopt; | ||
| 721 | } | ||
| 722 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 723 | bool modified = false; | ||
| 724 | for (auto& surface : overlaps) { | ||
| 725 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | ||
| 726 | if (src_params.target != SurfaceTarget::Texture2D) { | ||
| 727 | failed = true; | ||
| 728 | break; | ||
| 729 | } | ||
| 730 | if (src_params.height != params.height) { | ||
| 731 | failed = true; | ||
| 732 | break; | ||
| 733 | } | ||
| 734 | if (src_params.block_depth != params.block_depth || | ||
| 735 | src_params.block_height != params.block_height) { | ||
| 736 | failed = true; | ||
| 737 | break; | ||
| 738 | } | ||
| 739 | const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); | ||
| 740 | const auto offsets = params.GetBlockOffsetXYZ(offset); | ||
| 741 | const auto z = std::get<2>(offsets); | ||
| 742 | modified |= surface->IsModified(); | ||
| 743 | const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, | ||
| 744 | 1); | ||
| 745 | ImageCopy(surface, new_surface, copy_params); | ||
| 746 | } | ||
| 747 | if (failed) { | ||
| 748 | return std::nullopt; | ||
| 749 | } | ||
| 750 | for (const auto& surface : overlaps) { | ||
| 751 | Unregister(surface); | ||
| 752 | } | ||
| 753 | new_surface->MarkAsModified(modified, Tick()); | ||
| 754 | Register(new_surface); | ||
| 755 | auto view = new_surface->GetMainView(); | ||
| 756 | return {{std::move(new_surface), view}}; | ||
| 757 | } else { | ||
| 758 | for (const auto& surface : overlaps) { | 735 | for (const auto& surface : overlaps) { |
| 759 | if (!surface->MatchTarget(params.target)) { | 736 | if (!surface->MatchTarget(params.target)) { |
| 760 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { | 737 | if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) { |
| @@ -770,11 +747,60 @@ private: | |||
| 770 | continue; | 747 | continue; |
| 771 | } | 748 | } |
| 772 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { | 749 | if (surface->MatchesStructure(params) == MatchStructureResult::FullMatch) { |
| 773 | return {{surface, surface->GetMainView()}}; | 750 | return std::make_pair(surface, surface->GetMainView()); |
| 774 | } | 751 | } |
| 775 | } | 752 | } |
| 776 | return InitializeSurface(gpu_addr, params, preserve_contents); | 753 | return InitializeSurface(gpu_addr, params, preserve_contents); |
| 777 | } | 754 | } |
| 755 | |||
| 756 | if (params.num_levels > 1) { | ||
| 757 | // We can't handle mipmaps in 3D textures yet, better fallback to LLE approach | ||
| 758 | return std::nullopt; | ||
| 759 | } | ||
| 760 | |||
| 761 | if (overlaps.size() == 1) { | ||
| 762 | const auto& surface = overlaps[0]; | ||
| 763 | const SurfaceParams& overlap_params = surface->GetSurfaceParams(); | ||
| 764 | // Don't attempt to render to textures with more than one level for now | ||
| 765 | // The texture has to be to the right or the sample address if we want to render to it | ||
| 766 | if (overlap_params.num_levels == 1 && cpu_addr >= surface->GetCpuAddr()) { | ||
| 767 | const u32 offset = static_cast<u32>(cpu_addr - surface->GetCpuAddr()); | ||
| 768 | const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); | ||
| 769 | if (slice < overlap_params.depth) { | ||
| 770 | auto view = surface->Emplace3DView(slice, params.depth, 0, 1); | ||
| 771 | return std::make_pair(std::move(surface), std::move(view)); | ||
| 772 | } | ||
| 773 | } | ||
| 774 | } | ||
| 775 | |||
| 776 | TSurface new_surface = GetUncachedSurface(gpu_addr, params); | ||
| 777 | bool modified = false; | ||
| 778 | |||
| 779 | for (auto& surface : overlaps) { | ||
| 780 | const SurfaceParams& src_params = surface->GetSurfaceParams(); | ||
| 781 | if (src_params.target != SurfaceTarget::Texture2D || | ||
| 782 | src_params.height != params.height || | ||
| 783 | src_params.block_depth != params.block_depth || | ||
| 784 | src_params.block_height != params.block_height) { | ||
| 785 | return std::nullopt; | ||
| 786 | } | ||
| 787 | modified |= surface->IsModified(); | ||
| 788 | |||
| 789 | const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); | ||
| 790 | const u32 slice = std::get<2>(params.GetBlockOffsetXYZ(offset)); | ||
| 791 | const u32 width = params.width; | ||
| 792 | const u32 height = params.height; | ||
| 793 | const CopyParams copy_params(0, 0, 0, 0, 0, slice, 0, 0, width, height, 1); | ||
| 794 | ImageCopy(surface, new_surface, copy_params); | ||
| 795 | } | ||
| 796 | for (const auto& surface : overlaps) { | ||
| 797 | Unregister(surface); | ||
| 798 | } | ||
| 799 | new_surface->MarkAsModified(modified, Tick()); | ||
| 800 | Register(new_surface); | ||
| 801 | |||
| 802 | TView view = new_surface->GetMainView(); | ||
| 803 | return std::make_pair(std::move(new_surface), std::move(view)); | ||
| 778 | } | 804 | } |
| 779 | 805 | ||
| 780 | /** | 806 | /** |
| @@ -810,7 +836,7 @@ private: | |||
| 810 | TSurface& current_surface = iter->second; | 836 | TSurface& current_surface = iter->second; |
| 811 | const auto topological_result = current_surface->MatchesTopology(params); | 837 | const auto topological_result = current_surface->MatchesTopology(params); |
| 812 | if (topological_result != MatchTopologyResult::FullMatch) { | 838 | if (topological_result != MatchTopologyResult::FullMatch) { |
| 813 | std::vector<TSurface> overlaps{current_surface}; | 839 | VectorSurface overlaps{current_surface}; |
| 814 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 840 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
| 815 | topological_result); | 841 | topological_result); |
| 816 | } | 842 | } |
| @@ -852,7 +878,7 @@ private: | |||
| 852 | } | 878 | } |
| 853 | } | 879 | } |
| 854 | 880 | ||
| 855 | // Check if it's a 3D texture | 881 | // Manage 3D textures |
| 856 | if (params.block_depth > 0) { | 882 | if (params.block_depth > 0) { |
| 857 | auto surface = | 883 | auto surface = |
| 858 | Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); | 884 | Manage3DSurfaces(overlaps, params, gpu_addr, cpu_addr, preserve_contents); |
| @@ -868,12 +894,9 @@ private: | |||
| 868 | // two things either the candidate surface is a supertexture of the overlap | 894 | // two things either the candidate surface is a supertexture of the overlap |
| 869 | // or they don't match in any known way. | 895 | // or they don't match in any known way. |
| 870 | if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { | 896 | if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { |
| 871 | if (current_surface->GetGpuAddr() == gpu_addr) { | 897 | const std::optional view = TryReconstructSurface(overlaps, params, gpu_addr); |
| 872 | std::optional<std::pair<TSurface, TView>> view = | 898 | if (view) { |
| 873 | TryReconstructSurface(overlaps, params, gpu_addr); | 899 | return *view; |
| 874 | if (view) { | ||
| 875 | return *view; | ||
| 876 | } | ||
| 877 | } | 900 | } |
| 878 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 901 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
| 879 | MatchTopologyResult::FullMatch); | 902 | MatchTopologyResult::FullMatch); |
| @@ -1030,7 +1053,7 @@ private: | |||
| 1030 | void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, | 1053 | void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params, |
| 1031 | const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { | 1054 | const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) { |
| 1032 | auto deduced_src = DeduceSurface(src_gpu_addr, src_params); | 1055 | auto deduced_src = DeduceSurface(src_gpu_addr, src_params); |
| 1033 | auto deduced_dst = DeduceSurface(src_gpu_addr, src_params); | 1056 | auto deduced_dst = DeduceSurface(dst_gpu_addr, dst_params); |
| 1034 | if (deduced_src.Failed() || deduced_dst.Failed()) { | 1057 | if (deduced_src.Failed() || deduced_dst.Failed()) { |
| 1035 | return; | 1058 | return; |
| 1036 | } | 1059 | } |
| @@ -1126,23 +1149,25 @@ private: | |||
| 1126 | } | 1149 | } |
| 1127 | } | 1150 | } |
| 1128 | 1151 | ||
| 1129 | std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { | 1152 | VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { |
| 1130 | if (size == 0) { | 1153 | if (size == 0) { |
| 1131 | return {}; | 1154 | return {}; |
| 1132 | } | 1155 | } |
| 1133 | const VAddr cpu_addr_end = cpu_addr + size; | 1156 | const VAddr cpu_addr_end = cpu_addr + size; |
| 1134 | VAddr start = cpu_addr >> registry_page_bits; | ||
| 1135 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; | 1157 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; |
| 1136 | std::vector<TSurface> surfaces; | 1158 | VectorSurface surfaces; |
| 1137 | while (start <= end) { | 1159 | for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { |
| 1138 | std::vector<TSurface>& list = registry[start]; | 1160 | const auto it = registry.find(start); |
| 1139 | for (auto& surface : list) { | 1161 | if (it == registry.end()) { |
| 1140 | if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { | 1162 | continue; |
| 1141 | surface->MarkAsPicked(true); | 1163 | } |
| 1142 | surfaces.push_back(surface); | 1164 | for (auto& surface : it->second) { |
| 1165 | if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { | ||
| 1166 | continue; | ||
| 1143 | } | 1167 | } |
| 1168 | surface->MarkAsPicked(true); | ||
| 1169 | surfaces.push_back(surface); | ||
| 1144 | } | 1170 | } |
| 1145 | start++; | ||
| 1146 | } | 1171 | } |
| 1147 | for (auto& surface : surfaces) { | 1172 | for (auto& surface : surfaces) { |
| 1148 | surface->MarkAsPicked(false); | 1173 | surface->MarkAsPicked(false); |