diff options
| author | 2020-05-31 17:02:29 -0400 | |
|---|---|---|
| committer | 2020-05-31 17:02:29 -0400 | |
| commit | f7debcaa047b2216664fc13fa280c1d9b62214a8 (patch) | |
| tree | 343b58297e9d29091b50b08861d47752e4b6ff85 /src/video_core/texture_cache | |
| parent | Merge pull request #4025 from Morph1984/intel-proprietary-compute (diff) | |
| parent | texture_cache: Use unordered_map::find instead of operator[] on hot code (diff) | |
| download | yuzu-f7debcaa047b2216664fc13fa280c1d9b62214a8.tar.gz yuzu-f7debcaa047b2216664fc13fa280c1d9b62214a8.tar.xz yuzu-f7debcaa047b2216664fc13fa280c1d9b62214a8.zip | |
Merge pull request #3999 from ReinUsesLisp/opt-tex-cache
texture_cache: Optimize GetSurfacesInRegion
Diffstat (limited to 'src/video_core/texture_cache')
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 53 |
1 files changed, 29 insertions, 24 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8bfc541d4..45e3ddd2c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <unordered_map> | 14 | #include <unordered_map> |
| 15 | #include <vector> | 15 | #include <vector> |
| 16 | 16 | ||
| 17 | #include <boost/container/small_vector.hpp> | ||
| 17 | #include <boost/icl/interval_map.hpp> | 18 | #include <boost/icl/interval_map.hpp> |
| 18 | #include <boost/range/iterator_range.hpp> | 19 | #include <boost/range/iterator_range.hpp> |
| 19 | 20 | ||
| @@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; | |||
| 53 | 54 | ||
| 54 | template <typename TSurface, typename TView> | 55 | template <typename TSurface, typename TView> |
| 55 | class TextureCache { | 56 | class TextureCache { |
| 57 | using VectorSurface = boost::container::small_vector<TSurface, 1>; | ||
| 56 | 58 | ||
| 57 | public: | 59 | public: |
| 58 | void InvalidateRegion(VAddr addr, std::size_t size) { | 60 | void InvalidateRegion(VAddr addr, std::size_t size) { |
| @@ -308,18 +310,20 @@ public: | |||
| 308 | dst_surface.first->MarkAsModified(true, Tick()); | 310 | dst_surface.first->MarkAsModified(true, Tick()); |
| 309 | } | 311 | } |
| 310 | 312 | ||
| 311 | TSurface TryFindFramebufferSurface(VAddr addr) { | 313 | TSurface TryFindFramebufferSurface(VAddr addr) const { |
| 312 | if (!addr) { | 314 | if (!addr) { |
| 313 | return nullptr; | 315 | return nullptr; |
| 314 | } | 316 | } |
| 315 | const VAddr page = addr >> registry_page_bits; | 317 | const VAddr page = addr >> registry_page_bits; |
| 316 | std::vector<TSurface>& list = registry[page]; | 318 | const auto it = registry.find(page); |
| 317 | for (auto& surface : list) { | 319 | if (it == registry.end()) { |
| 318 | if (surface->GetCpuAddr() == addr) { | 320 | return nullptr; |
| 319 | return surface; | ||
| 320 | } | ||
| 321 | } | 321 | } |
| 322 | return nullptr; | 322 | const auto& list = it->second; |
| 323 | const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) { | ||
| 324 | return surface->GetCpuAddr() == addr; | ||
| 325 | }); | ||
| 326 | return found != list.end() ? *found : nullptr; | ||
| 323 | } | 327 | } |
| 324 | 328 | ||
| 325 | u64 Tick() { | 329 | u64 Tick() { |
| @@ -498,7 +502,7 @@ private: | |||
| 498 | * @param untopological Indicates to the recycler that the texture has no way | 502 | * @param untopological Indicates to the recycler that the texture has no way |
| 499 | * to match the overlaps due to topological reasons. | 503 | * to match the overlaps due to topological reasons. |
| 500 | **/ | 504 | **/ |
| 501 | RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, | 505 | RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params, |
| 502 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { | 506 | const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { |
| 503 | if (Settings::IsGPULevelExtreme()) { | 507 | if (Settings::IsGPULevelExtreme()) { |
| 504 | return RecycleStrategy::Flush; | 508 | return RecycleStrategy::Flush; |
| @@ -538,9 +542,8 @@ private: | |||
| 538 | * @param untopological Indicates to the recycler that the texture has no way to match the | 542 | * @param untopological Indicates to the recycler that the texture has no way to match the |
| 539 | * overlaps due to topological reasons. | 543 | * overlaps due to topological reasons. |
| 540 | **/ | 544 | **/ |
| 541 | std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, | 545 | std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params, |
| 542 | const SurfaceParams& params, const GPUVAddr gpu_addr, | 546 | const GPUVAddr gpu_addr, const bool preserve_contents, |
| 543 | const bool preserve_contents, | ||
| 544 | const MatchTopologyResult untopological) { | 547 | const MatchTopologyResult untopological) { |
| 545 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); | 548 | const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); |
| 546 | for (auto& surface : overlaps) { | 549 | for (auto& surface : overlaps) { |
| @@ -650,7 +653,7 @@ private: | |||
| 650 | * @param params The parameters on the new surface. | 653 | * @param params The parameters on the new surface. |
| 651 | * @param gpu_addr The starting address of the new surface. | 654 | * @param gpu_addr The starting address of the new surface. |
| 652 | **/ | 655 | **/ |
| 653 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, | 656 | std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps, |
| 654 | const SurfaceParams& params, | 657 | const SurfaceParams& params, |
| 655 | const GPUVAddr gpu_addr) { | 658 | const GPUVAddr gpu_addr) { |
| 656 | if (params.target == SurfaceTarget::Texture3D) { | 659 | if (params.target == SurfaceTarget::Texture3D) { |
| @@ -708,7 +711,7 @@ private: | |||
| 708 | * @param preserve_contents Indicates that the new surface should be loaded from memory or | 711 | * @param preserve_contents Indicates that the new surface should be loaded from memory or |
| 709 | * left blank. | 712 | * left blank. |
| 710 | */ | 713 | */ |
| 711 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, | 714 | std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps, |
| 712 | const SurfaceParams& params, | 715 | const SurfaceParams& params, |
| 713 | const GPUVAddr gpu_addr, | 716 | const GPUVAddr gpu_addr, |
| 714 | const VAddr cpu_addr, | 717 | const VAddr cpu_addr, |
| @@ -810,7 +813,7 @@ private: | |||
| 810 | TSurface& current_surface = iter->second; | 813 | TSurface& current_surface = iter->second; |
| 811 | const auto topological_result = current_surface->MatchesTopology(params); | 814 | const auto topological_result = current_surface->MatchesTopology(params); |
| 812 | if (topological_result != MatchTopologyResult::FullMatch) { | 815 | if (topological_result != MatchTopologyResult::FullMatch) { |
| 813 | std::vector<TSurface> overlaps{current_surface}; | 816 | VectorSurface overlaps{current_surface}; |
| 814 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, | 817 | return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
| 815 | topological_result); | 818 | topological_result); |
| 816 | } | 819 | } |
| @@ -1126,23 +1129,25 @@ private: | |||
| 1126 | } | 1129 | } |
| 1127 | } | 1130 | } |
| 1128 | 1131 | ||
| 1129 | std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { | 1132 | VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { |
| 1130 | if (size == 0) { | 1133 | if (size == 0) { |
| 1131 | return {}; | 1134 | return {}; |
| 1132 | } | 1135 | } |
| 1133 | const VAddr cpu_addr_end = cpu_addr + size; | 1136 | const VAddr cpu_addr_end = cpu_addr + size; |
| 1134 | VAddr start = cpu_addr >> registry_page_bits; | ||
| 1135 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; | 1137 | const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; |
| 1136 | std::vector<TSurface> surfaces; | 1138 | VectorSurface surfaces; |
| 1137 | while (start <= end) { | 1139 | for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) { |
| 1138 | std::vector<TSurface>& list = registry[start]; | 1140 | const auto it = registry.find(start); |
| 1139 | for (auto& surface : list) { | 1141 | if (it == registry.end()) { |
| 1140 | if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { | 1142 | continue; |
| 1141 | surface->MarkAsPicked(true); | 1143 | } |
| 1142 | surfaces.push_back(surface); | 1144 | for (auto& surface : it->second) { |
| 1145 | if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) { | ||
| 1146 | continue; | ||
| 1143 | } | 1147 | } |
| 1148 | surface->MarkAsPicked(true); | ||
| 1149 | surfaces.push_back(surface); | ||
| 1144 | } | 1150 | } |
| 1145 | start++; | ||
| 1146 | } | 1151 | } |
| 1147 | for (auto& surface : surfaces) { | 1152 | for (auto& surface : surfaces) { |
| 1148 | surface->MarkAsPicked(false); | 1153 | surface->MarkAsPicked(false); |