summaryrefslogtreecommitdiff
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
authorGravatar bunnei2020-05-31 17:02:29 -0400
committerGravatar GitHub2020-05-31 17:02:29 -0400
commitf7debcaa047b2216664fc13fa280c1d9b62214a8 (patch)
tree343b58297e9d29091b50b08861d47752e4b6ff85 /src/video_core/texture_cache
parentMerge pull request #4025 from Morph1984/intel-proprietary-compute (diff)
parenttexture_cache: Use unordered_map::find instead of operator[] on hot code (diff)
downloadyuzu-f7debcaa047b2216664fc13fa280c1d9b62214a8.tar.gz
yuzu-f7debcaa047b2216664fc13fa280c1d9b62214a8.tar.xz
yuzu-f7debcaa047b2216664fc13fa280c1d9b62214a8.zip
Merge pull request #3999 from ReinUsesLisp/opt-tex-cache
texture_cache: Optimize GetSurfacesInRegion
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/texture_cache.h53
1 files changed, 29 insertions, 24 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 8bfc541d4..45e3ddd2c 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -14,6 +14,7 @@
14#include <unordered_map> 14#include <unordered_map>
15#include <vector> 15#include <vector>
16 16
17#include <boost/container/small_vector.hpp>
17#include <boost/icl/interval_map.hpp> 18#include <boost/icl/interval_map.hpp>
18#include <boost/range/iterator_range.hpp> 19#include <boost/range/iterator_range.hpp>
19 20
@@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
53 54
54template <typename TSurface, typename TView> 55template <typename TSurface, typename TView>
55class TextureCache { 56class TextureCache {
57 using VectorSurface = boost::container::small_vector<TSurface, 1>;
56 58
57public: 59public:
58 void InvalidateRegion(VAddr addr, std::size_t size) { 60 void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -308,18 +310,20 @@ public:
308 dst_surface.first->MarkAsModified(true, Tick()); 310 dst_surface.first->MarkAsModified(true, Tick());
309 } 311 }
310 312
311 TSurface TryFindFramebufferSurface(VAddr addr) { 313 TSurface TryFindFramebufferSurface(VAddr addr) const {
312 if (!addr) { 314 if (!addr) {
313 return nullptr; 315 return nullptr;
314 } 316 }
315 const VAddr page = addr >> registry_page_bits; 317 const VAddr page = addr >> registry_page_bits;
316 std::vector<TSurface>& list = registry[page]; 318 const auto it = registry.find(page);
317 for (auto& surface : list) { 319 if (it == registry.end()) {
318 if (surface->GetCpuAddr() == addr) { 320 return nullptr;
319 return surface;
320 }
321 } 321 }
322 return nullptr; 322 const auto& list = it->second;
323 const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
324 return surface->GetCpuAddr() == addr;
325 });
326 return found != list.end() ? *found : nullptr;
323 } 327 }
324 328
325 u64 Tick() { 329 u64 Tick() {
@@ -498,7 +502,7 @@ private:
498 * @param untopological Indicates to the recycler that the texture has no way 502 * @param untopological Indicates to the recycler that the texture has no way
499 * to match the overlaps due to topological reasons. 503 * to match the overlaps due to topological reasons.
500 **/ 504 **/
501 RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, 505 RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
502 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { 506 const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
503 if (Settings::IsGPULevelExtreme()) { 507 if (Settings::IsGPULevelExtreme()) {
504 return RecycleStrategy::Flush; 508 return RecycleStrategy::Flush;
@@ -538,9 +542,8 @@ private:
538 * @param untopological Indicates to the recycler that the texture has no way to match the 542 * @param untopological Indicates to the recycler that the texture has no way to match the
539 * overlaps due to topological reasons. 543 * overlaps due to topological reasons.
540 **/ 544 **/
541 std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, 545 std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
542 const SurfaceParams& params, const GPUVAddr gpu_addr, 546 const GPUVAddr gpu_addr, const bool preserve_contents,
543 const bool preserve_contents,
544 const MatchTopologyResult untopological) { 547 const MatchTopologyResult untopological) {
545 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); 548 const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
546 for (auto& surface : overlaps) { 549 for (auto& surface : overlaps) {
@@ -650,7 +653,7 @@ private:
650 * @param params The parameters on the new surface. 653 * @param params The parameters on the new surface.
651 * @param gpu_addr The starting address of the new surface. 654 * @param gpu_addr The starting address of the new surface.
652 **/ 655 **/
653 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, 656 std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
654 const SurfaceParams& params, 657 const SurfaceParams& params,
655 const GPUVAddr gpu_addr) { 658 const GPUVAddr gpu_addr) {
656 if (params.target == SurfaceTarget::Texture3D) { 659 if (params.target == SurfaceTarget::Texture3D) {
@@ -708,7 +711,7 @@ private:
708 * @param preserve_contents Indicates that the new surface should be loaded from memory or 711 * @param preserve_contents Indicates that the new surface should be loaded from memory or
709 * left blank. 712 * left blank.
710 */ 713 */
711 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, 714 std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
712 const SurfaceParams& params, 715 const SurfaceParams& params,
713 const GPUVAddr gpu_addr, 716 const GPUVAddr gpu_addr,
714 const VAddr cpu_addr, 717 const VAddr cpu_addr,
@@ -810,7 +813,7 @@ private:
810 TSurface& current_surface = iter->second; 813 TSurface& current_surface = iter->second;
811 const auto topological_result = current_surface->MatchesTopology(params); 814 const auto topological_result = current_surface->MatchesTopology(params);
812 if (topological_result != MatchTopologyResult::FullMatch) { 815 if (topological_result != MatchTopologyResult::FullMatch) {
813 std::vector<TSurface> overlaps{current_surface}; 816 VectorSurface overlaps{current_surface};
814 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, 817 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
815 topological_result); 818 topological_result);
816 } 819 }
@@ -1126,23 +1129,25 @@ private:
1126 } 1129 }
1127 } 1130 }
1128 1131
1129 std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { 1132 VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
1130 if (size == 0) { 1133 if (size == 0) {
1131 return {}; 1134 return {};
1132 } 1135 }
1133 const VAddr cpu_addr_end = cpu_addr + size; 1136 const VAddr cpu_addr_end = cpu_addr + size;
1134 VAddr start = cpu_addr >> registry_page_bits;
1135 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; 1137 const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
1136 std::vector<TSurface> surfaces; 1138 VectorSurface surfaces;
1137 while (start <= end) { 1139 for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
1138 std::vector<TSurface>& list = registry[start]; 1140 const auto it = registry.find(start);
1139 for (auto& surface : list) { 1141 if (it == registry.end()) {
1140 if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { 1142 continue;
1141 surface->MarkAsPicked(true); 1143 }
1142 surfaces.push_back(surface); 1144 for (auto& surface : it->second) {
1145 if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
1146 continue;
1143 } 1147 }
1148 surface->MarkAsPicked(true);
1149 surfaces.push_back(surface);
1144 } 1150 }
1145 start++;
1146 } 1151 }
1147 for (auto& surface : surfaces) { 1152 for (auto& surface : surfaces) {
1148 surface->MarkAsPicked(false); 1153 surface->MarkAsPicked(false);