summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar bunnei2019-10-04 20:12:01 -0400
committerGravatar GitHub2019-10-04 20:12:01 -0400
commit0a662d009b1567bde5b0aa91e07365224858ca18 (patch)
treee8c61196b9f551b313ad1ad01776c4d9353cea6f
parentMerge pull request #2941 from FernandoS27/fix-master (diff)
parentTexture_Cache: Blit Deduction corrections and simplifications. (diff)
downloadyuzu-0a662d009b1567bde5b0aa91e07365224858ca18.tar.gz
yuzu-0a662d009b1567bde5b0aa91e07365224858ca18.tar.xz
yuzu-0a662d009b1567bde5b0aa91e07365224858ca18.zip
Merge pull request #2917 from FernandoS27/fermi-deduction-2
TextureCache: Add the ability to deduce if two textures are depth on blit.
-rw-r--r--src/video_core/texture_cache/texture_cache.h146
1 files changed, 144 insertions, 2 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 877c6635d..ca2da8f97 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -224,8 +224,13 @@ public:
224 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, 224 const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
225 const Tegra::Engines::Fermi2D::Config& copy_config) { 225 const Tegra::Engines::Fermi2D::Config& copy_config) {
226 std::lock_guard lock{mutex}; 226 std::lock_guard lock{mutex};
227 std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config); 227 SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
228 std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config); 228 SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
229 const GPUVAddr src_gpu_addr = src_config.Address();
230 const GPUVAddr dst_gpu_addr = dst_config.Address();
231 DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
232 std::pair<TSurface, TView> dst_surface = GetSurface(dst_gpu_addr, dst_params, true, false);
233 std::pair<TSurface, TView> src_surface = GetSurface(src_gpu_addr, src_params, true, false);
229 ImageBlit(src_surface.second, dst_surface.second, copy_config); 234 ImageBlit(src_surface.second, dst_surface.second, copy_config);
230 dst_surface.first->MarkAsModified(true, Tick()); 235 dst_surface.first->MarkAsModified(true, Tick());
231 } 236 }
@@ -357,6 +362,29 @@ private:
357 BufferCopy = 3, 362 BufferCopy = 3,
358 }; 363 };
359 364
365 enum class DeductionType : u32 {
366 DeductionComplete,
367 DeductionIncomplete,
368 DeductionFailed,
369 };
370
371 struct Deduction {
372 DeductionType type{DeductionType::DeductionFailed};
373 TSurface surface{};
374
375 bool Failed() const {
376 return type == DeductionType::DeductionFailed;
377 }
378
379 bool Incomplete() const {
380 return type == DeductionType::DeductionIncomplete;
381 }
382
383 bool IsDepth() const {
384 return surface->GetSurfaceParams().IsPixelFormatZeta();
385 }
386 };
387
360 /** 388 /**
361 * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. 389 * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
362 * @param overlaps, the overlapping surfaces registered in the cache. 390 * @param overlaps, the overlapping surfaces registered in the cache.
@@ -691,6 +719,120 @@ private:
691 MatchTopologyResult::FullMatch); 719 MatchTopologyResult::FullMatch);
692 } 720 }
693 721
722 /**
723 * `DeduceSurface` gets the starting address and parameters of a candidate surface and tries
724 * to find a matching surface within the cache that's similar to it. If there are many textures
725 * or the texture found if entirely incompatible, it will fail. If no texture is found, the
726 * blit will be unsuccessful.
727 * @param gpu_addr, the starting address of the candidate surface.
728 * @param params, the paremeters on the candidate surface.
729 **/
730 Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
731 const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
732 const auto cache_addr{ToCacheAddr(host_ptr)};
733
734 if (!cache_addr) {
735 Deduction result{};
736 result.type = DeductionType::DeductionFailed;
737 return result;
738 }
739
740 if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
741 TSurface& current_surface = iter->second;
742 const auto topological_result = current_surface->MatchesTopology(params);
743 if (topological_result != MatchTopologyResult::FullMatch) {
744 Deduction result{};
745 result.type = DeductionType::DeductionFailed;
746 return result;
747 }
748 const auto struct_result = current_surface->MatchesStructure(params);
749 if (struct_result != MatchStructureResult::None &&
750 current_surface->MatchTarget(params.target)) {
751 Deduction result{};
752 result.type = DeductionType::DeductionComplete;
753 result.surface = current_surface;
754 return result;
755 }
756 }
757
758 const std::size_t candidate_size = params.GetGuestSizeInBytes();
759 auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
760
761 if (overlaps.empty()) {
762 Deduction result{};
763 result.type = DeductionType::DeductionIncomplete;
764 return result;
765 }
766
767 if (overlaps.size() > 1) {
768 Deduction result{};
769 result.type = DeductionType::DeductionFailed;
770 return result;
771 } else {
772 Deduction result{};
773 result.type = DeductionType::DeductionComplete;
774 result.surface = overlaps[0];
775 return result;
776 }
777 }
778
779 /**
780 * `DeduceBestBlit` gets the a source and destination starting address and parameters,
781 * and tries to deduce if they are supposed to be depth textures. If so, their
782 * parameters are modified and fixed into so.
783 * @param gpu_addr, the starting address of the candidate surface.
784 * @param params, the parameters on the candidate surface.
785 **/
786 void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
787 const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
788 auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
789 auto deduced_dst = DeduceSurface(src_gpu_addr, src_params);
790 if (deduced_src.Failed() || deduced_dst.Failed()) {
791 return;
792 }
793
794 const bool incomplete_src = deduced_src.Incomplete();
795 const bool incomplete_dst = deduced_dst.Incomplete();
796
797 if (incomplete_src && incomplete_dst) {
798 return;
799 }
800
801 const bool any_incomplete = incomplete_src || incomplete_dst;
802
803 if (!any_incomplete) {
804 if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
805 return;
806 }
807 } else {
808 if (incomplete_src && !(deduced_dst.IsDepth())) {
809 return;
810 }
811
812 if (incomplete_dst && !(deduced_src.IsDepth())) {
813 return;
814 }
815 }
816
817 const auto inherit_format = ([](SurfaceParams& to, TSurface from) {
818 const SurfaceParams& params = from->GetSurfaceParams();
819 to.pixel_format = params.pixel_format;
820 to.component_type = params.component_type;
821 to.type = params.type;
822 });
823 // Now we got the cases where one or both is Depth and the other is not known
824 if (!incomplete_src) {
825 inherit_format(src_params, deduced_src.surface);
826 } else {
827 inherit_format(src_params, deduced_dst.surface);
828 }
829 if (!incomplete_dst) {
830 inherit_format(dst_params, deduced_dst.surface);
831 } else {
832 inherit_format(dst_params, deduced_src.surface);
833 }
834 }
835
694 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, 836 std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
695 bool preserve_contents) { 837 bool preserve_contents) {
696 auto new_surface{GetUncachedSurface(gpu_addr, params)}; 838 auto new_surface{GetUncachedSurface(gpu_addr, params)};