summaryrefslogtreecommitdiff
path: root/src/video_core/texture_cache
diff options
context:
space:
mode:
authorGravatar ReinUsesLisp2021-01-16 20:48:58 -0300
committerGravatar ReinUsesLisp2021-02-13 02:17:22 -0300
commit82c2601555b59a94d7160f2fd686cb63d32dd423 (patch)
treecd0ecd865945452fa589b572de614fc487f2f96a /src/video_core/texture_cache
parentvulkan_common: Expose interop and headless devices (diff)
downloadyuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.tar.gz
yuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.tar.xz
yuzu-82c2601555b59a94d7160f2fd686cb63d32dd423.zip
video_core: Reimplement the buffer cache
Reimplement the buffer cache using cached bindings and page level granularity for modification tracking. This also drops the usage of shared pointers and virtual functions from the cache. - Bindings are cached, allowing to skip work when the game changes few bits between draws. - OpenGL Assembly shaders no longer copy when a region has been modified from the GPU to emulate constant buffers, instead GL_EXT_memory_object is used to alias sub-buffers within the same allocation. - OpenGL Assembly shaders stream constant buffer data using glProgramBufferParametersIuivNV, from NV_parameter_buffer_object. In theory this should save one hash table resolve inside the driver compared to glBufferSubData. - A new OpenGL stream buffer is implemented based on fences for drivers that are not Nvidia's proprietary, due to their low performance on partial glBufferSubData calls synchronized with 3D rendering (that some games use a lot). - Most optimizations are shared between APIs now, allowing Vulkan to cache more bindings than before, skipping unnecesarry work. This commit adds the necessary infrastructure to use Vulkan object from OpenGL. Overall, it improves performance and fixes some bugs present on the old cache. There are still some edge cases hit by some games that harm performance on some vendors, this are planned to be fixed in later commits.
Diffstat (limited to 'src/video_core/texture_cache')
-rw-r--r--src/video_core/texture_cache/texture_cache.h28
1 files changed, 10 insertions, 18 deletions
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d1080300f..f336b705f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -103,9 +103,6 @@ public:
103 /// Notify the cache that a new frame has been queued 103 /// Notify the cache that a new frame has been queued
104 void TickFrame(); 104 void TickFrame();
105 105
106 /// Return an unique mutually exclusive lock for the cache
107 [[nodiscard]] std::unique_lock<std::mutex> AcquireLock();
108
109 /// Return a constant reference to the given image view id 106 /// Return a constant reference to the given image view id
110 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; 107 [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
111 108
@@ -179,6 +176,8 @@ public:
179 /// Return true when a CPU region is modified from the GPU 176 /// Return true when a CPU region is modified from the GPU
180 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); 177 [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
181 178
179 std::mutex mutex;
180
182private: 181private:
183 /// Iterate over all page indices in a range 182 /// Iterate over all page indices in a range
184 template <typename Func> 183 template <typename Func>
@@ -212,8 +211,8 @@ private:
212 void RefreshContents(Image& image); 211 void RefreshContents(Image& image);
213 212
214 /// Upload data from guest to an image 213 /// Upload data from guest to an image
215 template <typename MapBuffer> 214 template <typename StagingBuffer>
216 void UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset); 215 void UploadImageContents(Image& image, StagingBuffer& staging_buffer, size_t buffer_offset);
217 216
218 /// Find or create an image view from a guest descriptor 217 /// Find or create an image view from a guest descriptor
219 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); 218 [[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
@@ -325,8 +324,6 @@ private:
325 324
326 RenderTargets render_targets; 325 RenderTargets render_targets;
327 326
328 std::mutex mutex;
329
330 std::unordered_map<TICEntry, ImageViewId> image_views; 327 std::unordered_map<TICEntry, ImageViewId> image_views;
331 std::unordered_map<TSCEntry, SamplerId> samplers; 328 std::unordered_map<TSCEntry, SamplerId> samplers;
332 std::unordered_map<RenderTargets, FramebufferId> framebuffers; 329 std::unordered_map<RenderTargets, FramebufferId> framebuffers;
@@ -386,11 +383,6 @@ void TextureCache<P>::TickFrame() {
386} 383}
387 384
388template <class P> 385template <class P>
389std::unique_lock<std::mutex> TextureCache<P>::AcquireLock() {
390 return std::unique_lock{mutex};
391}
392
393template <class P>
394const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { 386const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept {
395 return slot_image_views[id]; 387 return slot_image_views[id];
396} 388}
@@ -598,11 +590,11 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
598 }); 590 });
599 for (const ImageId image_id : images) { 591 for (const ImageId image_id : images) {
600 Image& image = slot_images[image_id]; 592 Image& image = slot_images[image_id];
601 auto map = runtime.MapDownloadBuffer(image.unswizzled_size_bytes); 593 auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
602 const auto copies = FullDownloadCopies(image.info); 594 const auto copies = FullDownloadCopies(image.info);
603 image.DownloadMemory(map, 0, copies); 595 image.DownloadMemory(map, 0, copies);
604 runtime.Finish(); 596 runtime.Finish();
605 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); 597 SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
606 } 598 }
607} 599}
608 600
@@ -757,7 +749,7 @@ void TextureCache<P>::PopAsyncFlushes() {
757 for (const ImageId image_id : download_ids) { 749 for (const ImageId image_id : download_ids) {
758 total_size_bytes += slot_images[image_id].unswizzled_size_bytes; 750 total_size_bytes += slot_images[image_id].unswizzled_size_bytes;
759 } 751 }
760 auto download_map = runtime.MapDownloadBuffer(total_size_bytes); 752 auto download_map = runtime.DownloadStagingBuffer(total_size_bytes);
761 size_t buffer_offset = 0; 753 size_t buffer_offset = 0;
762 for (const ImageId image_id : download_ids) { 754 for (const ImageId image_id : download_ids) {
763 Image& image = slot_images[image_id]; 755 Image& image = slot_images[image_id];
@@ -769,7 +761,7 @@ void TextureCache<P>::PopAsyncFlushes() {
769 runtime.Finish(); 761 runtime.Finish();
770 762
771 buffer_offset = 0; 763 buffer_offset = 0;
772 const std::span<u8> download_span = download_map.Span(); 764 const std::span<u8> download_span = download_map.mapped_span;
773 for (const ImageId image_id : download_ids) { 765 for (const ImageId image_id : download_ids) {
774 const ImageBase& image = slot_images[image_id]; 766 const ImageBase& image = slot_images[image_id];
775 const auto copies = FullDownloadCopies(image.info); 767 const auto copies = FullDownloadCopies(image.info);
@@ -806,7 +798,7 @@ void TextureCache<P>::RefreshContents(Image& image) {
806 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); 798 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
807 return; 799 return;
808 } 800 }
809 auto map = runtime.MapUploadBuffer(MapSizeBytes(image)); 801 auto map = runtime.UploadStagingBuffer(MapSizeBytes(image));
810 UploadImageContents(image, map, 0); 802 UploadImageContents(image, map, 0);
811 runtime.InsertUploadMemoryBarrier(); 803 runtime.InsertUploadMemoryBarrier();
812} 804}
@@ -814,7 +806,7 @@ void TextureCache<P>::RefreshContents(Image& image) {
814template <class P> 806template <class P>
815template <typename MapBuffer> 807template <typename MapBuffer>
816void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) { 808void TextureCache<P>::UploadImageContents(Image& image, MapBuffer& map, size_t buffer_offset) {
817 const std::span<u8> mapped_span = map.Span().subspan(buffer_offset); 809 const std::span<u8> mapped_span = map.mapped_span.subspan(buffer_offset);
818 const GPUVAddr gpu_addr = image.gpu_addr; 810 const GPUVAddr gpu_addr = image.gpu_addr;
819 811
820 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { 812 if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {