diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 42 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 57 | ||||
| -rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 6 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 136 | ||||
| -rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 33 |
9 files changed, 212 insertions, 81 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 0932fadc2..2f986097f 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp | |||
| @@ -223,6 +223,9 @@ void Maxwell3D::ProcessMacro(u32 method, const u32* base_start, u32 amount, bool | |||
| 223 | } | 223 | } |
| 224 | 224 | ||
| 225 | void Maxwell3D::RefreshParametersImpl() { | 225 | void Maxwell3D::RefreshParametersImpl() { |
| 226 | if (!Settings::IsGPULevelHigh()) { | ||
| 227 | return; | ||
| 228 | } | ||
| 226 | size_t current_index = 0; | 229 | size_t current_index = 0; |
| 227 | for (auto& segment : macro_segments) { | 230 | for (auto& segment : macro_segments) { |
| 228 | if (segment.first == 0) { | 231 | if (segment.first == 0) { |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 90e35e307..4993d4709 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -1287,8 +1287,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, | |||
| 1287 | } | 1287 | } |
| 1288 | const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); | 1288 | const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); |
| 1289 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; | 1289 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; |
| 1290 | const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing | 1290 | const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; |
| 1291 | : VideoCommon::ObtainBufferOperation::MarkAsWritten; | ||
| 1292 | const auto [buffer, offset] = | 1291 | const auto [buffer, offset] = |
| 1293 | buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); | 1292 | buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); |
| 1294 | 1293 | ||
| @@ -1299,7 +1298,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, | |||
| 1299 | if constexpr (IS_IMAGE_UPLOAD) { | 1298 | if constexpr (IS_IMAGE_UPLOAD) { |
| 1300 | image->UploadMemory(buffer->Handle(), offset, copy_span); | 1299 | image->UploadMemory(buffer->Handle(), offset, copy_span); |
| 1301 | } else { | 1300 | } else { |
| 1302 | image->DownloadMemory(buffer->Handle(), offset, copy_span); | 1301 | texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span, |
| 1302 | buffer_operand.address, buffer_size); | ||
| 1303 | } | 1303 | } |
| 1304 | return true; | 1304 | return true; |
| 1305 | } | 1305 | } |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 0b9c4a904..032a8ebc5 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp | |||
| @@ -803,30 +803,40 @@ void Image::UploadMemory(const ImageBufferMap& map, | |||
| 803 | 803 | ||
| 804 | void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset, | 804 | void Image::DownloadMemory(GLuint buffer_handle, size_t buffer_offset, |
| 805 | std::span<const VideoCommon::BufferImageCopy> copies) { | 805 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 806 | std::array buffer_handles{buffer_handle}; | ||
| 807 | std::array buffer_offsets{buffer_offset}; | ||
| 808 | DownloadMemory(buffer_handles, buffer_offsets, copies); | ||
| 809 | } | ||
| 810 | |||
| 811 | void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> buffer_offsets, | ||
| 812 | std::span<const VideoCommon::BufferImageCopy> copies) { | ||
| 806 | const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); | 813 | const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); |
| 807 | if (is_rescaled) { | 814 | if (is_rescaled) { |
| 808 | ScaleDown(); | 815 | ScaleDown(); |
| 809 | } | 816 | } |
| 810 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API | 817 | glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API |
| 811 | glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle); | 818 | for (size_t i = 0; i < buffer_handles.size(); i++) { |
| 812 | glPixelStorei(GL_PACK_ALIGNMENT, 1); | 819 | auto& buffer_handle = buffer_handles[i]; |
| 820 | glBindBuffer(GL_PIXEL_PACK_BUFFER, buffer_handle); | ||
| 821 | glPixelStorei(GL_PACK_ALIGNMENT, 1); | ||
| 813 | 822 | ||
| 814 | u32 current_row_length = std::numeric_limits<u32>::max(); | 823 | u32 current_row_length = std::numeric_limits<u32>::max(); |
| 815 | u32 current_image_height = std::numeric_limits<u32>::max(); | 824 | u32 current_image_height = std::numeric_limits<u32>::max(); |
| 816 | 825 | ||
| 817 | for (const VideoCommon::BufferImageCopy& copy : copies) { | 826 | for (const VideoCommon::BufferImageCopy& copy : copies) { |
| 818 | if (copy.image_subresource.base_level >= gl_num_levels) { | 827 | if (copy.image_subresource.base_level >= gl_num_levels) { |
| 819 | continue; | 828 | continue; |
| 820 | } | 829 | } |
| 821 | if (current_row_length != copy.buffer_row_length) { | 830 | if (current_row_length != copy.buffer_row_length) { |
| 822 | current_row_length = copy.buffer_row_length; | 831 | current_row_length = copy.buffer_row_length; |
| 823 | glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); | 832 | glPixelStorei(GL_PACK_ROW_LENGTH, current_row_length); |
| 824 | } | 833 | } |
| 825 | if (current_image_height != copy.buffer_image_height) { | 834 | if (current_image_height != copy.buffer_image_height) { |
| 826 | current_image_height = copy.buffer_image_height; | 835 | current_image_height = copy.buffer_image_height; |
| 827 | glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); | 836 | glPixelStorei(GL_PACK_IMAGE_HEIGHT, current_image_height); |
| 837 | } | ||
| 838 | CopyImageToBuffer(copy, buffer_offsets[i]); | ||
| 828 | } | 839 | } |
| 829 | CopyImageToBuffer(copy, buffer_offset); | ||
| 830 | } | 840 | } |
| 831 | if (is_rescaled) { | 841 | if (is_rescaled) { |
| 832 | ScaleUp(true); | 842 | ScaleUp(true); |
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 911e4607a..0dd039ed2 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h | |||
| @@ -215,6 +215,9 @@ public: | |||
| 215 | void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, | 215 | void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, |
| 216 | std::span<const VideoCommon::BufferImageCopy> copies); | 216 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 217 | 217 | ||
| 218 | void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset, | ||
| 219 | std::span<const VideoCommon::BufferImageCopy> copies); | ||
| 220 | |||
| 218 | void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); | 221 | void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); |
| 219 | 222 | ||
| 220 | GLuint StorageHandle() noexcept; | 223 | GLuint StorageHandle() noexcept; |
| @@ -376,6 +379,7 @@ struct TextureCacheParams { | |||
| 376 | using Sampler = OpenGL::Sampler; | 379 | using Sampler = OpenGL::Sampler; |
| 377 | using Framebuffer = OpenGL::Framebuffer; | 380 | using Framebuffer = OpenGL::Framebuffer; |
| 378 | using AsyncBuffer = u32; | 381 | using AsyncBuffer = u32; |
| 382 | using BufferType = GLuint; | ||
| 379 | }; | 383 | }; |
| 380 | 384 | ||
| 381 | using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; | 385 | using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 673ab478e..2559a3aa7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -781,8 +781,7 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, | |||
| 781 | } | 781 | } |
| 782 | const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); | 782 | const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); |
| 783 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; | 783 | static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; |
| 784 | const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing | 784 | const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; |
| 785 | : VideoCommon::ObtainBufferOperation::MarkAsWritten; | ||
| 786 | const auto [buffer, offset] = | 785 | const auto [buffer, offset] = |
| 787 | buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); | 786 | buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); |
| 788 | 787 | ||
| @@ -793,7 +792,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, | |||
| 793 | if constexpr (IS_IMAGE_UPLOAD) { | 792 | if constexpr (IS_IMAGE_UPLOAD) { |
| 794 | image->UploadMemory(buffer->Handle(), offset, copy_span); | 793 | image->UploadMemory(buffer->Handle(), offset, copy_span); |
| 795 | } else { | 794 | } else { |
| 796 | image->DownloadMemory(buffer->Handle(), offset, copy_span); | 795 | texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span, |
| 796 | buffer_operand.address, buffer_size); | ||
| 797 | } | 797 | } |
| 798 | return true; | 798 | return true; |
| 799 | } | 799 | } |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index ae15f6976..d0a7d8f35 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -1,10 +1,11 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #include <algorithm> | 4 | #include <algorithm> |
| 5 | #include <array> | 5 | #include <array> |
| 6 | #include <span> | 6 | #include <span> |
| 7 | #include <vector> | 7 | #include <vector> |
| 8 | #include <boost/container/small_vector.hpp> | ||
| 8 | 9 | ||
| 9 | #include "common/bit_cast.h" | 10 | #include "common/bit_cast.h" |
| 10 | #include "common/bit_util.h" | 11 | #include "common/bit_util.h" |
| @@ -1343,14 +1344,31 @@ void Image::UploadMemory(const StagingBufferRef& map, std::span<const BufferImag | |||
| 1343 | 1344 | ||
| 1344 | void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, | 1345 | void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, |
| 1345 | std::span<const VideoCommon::BufferImageCopy> copies) { | 1346 | std::span<const VideoCommon::BufferImageCopy> copies) { |
| 1347 | std::array buffer_handles{ | ||
| 1348 | buffer, | ||
| 1349 | }; | ||
| 1350 | std::array buffer_offsets{ | ||
| 1351 | offset, | ||
| 1352 | }; | ||
| 1353 | DownloadMemory(buffer_handles, buffer_offsets, copies); | ||
| 1354 | } | ||
| 1355 | |||
| 1356 | void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceSize> offsets_span, | ||
| 1357 | std::span<const VideoCommon::BufferImageCopy> copies) { | ||
| 1346 | const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); | 1358 | const bool is_rescaled = True(flags & ImageFlagBits::Rescaled); |
| 1347 | if (is_rescaled) { | 1359 | if (is_rescaled) { |
| 1348 | ScaleDown(); | 1360 | ScaleDown(); |
| 1349 | } | 1361 | } |
| 1350 | std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); | 1362 | boost::container::small_vector<VkBuffer, 1> buffers_vector{}; |
| 1363 | boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies; | ||
| 1364 | for (size_t index = 0; index < buffers_span.size(); index++) { | ||
| 1365 | buffers_vector.emplace_back(buffers_span[index]); | ||
| 1366 | vk_copies.emplace_back( | ||
| 1367 | TransformBufferImageCopies(copies, offsets_span[index], aspect_mask)); | ||
| 1368 | } | ||
| 1351 | scheduler->RequestOutsideRenderPassOperationContext(); | 1369 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 1352 | scheduler->Record([buffer, image = *original_image, aspect_mask = aspect_mask, | 1370 | scheduler->Record([buffers = std::move(buffers_vector), image = *original_image, |
| 1353 | vk_copies](vk::CommandBuffer cmdbuf) { | 1371 | aspect_mask = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { |
| 1354 | const VkImageMemoryBarrier read_barrier{ | 1372 | const VkImageMemoryBarrier read_barrier{ |
| 1355 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 1373 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 1356 | .pNext = nullptr, | 1374 | .pNext = nullptr, |
| @@ -1369,6 +1387,20 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, | |||
| 1369 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 1387 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 1370 | }, | 1388 | }, |
| 1371 | }; | 1389 | }; |
| 1390 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 1391 | 0, read_barrier); | ||
| 1392 | |||
| 1393 | for (size_t index = 0; index < buffers.size(); index++) { | ||
| 1394 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index], | ||
| 1395 | vk_copies[index]); | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | const VkMemoryBarrier memory_write_barrier{ | ||
| 1399 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 1400 | .pNext = nullptr, | ||
| 1401 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 1402 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 1403 | }; | ||
| 1372 | const VkImageMemoryBarrier image_write_barrier{ | 1404 | const VkImageMemoryBarrier image_write_barrier{ |
| 1373 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | 1405 | .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, |
| 1374 | .pNext = nullptr, | 1406 | .pNext = nullptr, |
| @@ -1387,15 +1419,6 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, | |||
| 1387 | .layerCount = VK_REMAINING_ARRAY_LAYERS, | 1419 | .layerCount = VK_REMAINING_ARRAY_LAYERS, |
| 1388 | }, | 1420 | }, |
| 1389 | }; | 1421 | }; |
| 1390 | const VkMemoryBarrier memory_write_barrier{ | ||
| 1391 | .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||
| 1392 | .pNext = nullptr, | ||
| 1393 | .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 1394 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||
| 1395 | }; | ||
| 1396 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||
| 1397 | 0, read_barrier); | ||
| 1398 | cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, vk_copies); | ||
| 1399 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | 1422 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, |
| 1400 | 0, memory_write_barrier, nullptr, image_write_barrier); | 1423 | 0, memory_write_barrier, nullptr, image_write_barrier); |
| 1401 | }); | 1424 | }); |
| @@ -1405,7 +1428,13 @@ void Image::DownloadMemory(VkBuffer buffer, VkDeviceSize offset, | |||
| 1405 | } | 1428 | } |
| 1406 | 1429 | ||
| 1407 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { | 1430 | void Image::DownloadMemory(const StagingBufferRef& map, std::span<const BufferImageCopy> copies) { |
| 1408 | DownloadMemory(map.buffer, map.offset, copies); | 1431 | std::array buffers{ |
| 1432 | map.buffer, | ||
| 1433 | }; | ||
| 1434 | std::array offsets{ | ||
| 1435 | map.offset, | ||
| 1436 | }; | ||
| 1437 | DownloadMemory(buffers, offsets, copies); | ||
| 1409 | } | 1438 | } |
| 1410 | 1439 | ||
| 1411 | bool Image::IsRescaled() const noexcept { | 1440 | bool Image::IsRescaled() const noexcept { |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index d5ee23f8d..c656c5386 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| @@ -141,6 +141,9 @@ public: | |||
| 141 | void DownloadMemory(VkBuffer buffer, VkDeviceSize offset, | 141 | void DownloadMemory(VkBuffer buffer, VkDeviceSize offset, |
| 142 | std::span<const VideoCommon::BufferImageCopy> copies); | 142 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 143 | 143 | ||
| 144 | void DownloadMemory(std::span<VkBuffer> buffers, std::span<VkDeviceSize> offsets, | ||
| 145 | std::span<const VideoCommon::BufferImageCopy> copies); | ||
| 146 | |||
| 144 | void DownloadMemory(const StagingBufferRef& map, | 147 | void DownloadMemory(const StagingBufferRef& map, |
| 145 | std::span<const VideoCommon::BufferImageCopy> copies); | 148 | std::span<const VideoCommon::BufferImageCopy> copies); |
| 146 | 149 | ||
| @@ -371,6 +374,7 @@ struct TextureCacheParams { | |||
| 371 | using Sampler = Vulkan::Sampler; | 374 | using Sampler = Vulkan::Sampler; |
| 372 | using Framebuffer = Vulkan::Framebuffer; | 375 | using Framebuffer = Vulkan::Framebuffer; |
| 373 | using AsyncBuffer = Vulkan::StagingBufferRef; | 376 | using AsyncBuffer = Vulkan::StagingBufferRef; |
| 377 | using BufferType = VkBuffer; | ||
| 374 | }; | 378 | }; |
| 375 | 379 | ||
| 376 | using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; | 380 | using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index ed5c768d8..e601f8446 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -1,9 +1,10 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2021 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <unordered_set> | 6 | #include <unordered_set> |
| 7 | #include <boost/container/small_vector.hpp> | ||
| 7 | 8 | ||
| 8 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 9 | #include "common/settings.h" | 10 | #include "common/settings.h" |
| @@ -17,15 +18,10 @@ | |||
| 17 | 18 | ||
| 18 | namespace VideoCommon { | 19 | namespace VideoCommon { |
| 19 | 20 | ||
| 20 | using Tegra::Texture::SwizzleSource; | ||
| 21 | using Tegra::Texture::TextureType; | ||
| 22 | using Tegra::Texture::TICEntry; | 21 | using Tegra::Texture::TICEntry; |
| 23 | using Tegra::Texture::TSCEntry; | 22 | using Tegra::Texture::TSCEntry; |
| 24 | using VideoCore::Surface::GetFormatType; | 23 | using VideoCore::Surface::GetFormatType; |
| 25 | using VideoCore::Surface::IsCopyCompatible; | ||
| 26 | using VideoCore::Surface::PixelFormat; | 24 | using VideoCore::Surface::PixelFormat; |
| 27 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 28 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 29 | using VideoCore::Surface::SurfaceType; | 25 | using VideoCore::Surface::SurfaceType; |
| 30 | using namespace Common::Literals; | 26 | using namespace Common::Literals; |
| 31 | 27 | ||
| @@ -143,6 +139,13 @@ void TextureCache<P>::TickFrame() { | |||
| 143 | runtime.TickFrame(); | 139 | runtime.TickFrame(); |
| 144 | critical_gc = 0; | 140 | critical_gc = 0; |
| 145 | ++frame_tick; | 141 | ++frame_tick; |
| 142 | |||
| 143 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 144 | for (auto& buffer : async_buffers_death_ring) { | ||
| 145 | runtime.FreeDeferredStagingBuffer(buffer); | ||
| 146 | } | ||
| 147 | async_buffers_death_ring.clear(); | ||
| 148 | } | ||
| 146 | } | 149 | } |
| 147 | 150 | ||
| 148 | template <class P> | 151 | template <class P> |
| @@ -661,25 +664,39 @@ template <class P> | |||
| 661 | void TextureCache<P>::CommitAsyncFlushes() { | 664 | void TextureCache<P>::CommitAsyncFlushes() { |
| 662 | // This is intentionally passing the value by copy | 665 | // This is intentionally passing the value by copy |
| 663 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 666 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 664 | const std::span<const ImageId> download_ids = uncommitted_downloads; | 667 | auto& download_ids = uncommitted_downloads; |
| 665 | if (download_ids.empty()) { | 668 | if (download_ids.empty()) { |
| 666 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); | 669 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); |
| 667 | uncommitted_downloads.clear(); | 670 | uncommitted_downloads.clear(); |
| 668 | async_buffers.emplace_back(std::optional<AsyncBuffer>{}); | 671 | async_buffers.emplace_back(std::move(uncommitted_async_buffers)); |
| 672 | uncommitted_async_buffers.clear(); | ||
| 669 | return; | 673 | return; |
| 670 | } | 674 | } |
| 671 | size_t total_size_bytes = 0; | 675 | size_t total_size_bytes = 0; |
| 672 | for (const ImageId image_id : download_ids) { | 676 | size_t last_async_buffer_id = uncommitted_async_buffers.size(); |
| 673 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | 677 | bool any_none_dma = false; |
| 678 | for (PendingDownload& download_info : download_ids) { | ||
| 679 | if (download_info.is_swizzle) { | ||
| 680 | total_size_bytes += | ||
| 681 | Common::AlignUp(slot_images[download_info.object_id].unswizzled_size_bytes, 64); | ||
| 682 | any_none_dma = true; | ||
| 683 | download_info.async_buffer_id = last_async_buffer_id; | ||
| 684 | } | ||
| 674 | } | 685 | } |
| 675 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); | 686 | if (any_none_dma) { |
| 676 | for (const ImageId image_id : download_ids) { | 687 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); |
| 677 | Image& image = slot_images[image_id]; | 688 | for (const PendingDownload& download_info : download_ids) { |
| 678 | const auto copies = FullDownloadCopies(image.info); | 689 | if (download_info.is_swizzle) { |
| 679 | image.DownloadMemory(download_map, copies); | 690 | Image& image = slot_images[download_info.object_id]; |
| 680 | download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); | 691 | const auto copies = FullDownloadCopies(image.info); |
| 692 | image.DownloadMemory(download_map, copies); | ||
| 693 | download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); | ||
| 694 | } | ||
| 695 | } | ||
| 696 | uncommitted_async_buffers.emplace_back(download_map); | ||
| 681 | } | 697 | } |
| 682 | async_buffers.emplace_back(download_map); | 698 | async_buffers.emplace_back(std::move(uncommitted_async_buffers)); |
| 699 | uncommitted_async_buffers.clear(); | ||
| 683 | } | 700 | } |
| 684 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); | 701 | committed_downloads.emplace_back(std::move(uncommitted_downloads)); |
| 685 | uncommitted_downloads.clear(); | 702 | uncommitted_downloads.clear(); |
| @@ -691,39 +708,57 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 691 | return; | 708 | return; |
| 692 | } | 709 | } |
| 693 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | 710 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |
| 694 | const std::span<const ImageId> download_ids = committed_downloads.front(); | 711 | const auto& download_ids = committed_downloads.front(); |
| 695 | if (download_ids.empty()) { | 712 | if (download_ids.empty()) { |
| 696 | committed_downloads.pop_front(); | 713 | committed_downloads.pop_front(); |
| 697 | async_buffers.pop_front(); | 714 | async_buffers.pop_front(); |
| 698 | return; | 715 | return; |
| 699 | } | 716 | } |
| 700 | auto download_map = *async_buffers.front(); | 717 | auto download_map = std::move(async_buffers.front()); |
| 701 | std::span<u8> download_span = download_map.mapped_span; | ||
| 702 | for (size_t i = download_ids.size(); i > 0; i--) { | 718 | for (size_t i = download_ids.size(); i > 0; i--) { |
| 703 | const ImageBase& image = slot_images[download_ids[i - 1]]; | 719 | auto& download_info = download_ids[i - 1]; |
| 704 | const auto copies = FullDownloadCopies(image.info); | 720 | auto& download_buffer = download_map[download_info.async_buffer_id]; |
| 705 | download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); | 721 | if (download_info.is_swizzle) { |
| 706 | std::span<u8> download_span_alt = download_span.subspan(download_map.offset); | 722 | const ImageBase& image = slot_images[download_info.object_id]; |
| 707 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, | 723 | const auto copies = FullDownloadCopies(image.info); |
| 708 | swizzle_data_buffer); | 724 | download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); |
| 725 | std::span<u8> download_span = | ||
| 726 | download_buffer.mapped_span.subspan(download_buffer.offset); | ||
| 727 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, | ||
| 728 | swizzle_data_buffer); | ||
| 729 | } else { | ||
| 730 | const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id]; | ||
| 731 | std::span<u8> download_span = | ||
| 732 | download_buffer.mapped_span.subspan(download_buffer.offset); | ||
| 733 | gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(), | ||
| 734 | buffer_info.size); | ||
| 735 | slot_buffer_downloads.erase(download_info.object_id); | ||
| 736 | } | ||
| 737 | } | ||
| 738 | for (auto& download_buffer : download_map) { | ||
| 739 | async_buffers_death_ring.emplace_back(download_buffer); | ||
| 709 | } | 740 | } |
| 710 | runtime.FreeDeferredStagingBuffer(download_map); | ||
| 711 | committed_downloads.pop_front(); | 741 | committed_downloads.pop_front(); |
| 712 | async_buffers.pop_front(); | 742 | async_buffers.pop_front(); |
| 713 | } else { | 743 | } else { |
| 714 | const std::span<const ImageId> download_ids = committed_downloads.front(); | 744 | const auto& download_ids = committed_downloads.front(); |
| 715 | if (download_ids.empty()) { | 745 | if (download_ids.empty()) { |
| 716 | committed_downloads.pop_front(); | 746 | committed_downloads.pop_front(); |
| 717 | return; | 747 | return; |
| 718 | } | 748 | } |
| 719 | size_t total_size_bytes = 0; | 749 | size_t total_size_bytes = 0; |
| 720 | for (const ImageId image_id : download_ids) { | 750 | for (const PendingDownload& download_info : download_ids) { |
| 721 | total_size_bytes += slot_images[image_id].unswizzled_size_bytes; | 751 | if (download_info.is_swizzle) { |
| 752 | total_size_bytes += slot_images[download_info.object_id].unswizzled_size_bytes; | ||
| 753 | } | ||
| 722 | } | 754 | } |
| 723 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); | 755 | auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); |
| 724 | const size_t original_offset = download_map.offset; | 756 | const size_t original_offset = download_map.offset; |
| 725 | for (const ImageId image_id : download_ids) { | 757 | for (const PendingDownload& download_info : download_ids) { |
| 726 | Image& image = slot_images[image_id]; | 758 | if (!download_info.is_swizzle) { |
| 759 | continue; | ||
| 760 | } | ||
| 761 | Image& image = slot_images[download_info.object_id]; | ||
| 727 | const auto copies = FullDownloadCopies(image.info); | 762 | const auto copies = FullDownloadCopies(image.info); |
| 728 | image.DownloadMemory(download_map, copies); | 763 | image.DownloadMemory(download_map, copies); |
| 729 | download_map.offset += image.unswizzled_size_bytes; | 764 | download_map.offset += image.unswizzled_size_bytes; |
| @@ -732,8 +767,11 @@ void TextureCache<P>::PopAsyncFlushes() { | |||
| 732 | runtime.Finish(); | 767 | runtime.Finish(); |
| 733 | download_map.offset = original_offset; | 768 | download_map.offset = original_offset; |
| 734 | std::span<u8> download_span = download_map.mapped_span; | 769 | std::span<u8> download_span = download_map.mapped_span; |
| 735 | for (const ImageId image_id : download_ids) { | 770 | for (const PendingDownload& download_info : download_ids) { |
| 736 | const ImageBase& image = slot_images[image_id]; | 771 | if (!download_info.is_swizzle) { |
| 772 | continue; | ||
| 773 | } | ||
| 774 | const ImageBase& image = slot_images[download_info.object_id]; | ||
| 737 | const auto copies = FullDownloadCopies(image.info); | 775 | const auto copies = FullDownloadCopies(image.info); |
| 738 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, | 776 | SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, |
| 739 | swizzle_data_buffer); | 777 | swizzle_data_buffer); |
| @@ -834,6 +872,33 @@ std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::Dm | |||
| 834 | } | 872 | } |
| 835 | 873 | ||
| 836 | template <class P> | 874 | template <class P> |
| 875 | void TextureCache<P>::DownloadImageIntoBuffer(typename TextureCache<P>::Image* image, | ||
| 876 | typename TextureCache<P>::BufferType buffer, | ||
| 877 | size_t buffer_offset, | ||
| 878 | std::span<const VideoCommon::BufferImageCopy> copies, | ||
| 879 | GPUVAddr address, size_t size) { | ||
| 880 | if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||
| 881 | const BufferDownload new_buffer_download{address, size}; | ||
| 882 | auto slot = slot_buffer_downloads.insert(new_buffer_download); | ||
| 883 | const PendingDownload new_download{false, uncommitted_async_buffers.size(), slot}; | ||
| 884 | uncommitted_downloads.emplace_back(new_download); | ||
| 885 | auto download_map = runtime.DownloadStagingBuffer(size, true); | ||
| 886 | uncommitted_async_buffers.emplace_back(download_map); | ||
| 887 | std::array buffers{ | ||
| 888 | buffer, | ||
| 889 | download_map.buffer, | ||
| 890 | }; | ||
| 891 | std::array buffer_offsets{ | ||
| 892 | buffer_offset, | ||
| 893 | download_map.offset, | ||
| 894 | }; | ||
| 895 | image->DownloadMemory(buffers, buffer_offsets, copies); | ||
| 896 | } else { | ||
| 897 | image->DownloadMemory(buffer, buffer_offset, copies); | ||
| 898 | } | ||
| 899 | } | ||
| 900 | |||
| 901 | template <class P> | ||
| 837 | void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { | 902 | void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { |
| 838 | if (False(image.flags & ImageFlagBits::CpuModified)) { | 903 | if (False(image.flags & ImageFlagBits::CpuModified)) { |
| 839 | // Only upload modified images | 904 | // Only upload modified images |
| @@ -2209,7 +2274,8 @@ void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) | |||
| 2209 | if (new_id) { | 2274 | if (new_id) { |
| 2210 | const ImageViewBase& old_view = slot_image_views[new_id]; | 2275 | const ImageViewBase& old_view = slot_image_views[new_id]; |
| 2211 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { | 2276 | if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { |
| 2212 | uncommitted_downloads.push_back(old_view.image_id); | 2277 | const PendingDownload new_download{true, 0, old_view.image_id}; |
| 2278 | uncommitted_downloads.emplace_back(new_download); | ||
| 2213 | } | 2279 | } |
| 2214 | } | 2280 | } |
| 2215 | *old_id = new_id; | 2281 | *old_id = new_id; |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 5a5b4179c..758b7e212 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | // SPDX-FileCopyrightText: 2021 yuzu Emulator Project | 1 | // SPDX-FileCopyrightText: 2023 yuzu Emulator Project |
| 2 | // SPDX-License-Identifier: GPL-3.0-or-later | 2 | // SPDX-License-Identifier: GPL-3.0-or-later |
| 3 | 3 | ||
| 4 | #pragma once | 4 | #pragma once |
| @@ -40,14 +40,9 @@ struct ChannelState; | |||
| 40 | 40 | ||
| 41 | namespace VideoCommon { | 41 | namespace VideoCommon { |
| 42 | 42 | ||
| 43 | using Tegra::Texture::SwizzleSource; | ||
| 44 | using Tegra::Texture::TICEntry; | 43 | using Tegra::Texture::TICEntry; |
| 45 | using Tegra::Texture::TSCEntry; | 44 | using Tegra::Texture::TSCEntry; |
| 46 | using VideoCore::Surface::GetFormatType; | ||
| 47 | using VideoCore::Surface::IsCopyCompatible; | ||
| 48 | using VideoCore::Surface::PixelFormat; | 45 | using VideoCore::Surface::PixelFormat; |
| 49 | using VideoCore::Surface::PixelFormatFromDepthFormat; | ||
| 50 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||
| 51 | using namespace Common::Literals; | 46 | using namespace Common::Literals; |
| 52 | 47 | ||
| 53 | struct ImageViewInOut { | 48 | struct ImageViewInOut { |
| @@ -119,6 +114,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI | |||
| 119 | using Sampler = typename P::Sampler; | 114 | using Sampler = typename P::Sampler; |
| 120 | using Framebuffer = typename P::Framebuffer; | 115 | using Framebuffer = typename P::Framebuffer; |
| 121 | using AsyncBuffer = typename P::AsyncBuffer; | 116 | using AsyncBuffer = typename P::AsyncBuffer; |
| 117 | using BufferType = typename P::BufferType; | ||
| 122 | 118 | ||
| 123 | struct BlitImages { | 119 | struct BlitImages { |
| 124 | ImageId dst_id; | 120 | ImageId dst_id; |
| @@ -215,6 +211,10 @@ public: | |||
| 215 | const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand, | 211 | const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand, |
| 216 | const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); | 212 | const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image); |
| 217 | 213 | ||
| 214 | void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset, | ||
| 215 | std::span<const VideoCommon::BufferImageCopy> copies, | ||
| 216 | GPUVAddr address = 0, size_t size = 0); | ||
| 217 | |||
| 218 | /// Return true when a CPU region is modified from the GPU | 218 | /// Return true when a CPU region is modified from the GPU |
| 219 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | 219 | [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |
| 220 | 220 | ||
| @@ -424,17 +424,32 @@ private: | |||
| 424 | u64 critical_memory; | 424 | u64 critical_memory; |
| 425 | size_t critical_gc; | 425 | size_t critical_gc; |
| 426 | 426 | ||
| 427 | struct BufferDownload { | ||
| 428 | GPUVAddr address; | ||
| 429 | size_t size; | ||
| 430 | }; | ||
| 431 | |||
| 432 | struct PendingDownload { | ||
| 433 | bool is_swizzle; | ||
| 434 | size_t async_buffer_id; | ||
| 435 | SlotId object_id; | ||
| 436 | }; | ||
| 437 | |||
| 427 | SlotVector<Image> slot_images; | 438 | SlotVector<Image> slot_images; |
| 428 | SlotVector<ImageMapView> slot_map_views; | 439 | SlotVector<ImageMapView> slot_map_views; |
| 429 | SlotVector<ImageView> slot_image_views; | 440 | SlotVector<ImageView> slot_image_views; |
| 430 | SlotVector<ImageAlloc> slot_image_allocs; | 441 | SlotVector<ImageAlloc> slot_image_allocs; |
| 431 | SlotVector<Sampler> slot_samplers; | 442 | SlotVector<Sampler> slot_samplers; |
| 432 | SlotVector<Framebuffer> slot_framebuffers; | 443 | SlotVector<Framebuffer> slot_framebuffers; |
| 444 | SlotVector<BufferDownload> slot_buffer_downloads; | ||
| 433 | 445 | ||
| 434 | // TODO: This data structure is not optimal and it should be reworked | 446 | // TODO: This data structure is not optimal and it should be reworked |
| 435 | std::vector<ImageId> uncommitted_downloads; | 447 | |
| 436 | std::deque<std::vector<ImageId>> committed_downloads; | 448 | std::vector<PendingDownload> uncommitted_downloads; |
| 437 | std::deque<std::optional<AsyncBuffer>> async_buffers; | 449 | std::deque<std::vector<PendingDownload>> committed_downloads; |
| 450 | std::vector<AsyncBuffer> uncommitted_async_buffers; | ||
| 451 | std::deque<std::vector<AsyncBuffer>> async_buffers; | ||
| 452 | std::deque<AsyncBuffer> async_buffers_death_ring; | ||
| 438 | 453 | ||
| 439 | struct LRUItemParams { | 454 | struct LRUItemParams { |
| 440 | using ObjectType = ImageId; | 455 | using ObjectType = ImageId; |