summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp7
-rw-r--r--src/video_core/texture_cache/image_base.h3
-rw-r--r--src/video_core/texture_cache/texture_cache.h70
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h16
-rw-r--r--src/video_core/textures/astc.cpp4
6 files changed, 109 insertions, 8 deletions
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index eb6e43a08..b047e7b3d 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -228,8 +228,9 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
228 228
229[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime, 229[[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
230 const VideoCommon::ImageInfo& info) { 230 const VideoCommon::ImageInfo& info) {
231 if (IsPixelFormatASTC(info.format)) { 231 if (IsPixelFormatASTC(info.format) && !runtime.HasNativeASTC()) {
232 return !runtime.HasNativeASTC() && Settings::values.accelerate_astc.GetValue(); 232 return Settings::values.accelerate_astc.GetValue() &&
233 !Settings::values.async_astc.GetValue();
233 } 234 }
234 // Disable other accelerated uploads for now as they don't implement swizzled uploads 235 // Disable other accelerated uploads for now as they don't implement swizzled uploads
235 return false; 236 return false;
@@ -258,6 +259,14 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
258 return format_info.compatibility_class == store_class; 259 return format_info.compatibility_class == store_class;
259} 260}
260 261
262[[nodiscard]] bool CanBeDecodedAsync(const TextureCacheRuntime& runtime,
263 const VideoCommon::ImageInfo& info) {
264 if (IsPixelFormatASTC(info.format) && !runtime.HasNativeASTC()) {
265 return Settings::values.async_astc.GetValue();
266 }
267 return false;
268}
269
261[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset, 270[[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
262 VideoCommon::SubresourceLayers subresource, GLenum target) { 271 VideoCommon::SubresourceLayers subresource, GLenum target) {
263 switch (target) { 272 switch (target) {
@@ -721,7 +730,9 @@ std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req
721Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, 730Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
722 VAddr cpu_addr_) 731 VAddr cpu_addr_)
723 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { 732 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
724 if (CanBeAccelerated(*runtime, info)) { 733 if (CanBeDecodedAsync(*runtime, info)) {
734 flags |= ImageFlagBits::AsynchronousDecode;
735 } else if (CanBeAccelerated(*runtime, info)) {
725 flags |= ImageFlagBits::AcceleratedUpload; 736 flags |= ImageFlagBits::AcceleratedUpload;
726 } 737 }
727 if (IsConverted(runtime->device, info.format, info.type)) { 738 if (IsConverted(runtime->device, info.format, info.type)) {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 9b85dfb5e..80adb70eb 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1256,11 +1256,12 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
1256 commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)), 1256 commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)),
1257 aspect_mask(ImageAspectMask(info.format)) { 1257 aspect_mask(ImageAspectMask(info.format)) {
1258 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { 1258 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
1259 if (Settings::values.accelerate_astc.GetValue()) { 1259 if (Settings::values.async_astc.GetValue()) {
1260 flags |= VideoCommon::ImageFlagBits::AsynchronousDecode;
1261 } else if (Settings::values.accelerate_astc.GetValue()) {
1260 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; 1262 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
1261 } else {
1262 flags |= VideoCommon::ImageFlagBits::Converted;
1263 } 1263 }
1264 flags |= VideoCommon::ImageFlagBits::Converted;
1264 flags |= VideoCommon::ImageFlagBits::CostlyLoad; 1265 flags |= VideoCommon::ImageFlagBits::CostlyLoad;
1265 } 1266 }
1266 if (runtime->device.HasDebuggingToolAttached()) { 1267 if (runtime->device.HasDebuggingToolAttached()) {
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 620565684..e8fa592d2 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -38,6 +38,9 @@ enum class ImageFlagBits : u32 {
38 Rescaled = 1 << 13, 38 Rescaled = 1 << 13,
39 CheckingRescalable = 1 << 14, 39 CheckingRescalable = 1 << 14,
40 IsRescalable = 1 << 15, 40 IsRescalable = 1 << 15,
41
42 AsynchronousDecode = 1 << 16,
43 IsDecoding = 1 << 17, ///< Is currently being decoded asynchornously.
41}; 44};
42DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) 45DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
43 46
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 3e2cbb0b0..9dd152fbe 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -85,6 +85,11 @@ void TextureCache<P>::RunGarbageCollector() {
85 } 85 }
86 --num_iterations; 86 --num_iterations;
87 auto& image = slot_images[image_id]; 87 auto& image = slot_images[image_id];
88 if (True(image.flags & ImageFlagBits::IsDecoding)) {
89 // This image is still being decoded, deleting it will invalidate the slot
90 // used by the async decoder thread.
91 return false;
92 }
88 const bool must_download = 93 const bool must_download =
89 image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap); 94 image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
90 if (!high_priority_mode && 95 if (!high_priority_mode &&
@@ -133,6 +138,8 @@ void TextureCache<P>::TickFrame() {
133 sentenced_images.Tick(); 138 sentenced_images.Tick();
134 sentenced_framebuffers.Tick(); 139 sentenced_framebuffers.Tick();
135 sentenced_image_view.Tick(); 140 sentenced_image_view.Tick();
141 TickAsyncDecode();
142
136 runtime.TickFrame(); 143 runtime.TickFrame();
137 critical_gc = 0; 144 critical_gc = 0;
138 ++frame_tick; 145 ++frame_tick;
@@ -777,6 +784,10 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
777 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); 784 LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
778 return; 785 return;
779 } 786 }
787 if (True(image.flags & ImageFlagBits::AsynchronousDecode)) {
788 QueueAsyncDecode(image, image_id);
789 return;
790 }
780 auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); 791 auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
781 UploadImageContents(image, staging); 792 UploadImageContents(image, staging);
782 runtime.InsertUploadMemoryBarrier(); 793 runtime.InsertUploadMemoryBarrier();
@@ -990,6 +1001,65 @@ u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) {
990} 1001}
991 1002
992template <class P> 1003template <class P>
1004void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
1005 UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted));
1006 LOG_INFO(HW_GPU, "Queuing async texture decode");
1007
1008 image.flags |= ImageFlagBits::IsDecoding;
1009 auto decode = std::make_unique<AsyncDecodeContext>();
1010 auto* decode_ptr = decode.get();
1011 decode->image_id = image_id;
1012 async_decodes.push_back(std::move(decode));
1013
1014 Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes);
1015 const size_t guest_size_bytes = image.guest_size_bytes;
1016 swizzle_data_buffer.resize_destructive(guest_size_bytes);
1017 gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes);
1018 auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer,
1019 local_unswizzle_data_buffer);
1020 const size_t out_size = MapSizeBytes(image);
1021
1022 auto func = [out_size, copies, info = image.info,
1023 input = std::move(local_unswizzle_data_buffer),
1024 async_decode = decode_ptr]() mutable {
1025 async_decode->decoded_data.resize_destructive(out_size);
1026 std::span copies_span{copies.data(), copies.size()};
1027 ConvertImage(input, info, async_decode->decoded_data, copies_span);
1028
1029 // TODO: Do we need this lock?
1030 std::unique_lock lock{async_decode->mutex};
1031 async_decode->copies = std::move(copies);
1032 async_decode->complete = true;
1033 };
1034 texture_decode_worker.QueueWork(std::move(func));
1035}
1036
1037template <class P>
1038void TextureCache<P>::TickAsyncDecode() {
1039 bool has_uploads{};
1040 auto i = async_decodes.begin();
1041 while (i != async_decodes.end()) {
1042 auto* async_decode = i->get();
1043 std::unique_lock lock{async_decode->mutex};
1044 if (!async_decode->complete) {
1045 ++i;
1046 continue;
1047 }
1048 Image& image = slot_images[async_decode->image_id];
1049 auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
1050 std::memcpy(staging.mapped_span.data(), async_decode->decoded_data.data(),
1051 async_decode->decoded_data.size());
1052 image.UploadMemory(staging, async_decode->copies);
1053 image.flags &= ~ImageFlagBits::IsDecoding;
1054 has_uploads = true;
1055 i = async_decodes.erase(i);
1056 }
1057 if (has_uploads) {
1058 runtime.InsertUploadMemoryBarrier();
1059 }
1060}
1061
1062template <class P>
993bool TextureCache<P>::ScaleUp(Image& image) { 1063bool TextureCache<P>::ScaleUp(Image& image) {
994 const bool has_copy = image.HasScaled(); 1064 const bool has_copy = image.HasScaled();
995 const bool rescaled = image.ScaleUp(); 1065 const bool rescaled = image.ScaleUp();
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 485eaabaa..013836933 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -3,6 +3,7 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <atomic>
6#include <deque> 7#include <deque>
7#include <limits> 8#include <limits>
8#include <mutex> 9#include <mutex>
@@ -18,6 +19,7 @@
18#include "common/lru_cache.h" 19#include "common/lru_cache.h"
19#include "common/polyfill_ranges.h" 20#include "common/polyfill_ranges.h"
20#include "common/scratch_buffer.h" 21#include "common/scratch_buffer.h"
22#include "common/thread_worker.h"
21#include "video_core/compatible_formats.h" 23#include "video_core/compatible_formats.h"
22#include "video_core/control/channel_state_cache.h" 24#include "video_core/control/channel_state_cache.h"
23#include "video_core/delayed_destruction_ring.h" 25#include "video_core/delayed_destruction_ring.h"
@@ -54,6 +56,14 @@ struct ImageViewInOut {
54 ImageViewId id{}; 56 ImageViewId id{};
55}; 57};
56 58
59struct AsyncDecodeContext {
60 ImageId image_id;
61 Common::ScratchBuffer<u8> decoded_data;
62 std::vector<BufferImageCopy> copies;
63 std::mutex mutex;
64 std::atomic_bool complete;
65};
66
57using TextureCacheGPUMap = std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>; 67using TextureCacheGPUMap = std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>;
58 68
59class TextureCacheChannelInfo : public ChannelInfo { 69class TextureCacheChannelInfo : public ChannelInfo {
@@ -377,6 +387,9 @@ private:
377 bool ScaleDown(Image& image); 387 bool ScaleDown(Image& image);
378 u64 GetScaledImageSizeBytes(const ImageBase& image); 388 u64 GetScaledImageSizeBytes(const ImageBase& image);
379 389
390 void QueueAsyncDecode(Image& image, ImageId image_id);
391 void TickAsyncDecode();
392
380 Runtime& runtime; 393 Runtime& runtime;
381 394
382 VideoCore::RasterizerInterface& rasterizer; 395 VideoCore::RasterizerInterface& rasterizer;
@@ -430,6 +443,9 @@ private:
430 443
431 u64 modification_tick = 0; 444 u64 modification_tick = 0;
432 u64 frame_tick = 0; 445 u64 frame_tick = 0;
446
447 Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
448 std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
433}; 449};
434 450
435} // namespace VideoCommon 451} // namespace VideoCommon
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index e8d7c7863..4381eed1d 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -1656,8 +1656,8 @@ void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height,
1656 const u32 rows = Common::DivideUp(height, block_height); 1656 const u32 rows = Common::DivideUp(height, block_height);
1657 const u32 cols = Common::DivideUp(width, block_width); 1657 const u32 cols = Common::DivideUp(width, block_width);
1658 1658
1659 Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2, 1659 static Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2,
1660 "ASTCDecompress"}; 1660 "ASTCDecompress"};
1661 1661
1662 for (u32 z = 0; z < depth; ++z) { 1662 for (u32 z = 0; z < depth; ++z) {
1663 const u32 depth_offset = z * height * width * 4; 1663 const u32 depth_offset = z * height * width * 4;