summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/settings.cpp2
-rw-r--r--src/common/settings.h9
-rw-r--r--src/core/hle/kernel/k_memory_block_manager.h8
-rw-r--r--src/video_core/CMakeLists.txt6
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp31
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h4
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp24
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp3
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp46
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h36
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h3
-rw-r--r--src/video_core/texture_cache/util.cpp77
-rw-r--r--src/video_core/textures/astc.cpp5
-rw-r--r--src/video_core/textures/bcn.cpp87
-rw-r--r--src/video_core/textures/bcn.h17
-rw-r--r--src/video_core/textures/workers.cpp15
-rw-r--r--src/video_core/textures/workers.h12
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp8
-rw-r--r--src/yuzu/configuration/config.cpp5
-rw-r--r--src/yuzu/configuration/config.h1
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.cpp14
-rw-r--r--src/yuzu/configuration/configure_graphics_advanced.ui44
-rw-r--r--src/yuzu_cmd/config.cpp1
-rw-r--r--src/yuzu_cmd/default_ini.h4
28 files changed, 456 insertions, 37 deletions
diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index ba617aea1..ff53e80bb 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -61,6 +61,7 @@ void LogSettings() {
61 log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue()); 61 log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
62 log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); 62 log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
63 log_setting("Renderer_AsyncASTC", values.async_astc.GetValue()); 63 log_setting("Renderer_AsyncASTC", values.async_astc.GetValue());
64 log_setting("Renderer_AstcRecompression", values.astc_recompression.GetValue());
64 log_setting("Renderer_UseVsync", values.vsync_mode.GetValue()); 65 log_setting("Renderer_UseVsync", values.vsync_mode.GetValue());
65 log_setting("Renderer_UseReactiveFlushing", values.use_reactive_flushing.GetValue()); 66 log_setting("Renderer_UseReactiveFlushing", values.use_reactive_flushing.GetValue());
66 log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); 67 log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
@@ -224,6 +225,7 @@ void RestoreGlobalState(bool is_powered_on) {
224 values.nvdec_emulation.SetGlobal(true); 225 values.nvdec_emulation.SetGlobal(true);
225 values.accelerate_astc.SetGlobal(true); 226 values.accelerate_astc.SetGlobal(true);
226 values.async_astc.SetGlobal(true); 227 values.async_astc.SetGlobal(true);
228 values.astc_recompression.SetGlobal(true);
227 values.use_reactive_flushing.SetGlobal(true); 229 values.use_reactive_flushing.SetGlobal(true);
228 values.shader_backend.SetGlobal(true); 230 values.shader_backend.SetGlobal(true);
229 values.use_asynchronous_shaders.SetGlobal(true); 231 values.use_asynchronous_shaders.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 36ffcd693..7f865b2a7 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -90,6 +90,12 @@ enum class AntiAliasing : u32 {
90 LastAA = Smaa, 90 LastAA = Smaa,
91}; 91};
92 92
93enum class AstcRecompression : u32 {
94 Uncompressed = 0,
95 Bc1 = 1,
96 Bc3 = 2,
97};
98
93struct ResolutionScalingInfo { 99struct ResolutionScalingInfo {
94 u32 up_scale{1}; 100 u32 up_scale{1};
95 u32 down_shift{0}; 101 u32 down_shift{0};
@@ -473,6 +479,9 @@ struct Values {
473 SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true, 479 SwitchableSetting<bool> use_vulkan_driver_pipeline_cache{true,
474 "use_vulkan_driver_pipeline_cache"}; 480 "use_vulkan_driver_pipeline_cache"};
475 SwitchableSetting<bool> enable_compute_pipelines{false, "enable_compute_pipelines"}; 481 SwitchableSetting<bool> enable_compute_pipelines{false, "enable_compute_pipelines"};
482 SwitchableSetting<AstcRecompression, true> astc_recompression{
483 AstcRecompression::Uncompressed, AstcRecompression::Uncompressed, AstcRecompression::Bc3,
484 "astc_recompression"};
476 485
477 SwitchableSetting<u8> bg_red{0, "bg_red"}; 486 SwitchableSetting<u8> bg_red{0, "bg_red"};
478 SwitchableSetting<u8> bg_green{0, "bg_green"}; 487 SwitchableSetting<u8> bg_green{0, "bg_green"};
diff --git a/src/core/hle/kernel/k_memory_block_manager.h b/src/core/hle/kernel/k_memory_block_manager.h
index 7c0bd16f0..96496e990 100644
--- a/src/core/hle/kernel/k_memory_block_manager.h
+++ b/src/core/hle/kernel/k_memory_block_manager.h
@@ -144,14 +144,10 @@ private:
144 144
145class KScopedMemoryBlockManagerAuditor { 145class KScopedMemoryBlockManagerAuditor {
146public: 146public:
147 explicit KScopedMemoryBlockManagerAuditor(KMemoryBlockManager* m) : m_manager(m) { 147 explicit KScopedMemoryBlockManagerAuditor(KMemoryBlockManager* m) : m_manager(m) {}
148 ASSERT(m_manager->CheckState());
149 }
150 explicit KScopedMemoryBlockManagerAuditor(KMemoryBlockManager& m) 148 explicit KScopedMemoryBlockManagerAuditor(KMemoryBlockManager& m)
151 : KScopedMemoryBlockManagerAuditor(std::addressof(m)) {} 149 : KScopedMemoryBlockManagerAuditor(std::addressof(m)) {}
152 ~KScopedMemoryBlockManagerAuditor() { 150 ~KScopedMemoryBlockManagerAuditor() = default;
153 ASSERT(m_manager->CheckState());
154 }
155 151
156private: 152private:
157 KMemoryBlockManager* m_manager; 153 KMemoryBlockManager* m_manager;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index a0009a36f..308d013d6 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -246,10 +246,14 @@ add_library(video_core STATIC
246 texture_cache/util.h 246 texture_cache/util.h
247 textures/astc.h 247 textures/astc.h
248 textures/astc.cpp 248 textures/astc.cpp
249 textures/bcn.cpp
250 textures/bcn.h
249 textures/decoders.cpp 251 textures/decoders.cpp
250 textures/decoders.h 252 textures/decoders.h
251 textures/texture.cpp 253 textures/texture.cpp
252 textures/texture.h 254 textures/texture.h
255 textures/workers.cpp
256 textures/workers.h
253 transform_feedback.cpp 257 transform_feedback.cpp
254 transform_feedback.h 258 transform_feedback.h
255 video_core.cpp 259 video_core.cpp
@@ -275,7 +279,7 @@ add_library(video_core STATIC
275create_target_directory_groups(video_core) 279create_target_directory_groups(video_core)
276 280
277target_link_libraries(video_core PUBLIC common core) 281target_link_libraries(video_core PUBLIC common core)
278target_link_libraries(video_core PUBLIC glad shader_recompiler) 282target_link_libraries(video_core PUBLIC glad shader_recompiler stb)
279 283
280if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32) 284if (YUZU_USE_BUNDLED_FFMPEG AND NOT WIN32)
281 add_dependencies(video_core ffmpeg-build) 285 add_dependencies(video_core ffmpeg-build)
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f3a6d5069..65494097b 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -1664,7 +1664,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
1664 // cbufs, which do not store the sizes adjacent to the addresses, so use the fully 1664 // cbufs, which do not store the sizes adjacent to the addresses, so use the fully
1665 // mapped buffer size for now. 1665 // mapped buffer size for now.
1666 const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr)); 1666 const u32 memory_layout_size = static_cast<u32>(gpu_memory->GetMemoryLayoutSize(gpu_addr));
1667 return memory_layout_size; 1667 return std::min(memory_layout_size, static_cast<u32>(8_MiB));
1668 }(); 1668 }();
1669 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); 1669 const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
1670 if (!cpu_addr || size == 0) { 1670 if (!cpu_addr || size == 0) {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 31118886f..1e0823836 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -233,6 +233,8 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
233 const VideoCommon::ImageInfo& info) { 233 const VideoCommon::ImageInfo& info) {
234 if (IsPixelFormatASTC(info.format) && info.size.depth == 1 && !runtime.HasNativeASTC()) { 234 if (IsPixelFormatASTC(info.format) && info.size.depth == 1 && !runtime.HasNativeASTC()) {
235 return Settings::values.accelerate_astc.GetValue() && 235 return Settings::values.accelerate_astc.GetValue() &&
236 Settings::values.astc_recompression.GetValue() ==
237 Settings::AstcRecompression::Uncompressed &&
236 !Settings::values.async_astc.GetValue(); 238 !Settings::values.async_astc.GetValue();
237 } 239 }
238 // Disable other accelerated uploads for now as they don't implement swizzled uploads 240 // Disable other accelerated uploads for now as they don't implement swizzled uploads
@@ -437,6 +439,19 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
437 return GL_R32UI; 439 return GL_R32UI;
438} 440}
439 441
442[[nodiscard]] GLenum SelectAstcFormat(PixelFormat format, bool is_srgb) {
443 switch (Settings::values.astc_recompression.GetValue()) {
444 case Settings::AstcRecompression::Bc1:
445 return is_srgb ? GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT : GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
446 break;
447 case Settings::AstcRecompression::Bc3:
448 return is_srgb ? GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT : GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
449 break;
450 default:
451 return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
452 }
453}
454
440} // Anonymous namespace 455} // Anonymous namespace
441 456
442ImageBufferMap::~ImageBufferMap() { 457ImageBufferMap::~ImageBufferMap() {
@@ -739,9 +754,16 @@ Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_,
739 if (IsConverted(runtime->device, info.format, info.type)) { 754 if (IsConverted(runtime->device, info.format, info.type)) {
740 flags |= ImageFlagBits::Converted; 755 flags |= ImageFlagBits::Converted;
741 flags |= ImageFlagBits::CostlyLoad; 756 flags |= ImageFlagBits::CostlyLoad;
742 gl_internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; 757
758 const bool is_srgb = IsPixelFormatSRGB(info.format);
759 gl_internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
743 gl_format = GL_RGBA; 760 gl_format = GL_RGBA;
744 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; 761 gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
762
763 if (IsPixelFormatASTC(info.format)) {
764 gl_internal_format = SelectAstcFormat(info.format, is_srgb);
765 gl_format = GL_NONE;
766 }
745 } else { 767 } else {
746 const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); 768 const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
747 gl_internal_format = tuple.internal_format; 769 gl_internal_format = tuple.internal_format;
@@ -1130,7 +1152,12 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
1130 views{runtime.null_image_views} { 1152 views{runtime.null_image_views} {
1131 const Device& device = runtime.device; 1153 const Device& device = runtime.device;
1132 if (True(image.flags & ImageFlagBits::Converted)) { 1154 if (True(image.flags & ImageFlagBits::Converted)) {
1133 internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; 1155 const bool is_srgb = IsPixelFormatSRGB(info.format);
1156 internal_format = is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
1157
1158 if (IsPixelFormatASTC(info.format)) {
1159 internal_format = SelectAstcFormat(info.format, is_srgb);
1160 }
1134 } else { 1161 } else {
1135 internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; 1162 internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
1136 } 1163 }
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 1190999a8..3e9b3302b 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -144,6 +144,10 @@ public:
144 return state_tracker; 144 return state_tracker;
145 } 145 }
146 146
147 void BarrierFeedbackLoop() const noexcept {
148 // OpenGL does not require a barrier for attachment feedback loops.
149 }
150
147private: 151private:
148 struct StagingBuffers { 152 struct StagingBuffers {
149 explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_); 153 explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 8853cf0f7..b75d7220d 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -6,6 +6,7 @@
6#include "common/assert.h" 6#include "common/assert.h"
7#include "common/common_types.h" 7#include "common/common_types.h"
8#include "common/logging/log.h" 8#include "common/logging/log.h"
9#include "common/settings.h"
9#include "video_core/engines/maxwell_3d.h" 10#include "video_core/engines/maxwell_3d.h"
10#include "video_core/renderer_vulkan/maxwell_to_vk.h" 11#include "video_core/renderer_vulkan/maxwell_to_vk.h"
11#include "video_core/surface.h" 12#include "video_core/surface.h"
@@ -237,14 +238,25 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
237 PixelFormat pixel_format) { 238 PixelFormat pixel_format) {
238 ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples)); 239 ASSERT(static_cast<size_t>(pixel_format) < std::size(tex_format_tuples));
239 FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)]; 240 FormatTuple tuple = tex_format_tuples[static_cast<size_t>(pixel_format)];
240 // Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively 241 // Transcode on hardware that doesn't support ASTC natively
241 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) { 242 if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
242 const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format); 243 const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
243 if (is_srgb) { 244
244 tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32; 245 switch (Settings::values.astc_recompression.GetValue()) {
245 } else { 246 case Settings::AstcRecompression::Uncompressed:
246 tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32; 247 if (is_srgb) {
247 tuple.usage |= Storage; 248 tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
249 } else {
250 tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
251 tuple.usage |= Storage;
252 }
253 break;
254 case Settings::AstcRecompression::Bc1:
255 tuple.format = is_srgb ? VK_FORMAT_BC1_RGBA_SRGB_BLOCK : VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
256 break;
257 case Settings::AstcRecompression::Bc3:
258 tuple.format = is_srgb ? VK_FORMAT_BC3_SRGB_BLOCK : VK_FORMAT_BC3_UNORM_BLOCK;
259 break;
248 } 260 }
249 } 261 }
250 const bool attachable = (tuple.usage & Attachable) != 0; 262 const bool attachable = (tuple.usage & Attachable) != 0;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index f1bcd5cd6..506b78f08 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -481,12 +481,13 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
481 if constexpr (Spec::enabled_stages[4]) { 481 if constexpr (Spec::enabled_stages[4]) {
482 prepare_stage(4); 482 prepare_stage(4);
483 } 483 }
484 texture_cache.UpdateRenderTargets(false);
485 texture_cache.CheckFeedbackLoop(views);
484 ConfigureDraw(rescaling, render_area); 486 ConfigureDraw(rescaling, render_area);
485} 487}
486 488
487void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling, 489void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling,
488 const RenderAreaPushConstant& render_area) { 490 const RenderAreaPushConstant& render_area) {
489 texture_cache.UpdateRenderTargets(false);
490 scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); 491 scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
491 492
492 if (!is_built.load(std::memory_order::relaxed)) { 493 if (!is_built.load(std::memory_order::relaxed)) {
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index 47c74e4d8..8b65aeaeb 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -10,11 +10,16 @@
10 10
11namespace Vulkan { 11namespace Vulkan {
12 12
13constexpr u64 FENCE_RESERVE_SIZE = 8;
14
13MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) { 15MasterSemaphore::MasterSemaphore(const Device& device_) : device(device_) {
14 if (!device.HasTimelineSemaphore()) { 16 if (!device.HasTimelineSemaphore()) {
15 static constexpr VkFenceCreateInfo fence_ci{ 17 static constexpr VkFenceCreateInfo fence_ci{
16 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0}; 18 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0};
17 fence = device.GetLogical().CreateFence(fence_ci); 19 free_queue.resize(FENCE_RESERVE_SIZE);
20 std::ranges::generate(free_queue,
21 [&] { return device.GetLogical().CreateFence(fence_ci); });
22 wait_thread = std::jthread([this](std::stop_token token) { WaitThread(token); });
18 return; 23 return;
19 } 24 }
20 25
@@ -167,16 +172,53 @@ VkResult MasterSemaphore::SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphor
167 .pSignalSemaphores = &signal_semaphore, 172 .pSignalSemaphores = &signal_semaphore,
168 }; 173 };
169 174
175 auto fence = GetFreeFence();
170 auto result = device.GetGraphicsQueue().Submit(submit_info, *fence); 176 auto result = device.GetGraphicsQueue().Submit(submit_info, *fence);
171 177
172 if (result == VK_SUCCESS) { 178 if (result == VK_SUCCESS) {
179 std::scoped_lock lock{wait_mutex};
180 wait_queue.emplace(host_tick, std::move(fence));
181 wait_cv.notify_one();
182 }
183
184 return result;
185}
186
187void MasterSemaphore::WaitThread(std::stop_token token) {
188 while (!token.stop_requested()) {
189 u64 host_tick;
190 vk::Fence fence;
191 {
192 std::unique_lock lock{wait_mutex};
193 Common::CondvarWait(wait_cv, lock, token, [this] { return !wait_queue.empty(); });
194 if (token.stop_requested()) {
195 return;
196 }
197 std::tie(host_tick, fence) = std::move(wait_queue.front());
198 wait_queue.pop();
199 }
200
173 fence.Wait(); 201 fence.Wait();
174 fence.Reset(); 202 fence.Reset();
175 gpu_tick.store(host_tick); 203 gpu_tick.store(host_tick);
176 gpu_tick.notify_all(); 204 gpu_tick.notify_all();
205
206 std::scoped_lock lock{free_mutex};
207 free_queue.push_front(std::move(fence));
177 } 208 }
209}
178 210
179 return result; 211vk::Fence MasterSemaphore::GetFreeFence() {
212 std::scoped_lock lock{free_mutex};
213 if (free_queue.empty()) {
214 static constexpr VkFenceCreateInfo fence_ci{
215 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .pNext = nullptr, .flags = 0};
216 return device.GetLogical().CreateFence(fence_ci);
217 }
218
219 auto fence = std::move(free_queue.back());
220 free_queue.pop_back();
221 return fence;
180} 222}
181 223
182} // namespace Vulkan 224} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index f2f61f781..1e7c90215 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -5,8 +5,10 @@
5 5
6#include <atomic> 6#include <atomic>
7#include <condition_variable> 7#include <condition_variable>
8#include <deque>
8#include <mutex> 9#include <mutex>
9#include <thread> 10#include <thread>
11#include <queue>
10 12
11#include "common/common_types.h" 13#include "common/common_types.h"
12#include "common/polyfill_thread.h" 14#include "common/polyfill_thread.h"
@@ -17,6 +19,8 @@ namespace Vulkan {
17class Device; 19class Device;
18 20
19class MasterSemaphore { 21class MasterSemaphore {
22 using Waitable = std::pair<u64, vk::Fence>;
23
20public: 24public:
21 explicit MasterSemaphore(const Device& device); 25 explicit MasterSemaphore(const Device& device);
22 ~MasterSemaphore(); 26 ~MasterSemaphore();
@@ -57,13 +61,22 @@ private:
57 VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore, 61 VkResult SubmitQueueFence(vk::CommandBuffer& cmdbuf, VkSemaphore signal_semaphore,
58 VkSemaphore wait_semaphore, u64 host_tick); 62 VkSemaphore wait_semaphore, u64 host_tick);
59 63
64 void WaitThread(std::stop_token token);
65
66 vk::Fence GetFreeFence();
67
60private: 68private:
61 const Device& device; ///< Device. 69 const Device& device; ///< Device.
62 vk::Fence fence; ///< Fence.
63 vk::Semaphore semaphore; ///< Timeline semaphore. 70 vk::Semaphore semaphore; ///< Timeline semaphore.
64 std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick. 71 std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
65 std::atomic<u64> current_tick{1}; ///< Current logical tick. 72 std::atomic<u64> current_tick{1}; ///< Current logical tick.
73 std::mutex wait_mutex;
74 std::mutex free_mutex;
75 std::condition_variable_any wait_cv;
76 std::queue<Waitable> wait_queue; ///< Queue for the fences to be waited on by the wait thread.
77 std::deque<vk::Fence> free_queue; ///< Holds available fences for submission.
66 std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs. 78 std::jthread debug_thread; ///< Debug thread to workaround validation layer bugs.
79 std::jthread wait_thread; ///< Helper thread that waits for submitted fences.
67}; 80};
68 81
69} // namespace Vulkan 82} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 4d0481f2a..8711e2a87 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -861,6 +861,10 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
861 return *buffers[level]; 861 return *buffers[level];
862} 862}
863 863
864void TextureCacheRuntime::BarrierFeedbackLoop() {
865 scheduler.RequestOutsideRenderPassOperationContext();
866}
867
864void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, 868void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
865 std::span<const VideoCommon::ImageCopy> copies) { 869 std::span<const VideoCommon::ImageCopy> copies) {
866 std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); 870 std::vector<VkBufferImageCopy> vk_in_copies(copies.size());
@@ -1268,7 +1272,9 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
1268 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { 1272 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
1269 if (Settings::values.async_astc.GetValue()) { 1273 if (Settings::values.async_astc.GetValue()) {
1270 flags |= VideoCommon::ImageFlagBits::AsynchronousDecode; 1274 flags |= VideoCommon::ImageFlagBits::AsynchronousDecode;
1271 } else if (Settings::values.accelerate_astc.GetValue() && info.size.depth == 1) { 1275 } else if (Settings::values.astc_recompression.GetValue() ==
1276 Settings::AstcRecompression::Uncompressed &&
1277 Settings::values.accelerate_astc.GetValue() && info.size.depth == 1) {
1272 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; 1278 flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
1273 } 1279 }
1274 flags |= VideoCommon::ImageFlagBits::Converted; 1280 flags |= VideoCommon::ImageFlagBits::Converted;
@@ -1283,7 +1289,9 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
1283 .usage = VK_IMAGE_USAGE_STORAGE_BIT, 1289 .usage = VK_IMAGE_USAGE_STORAGE_BIT,
1284 }; 1290 };
1285 current_image = *original_image; 1291 current_image = *original_image;
1286 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { 1292 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported() &&
1293 Settings::values.astc_recompression.GetValue() ==
1294 Settings::AstcRecompression::Uncompressed) {
1287 const auto& device = runtime->device.GetLogical(); 1295 const auto& device = runtime->device.GetLogical();
1288 storage_image_views.reserve(info.resources.levels); 1296 storage_image_views.reserve(info.resources.levels);
1289 for (s32 level = 0; level < info.resources.levels; ++level) { 1297 for (s32 level = 0; level < info.resources.levels; ++level) {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 4166b3d20..0f7a5ffd4 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -103,6 +103,8 @@ public:
103 103
104 [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size); 104 [[nodiscard]] VkBuffer GetTemporaryBuffer(size_t needed_size);
105 105
106 void BarrierFeedbackLoop();
107
106 const Device& device; 108 const Device& device;
107 Scheduler& scheduler; 109 Scheduler& scheduler;
108 MemoryAllocator& memory_allocator; 110 MemoryAllocator& memory_allocator;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 6abafa877..9790949f5 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -186,6 +186,42 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
186} 186}
187 187
188template <class P> 188template <class P>
189void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
190 const bool requires_barrier = [&] {
191 for (const auto& view : views) {
192 if (!view.id) {
193 continue;
194 }
195 auto& image_view = slot_image_views[view.id];
196
197 // Check color targets
198 for (const auto& ct_view_id : render_targets.color_buffer_ids) {
199 if (ct_view_id) {
200 auto& ct_view = slot_image_views[ct_view_id];
201 if (image_view.image_id == ct_view.image_id) {
202 return true;
203 }
204 }
205 }
206
207 // Check zeta target
208 if (render_targets.depth_buffer_id) {
209 auto& zt_view = slot_image_views[render_targets.depth_buffer_id];
210 if (image_view.image_id == zt_view.image_id) {
211 return true;
212 }
213 }
214 }
215
216 return false;
217 }();
218
219 if (requires_barrier) {
220 runtime.BarrierFeedbackLoop();
221 }
222}
223
224template <class P>
189typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { 225typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
190 if (index > channel_state->graphics_sampler_table.Limit()) { 226 if (index > channel_state->graphics_sampler_table.Limit()) {
191 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); 227 LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 0720494e5..1a3308e2d 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -148,6 +148,9 @@ public:
148 /// Fill image_view_ids with the compute images in indices 148 /// Fill image_view_ids with the compute images in indices
149 void FillComputeImageViews(std::span<ImageViewInOut> views); 149 void FillComputeImageViews(std::span<ImageViewInOut> views);
150 150
151 /// Handle feedback loops during draws.
152 void CheckFeedbackLoop(std::span<const ImageViewInOut> views);
153
151 /// Get the sampler from the graphics descriptor table in the specified index 154 /// Get the sampler from the graphics descriptor table in the specified index
152 Sampler* GetGraphicsSampler(u32 index); 155 Sampler* GetGraphicsSampler(u32 index);
153 156
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index f1071aa23..1463f157b 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -18,6 +18,8 @@
18#include "common/bit_util.h" 18#include "common/bit_util.h"
19#include "common/common_types.h" 19#include "common/common_types.h"
20#include "common/div_ceil.h" 20#include "common/div_ceil.h"
21#include "common/scratch_buffer.h"
22#include "common/settings.h"
21#include "video_core/compatible_formats.h" 23#include "video_core/compatible_formats.h"
22#include "video_core/engines/maxwell_3d.h" 24#include "video_core/engines/maxwell_3d.h"
23#include "video_core/memory_manager.h" 25#include "video_core/memory_manager.h"
@@ -28,6 +30,7 @@
28#include "video_core/texture_cache/samples_helper.h" 30#include "video_core/texture_cache/samples_helper.h"
29#include "video_core/texture_cache/util.h" 31#include "video_core/texture_cache/util.h"
30#include "video_core/textures/astc.h" 32#include "video_core/textures/astc.h"
33#include "video_core/textures/bcn.h"
31#include "video_core/textures/decoders.h" 34#include "video_core/textures/decoders.h"
32 35
33namespace VideoCommon { 36namespace VideoCommon {
@@ -585,6 +588,21 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
585 return info.size.width * BytesPerBlock(info.format); 588 return info.size.width * BytesPerBlock(info.format);
586 } 589 }
587 static constexpr Extent2D TILE_SIZE{1, 1}; 590 static constexpr Extent2D TILE_SIZE{1, 1};
591 if (IsPixelFormatASTC(info.format) && Settings::values.astc_recompression.GetValue() !=
592 Settings::AstcRecompression::Uncompressed) {
593 const u32 bpp_div =
594 Settings::values.astc_recompression.GetValue() == Settings::AstcRecompression::Bc1 ? 2
595 : 1;
596 // NumBlocksPerLayer doesn't account for this correctly, so we have to do it manually.
597 u32 output_size = 0;
598 for (s32 i = 0; i < info.resources.levels; i++) {
599 const auto mip_size = AdjustMipSize(info.size, i);
600 const u32 plane_dim =
601 Common::AlignUp(mip_size.width, 4U) * Common::AlignUp(mip_size.height, 4U);
602 output_size += (plane_dim * info.size.depth * info.resources.layers) / bpp_div;
603 }
604 return output_size;
605 }
588 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; 606 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK;
589} 607}
590 608
@@ -885,6 +903,7 @@ BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
885void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, 903void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
886 std::span<BufferImageCopy> copies) { 904 std::span<BufferImageCopy> copies) {
887 u32 output_offset = 0; 905 u32 output_offset = 0;
906 Common::ScratchBuffer<u8> decode_scratch;
888 907
889 const Extent2D tile_size = DefaultBlockSize(info.format); 908 const Extent2D tile_size = DefaultBlockSize(info.format);
890 for (BufferImageCopy& copy : copies) { 909 for (BufferImageCopy& copy : copies) {
@@ -895,22 +914,58 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
895 ASSERT(copy.image_extent == mip_size); 914 ASSERT(copy.image_extent == mip_size);
896 ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width)); 915 ASSERT(copy.buffer_row_length == Common::AlignUp(mip_size.width, tile_size.width));
897 ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height)); 916 ASSERT(copy.buffer_image_height == Common::AlignUp(mip_size.height, tile_size.height));
898 if (IsPixelFormatASTC(info.format)) { 917
918 const auto input_offset = input.subspan(copy.buffer_offset);
919 copy.buffer_offset = output_offset;
920 copy.buffer_row_length = mip_size.width;
921 copy.buffer_image_height = mip_size.height;
922
923 const auto recompression_setting = Settings::values.astc_recompression.GetValue();
924 const bool astc = IsPixelFormatASTC(info.format);
925
926 if (astc && recompression_setting == Settings::AstcRecompression::Uncompressed) {
899 Tegra::Texture::ASTC::Decompress( 927 Tegra::Texture::ASTC::Decompress(
900 input.subspan(copy.buffer_offset), copy.image_extent.width, 928 input_offset, copy.image_extent.width, copy.image_extent.height,
901 copy.image_extent.height,
902 copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width, 929 copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width,
903 tile_size.height, output.subspan(output_offset)); 930 tile_size.height, output.subspan(output_offset));
931
932 output_offset += copy.image_extent.width * copy.image_extent.height *
933 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
934 } else if (astc) {
935 // BC1 uses 0.5 bytes per texel
936 // BC3 uses 1 byte per texel
937 const auto compress = recompression_setting == Settings::AstcRecompression::Bc1
938 ? Tegra::Texture::BCN::CompressBC1
939 : Tegra::Texture::BCN::CompressBC3;
940 const auto bpp_div = recompression_setting == Settings::AstcRecompression::Bc1 ? 2 : 1;
941
942 const u32 plane_dim = copy.image_extent.width * copy.image_extent.height;
943 const u32 level_size = plane_dim * copy.image_extent.depth *
944 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
945 decode_scratch.resize_destructive(level_size);
946
947 Tegra::Texture::ASTC::Decompress(
948 input_offset, copy.image_extent.width, copy.image_extent.height,
949 copy.image_subresource.num_layers * copy.image_extent.depth, tile_size.width,
950 tile_size.height, decode_scratch);
951
952 compress(decode_scratch, copy.image_extent.width, copy.image_extent.height,
953 copy.image_subresource.num_layers * copy.image_extent.depth,
954 output.subspan(output_offset));
955
956 const u32 aligned_plane_dim = Common::AlignUp(copy.image_extent.width, 4) *
957 Common::AlignUp(copy.image_extent.height, 4);
958
959 copy.buffer_size =
960 (aligned_plane_dim * copy.image_extent.depth * copy.image_subresource.num_layers) /
961 bpp_div;
962 output_offset += static_cast<u32>(copy.buffer_size);
904 } else { 963 } else {
905 DecompressBC4(input.subspan(copy.buffer_offset), copy.image_extent, 964 DecompressBC4(input_offset, copy.image_extent, output.subspan(output_offset));
906 output.subspan(output_offset));
907 }
908 copy.buffer_offset = output_offset;
909 copy.buffer_row_length = mip_size.width;
910 copy.buffer_image_height = mip_size.height;
911 965
912 output_offset += copy.image_extent.width * copy.image_extent.height * 966 output_offset += copy.image_extent.width * copy.image_extent.height *
913 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; 967 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK;
968 }
914 } 969 }
915} 970}
916 971
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index a68bc0d77..fef0be31d 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -16,8 +16,8 @@
16#include "common/alignment.h" 16#include "common/alignment.h"
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "common/polyfill_ranges.h" 18#include "common/polyfill_ranges.h"
19#include "common/thread_worker.h"
20#include "video_core/textures/astc.h" 19#include "video_core/textures/astc.h"
20#include "video_core/textures/workers.h"
21 21
22class InputBitStream { 22class InputBitStream {
23public: 23public:
@@ -1656,8 +1656,7 @@ void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height,
1656 const u32 rows = Common::DivideUp(height, block_height); 1656 const u32 rows = Common::DivideUp(height, block_height);
1657 const u32 cols = Common::DivideUp(width, block_width); 1657 const u32 cols = Common::DivideUp(width, block_width);
1658 1658
1659 static Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2, 1659 Common::ThreadWorker& workers{GetThreadWorkers()};
1660 "ASTCDecompress"};
1661 1660
1662 for (u32 z = 0; z < depth; ++z) { 1661 for (u32 z = 0; z < depth; ++z) {
1663 const u32 depth_offset = z * height * width * 4; 1662 const u32 depth_offset = z * height * width * 4;
diff --git a/src/video_core/textures/bcn.cpp b/src/video_core/textures/bcn.cpp
new file mode 100644
index 000000000..671212a49
--- /dev/null
+++ b/src/video_core/textures/bcn.cpp
@@ -0,0 +1,87 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <stb_dxt.h>
5#include <string.h>
6
7#include "common/alignment.h"
8#include "video_core/textures/bcn.h"
9#include "video_core/textures/workers.h"
10
11namespace Tegra::Texture::BCN {
12
13using BCNCompressor = void(u8* block_output, const u8* block_input, bool any_alpha);
14
15template <u32 BytesPerBlock, bool ThresholdAlpha = false>
16void CompressBCN(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
17 std::span<uint8_t> output, BCNCompressor f) {
18 constexpr u8 alpha_threshold = 128;
19 constexpr u32 bytes_per_px = 4;
20 const u32 plane_dim = width * height;
21
22 Common::ThreadWorker& workers{GetThreadWorkers()};
23
24 for (u32 z = 0; z < depth; z++) {
25 for (u32 y = 0; y < height; y += 4) {
26 auto compress_row = [z, y, width, height, plane_dim, f, data, output]() {
27 for (u32 x = 0; x < width; x += 4) {
28 // Gather 4x4 block of RGBA texels
29 u8 input_colors[4][4][4];
30 bool any_alpha = false;
31
32 for (u32 j = 0; j < 4; j++) {
33 for (u32 i = 0; i < 4; i++) {
34 const size_t coord =
35 (z * plane_dim + (y + j) * width + (x + i)) * bytes_per_px;
36
37 if ((x + i < width) && (y + j < height)) {
38 if constexpr (ThresholdAlpha) {
39 if (data[coord + 3] >= alpha_threshold) {
40 input_colors[j][i][0] = data[coord + 0];
41 input_colors[j][i][1] = data[coord + 1];
42 input_colors[j][i][2] = data[coord + 2];
43 input_colors[j][i][3] = 255;
44 } else {
45 any_alpha = true;
46 memset(input_colors[j][i], 0, bytes_per_px);
47 }
48 } else {
49 memcpy(input_colors[j][i], &data[coord], bytes_per_px);
50 }
51 } else {
52 memset(input_colors[j][i], 0, bytes_per_px);
53 }
54 }
55 }
56
57 const u32 bytes_per_row = BytesPerBlock * Common::DivideUp(width, 4U);
58 const u32 bytes_per_plane = bytes_per_row * Common::DivideUp(height, 4U);
59 f(output.data() + z * bytes_per_plane + (y / 4) * bytes_per_row +
60 (x / 4) * BytesPerBlock,
61 reinterpret_cast<u8*>(input_colors), any_alpha);
62 }
63 };
64 workers.QueueWork(std::move(compress_row));
65 }
66 workers.WaitForRequests();
67 }
68}
69
70void CompressBC1(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
71 std::span<uint8_t> output) {
72 CompressBCN<8, true>(data, width, height, depth, output,
73 [](u8* block_output, const u8* block_input, bool any_alpha) {
74 stb_compress_bc1_block(block_output, block_input, any_alpha,
75 STB_DXT_NORMAL);
76 });
77}
78
79void CompressBC3(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
80 std::span<uint8_t> output) {
81 CompressBCN<16, false>(data, width, height, depth, output,
82 [](u8* block_output, const u8* block_input, bool any_alpha) {
83 stb_compress_bc3_block(block_output, block_input, STB_DXT_NORMAL);
84 });
85}
86
87} // namespace Tegra::Texture::BCN
diff --git a/src/video_core/textures/bcn.h b/src/video_core/textures/bcn.h
new file mode 100644
index 000000000..6464af885
--- /dev/null
+++ b/src/video_core/textures/bcn.h
@@ -0,0 +1,17 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include <span>
7#include <stdint.h>
8
9namespace Tegra::Texture::BCN {
10
11void CompressBC1(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
12 std::span<uint8_t> output);
13
14void CompressBC3(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth,
15 std::span<uint8_t> output);
16
17} // namespace Tegra::Texture::BCN
diff --git a/src/video_core/textures/workers.cpp b/src/video_core/textures/workers.cpp
new file mode 100644
index 000000000..a71c305f4
--- /dev/null
+++ b/src/video_core/textures/workers.cpp
@@ -0,0 +1,15 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include "video_core/textures/workers.h"
5
6namespace Tegra::Texture {
7
8Common::ThreadWorker& GetThreadWorkers() {
9 static Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2,
10 "ImageTranscode"};
11
12 return workers;
13}
14
15} // namespace Tegra::Texture
diff --git a/src/video_core/textures/workers.h b/src/video_core/textures/workers.h
new file mode 100644
index 000000000..008dd05b3
--- /dev/null
+++ b/src/video_core/textures/workers.h
@@ -0,0 +1,12 @@
1// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#pragma once
5
6#include "common/thread_worker.h"
7
8namespace Tegra::Texture {
9
10Common::ThreadWorker& GetThreadWorkers();
11
12}
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index f6e6f2736..3a7c2dedf 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -1001,6 +1001,11 @@ u64 Device::GetDeviceMemoryUsage() const {
1001} 1001}
1002 1002
1003void Device::CollectPhysicalMemoryInfo() { 1003void Device::CollectPhysicalMemoryInfo() {
1004 // Account for resolution scaling in memory limits
1005 const size_t normal_memory = 6_GiB;
1006 const size_t scaler_memory = 1_GiB * Settings::values.resolution_info.ScaleUp(1);
1007
1008 // Calculate limits using memory budget
1004 VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{}; 1009 VkPhysicalDeviceMemoryBudgetPropertiesEXT budget{};
1005 budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT; 1010 budget.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT;
1006 const auto mem_info = 1011 const auto mem_info =
@@ -1030,11 +1035,12 @@ void Device::CollectPhysicalMemoryInfo() {
1030 if (!is_integrated) { 1035 if (!is_integrated) {
1031 const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 1_GiB); 1036 const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 1_GiB);
1032 device_access_memory -= reserve_memory; 1037 device_access_memory -= reserve_memory;
1038 device_access_memory = std::min<u64>(device_access_memory, normal_memory + scaler_memory);
1033 return; 1039 return;
1034 } 1040 }
1035 const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage); 1041 const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage);
1036 device_access_memory = static_cast<u64>(std::max<s64>( 1042 device_access_memory = static_cast<u64>(std::max<s64>(
1037 std::min<s64>(available_memory - 8_GiB, 4_GiB), static_cast<s64>(local_memory))); 1043 std::min<s64>(available_memory - 8_GiB, 4_GiB), std::min<s64>(local_memory, 4_GiB)));
1038} 1044}
1039 1045
1040void Device::CollectToolingInfo() { 1046void Device::CollectToolingInfo() {
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 70737c54e..662651196 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -711,6 +711,7 @@ void Config::ReadRendererValues() {
711 ReadGlobalSetting(Settings::values.nvdec_emulation); 711 ReadGlobalSetting(Settings::values.nvdec_emulation);
712 ReadGlobalSetting(Settings::values.accelerate_astc); 712 ReadGlobalSetting(Settings::values.accelerate_astc);
713 ReadGlobalSetting(Settings::values.async_astc); 713 ReadGlobalSetting(Settings::values.async_astc);
714 ReadGlobalSetting(Settings::values.astc_recompression);
714 ReadGlobalSetting(Settings::values.use_reactive_flushing); 715 ReadGlobalSetting(Settings::values.use_reactive_flushing);
715 ReadGlobalSetting(Settings::values.shader_backend); 716 ReadGlobalSetting(Settings::values.shader_backend);
716 ReadGlobalSetting(Settings::values.use_asynchronous_shaders); 717 ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
@@ -1359,6 +1360,10 @@ void Config::SaveRendererValues() {
1359 Settings::values.nvdec_emulation.UsingGlobal()); 1360 Settings::values.nvdec_emulation.UsingGlobal());
1360 WriteGlobalSetting(Settings::values.accelerate_astc); 1361 WriteGlobalSetting(Settings::values.accelerate_astc);
1361 WriteGlobalSetting(Settings::values.async_astc); 1362 WriteGlobalSetting(Settings::values.async_astc);
1363 WriteSetting(QString::fromStdString(Settings::values.astc_recompression.GetLabel()),
1364 static_cast<u32>(Settings::values.astc_recompression.GetValue(global)),
1365 static_cast<u32>(Settings::values.astc_recompression.GetDefault()),
1366 Settings::values.astc_recompression.UsingGlobal());
1362 WriteGlobalSetting(Settings::values.use_reactive_flushing); 1367 WriteGlobalSetting(Settings::values.use_reactive_flushing);
1363 WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), 1368 WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
1364 static_cast<u32>(Settings::values.shader_backend.GetValue(global)), 1369 static_cast<u32>(Settings::values.shader_backend.GetValue(global)),
diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h
index 7d26e9ab6..9cb9db6cf 100644
--- a/src/yuzu/configuration/config.h
+++ b/src/yuzu/configuration/config.h
@@ -208,3 +208,4 @@ Q_DECLARE_METATYPE(Settings::ScalingFilter);
208Q_DECLARE_METATYPE(Settings::AntiAliasing); 208Q_DECLARE_METATYPE(Settings::AntiAliasing);
209Q_DECLARE_METATYPE(Settings::RendererBackend); 209Q_DECLARE_METATYPE(Settings::RendererBackend);
210Q_DECLARE_METATYPE(Settings::ShaderBackend); 210Q_DECLARE_METATYPE(Settings::ShaderBackend);
211Q_DECLARE_METATYPE(Settings::AstcRecompression);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index 1f3e489d0..896863f87 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -27,6 +27,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
27 ui->async_present->setEnabled(runtime_lock); 27 ui->async_present->setEnabled(runtime_lock);
28 ui->renderer_force_max_clock->setEnabled(runtime_lock); 28 ui->renderer_force_max_clock->setEnabled(runtime_lock);
29 ui->async_astc->setEnabled(runtime_lock); 29 ui->async_astc->setEnabled(runtime_lock);
30 ui->astc_recompression_combobox->setEnabled(runtime_lock);
30 ui->use_asynchronous_shaders->setEnabled(runtime_lock); 31 ui->use_asynchronous_shaders->setEnabled(runtime_lock);
31 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock); 32 ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
32 ui->enable_compute_pipelines_checkbox->setEnabled(runtime_lock); 33 ui->enable_compute_pipelines_checkbox->setEnabled(runtime_lock);
@@ -47,14 +48,20 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
47 static_cast<int>(Settings::values.gpu_accuracy.GetValue())); 48 static_cast<int>(Settings::values.gpu_accuracy.GetValue()));
48 ui->anisotropic_filtering_combobox->setCurrentIndex( 49 ui->anisotropic_filtering_combobox->setCurrentIndex(
49 Settings::values.max_anisotropy.GetValue()); 50 Settings::values.max_anisotropy.GetValue());
51 ui->astc_recompression_combobox->setCurrentIndex(
52 static_cast<int>(Settings::values.astc_recompression.GetValue()));
50 } else { 53 } else {
51 ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy); 54 ConfigurationShared::SetPerGameSetting(ui->gpu_accuracy, &Settings::values.gpu_accuracy);
52 ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox, 55 ConfigurationShared::SetPerGameSetting(ui->anisotropic_filtering_combobox,
53 &Settings::values.max_anisotropy); 56 &Settings::values.max_anisotropy);
57 ConfigurationShared::SetPerGameSetting(ui->astc_recompression_combobox,
58 &Settings::values.astc_recompression);
54 ConfigurationShared::SetHighlight(ui->label_gpu_accuracy, 59 ConfigurationShared::SetHighlight(ui->label_gpu_accuracy,
55 !Settings::values.gpu_accuracy.UsingGlobal()); 60 !Settings::values.gpu_accuracy.UsingGlobal());
56 ConfigurationShared::SetHighlight(ui->af_label, 61 ConfigurationShared::SetHighlight(ui->af_label,
57 !Settings::values.max_anisotropy.UsingGlobal()); 62 !Settings::values.max_anisotropy.UsingGlobal());
63 ConfigurationShared::SetHighlight(ui->label_astc_recompression,
64 !Settings::values.astc_recompression.UsingGlobal());
58 } 65 }
59} 66}
60 67
@@ -71,6 +78,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
71 ui->use_reactive_flushing, use_reactive_flushing); 78 ui->use_reactive_flushing, use_reactive_flushing);
72 ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc, 79 ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc,
73 async_astc); 80 async_astc);
81 ConfigurationShared::ApplyPerGameSetting(&Settings::values.astc_recompression,
82 ui->astc_recompression_combobox);
74 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, 83 ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
75 ui->use_asynchronous_shaders, 84 ui->use_asynchronous_shaders,
76 use_asynchronous_shaders); 85 use_asynchronous_shaders);
@@ -105,6 +114,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
105 Settings::values.renderer_force_max_clock.UsingGlobal()); 114 Settings::values.renderer_force_max_clock.UsingGlobal());
106 ui->use_reactive_flushing->setEnabled(Settings::values.use_reactive_flushing.UsingGlobal()); 115 ui->use_reactive_flushing->setEnabled(Settings::values.use_reactive_flushing.UsingGlobal());
107 ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal()); 116 ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal());
117 ui->astc_recompression_combobox->setEnabled(
118 Settings::values.astc_recompression.UsingGlobal());
108 ui->use_asynchronous_shaders->setEnabled( 119 ui->use_asynchronous_shaders->setEnabled(
109 Settings::values.use_asynchronous_shaders.UsingGlobal()); 120 Settings::values.use_asynchronous_shaders.UsingGlobal());
110 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); 121 ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
@@ -144,6 +155,9 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
144 ConfigurationShared::SetColoredComboBox( 155 ConfigurationShared::SetColoredComboBox(
145 ui->anisotropic_filtering_combobox, ui->af_label, 156 ui->anisotropic_filtering_combobox, ui->af_label,
146 static_cast<int>(Settings::values.max_anisotropy.GetValue(true))); 157 static_cast<int>(Settings::values.max_anisotropy.GetValue(true)));
158 ConfigurationShared::SetColoredComboBox(
159 ui->astc_recompression_combobox, ui->label_astc_recompression,
160 static_cast<int>(Settings::values.astc_recompression.GetValue(true)));
147} 161}
148 162
149void ConfigureGraphicsAdvanced::ExposeComputeOption() { 163void ConfigureGraphicsAdvanced::ExposeComputeOption() {
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 9ef7c8e8f..37757a918 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -70,6 +70,50 @@
70 </widget> 70 </widget>
71 </item> 71 </item>
72 <item> 72 <item>
73 <widget class="QWidget" name="astc_recompression_layout" native="true">
74 <layout class="QHBoxLayout" name="horizontalLayout_3">
75 <property name="leftMargin">
76 <number>0</number>
77 </property>
78 <property name="topMargin">
79 <number>0</number>
80 </property>
81 <property name="rightMargin">
82 <number>0</number>
83 </property>
84 <property name="bottomMargin">
85 <number>0</number>
86 </property>
87 <item>
88 <widget class="QLabel" name="label_astc_recompression">
89 <property name="text">
90 <string>ASTC recompression:</string>
91 </property>
92 </widget>
93 </item>
94 <item>
95 <widget class="QComboBox" name="astc_recompression_combobox">
96 <item>
97 <property name="text">
98 <string>Uncompressed (Best quality)</string>
99 </property>
100 </item>
101 <item>
102 <property name="text">
103 <string>BC1 (Low quality)</string>
104 </property>
105 </item>
106 <item>
107 <property name="text">
108 <string>BC3 (Medium quality)</string>
109 </property>
110 </item>
111 </widget>
112 </item>
113 </layout>
114 </widget>
115 </item>
116 <item>
73 <widget class="QCheckBox" name="async_present"> 117 <widget class="QCheckBox" name="async_present">
74 <property name="text"> 118 <property name="text">
75 <string>Enable asynchronous presentation (Vulkan only)</string> 119 <string>Enable asynchronous presentation (Vulkan only)</string>
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index dc9a3d68f..c5bc472ca 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -318,6 +318,7 @@ void Config::ReadValues() {
318 ReadSetting("Renderer", Settings::values.nvdec_emulation); 318 ReadSetting("Renderer", Settings::values.nvdec_emulation);
319 ReadSetting("Renderer", Settings::values.accelerate_astc); 319 ReadSetting("Renderer", Settings::values.accelerate_astc);
320 ReadSetting("Renderer", Settings::values.async_astc); 320 ReadSetting("Renderer", Settings::values.async_astc);
321 ReadSetting("Renderer", Settings::values.astc_recompression);
321 ReadSetting("Renderer", Settings::values.use_fast_gpu_time); 322 ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
322 ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache); 323 ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache);
323 324
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 5e7c3ac04..644a30e59 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -360,6 +360,10 @@ accelerate_astc =
360# 0 (default): Off, 1: On 360# 0 (default): Off, 1: On
361async_astc = 361async_astc =
362 362
363# Recompress ASTC textures to a different format.
364# 0 (default): Uncompressed, 1: BC1 (Low quality), 2: BC3: (Medium quality)
365async_astc =
366
363# Turns on the speed limiter, which will limit the emulation speed to the desired speed limit value 367# Turns on the speed limiter, which will limit the emulation speed to the desired speed limit value
364# 0: Off, 1: On (default) 368# 0: Off, 1: On (default)
365use_speed_limit = 369use_speed_limit =