summaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
authorGravatar comex2023-07-01 15:01:11 -0700
committerGravatar comex2023-07-01 15:01:11 -0700
commit98685d48e3cb9f25f6919f004ec62cadf33afad2 (patch)
tree9df2ce7f57370641589bfae7196c77b090bcbe0f /src/video_core
parentPR feedback + constification (diff)
parentUpdate translations (2023-07-01) (#10972) (diff)
downloadyuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.tar.gz
yuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.tar.xz
yuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.zip
Merge remote-tracking branch 'origin/master' into ssl
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt8
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h4
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h4
-rw-r--r--src/video_core/cdma_pusher.h1
-rw-r--r--src/video_core/dma_pusher.h8
-rw-r--r--src/video_core/engines/maxwell_dma.cpp40
-rw-r--r--src/video_core/gpu.cpp13
-rw-r--r--src/video_core/host1x/codecs/h264.cpp4
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt1
-rw-r--r--src/video_core/host_shaders/opengl_lmem_warmup.comp47
-rw-r--r--src/video_core/memory_manager.cpp17
-rw-r--r--src/video_core/memory_manager.h15
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_compute_pipeline.h5
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_device.h5
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_graphics_pipeline.h5
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h3
-rw-r--r--src/video_core/renderer_vulkan/blit_image.cpp3
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp20
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp34
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp18
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp86
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_fsr.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp5
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp22
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_present_manager.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_smaa.cpp39
-rw-r--r--src/video_core/renderer_vulkan/vk_smaa.h2
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp115
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp54
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h3
-rw-r--r--src/video_core/renderer_vulkan/vk_turbo_mode.cpp10
-rw-r--r--src/video_core/shader_cache.cpp4
-rw-r--r--src/video_core/surface.cpp22
-rw-r--r--src/video_core/surface.h2
-rw-r--r--src/video_core/texture_cache/decode_bc.cpp129
-rw-r--r--src/video_core/texture_cache/decode_bc.h (renamed from src/video_core/texture_cache/decode_bc4.h)6
-rw-r--r--src/video_core/texture_cache/decode_bc4.cpp96
-rw-r--r--src/video_core/texture_cache/image_base.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h53
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h14
-rw-r--r--src/video_core/texture_cache/util.cpp72
-rw-r--r--src/video_core/texture_cache/util.h31
-rw-r--r--src/video_core/textures/bcn.cpp1
-rw-r--r--src/video_core/textures/bcn.h9
-rw-r--r--src/video_core/transform_feedback.cpp8
-rw-r--r--src/video_core/transform_feedback.h2
-rw-r--r--src/video_core/vulkan_common/vulkan_debug_callback.cpp40
-rw-r--r--src/video_core/vulkan_common/vulkan_debug_callback.h4
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp44
-rw-r--r--src/video_core/vulkan_common/vulkan_device.h66
-rw-r--r--src/video_core/vulkan_common/vulkan_instance.cpp58
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.cpp173
-rw-r--r--src/video_core/vulkan_common/vulkan_memory_allocator.h28
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.cpp64
-rw-r--r--src/video_core/vulkan_common/vulkan_wrapper.h170
70 files changed, 1088 insertions, 700 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index bf6439530..3b2fe01da 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -220,8 +220,8 @@ add_library(video_core STATIC
220 surface.h 220 surface.h
221 texture_cache/accelerated_swizzle.cpp 221 texture_cache/accelerated_swizzle.cpp
222 texture_cache/accelerated_swizzle.h 222 texture_cache/accelerated_swizzle.h
223 texture_cache/decode_bc4.cpp 223 texture_cache/decode_bc.cpp
224 texture_cache/decode_bc4.h 224 texture_cache/decode_bc.h
225 texture_cache/descriptor_table.h 225 texture_cache/descriptor_table.h
226 texture_cache/formatter.cpp 226 texture_cache/formatter.cpp
227 texture_cache/formatter.h 227 texture_cache/formatter.h
@@ -279,7 +279,7 @@ add_library(video_core STATIC
279create_target_directory_groups(video_core) 279create_target_directory_groups(video_core)
280 280
281target_link_libraries(video_core PUBLIC common core) 281target_link_libraries(video_core PUBLIC common core)
282target_link_libraries(video_core PUBLIC glad shader_recompiler stb) 282target_link_libraries(video_core PUBLIC glad shader_recompiler stb bc_decoder)
283 283
284if (YUZU_USE_BUNDLED_FFMPEG AND NOT (WIN32 OR ANDROID)) 284if (YUZU_USE_BUNDLED_FFMPEG AND NOT (WIN32 OR ANDROID))
285 add_dependencies(video_core ffmpeg-build) 285 add_dependencies(video_core ffmpeg-build)
@@ -291,7 +291,7 @@ target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})
291 291
292add_dependencies(video_core host_shaders) 292add_dependencies(video_core host_shaders)
293target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) 293target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
294target_link_libraries(video_core PRIVATE sirit Vulkan::Headers) 294target_link_libraries(video_core PRIVATE sirit Vulkan::Headers vma)
295 295
296if (ENABLE_NSIGHT_AFTERMATH) 296if (ENABLE_NSIGHT_AFTERMATH)
297 if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK}) 297 if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK})
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 45977d578..58a45ab67 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -207,7 +207,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
207 if (has_new_downloads) { 207 if (has_new_downloads) {
208 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); 208 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
209 } 209 }
210 tmp_buffer.resize(amount); 210 tmp_buffer.resize_destructive(amount);
211 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); 211 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
212 cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); 212 cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
213 return true; 213 return true;
@@ -1279,7 +1279,7 @@ template <class P>
1279typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, 1279typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
1280 u32 wanted_size) { 1280 u32 wanted_size) {
1281 static constexpr int STREAM_LEAP_THRESHOLD = 16; 1281 static constexpr int STREAM_LEAP_THRESHOLD = 16;
1282 std::vector<BufferId> overlap_ids; 1282 boost::container::small_vector<BufferId, 16> overlap_ids;
1283 VAddr begin = cpu_addr; 1283 VAddr begin = cpu_addr;
1284 VAddr end = cpu_addr + wanted_size; 1284 VAddr end = cpu_addr + wanted_size;
1285 int stream_score = 0; 1285 int stream_score = 0;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 63a120f7a..fe6068cfe 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
229 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; 229 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
230 230
231 struct OverlapResult { 231 struct OverlapResult {
232 std::vector<BufferId> ids; 232 boost::container::small_vector<BufferId, 16> ids;
233 VAddr begin; 233 VAddr begin;
234 VAddr end; 234 VAddr end;
235 bool has_stream_leap = false; 235 bool has_stream_leap = false;
@@ -582,7 +582,7 @@ private:
582 BufferId inline_buffer_id; 582 BufferId inline_buffer_id;
583 583
584 std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; 584 std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table;
585 std::vector<u8> tmp_buffer; 585 Common::ScratchBuffer<u8> tmp_buffer;
586}; 586};
587 587
588} // namespace VideoCommon 588} // namespace VideoCommon
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 83112dfce..7d660af47 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -63,7 +63,6 @@ struct ChCommand {
63}; 63};
64 64
65using ChCommandHeaderList = std::vector<ChCommandHeader>; 65using ChCommandHeaderList = std::vector<ChCommandHeader>;
66using ChCommandList = std::vector<ChCommand>;
67 66
68struct ThiRegisters { 67struct ThiRegisters {
69 u32_le increment_syncpt{}; 68 u32_le increment_syncpt{};
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 1cdb690ed..8a2784cdc 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -6,6 +6,7 @@
6#include <array> 6#include <array>
7#include <span> 7#include <span>
8#include <vector> 8#include <vector>
9#include <boost/container/small_vector.hpp>
9#include <queue> 10#include <queue>
10 11
11#include "common/bit_field.h" 12#include "common/bit_field.h"
@@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub
102struct CommandList final { 103struct CommandList final {
103 CommandList() = default; 104 CommandList() = default;
104 explicit CommandList(std::size_t size) : command_lists(size) {} 105 explicit CommandList(std::size_t size) : command_lists(size) {}
105 explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_) 106 explicit CommandList(
107 boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_)
106 : prefetch_command_list{std::move(prefetch_command_list_)} {} 108 : prefetch_command_list{std::move(prefetch_command_list_)} {}
107 109
108 std::vector<CommandListHeader> command_lists; 110 boost::container::small_vector<CommandListHeader, 512> command_lists;
109 std::vector<CommandHeader> prefetch_command_list; 111 boost::container::small_vector<CommandHeader, 512> prefetch_command_list;
110}; 112};
111 113
112/** 114/**
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ebe5536de..a290d6ea7 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -108,9 +108,11 @@ void MaxwellDMA::Launch() {
108 if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { 108 if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
109 ASSERT(regs.remap_const.component_size_minus_one == 3); 109 ASSERT(regs.remap_const.component_size_minus_one == 3);
110 accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); 110 accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
111 std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); 111 read_buffer.resize_destructive(regs.line_length_in * sizeof(u32));
112 std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in);
113 std::ranges::fill(span, regs.remap_consta_value);
112 memory_manager.WriteBlockUnsafe(regs.offset_out, 114 memory_manager.WriteBlockUnsafe(regs.offset_out,
113 reinterpret_cast<u8*>(tmp_buffer.data()), 115 reinterpret_cast<u8*>(read_buffer.data()),
114 regs.line_length_in * sizeof(u32)); 116 regs.line_length_in * sizeof(u32));
115 } else { 117 } else {
116 memory_manager.FlushCaching(); 118 memory_manager.FlushCaching();
@@ -126,32 +128,33 @@ void MaxwellDMA::Launch() {
126 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); 128 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
127 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); 129 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
128 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 130 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
129 std::vector<u8> tmp_buffer(16); 131 read_buffer.resize_destructive(16);
130 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 132 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
131 memory_manager.ReadBlockUnsafe( 133 memory_manager.ReadBlock(
132 convert_linear_2_blocklinear_addr(regs.offset_in + offset), 134 convert_linear_2_blocklinear_addr(regs.offset_in + offset),
133 tmp_buffer.data(), tmp_buffer.size()); 135 read_buffer.data(), read_buffer.size());
134 memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), 136 memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(),
135 tmp_buffer.size()); 137 read_buffer.size());
136 } 138 }
137 } else if (is_src_pitch && !is_dst_pitch) { 139 } else if (is_src_pitch && !is_dst_pitch) {
138 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); 140 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
139 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); 141 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
140 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 142 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
141 std::vector<u8> tmp_buffer(16); 143 read_buffer.resize_destructive(16);
142 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 144 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
143 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), 145 memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(),
144 tmp_buffer.size()); 146 read_buffer.size());
145 memory_manager.WriteBlockCached( 147 memory_manager.WriteBlockCached(
146 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 148 convert_linear_2_blocklinear_addr(regs.offset_out + offset),
147 tmp_buffer.data(), tmp_buffer.size()); 149 read_buffer.data(), read_buffer.size());
148 } 150 }
149 } else { 151 } else {
150 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { 152 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
151 std::vector<u8> tmp_buffer(regs.line_length_in); 153 read_buffer.resize_destructive(regs.line_length_in);
152 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), 154 memory_manager.ReadBlock(regs.offset_in, read_buffer.data(),
153 regs.line_length_in); 155 regs.line_length_in,
154 memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), 156 VideoCommon::CacheType::NoBufferCache);
157 memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(),
155 regs.line_length_in); 158 regs.line_length_in);
156 } 159 }
157 } 160 }
@@ -171,7 +174,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
171 src_operand.address = regs.offset_in; 174 src_operand.address = regs.offset_in;
172 175
173 DMA::BufferOperand dst_operand; 176 DMA::BufferOperand dst_operand;
174 dst_operand.pitch = regs.pitch_out; 177 u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out));
178 dst_operand.pitch = abs_pitch_out;
175 dst_operand.width = regs.line_length_in; 179 dst_operand.width = regs.line_length_in;
176 dst_operand.height = regs.line_count; 180 dst_operand.height = regs.line_count;
177 dst_operand.address = regs.offset_out; 181 dst_operand.address = regs.offset_out;
@@ -218,7 +222,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
218 const size_t src_size = 222 const size_t src_size =
219 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); 223 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
220 224
221 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; 225 const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count;
222 read_buffer.resize_destructive(src_size); 226 read_buffer.resize_destructive(src_size);
223 write_buffer.resize_destructive(dst_size); 227 write_buffer.resize_destructive(dst_size);
224 228
@@ -227,7 +231,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
227 231
228 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, 232 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
229 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 233 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
230 regs.pitch_out); 234 abs_pitch_out);
231 235
232 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); 236 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
233} 237}
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 456f733cf..db385076d 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -193,18 +193,13 @@ struct GPU::Impl {
193 } 193 }
194 194
195 [[nodiscard]] u64 GetTicks() const { 195 [[nodiscard]] u64 GetTicks() const {
196 // This values were reversed engineered by fincs from NVN 196 u64 gpu_tick = system.CoreTiming().GetGPUTicks();
197 // The gpu clock is reported in units of 385/625 nanoseconds
198 constexpr u64 gpu_ticks_num = 384;
199 constexpr u64 gpu_ticks_den = 625;
200 197
201 u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count();
202 if (Settings::values.use_fast_gpu_time.GetValue()) { 198 if (Settings::values.use_fast_gpu_time.GetValue()) {
203 nanoseconds /= 256; 199 gpu_tick /= 256;
204 } 200 }
205 const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; 201
206 const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; 202 return gpu_tick;
207 return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
208 } 203 }
209 204
210 [[nodiscard]] bool IsAsync() const { 205 [[nodiscard]] bool IsAsync() const {
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index 6ce179167..ce827eb6c 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -4,6 +4,7 @@
4#include <array> 4#include <array>
5#include <bit> 5#include <bit>
6 6
7#include "common/scratch_buffer.h"
7#include "common/settings.h" 8#include "common/settings.h"
8#include "video_core/host1x/codecs/h264.h" 9#include "video_core/host1x/codecs/h264.h"
9#include "video_core/host1x/host1x.h" 10#include "video_core/host1x/host1x.h"
@@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) {
188} 189}
189 190
190void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { 191void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
191 std::vector<u8> scan(count); 192 static Common::ScratchBuffer<u8> scan{};
193 scan.resize_destructive(count);
192 if (count == 16) { 194 if (count == 16) {
193 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); 195 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
194 } else { 196 } else {
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
index 2442c3c29..e61d9af80 100644
--- a/src/video_core/host_shaders/CMakeLists.txt
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -33,6 +33,7 @@ set(SHADER_FILES
33 opengl_fidelityfx_fsr.frag 33 opengl_fidelityfx_fsr.frag
34 opengl_fidelityfx_fsr_easu.frag 34 opengl_fidelityfx_fsr_easu.frag
35 opengl_fidelityfx_fsr_rcas.frag 35 opengl_fidelityfx_fsr_rcas.frag
36 opengl_lmem_warmup.comp
36 opengl_present.frag 37 opengl_present.frag
37 opengl_present.vert 38 opengl_present.vert
38 opengl_present_scaleforce.frag 39 opengl_present_scaleforce.frag
diff --git a/src/video_core/host_shaders/opengl_lmem_warmup.comp b/src/video_core/host_shaders/opengl_lmem_warmup.comp
new file mode 100644
index 000000000..518268477
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_lmem_warmup.comp
@@ -0,0 +1,47 @@
1// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4// This shader is a workaround for a quirk in NVIDIA OpenGL drivers
5// Shaders using local memory see a great performance benefit if a shader that was dispatched
6// before it had more local memory allocated.
7// This shader allocates the maximum local memory allowed on NVIDIA drivers to ensure that
8// subsequent shaders see the performance boost.
9
10// NOTE: This shader does no actual meaningful work and returns immediately,
11// it is simply a means to have the driver expect a shader using lots of local memory.
12
13#version 450
14
15layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
16
17layout(location = 0) uniform uint uniform_data;
18
19layout(binding = 0, rgba8) uniform writeonly restrict image2DArray dest_image;
20
21#define MAX_LMEM_SIZE 4080 // Size chosen to avoid errors in Nvidia's GLSL compiler
22#define NUM_LMEM_CONSTANTS 1
23#define ARRAY_SIZE MAX_LMEM_SIZE - NUM_LMEM_CONSTANTS
24
25uint lmem_0[ARRAY_SIZE];
26const uvec4 constant_values[NUM_LMEM_CONSTANTS] = uvec4[](uvec4(0));
27
28void main() {
29 const uint global_id = gl_GlobalInvocationID.x;
30 if (global_id <= 128) {
31 // Since the shader is called with a dispatch of 1x1x1
32 // This should always be the case, and this shader will not actually execute
33 return;
34 }
35 for (uint t = 0; t < uniform_data; t++) {
36 const uint offset = (t * uniform_data);
37 lmem_0[offset] = t;
38 }
39 const uint offset = (gl_GlobalInvocationID.y * uniform_data + gl_GlobalInvocationID.x);
40 const uint value = lmem_0[offset];
41 const uint const_value = constant_values[offset / 4][offset % 4];
42 const uvec4 color = uvec4(value + const_value);
43
44 // A "side-effect" is needed so the variables don't get optimized out,
45 // but this should never execute so there should be no clobbering of previously bound state.
46 imageStore(dest_image, ivec3(gl_GlobalInvocationID), color);
47}
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 7b2cde7a7..45141e488 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -111,7 +111,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp
111 [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr); 111 [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr);
112 SetEntry<false>(current_gpu_addr, entry_type); 112 SetEntry<false>(current_gpu_addr, entry_type);
113 if (current_entry_type != entry_type) { 113 if (current_entry_type != entry_type) {
114 rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); 114 rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size);
115 } 115 }
116 if constexpr (entry_type == EntryType::Mapped) { 116 if constexpr (entry_type == EntryType::Mapped) {
117 const VAddr current_cpu_addr = cpu_addr + offset; 117 const VAddr current_cpu_addr = cpu_addr + offset;
@@ -134,7 +134,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr
134 [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); 134 [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr);
135 SetEntry<true>(current_gpu_addr, entry_type); 135 SetEntry<true>(current_gpu_addr, entry_type);
136 if (current_entry_type != entry_type) { 136 if (current_entry_type != entry_type) {
137 rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); 137 rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size);
138 } 138 }
139 if constexpr (entry_type == EntryType::Mapped) { 139 if constexpr (entry_type == EntryType::Mapped) {
140 const VAddr current_cpu_addr = cpu_addr + offset; 140 const VAddr current_cpu_addr = cpu_addr + offset;
@@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
587 587
588void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, 588void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
589 VideoCommon::CacheType which) { 589 VideoCommon::CacheType which) {
590 std::vector<u8> tmp_buffer(size); 590 tmp_buffer.resize_destructive(size);
591 ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); 591 ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which);
592 592
593 // The output block must be flushed in case it has data modified from the GPU. 593 // The output block must be flushed in case it has data modified from the GPU.
@@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
670 return result; 670 return result;
671} 671}
672 672
673std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( 673boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32>
674 GPUVAddr gpu_addr, std::size_t size) const { 674MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const {
675 std::vector<std::pair<GPUVAddr, std::size_t>> result{}; 675 boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> result{};
676 GetSubmappedRangeImpl<true>(gpu_addr, size, result); 676 GetSubmappedRangeImpl<true>(gpu_addr, size, result);
677 return result; 677 return result;
678} 678}
@@ -680,8 +680,9 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
680template <bool is_gpu_address> 680template <bool is_gpu_address>
681void MemoryManager::GetSubmappedRangeImpl( 681void MemoryManager::GetSubmappedRangeImpl(
682 GPUVAddr gpu_addr, std::size_t size, 682 GPUVAddr gpu_addr, std::size_t size,
683 std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& 683 boost::container::small_vector<
684 result) const { 684 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result)
685 const {
685 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> 686 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
686 last_segment{}; 687 last_segment{};
687 std::optional<VAddr> old_page_addr{}; 688 std::optional<VAddr> old_page_addr{};
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 794535122..4202c26ff 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -8,10 +8,12 @@
8#include <mutex> 8#include <mutex>
9#include <optional> 9#include <optional>
10#include <vector> 10#include <vector>
11#include <boost/container/small_vector.hpp>
11 12
12#include "common/common_types.h" 13#include "common/common_types.h"
13#include "common/multi_level_page_table.h" 14#include "common/multi_level_page_table.h"
14#include "common/range_map.h" 15#include "common/range_map.h"
16#include "common/scratch_buffer.h"
15#include "common/virtual_buffer.h" 17#include "common/virtual_buffer.h"
16#include "video_core/cache_types.h" 18#include "video_core/cache_types.h"
17#include "video_core/pte_kind.h" 19#include "video_core/pte_kind.h"
@@ -107,8 +109,8 @@ public:
107 * if the region is continuous, a single pair will be returned. If it's unmapped, an empty 109 * if the region is continuous, a single pair will be returned. If it's unmapped, an empty
108 * vector will be returned; 110 * vector will be returned;
109 */ 111 */
110 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, 112 boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange(
111 std::size_t size) const; 113 GPUVAddr gpu_addr, std::size_t size) const;
112 114
113 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, 115 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
114 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); 116 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
@@ -165,7 +167,8 @@ private:
165 template <bool is_gpu_address> 167 template <bool is_gpu_address>
166 void GetSubmappedRangeImpl( 168 void GetSubmappedRangeImpl(
167 GPUVAddr gpu_addr, std::size_t size, 169 GPUVAddr gpu_addr, std::size_t size,
168 std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& 170 boost::container::small_vector<
171 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>&
169 result) const; 172 result) const;
170 173
171 Core::System& system; 174 Core::System& system;
@@ -215,8 +218,8 @@ private:
215 Common::VirtualBuffer<u32> big_page_table_cpu; 218 Common::VirtualBuffer<u32> big_page_table_cpu;
216 219
217 std::vector<u64> big_page_continuous; 220 std::vector<u64> big_page_continuous;
218 std::vector<std::pair<VAddr, std::size_t>> page_stash{}; 221 boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{};
219 std::vector<std::pair<VAddr, std::size_t>> page_stash2{}; 222 boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{};
220 223
221 mutable std::mutex guard; 224 mutable std::mutex guard;
222 225
@@ -226,6 +229,8 @@ private:
226 std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; 229 std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
227 230
228 static std::atomic<size_t> unique_identifier_generator; 231 static std::atomic<size_t> unique_identifier_generator;
232
233 Common::ScratchBuffer<u8> tmp_buffer;
229}; 234};
230 235
231} // namespace Tegra 236} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
index 3151c0db8..f9ca55c36 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp
@@ -63,6 +63,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac
63 writes_global_memory = !use_storage_buffers && 63 writes_global_memory = !use_storage_buffers &&
64 std::ranges::any_of(info.storage_buffers_descriptors, 64 std::ranges::any_of(info.storage_buffers_descriptors,
65 [](const auto& desc) { return desc.is_written; }); 65 [](const auto& desc) { return desc.is_written; });
66 uses_local_memory = info.uses_local_memory;
66 if (force_context_flush) { 67 if (force_context_flush) {
67 std::scoped_lock lock{built_mutex}; 68 std::scoped_lock lock{built_mutex};
68 built_fence.Create(); 69 built_fence.Create();
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h
index 9bcc72b59..c26b4fa5e 100644
--- a/src/video_core/renderer_opengl/gl_compute_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h
@@ -59,6 +59,10 @@ public:
59 return writes_global_memory; 59 return writes_global_memory;
60 } 60 }
61 61
62 [[nodiscard]] bool UsesLocalMemory() const noexcept {
63 return uses_local_memory;
64 }
65
62 void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_, 66 void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_,
63 Tegra::MemoryManager* gpu_memory_) { 67 Tegra::MemoryManager* gpu_memory_) {
64 kepler_compute = kepler_compute_; 68 kepler_compute = kepler_compute_;
@@ -84,6 +88,7 @@ private:
84 88
85 bool use_storage_buffers{}; 89 bool use_storage_buffers{};
86 bool writes_global_memory{}; 90 bool writes_global_memory{};
91 bool uses_local_memory{};
87 92
88 std::mutex built_mutex; 93 std::mutex built_mutex;
89 std::condition_variable built_condvar; 94 std::condition_variable built_condvar;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 03d234f2f..33e63c17d 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -194,6 +194,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) {
194 has_bool_ref_bug = true; 194 has_bool_ref_bug = true;
195 } 195 }
196 } 196 }
197 has_lmem_perf_bug = is_nvidia;
197 198
198 strict_context_required = emu_window.StrictContextRequired(); 199 strict_context_required = emu_window.StrictContextRequired();
199 // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. 200 // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation.
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index ad27264e5..a5a6bbbba 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -192,6 +192,10 @@ public:
192 return supports_conditional_barriers; 192 return supports_conditional_barriers;
193 } 193 }
194 194
195 bool HasLmemPerfBug() const {
196 return has_lmem_perf_bug;
197 }
198
195private: 199private:
196 static bool TestVariableAoffi(); 200 static bool TestVariableAoffi();
197 static bool TestPreciseBug(); 201 static bool TestPreciseBug();
@@ -238,6 +242,7 @@ private:
238 bool can_report_memory{}; 242 bool can_report_memory{};
239 bool strict_context_required{}; 243 bool strict_context_required{};
240 bool supports_conditional_barriers{}; 244 bool supports_conditional_barriers{};
245 bool has_lmem_perf_bug{};
241 246
242 std::string vendor_name; 247 std::string vendor_name;
243}; 248};
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
index c58f760b8..23a48c6fe 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp
@@ -215,6 +215,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
215 215
216 writes_global_memory |= std::ranges::any_of( 216 writes_global_memory |= std::ranges::any_of(
217 info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); 217 info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
218 uses_local_memory |= info.uses_local_memory;
218 } 219 }
219 ASSERT(num_textures <= MAX_TEXTURES); 220 ASSERT(num_textures <= MAX_TEXTURES);
220 ASSERT(num_images <= MAX_IMAGES); 221 ASSERT(num_images <= MAX_IMAGES);
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
index 7bab3be0a..7b3d7eae8 100644
--- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h
+++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h
@@ -98,6 +98,10 @@ public:
98 return writes_global_memory; 98 return writes_global_memory;
99 } 99 }
100 100
101 [[nodiscard]] bool UsesLocalMemory() const noexcept {
102 return uses_local_memory;
103 }
104
101 [[nodiscard]] bool IsBuilt() noexcept; 105 [[nodiscard]] bool IsBuilt() noexcept;
102 106
103 template <typename Spec> 107 template <typename Spec>
@@ -146,6 +150,7 @@ private:
146 150
147 bool use_storage_buffers{}; 151 bool use_storage_buffers{};
148 bool writes_global_memory{}; 152 bool writes_global_memory{};
153 bool uses_local_memory{};
149 154
150 static constexpr std::size_t XFB_ENTRY_STRIDE = 3; 155 static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
151 GLsizei num_xfb_attribs{}; 156 GLsizei num_xfb_attribs{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index fc711c44a..edf527f2d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -222,6 +222,9 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
222 gpu.TickWork(); 222 gpu.TickWork();
223 223
224 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; 224 std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
225 if (pipeline->UsesLocalMemory()) {
226 program_manager.LocalMemoryWarmup();
227 }
225 pipeline->SetEngine(maxwell3d, gpu_memory); 228 pipeline->SetEngine(maxwell3d, gpu_memory);
226 pipeline->Configure(is_indexed); 229 pipeline->Configure(is_indexed);
227 230
@@ -371,6 +374,9 @@ void RasterizerOpenGL::DispatchCompute() {
371 if (!pipeline) { 374 if (!pipeline) {
372 return; 375 return;
373 } 376 }
377 if (pipeline->UsesLocalMemory()) {
378 program_manager.LocalMemoryWarmup();
379 }
374 pipeline->SetEngine(kepler_compute, gpu_memory); 380 pipeline->SetEngine(kepler_compute, gpu_memory);
375 pipeline->Configure(); 381 pipeline->Configure();
376 const auto& qmd{kepler_compute->launch_description}; 382 const auto& qmd{kepler_compute->launch_description};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3f077311e..0329ed820 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
85 case Shader::Stage::VertexB: 85 case Shader::Stage::VertexB:
86 case Shader::Stage::Geometry: 86 case Shader::Stage::Geometry:
87 if (!use_assembly_shaders && key.xfb_enabled != 0) { 87 if (!use_assembly_shaders && key.xfb_enabled != 0) {
88 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); 88 auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
89 info.xfb_varyings = varyings;
90 info.xfb_count = count;
89 } 91 }
90 break; 92 break;
91 case Shader::Stage::TessellationEval: 93 case Shader::Stage::TessellationEval:
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 98841ae65..03d4b9d06 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -3,7 +3,9 @@
3 3
4#include <glad/glad.h> 4#include <glad/glad.h>
5 5
6#include "video_core/host_shaders/opengl_lmem_warmup_comp.h"
6#include "video_core/renderer_opengl/gl_shader_manager.h" 7#include "video_core/renderer_opengl/gl_shader_manager.h"
8#include "video_core/renderer_opengl/gl_shader_util.h"
7 9
8namespace OpenGL { 10namespace OpenGL {
9 11
@@ -17,6 +19,10 @@ ProgramManager::ProgramManager(const Device& device) {
17 if (device.UseAssemblyShaders()) { 19 if (device.UseAssemblyShaders()) {
18 glEnable(GL_COMPUTE_PROGRAM_NV); 20 glEnable(GL_COMPUTE_PROGRAM_NV);
19 } 21 }
22 if (device.HasLmemPerfBug()) {
23 lmem_warmup_program =
24 CreateProgram(HostShaders::OPENGL_LMEM_WARMUP_COMP, GL_COMPUTE_SHADER);
25 }
20} 26}
21 27
22void ProgramManager::BindComputeProgram(GLuint program) { 28void ProgramManager::BindComputeProgram(GLuint program) {
@@ -98,6 +104,13 @@ void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NU
98 104
99void ProgramManager::RestoreGuestCompute() {} 105void ProgramManager::RestoreGuestCompute() {}
100 106
107void ProgramManager::LocalMemoryWarmup() {
108 if (lmem_warmup_program.handle != 0) {
109 BindComputeProgram(lmem_warmup_program.handle);
110 glDispatchCompute(1, 1, 1);
111 }
112}
113
101void ProgramManager::BindPipeline() { 114void ProgramManager::BindPipeline() {
102 if (!is_pipeline_bound) { 115 if (!is_pipeline_bound) {
103 is_pipeline_bound = true; 116 is_pipeline_bound = true;
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 07ffab77f..852d8c88e 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -30,6 +30,8 @@ public:
30 30
31 void RestoreGuestCompute(); 31 void RestoreGuestCompute();
32 32
33 void LocalMemoryWarmup();
34
33private: 35private:
34 void BindPipeline(); 36 void BindPipeline();
35 37
@@ -44,6 +46,7 @@ private:
44 u32 current_stage_mask = 0; 46 u32 current_stage_mask = 0;
45 std::array<GLuint, NUM_STAGES> current_programs{}; 47 std::array<GLuint, NUM_STAGES> current_programs{};
46 GLuint current_assembly_compute_program = 0; 48 GLuint current_assembly_compute_program = 0;
49 OGLProgram lmem_warmup_program;
47}; 50};
48 51
49} // namespace OpenGL 52} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp
index cf2964a3f..28d4b15a0 100644
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@@ -495,6 +495,9 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
495 const Region2D& dst_region, const Region2D& src_region, 495 const Region2D& dst_region, const Region2D& src_region,
496 Tegra::Engines::Fermi2D::Filter filter, 496 Tegra::Engines::Fermi2D::Filter filter,
497 Tegra::Engines::Fermi2D::Operation operation) { 497 Tegra::Engines::Fermi2D::Operation operation) {
498 if (!device.IsExtShaderStencilExportSupported()) {
499 return;
500 }
498 ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); 501 ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
499 ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); 502 ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
500 const BlitImagePipelineKey key{ 503 const BlitImagePipelineKey key{
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 9a0b10568..a8540339d 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -259,6 +259,26 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with
259 break; 259 break;
260 } 260 }
261 } 261 }
262 // Transcode on hardware that doesn't support BCn natively
263 if (!device.IsOptimalBcnSupported() && VideoCore::Surface::IsPixelFormatBCn(pixel_format)) {
264 const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
265 if (pixel_format == PixelFormat::BC4_SNORM) {
266 tuple.format = VK_FORMAT_R8_SNORM;
267 } else if (pixel_format == PixelFormat::BC4_UNORM) {
268 tuple.format = VK_FORMAT_R8_UNORM;
269 } else if (pixel_format == PixelFormat::BC5_SNORM) {
270 tuple.format = VK_FORMAT_R8G8_SNORM;
271 } else if (pixel_format == PixelFormat::BC5_UNORM) {
272 tuple.format = VK_FORMAT_R8G8_UNORM;
273 } else if (pixel_format == PixelFormat::BC6H_SFLOAT ||
274 pixel_format == PixelFormat::BC6H_UFLOAT) {
275 tuple.format = VK_FORMAT_R16G16B16A16_SFLOAT;
276 } else if (is_srgb) {
277 tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32;
278 } else {
279 tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32;
280 }
281 }
262 const bool attachable = (tuple.usage & Attachable) != 0; 282 const bool attachable = (tuple.usage & Attachable) != 0;
263 const bool storage = (tuple.usage & Storage) != 0; 283 const bool storage = (tuple.usage & Storage) != 0;
264 284
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 77128c6e2..454bb66a4 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -12,6 +12,7 @@
12#include <fmt/format.h> 12#include <fmt/format.h>
13 13
14#include "common/logging/log.h" 14#include "common/logging/log.h"
15#include "common/polyfill_ranges.h"
15#include "common/scope_exit.h" 16#include "common/scope_exit.h"
16#include "common/settings.h" 17#include "common/settings.h"
17#include "common/telemetry.h" 18#include "common/telemetry.h"
@@ -65,6 +66,21 @@ std::string BuildCommaSeparatedExtensions(
65 return fmt::format("{}", fmt::join(available_extensions, ",")); 66 return fmt::format("{}", fmt::join(available_extensions, ","));
66} 67}
67 68
69DebugCallback MakeDebugCallback(const vk::Instance& instance, const vk::InstanceDispatch& dld) {
70 if (!Settings::values.renderer_debug) {
71 return DebugCallback{};
72 }
73 const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
74 const auto it = std::ranges::find_if(*properties, [](const auto& prop) {
75 return std::strcmp(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, prop.extensionName) == 0;
76 });
77 if (it != properties->end()) {
78 return CreateDebugUtilsCallback(instance);
79 } else {
80 return CreateDebugReportCallback(instance);
81 }
82}
83
68} // Anonymous namespace 84} // Anonymous namespace
69 85
70Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, 86Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
@@ -87,10 +103,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
87 cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), 103 cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())),
88 instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, 104 instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
89 Settings::values.renderer_debug.GetValue())), 105 Settings::values.renderer_debug.GetValue())),
90 debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), 106 debug_callback(MakeDebugCallback(instance, dld)),
91 surface(CreateSurface(instance, render_window.GetWindowInfo())), 107 surface(CreateSurface(instance, render_window.GetWindowInfo())),
92 device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), 108 device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(),
93 state_tracker(), scheduler(device, state_tracker), 109 scheduler(device, state_tracker),
94 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, 110 swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
95 render_window.GetFramebufferLayout().height, false), 111 render_window.GetFramebufferLayout().height, false),
96 present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, 112 present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain,
@@ -173,7 +189,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
173 return; 189 return;
174 } 190 }
175 const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; 191 const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
176 vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{ 192 vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{
177 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 193 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
178 .pNext = nullptr, 194 .pNext = nullptr,
179 .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, 195 .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
@@ -196,7 +212,6 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
196 .pQueueFamilyIndices = nullptr, 212 .pQueueFamilyIndices = nullptr,
197 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 213 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
198 }); 214 });
199 const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
200 215
201 const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ 216 const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
202 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 217 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@@ -234,8 +249,8 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
234 .queueFamilyIndexCount = 0, 249 .queueFamilyIndexCount = 0,
235 .pQueueFamilyIndices = nullptr, 250 .pQueueFamilyIndices = nullptr,
236 }; 251 };
237 const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info); 252 const vk::Buffer dst_buffer =
238 MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download); 253 memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download);
239 254
240 scheduler.RequestOutsideRenderPassOperationContext(); 255 scheduler.RequestOutsideRenderPassOperationContext();
241 scheduler.Record([&](vk::CommandBuffer cmdbuf) { 256 scheduler.Record([&](vk::CommandBuffer cmdbuf) {
@@ -309,8 +324,9 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
309 scheduler.Finish(); 324 scheduler.Finish();
310 325
311 // Copy backing image data to the QImage screenshot buffer 326 // Copy backing image data to the QImage screenshot buffer
312 const auto dst_memory_map = dst_buffer_memory.Map(); 327 dst_buffer.Invalidate();
313 std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size()); 328 std::memcpy(renderer_settings.screenshot_bits, dst_buffer.Mapped().data(),
329 dst_buffer.Mapped().size());
314 renderer_settings.screenshot_complete_callback(false); 330 renderer_settings.screenshot_complete_callback(false);
315 renderer_settings.screenshot_requested = false; 331 renderer_settings.screenshot_requested = false;
316} 332}
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index b2e8cbd1b..ca22c0baa 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -5,6 +5,7 @@
5 5
6#include <memory> 6#include <memory>
7#include <string> 7#include <string>
8#include <variant>
8 9
9#include "common/dynamic_library.h" 10#include "common/dynamic_library.h"
10#include "video_core/renderer_base.h" 11#include "video_core/renderer_base.h"
@@ -33,6 +34,8 @@ class GPU;
33 34
34namespace Vulkan { 35namespace Vulkan {
35 36
37using DebugCallback = std::variant<vk::DebugUtilsMessenger, vk::DebugReportCallback>;
38
36Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, 39Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld,
37 VkSurfaceKHR surface); 40 VkSurfaceKHR surface);
38 41
@@ -71,7 +74,7 @@ private:
71 vk::InstanceDispatch dld; 74 vk::InstanceDispatch dld;
72 75
73 vk::Instance instance; 76 vk::Instance instance;
74 vk::DebugUtilsMessenger debug_callback; 77 DebugCallback debug_callback;
75 vk::SurfaceKHR surface; 78 vk::SurfaceKHR surface;
76 79
77 ScreenInfo screen_info; 80 ScreenInfo screen_info;
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index acb143fc7..ad3b29f0e 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -162,7 +162,7 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
162 SetUniformData(data, layout); 162 SetUniformData(data, layout);
163 SetVertexData(data, framebuffer, layout); 163 SetVertexData(data, framebuffer, layout);
164 164
165 const std::span<u8> mapped_span = buffer_commit.Map(); 165 const std::span<u8> mapped_span = buffer.Mapped();
166 std::memcpy(mapped_span.data(), &data, sizeof(data)); 166 std::memcpy(mapped_span.data(), &data, sizeof(data));
167 167
168 if (!use_accelerated) { 168 if (!use_accelerated) {
@@ -1071,14 +1071,9 @@ void BlitScreen::ReleaseRawImages() {
1071 scheduler.Wait(tick); 1071 scheduler.Wait(tick);
1072 } 1072 }
1073 raw_images.clear(); 1073 raw_images.clear();
1074 raw_buffer_commits.clear();
1075
1076 aa_image_view.reset(); 1074 aa_image_view.reset();
1077 aa_image.reset(); 1075 aa_image.reset();
1078 aa_commit = MemoryCommit{};
1079
1080 buffer.reset(); 1076 buffer.reset();
1081 buffer_commit = MemoryCommit{};
1082} 1077}
1083 1078
1084void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { 1079void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
@@ -1094,20 +1089,18 @@ void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer
1094 .pQueueFamilyIndices = nullptr, 1089 .pQueueFamilyIndices = nullptr,
1095 }; 1090 };
1096 1091
1097 buffer = device.GetLogical().CreateBuffer(ci); 1092 buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload);
1098 buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload);
1099} 1093}
1100 1094
1101void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { 1095void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
1102 raw_images.resize(image_count); 1096 raw_images.resize(image_count);
1103 raw_image_views.resize(image_count); 1097 raw_image_views.resize(image_count);
1104 raw_buffer_commits.resize(image_count);
1105 1098
1106 const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1, 1099 const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1,
1107 u32 down_shift = 0) { 1100 u32 down_shift = 0) {
1108 u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT 1101 u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
1109 : VK_IMAGE_USAGE_TRANSFER_DST_BIT; 1102 : VK_IMAGE_USAGE_TRANSFER_DST_BIT;
1110 return device.GetLogical().CreateImage(VkImageCreateInfo{ 1103 return memory_allocator.CreateImage(VkImageCreateInfo{
1111 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 1104 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
1112 .pNext = nullptr, 1105 .pNext = nullptr,
1113 .flags = 0, 1106 .flags = 0,
@@ -1130,9 +1123,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
1130 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 1123 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
1131 }); 1124 });
1132 }; 1125 };
1133 const auto create_commit = [&](vk::Image& image) {
1134 return memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
1135 };
1136 const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) { 1126 const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) {
1137 return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ 1127 return device.GetLogical().CreateImageView(VkImageViewCreateInfo{
1138 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 1128 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@@ -1161,7 +1151,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
1161 1151
1162 for (size_t i = 0; i < image_count; ++i) { 1152 for (size_t i = 0; i < image_count; ++i) {
1163 raw_images[i] = create_image(); 1153 raw_images[i] = create_image();
1164 raw_buffer_commits[i] = create_commit(raw_images[i]);
1165 raw_image_views[i] = create_image_view(raw_images[i]); 1154 raw_image_views[i] = create_image_view(raw_images[i]);
1166 } 1155 }
1167 1156
@@ -1169,7 +1158,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
1169 const u32 up_scale = Settings::values.resolution_info.up_scale; 1158 const u32 up_scale = Settings::values.resolution_info.up_scale;
1170 const u32 down_shift = Settings::values.resolution_info.down_shift; 1159 const u32 down_shift = Settings::values.resolution_info.down_shift;
1171 aa_image = create_image(true, up_scale, down_shift); 1160 aa_image = create_image(true, up_scale, down_shift);
1172 aa_commit = create_commit(aa_image);
1173 aa_image_view = create_image_view(aa_image, true); 1161 aa_image_view = create_image_view(aa_image, true);
1174 VkExtent2D size{ 1162 VkExtent2D size{
1175 .width = (up_scale * framebuffer.width) >> down_shift, 1163 .width = (up_scale * framebuffer.width) >> down_shift,
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 68ec20253..8365b5668 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -142,13 +142,11 @@ private:
142 vk::Sampler sampler; 142 vk::Sampler sampler;
143 143
144 vk::Buffer buffer; 144 vk::Buffer buffer;
145 MemoryCommit buffer_commit;
146 145
147 std::vector<u64> resource_ticks; 146 std::vector<u64> resource_ticks;
148 147
149 std::vector<vk::Image> raw_images; 148 std::vector<vk::Image> raw_images;
150 std::vector<vk::ImageView> raw_image_views; 149 std::vector<vk::ImageView> raw_image_views;
151 std::vector<MemoryCommit> raw_buffer_commits;
152 150
153 vk::DescriptorPool aa_descriptor_pool; 151 vk::DescriptorPool aa_descriptor_pool;
154 vk::DescriptorSetLayout aa_descriptor_set_layout; 152 vk::DescriptorSetLayout aa_descriptor_set_layout;
@@ -159,7 +157,6 @@ private:
159 vk::DescriptorSets aa_descriptor_sets; 157 vk::DescriptorSets aa_descriptor_sets;
160 vk::Image aa_image; 158 vk::Image aa_image;
161 vk::ImageView aa_image_view; 159 vk::ImageView aa_image_view;
162 MemoryCommit aa_commit;
163 160
164 u32 raw_width = 0; 161 u32 raw_width = 0;
165 u32 raw_height = 0; 162 u32 raw_height = 0;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index e30fcb1ed..b72f95235 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -50,7 +50,7 @@ size_t BytesPerIndex(VkIndexType index_type) {
50 } 50 }
51} 51}
52 52
53vk::Buffer CreateBuffer(const Device& device, u64 size) { 53vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allocator, u64 size) {
54 VkBufferUsageFlags flags = 54 VkBufferUsageFlags flags =
55 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | 55 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
56 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | 56 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
@@ -60,7 +60,7 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
60 if (device.IsExtTransformFeedbackSupported()) { 60 if (device.IsExtTransformFeedbackSupported()) {
61 flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; 61 flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
62 } 62 }
63 return device.GetLogical().CreateBuffer({ 63 const VkBufferCreateInfo buffer_ci = {
64 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 64 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
65 .pNext = nullptr, 65 .pNext = nullptr,
66 .flags = 0, 66 .flags = 0,
@@ -69,7 +69,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
69 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 69 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
70 .queueFamilyIndexCount = 0, 70 .queueFamilyIndexCount = 0,
71 .pQueueFamilyIndices = nullptr, 71 .pQueueFamilyIndices = nullptr,
72 }); 72 };
73 return memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
73} 74}
74} // Anonymous namespace 75} // Anonymous namespace
75 76
@@ -79,8 +80,8 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
79Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, 80Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
80 VAddr cpu_addr_, u64 size_bytes_) 81 VAddr cpu_addr_, u64 size_bytes_)
81 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), 82 : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
82 device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())}, 83 device{&runtime.device}, buffer{
83 commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { 84 CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} {
84 if (runtime.device.HasDebuggingToolAttached()) { 85 if (runtime.device.HasDebuggingToolAttached()) {
85 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); 86 buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
86 } 87 }
@@ -138,7 +139,7 @@ public:
138 const u32 num_first_offset_copies = 4; 139 const u32 num_first_offset_copies = 4;
139 const size_t bytes_per_index = BytesPerIndex(index_type); 140 const size_t bytes_per_index = BytesPerIndex(index_type);
140 const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; 141 const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
141 buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ 142 const VkBufferCreateInfo buffer_ci = {
142 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 143 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
143 .pNext = nullptr, 144 .pNext = nullptr,
144 .flags = 0, 145 .flags = 0,
@@ -147,14 +148,21 @@ public:
147 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 148 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
148 .queueFamilyIndexCount = 0, 149 .queueFamilyIndexCount = 0,
149 .pQueueFamilyIndices = nullptr, 150 .pQueueFamilyIndices = nullptr,
150 }); 151 };
152 buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
151 if (device.HasDebuggingToolAttached()) { 153 if (device.HasDebuggingToolAttached()) {
152 buffer.SetObjectNameEXT("Quad LUT"); 154 buffer.SetObjectNameEXT("Quad LUT");
153 } 155 }
154 memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
155 156
156 const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); 157 const bool host_visible = buffer.IsHostVisible();
157 u8* staging_data = staging.mapped_span.data(); 158 const StagingBufferRef staging = [&] {
159 if (host_visible) {
160 return StagingBufferRef{};
161 }
162 return staging_pool.Request(size_bytes, MemoryUsage::Upload);
163 }();
164
165 u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data();
158 const size_t quad_size = bytes_per_index * 6; 166 const size_t quad_size = bytes_per_index * 6;
159 167
160 for (u32 first = 0; first < num_first_offset_copies; ++first) { 168 for (u32 first = 0; first < num_first_offset_copies; ++first) {
@@ -164,29 +172,33 @@ public:
164 } 172 }
165 } 173 }
166 174
167 scheduler.RequestOutsideRenderPassOperationContext(); 175 if (!host_visible) {
168 scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, 176 scheduler.RequestOutsideRenderPassOperationContext();
169 dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { 177 scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
170 const VkBufferCopy copy{ 178 dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
171 .srcOffset = src_offset, 179 const VkBufferCopy copy{
172 .dstOffset = 0, 180 .srcOffset = src_offset,
173 .size = size_bytes, 181 .dstOffset = 0,
174 }; 182 .size = size_bytes,
175 const VkBufferMemoryBarrier write_barrier{ 183 };
176 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, 184 const VkBufferMemoryBarrier write_barrier{
177 .pNext = nullptr, 185 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
178 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, 186 .pNext = nullptr,
179 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, 187 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
180 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 188 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
181 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, 189 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
182 .buffer = dst_buffer, 190 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
183 .offset = 0, 191 .buffer = dst_buffer,
184 .size = size_bytes, 192 .offset = 0,
185 }; 193 .size = size_bytes,
186 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); 194 };
187 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, 195 cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
188 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier); 196 cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
189 }); 197 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
198 });
199 } else {
200 buffer.Flush();
201 }
190 } 202 }
191 203
192 void BindBuffer(u32 first) { 204 void BindBuffer(u32 first) {
@@ -361,7 +373,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
361 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, 373 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
362 }; 374 };
363 // Measuring a popular game, this number never exceeds the specified size once data is warmed up 375 // Measuring a popular game, this number never exceeds the specified size once data is warmed up
364 boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); 376 boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
365 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); 377 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
366 scheduler.RequestOutsideRenderPassOperationContext(); 378 scheduler.RequestOutsideRenderPassOperationContext();
367 scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { 379 scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
@@ -578,7 +590,8 @@ void BufferCacheRuntime::ReserveNullBuffer() {
578 .pNext = nullptr, 590 .pNext = nullptr,
579 .flags = 0, 591 .flags = 0,
580 .size = 4, 592 .size = 4,
581 .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, 593 .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
594 VK_BUFFER_USAGE_TRANSFER_DST_BIT,
582 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 595 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
583 .queueFamilyIndexCount = 0, 596 .queueFamilyIndexCount = 0,
584 .pQueueFamilyIndices = nullptr, 597 .pQueueFamilyIndices = nullptr,
@@ -587,11 +600,10 @@ void BufferCacheRuntime::ReserveNullBuffer() {
587 create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; 600 create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
588 } 601 }
589 create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; 602 create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
590 null_buffer = device.GetLogical().CreateBuffer(create_info); 603 null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
591 if (device.HasDebuggingToolAttached()) { 604 if (device.HasDebuggingToolAttached()) {
592 null_buffer.SetObjectNameEXT("Null buffer"); 605 null_buffer.SetObjectNameEXT("Null buffer");
593 } 606 }
594 null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);
595 607
596 scheduler.RequestOutsideRenderPassOperationContext(); 608 scheduler.RequestOutsideRenderPassOperationContext();
597 scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) { 609 scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index cdeef8846..95446c732 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -48,7 +48,6 @@ private:
48 48
49 const Device* device{}; 49 const Device* device{};
50 vk::Buffer buffer; 50 vk::Buffer buffer;
51 MemoryCommit commit;
52 std::vector<BufferView> views; 51 std::vector<BufferView> views;
53}; 52};
54 53
@@ -142,7 +141,6 @@ private:
142 std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer; 141 std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer;
143 142
144 vk::Buffer null_buffer; 143 vk::Buffer null_buffer;
145 MemoryCommit null_buffer_commit;
146 144
147 std::unique_ptr<Uint8Pass> uint8_pass; 145 std::unique_ptr<Uint8Pass> uint8_pass;
148 QuadIndexedPass quad_index_pass; 146 QuadIndexedPass quad_index_pass;
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp
index df972cd54..9bcdca2fb 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.cpp
+++ b/src/video_core/renderer_vulkan/vk_fsr.cpp
@@ -205,10 +205,9 @@ void FSR::CreateDescriptorSets() {
205void FSR::CreateImages() { 205void FSR::CreateImages() {
206 images.resize(image_count * 2); 206 images.resize(image_count * 2);
207 image_views.resize(image_count * 2); 207 image_views.resize(image_count * 2);
208 buffer_commits.resize(image_count * 2);
209 208
210 for (size_t i = 0; i < image_count * 2; ++i) { 209 for (size_t i = 0; i < image_count * 2; ++i) {
211 images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ 210 images[i] = memory_allocator.CreateImage(VkImageCreateInfo{
212 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 211 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
213 .pNext = nullptr, 212 .pNext = nullptr,
214 .flags = 0, 213 .flags = 0,
@@ -231,7 +230,6 @@ void FSR::CreateImages() {
231 .pQueueFamilyIndices = nullptr, 230 .pQueueFamilyIndices = nullptr,
232 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 231 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
233 }); 232 });
234 buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal);
235 image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ 233 image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
236 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 234 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
237 .pNext = nullptr, 235 .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h
index 5d872861f..8bb9fc23a 100644
--- a/src/video_core/renderer_vulkan/vk_fsr.h
+++ b/src/video_core/renderer_vulkan/vk_fsr.h
@@ -47,7 +47,6 @@ private:
47 vk::Sampler sampler; 47 vk::Sampler sampler;
48 std::vector<vk::Image> images; 48 std::vector<vk::Image> images;
49 std::vector<vk::ImageView> image_views; 49 std::vector<vk::ImageView> image_views;
50 std::vector<MemoryCommit> buffer_commits;
51}; 50};
52 51
53} // namespace Vulkan 52} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index c1595642e..ad35cacac 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -652,13 +652,14 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
652 .pNext = nullptr, 652 .pNext = nullptr,
653 .negativeOneToOne = key.state.ndc_minus_one_to_one.Value() != 0 ? VK_TRUE : VK_FALSE, 653 .negativeOneToOne = key.state.ndc_minus_one_to_one.Value() != 0 ? VK_TRUE : VK_FALSE,
654 }; 654 };
655 const u32 num_viewports = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports);
655 VkPipelineViewportStateCreateInfo viewport_ci{ 656 VkPipelineViewportStateCreateInfo viewport_ci{
656 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, 657 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
657 .pNext = nullptr, 658 .pNext = nullptr,
658 .flags = 0, 659 .flags = 0,
659 .viewportCount = Maxwell::NumViewports, 660 .viewportCount = num_viewports,
660 .pViewports = nullptr, 661 .pViewports = nullptr,
661 .scissorCount = Maxwell::NumViewports, 662 .scissorCount = num_viewports,
662 .pScissors = nullptr, 663 .pScissors = nullptr,
663 }; 664 };
664 if (device.IsNvViewportSwizzleSupported()) { 665 if (device.IsNvViewportSwizzleSupported()) {
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
index 5eeda08d2..6b288b994 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -75,15 +75,9 @@ void MasterSemaphore::Refresh() {
75 75
76void MasterSemaphore::Wait(u64 tick) { 76void MasterSemaphore::Wait(u64 tick) {
77 if (!semaphore) { 77 if (!semaphore) {
78 // If we don't support timeline semaphores, use an atomic wait 78 // If we don't support timeline semaphores, wait for the value normally
79 while (true) { 79 std::unique_lock lk{free_mutex};
80 u64 current_value = gpu_tick.load(std::memory_order_relaxed); 80 free_cv.wait(lk, [&] { return gpu_tick.load(std::memory_order_relaxed) >= tick; });
81 if (current_value >= tick) {
82 return;
83 }
84 gpu_tick.wait(current_value);
85 }
86
87 return; 81 return;
88 } 82 }
89 83
@@ -198,11 +192,13 @@ void MasterSemaphore::WaitThread(std::stop_token token) {
198 192
199 fence.Wait(); 193 fence.Wait();
200 fence.Reset(); 194 fence.Reset();
201 gpu_tick.store(host_tick);
202 gpu_tick.notify_all();
203 195
204 std::scoped_lock lock{free_mutex}; 196 {
205 free_queue.push_front(std::move(fence)); 197 std::scoped_lock lock{free_mutex};
198 free_queue.push_front(std::move(fence));
199 gpu_tick.store(host_tick);
200 }
201 free_cv.notify_one();
206 } 202 }
207} 203}
208 204
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
index 1e7c90215..3f599d7bd 100644
--- a/src/video_core/renderer_vulkan/vk_master_semaphore.h
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -72,6 +72,7 @@ private:
72 std::atomic<u64> current_tick{1}; ///< Current logical tick. 72 std::atomic<u64> current_tick{1}; ///< Current logical tick.
73 std::mutex wait_mutex; 73 std::mutex wait_mutex;
74 std::mutex free_mutex; 74 std::mutex free_mutex;
75 std::condition_variable free_cv;
75 std::condition_variable_any wait_cv; 76 std::condition_variable_any wait_cv;
76 std::queue<Waitable> wait_queue; ///< Queue for the fences to be waited on by the wait thread. 77 std::queue<Waitable> wait_queue; ///< Queue for the fences to be waited on by the wait thread.
77 std::deque<vk::Fence> free_queue; ///< Holds available fences for submission. 78 std::deque<vk::Fence> free_queue; ///< Holds available fences for submission.
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 18e040a1b..d600c4e61 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
167 info.fixed_state_point_size = point_size; 167 info.fixed_state_point_size = point_size;
168 } 168 }
169 if (key.state.xfb_enabled) { 169 if (key.state.xfb_enabled) {
170 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); 170 auto [varyings, count] =
171 VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
172 info.xfb_varyings = varyings;
173 info.xfb_count = count;
171 } 174 }
172 info.convert_depth_mode = gl_ndc; 175 info.convert_depth_mode = gl_ndc;
173 } 176 }
@@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
214 info.fixed_state_point_size = point_size; 217 info.fixed_state_point_size = point_size;
215 } 218 }
216 if (key.state.xfb_enabled != 0) { 219 if (key.state.xfb_enabled != 0) {
217 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); 220 auto [varyings, count] =
221 VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
222 info.xfb_varyings = varyings;
223 info.xfb_count = count;
218 } 224 }
219 info.convert_depth_mode = gl_ndc; 225 info.convert_depth_mode = gl_ndc;
220 break; 226 break;
@@ -303,7 +309,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
303 .support_int16 = device.IsShaderInt16Supported(), 309 .support_int16 = device.IsShaderInt16Supported(),
304 .support_int64 = device.IsShaderInt64Supported(), 310 .support_int64 = device.IsShaderInt64Supported(),
305 .support_vertex_instance_id = false, 311 .support_vertex_instance_id = false,
306 .support_float_controls = true, 312 .support_float_controls = device.IsKhrShaderFloatControlsSupported(),
307 .support_separate_denorm_behavior = 313 .support_separate_denorm_behavior =
308 float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, 314 float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
309 .support_separate_rounding_mode = 315 .support_separate_rounding_mode =
@@ -319,12 +325,13 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
319 .support_fp64_signed_zero_nan_preserve = 325 .support_fp64_signed_zero_nan_preserve =
320 float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, 326 float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
321 .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), 327 .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
322 .support_vote = true, 328 .support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
323 .support_viewport_index_layer_non_geometry = 329 .support_viewport_index_layer_non_geometry =
324 device.IsExtShaderViewportIndexLayerSupported(), 330 device.IsExtShaderViewportIndexLayerSupported(),
325 .support_viewport_mask = device.IsNvViewportArray2Supported(), 331 .support_viewport_mask = device.IsNvViewportArray2Supported(),
326 .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), 332 .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(),
327 .support_demote_to_helper_invocation = true, 333 .support_demote_to_helper_invocation =
334 device.IsExtShaderDemoteToHelperInvocationSupported(),
328 .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), 335 .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(),
329 .support_derivative_control = true, 336 .support_derivative_control = true,
330 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), 337 .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
@@ -705,10 +712,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
705std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( 712std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
706 ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, 713 ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env,
707 PipelineStatistics* statistics, bool build_in_parallel) try { 714 PipelineStatistics* statistics, bool build_in_parallel) try {
708 // TODO: Remove this when Intel fixes their shader compiler. 715 if (device.HasBrokenCompute()) {
709 // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159
710 if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS &&
711 !Settings::values.enable_compute_pipelines.GetValue()) {
712 LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash()); 716 LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash());
713 return nullptr; 717 return nullptr;
714 } 718 }
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp
index 10ace0420..d681bd22a 100644
--- a/src/video_core/renderer_vulkan/vk_present_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp
@@ -181,7 +181,7 @@ void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_
181 frame->height = height; 181 frame->height = height;
182 frame->is_srgb = is_srgb; 182 frame->is_srgb = is_srgb;
183 183
184 frame->image = dld.CreateImage({ 184 frame->image = memory_allocator.CreateImage({
185 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 185 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
186 .pNext = nullptr, 186 .pNext = nullptr,
187 .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, 187 .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
@@ -204,8 +204,6 @@ void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_
204 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 204 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
205 }); 205 });
206 206
207 frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal);
208
209 frame->image_view = dld.CreateImageView({ 207 frame->image_view = dld.CreateImageView({
210 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 208 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
211 .pNext = nullptr, 209 .pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h
index 4ac2e2395..83e859416 100644
--- a/src/video_core/renderer_vulkan/vk_present_manager.h
+++ b/src/video_core/renderer_vulkan/vk_present_manager.h
@@ -29,7 +29,6 @@ struct Frame {
29 vk::Image image; 29 vk::Image image;
30 vk::ImageView image_view; 30 vk::ImageView image_view;
31 vk::Framebuffer framebuffer; 31 vk::Framebuffer framebuffer;
32 MemoryCommit image_commit;
33 vk::CommandBuffer cmdbuf; 32 vk::CommandBuffer cmdbuf;
34 vk::Semaphore render_ready; 33 vk::Semaphore render_ready;
35 vk::Fence present_done; 34 vk::Fence present_done;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 84e3a30cc..f7c0d939a 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -315,7 +315,14 @@ void RasterizerVulkan::Clear(u32 layer_count) {
315 FlushWork(); 315 FlushWork();
316 gpu_memory->FlushCaching(); 316 gpu_memory->FlushCaching();
317 317
318#if ANDROID
319 if (Settings::IsGPULevelHigh()) {
320 // This is problematic on Android, disable on GPU Normal.
321 query_cache.UpdateCounters();
322 }
323#else
318 query_cache.UpdateCounters(); 324 query_cache.UpdateCounters();
325#endif
319 326
320 auto& regs = maxwell3d->regs; 327 auto& regs = maxwell3d->regs;
321 const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || 328 const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B ||
@@ -925,7 +932,7 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
925 } 932 }
926 const bool is_rescaling{texture_cache.IsRescaling()}; 933 const bool is_rescaling{texture_cache.IsRescaling()};
927 const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; 934 const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
928 const std::array viewports{ 935 const std::array viewport_list{
929 GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale), 936 GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale),
930 GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale), 937 GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale),
931 GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale), 938 GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale),
@@ -935,7 +942,11 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg
935 GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale), 942 GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale),
936 GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale), 943 GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale),
937 }; 944 };
938 scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); 945 scheduler.Record([this, viewport_list](vk::CommandBuffer cmdbuf) {
946 const u32 num_viewports = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports);
947 const vk::Span<VkViewport> viewports(viewport_list.data(), num_viewports);
948 cmdbuf.SetViewport(0, viewports);
949 });
939} 950}
940 951
941void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { 952void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) {
@@ -948,7 +959,7 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
948 up_scale = Settings::values.resolution_info.up_scale; 959 up_scale = Settings::values.resolution_info.up_scale;
949 down_shift = Settings::values.resolution_info.down_shift; 960 down_shift = Settings::values.resolution_info.down_shift;
950 } 961 }
951 const std::array scissors{ 962 const std::array scissor_list{
952 GetScissorState(regs, 0, up_scale, down_shift), 963 GetScissorState(regs, 0, up_scale, down_shift),
953 GetScissorState(regs, 1, up_scale, down_shift), 964 GetScissorState(regs, 1, up_scale, down_shift),
954 GetScissorState(regs, 2, up_scale, down_shift), 965 GetScissorState(regs, 2, up_scale, down_shift),
@@ -966,7 +977,11 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
966 GetScissorState(regs, 14, up_scale, down_shift), 977 GetScissorState(regs, 14, up_scale, down_shift),
967 GetScissorState(regs, 15, up_scale, down_shift), 978 GetScissorState(regs, 15, up_scale, down_shift),
968 }; 979 };
969 scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); 980 scheduler.Record([this, scissor_list](vk::CommandBuffer cmdbuf) {
981 const u32 num_scissors = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports);
982 const vk::Span<VkRect2D> scissors(scissor_list.data(), num_scissors);
983 cmdbuf.SetScissor(0, scissors);
984 });
970} 985}
971 986
972void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { 987void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp
index f8735189d..5efd7d66e 100644
--- a/src/video_core/renderer_vulkan/vk_smaa.cpp
+++ b/src/video_core/renderer_vulkan/vk_smaa.cpp
@@ -25,9 +25,7 @@ namespace {
25 25
26#define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0]))) 26#define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0])))
27 27
28std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device, 28vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) {
29 MemoryAllocator& allocator,
30 VkExtent2D dimensions, VkFormat format) {
31 const VkImageCreateInfo image_ci{ 29 const VkImageCreateInfo image_ci{
32 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, 30 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
33 .pNext = nullptr, 31 .pNext = nullptr,
@@ -46,11 +44,7 @@ std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device,
46 .pQueueFamilyIndices = nullptr, 44 .pQueueFamilyIndices = nullptr,
47 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, 45 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
48 }; 46 };
49 47 return allocator.CreateImage(image_ci);
50 auto image = device.GetLogical().CreateImage(image_ci);
51 auto commit = allocator.Commit(image, Vulkan::MemoryUsage::DeviceLocal);
52
53 return std::make_pair(std::move(image), std::move(commit));
54} 48}
55 49
56void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, 50void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout,
@@ -82,7 +76,7 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo
82void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler, 76void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler,
83 vk::Image& image, VkExtent2D dimensions, VkFormat format, 77 vk::Image& image, VkExtent2D dimensions, VkFormat format,
84 std::span<const u8> initial_contents = {}) { 78 std::span<const u8> initial_contents = {}) {
85 auto upload_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ 79 const VkBufferCreateInfo upload_ci = {
86 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 80 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
87 .pNext = nullptr, 81 .pNext = nullptr,
88 .flags = 0, 82 .flags = 0,
@@ -91,9 +85,10 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc
91 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 85 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
92 .queueFamilyIndexCount = 0, 86 .queueFamilyIndexCount = 0,
93 .pQueueFamilyIndices = nullptr, 87 .pQueueFamilyIndices = nullptr,
94 }); 88 };
95 auto upload_commit = allocator.Commit(upload_buffer, MemoryUsage::Upload); 89 auto upload_buffer = allocator.CreateBuffer(upload_ci, MemoryUsage::Upload);
96 std::ranges::copy(initial_contents, upload_commit.Map().begin()); 90 std::ranges::copy(initial_contents, upload_buffer.Mapped().begin());
91 upload_buffer.Flush();
97 92
98 const std::array<VkBufferImageCopy, 1> regions{{{ 93 const std::array<VkBufferImageCopy, 1> regions{{{
99 .bufferOffset = 0, 94 .bufferOffset = 0,
@@ -117,9 +112,6 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc
117 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); 112 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
118 }); 113 });
119 scheduler.Finish(); 114 scheduler.Finish();
120
121 // This should go out of scope before the commit
122 auto upload_buffer2 = std::move(upload_buffer);
123} 115}
124 116
125vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) { 117vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) {
@@ -531,10 +523,8 @@ void SMAA::CreateImages() {
531 static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; 523 static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT};
532 static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; 524 static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT};
533 525
534 std::tie(m_static_images[Area], m_static_buffer_commits[Area]) = 526 m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM);
535 CreateWrappedImage(m_device, m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); 527 m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM);
536 std::tie(m_static_images[Search], m_static_buffer_commits[Search]) =
537 CreateWrappedImage(m_device, m_allocator, search_extent, VK_FORMAT_R8_UNORM);
538 528
539 m_static_image_views[Area] = 529 m_static_image_views[Area] =
540 CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM); 530 CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM);
@@ -544,12 +534,11 @@ void SMAA::CreateImages() {
544 for (u32 i = 0; i < m_image_count; i++) { 534 for (u32 i = 0; i < m_image_count; i++) {
545 Images& images = m_dynamic_images.emplace_back(); 535 Images& images = m_dynamic_images.emplace_back();
546 536
547 std::tie(images.images[Blend], images.buffer_commits[Blend]) = 537 images.images[Blend] =
548 CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); 538 CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
549 std::tie(images.images[Edges], images.buffer_commits[Edges]) = 539 images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT);
550 CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT); 540 images.images[Output] =
551 std::tie(images.images[Output], images.buffer_commits[Output]) = 541 CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
552 CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
553 542
554 images.image_views[Blend] = 543 images.image_views[Blend] =
555 CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT); 544 CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT);
diff --git a/src/video_core/renderer_vulkan/vk_smaa.h b/src/video_core/renderer_vulkan/vk_smaa.h
index 99a369148..0e214258a 100644
--- a/src/video_core/renderer_vulkan/vk_smaa.h
+++ b/src/video_core/renderer_vulkan/vk_smaa.h
@@ -66,13 +66,11 @@ private:
66 std::array<vk::Pipeline, MaxSMAAStage> m_pipelines{}; 66 std::array<vk::Pipeline, MaxSMAAStage> m_pipelines{};
67 std::array<vk::RenderPass, MaxSMAAStage> m_renderpasses{}; 67 std::array<vk::RenderPass, MaxSMAAStage> m_renderpasses{};
68 68
69 std::array<MemoryCommit, MaxStaticImage> m_static_buffer_commits;
70 std::array<vk::Image, MaxStaticImage> m_static_images{}; 69 std::array<vk::Image, MaxStaticImage> m_static_images{};
71 std::array<vk::ImageView, MaxStaticImage> m_static_image_views{}; 70 std::array<vk::ImageView, MaxStaticImage> m_static_image_views{};
72 71
73 struct Images { 72 struct Images {
74 vk::DescriptorSets descriptor_sets{}; 73 vk::DescriptorSets descriptor_sets{};
75 std::array<MemoryCommit, MaxDynamicImage> buffer_commits;
76 std::array<vk::Image, MaxDynamicImage> images{}; 74 std::array<vk::Image, MaxDynamicImage> images{};
77 std::array<vk::ImageView, MaxDynamicImage> image_views{}; 75 std::array<vk::ImageView, MaxDynamicImage> image_views{};
78 std::array<vk::Framebuffer, MaxSMAAStage> framebuffers{}; 76 std::array<vk::Framebuffer, MaxSMAAStage> framebuffers{};
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 74ca77216..ce92f66ab 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -30,55 +30,6 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
30constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; 30constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
31constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; 31constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
32 32
33constexpr VkMemoryPropertyFlags HOST_FLAGS =
34 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
35constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
36
37bool IsStreamHeap(VkMemoryHeap heap) noexcept {
38 return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
39}
40
41std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
42 VkMemoryPropertyFlags flags) noexcept {
43 for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
44 if (((type_mask >> type_index) & 1) == 0) {
45 // Memory type is incompatible
46 continue;
47 }
48 const VkMemoryType& memory_type = props.memoryTypes[type_index];
49 if ((memory_type.propertyFlags & flags) != flags) {
50 // Memory type doesn't have the flags we want
51 continue;
52 }
53 if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
54 // Memory heap is not suitable for streaming
55 continue;
56 }
57 // Success!
58 return type_index;
59 }
60 return std::nullopt;
61}
62
63u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
64 bool try_device_local) {
65 std::optional<u32> type;
66 if (try_device_local) {
67 // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
68 type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
69 if (type) {
70 return *type;
71 }
72 }
73 // Otherwise try without the DEVICE_LOCAL_BIT
74 type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
75 if (type) {
76 return *type;
77 }
78 // This should never happen, and in case it does, signal it as an out of memory situation
79 throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
80}
81
82size_t Region(size_t iterator) noexcept { 33size_t Region(size_t iterator) noexcept {
83 return iterator / REGION_SIZE; 34 return iterator / REGION_SIZE;
84} 35}
@@ -87,58 +38,26 @@ size_t Region(size_t iterator) noexcept {
87StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, 38StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
88 Scheduler& scheduler_) 39 Scheduler& scheduler_)
89 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { 40 : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
90 const vk::Device& dev = device.GetLogical(); 41 VkBufferCreateInfo stream_ci = {
91 stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
92 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 42 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
93 .pNext = nullptr, 43 .pNext = nullptr,
94 .flags = 0, 44 .flags = 0,
95 .size = STREAM_BUFFER_SIZE, 45 .size = STREAM_BUFFER_SIZE,
96 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | 46 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
97 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | 47 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
98 VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT,
99 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 48 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
100 .queueFamilyIndexCount = 0, 49 .queueFamilyIndexCount = 0,
101 .pQueueFamilyIndices = nullptr, 50 .pQueueFamilyIndices = nullptr,
102 });
103 if (device.HasDebuggingToolAttached()) {
104 stream_buffer.SetObjectNameEXT("Stream Buffer");
105 }
106 VkMemoryDedicatedRequirements dedicated_reqs{
107 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
108 .pNext = nullptr,
109 .prefersDedicatedAllocation = VK_FALSE,
110 .requiresDedicatedAllocation = VK_FALSE,
111 };
112 const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
113 const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
114 dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
115 const VkMemoryDedicatedAllocateInfo dedicated_info{
116 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
117 .pNext = nullptr,
118 .image = nullptr,
119 .buffer = *stream_buffer,
120 }; 51 };
121 const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties; 52 if (device.IsExtTransformFeedbackSupported()) {
122 VkMemoryAllocateInfo stream_memory_info{ 53 stream_ci.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
123 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
124 .pNext = make_dedicated ? &dedicated_info : nullptr,
125 .allocationSize = requirements.size,
126 .memoryTypeIndex =
127 FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
128 };
129 stream_memory = dev.TryAllocateMemory(stream_memory_info);
130 if (!stream_memory) {
131 LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
132 stream_memory_info.memoryTypeIndex =
133 FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
134 stream_memory = dev.AllocateMemory(stream_memory_info);
135 } 54 }
136 55 stream_buffer = memory_allocator.CreateBuffer(stream_ci, MemoryUsage::Stream);
137 if (device.HasDebuggingToolAttached()) { 56 if (device.HasDebuggingToolAttached()) {
138 stream_memory.SetObjectNameEXT("Stream Buffer Memory"); 57 stream_buffer.SetObjectNameEXT("Stream Buffer");
139 } 58 }
140 stream_buffer.BindMemory(*stream_memory, 0); 59 stream_pointer = stream_buffer.Mapped();
141 stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); 60 ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
142} 61}
143 62
144StagingBufferPool::~StagingBufferPool() = default; 63StagingBufferPool::~StagingBufferPool() = default;
@@ -199,7 +118,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
199 return StagingBufferRef{ 118 return StagingBufferRef{
200 .buffer = *stream_buffer, 119 .buffer = *stream_buffer,
201 .offset = static_cast<VkDeviceSize>(offset), 120 .offset = static_cast<VkDeviceSize>(offset),
202 .mapped_span = std::span<u8>(stream_pointer + offset, size), 121 .mapped_span = stream_pointer.subspan(offset, size),
203 .usage{}, 122 .usage{},
204 .log2_level{}, 123 .log2_level{},
205 .index{}, 124 .index{},
@@ -247,29 +166,29 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s
247StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage, 166StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage,
248 bool deferred) { 167 bool deferred) {
249 const u32 log2 = Common::Log2Ceil64(size); 168 const u32 log2 = Common::Log2Ceil64(size);
250 vk::Buffer buffer = device.GetLogical().CreateBuffer({ 169 VkBufferCreateInfo buffer_ci = {
251 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 170 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
252 .pNext = nullptr, 171 .pNext = nullptr,
253 .flags = 0, 172 .flags = 0,
254 .size = 1ULL << log2, 173 .size = 1ULL << log2,
255 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | 174 .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
256 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | 175 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
257 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | 176 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
258 VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT,
259 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 177 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
260 .queueFamilyIndexCount = 0, 178 .queueFamilyIndexCount = 0,
261 .pQueueFamilyIndices = nullptr, 179 .pQueueFamilyIndices = nullptr,
262 }); 180 };
181 if (device.IsExtTransformFeedbackSupported()) {
182 buffer_ci.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
183 }
184 vk::Buffer buffer = memory_allocator.CreateBuffer(buffer_ci, usage);
263 if (device.HasDebuggingToolAttached()) { 185 if (device.HasDebuggingToolAttached()) {
264 ++buffer_index; 186 ++buffer_index;
265 buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str()); 187 buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
266 } 188 }
267 MemoryCommit commit = memory_allocator.Commit(buffer, usage); 189 const std::span<u8> mapped_span = buffer.Mapped();
268 const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{};
269
270 StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{ 190 StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
271 .buffer = std::move(buffer), 191 .buffer = std::move(buffer),
272 .commit = std::move(commit),
273 .mapped_span = mapped_span, 192 .mapped_span = mapped_span,
274 .usage = usage, 193 .usage = usage,
275 .log2_level = log2, 194 .log2_level = log2,
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 4fd15f11a..5f69f08b1 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -46,7 +46,6 @@ private:
46 46
47 struct StagingBuffer { 47 struct StagingBuffer {
48 vk::Buffer buffer; 48 vk::Buffer buffer;
49 MemoryCommit commit;
50 std::span<u8> mapped_span; 49 std::span<u8> mapped_span;
51 MemoryUsage usage; 50 MemoryUsage usage;
52 u32 log2_level; 51 u32 log2_level;
@@ -97,8 +96,7 @@ private:
97 Scheduler& scheduler; 96 Scheduler& scheduler;
98 97
99 vk::Buffer stream_buffer; 98 vk::Buffer stream_buffer;
100 vk::DeviceMemory stream_memory; 99 std::span<u8> stream_pointer;
101 u8* stream_pointer = nullptr;
102 100
103 size_t iterator = 0; 101 size_t iterator = 0;
104 size_t used_iterator = 0; 102 size_t used_iterator = 0;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index f025f618b..8385b5509 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -15,7 +15,6 @@
15#include "video_core/renderer_vulkan/blit_image.h" 15#include "video_core/renderer_vulkan/blit_image.h"
16#include "video_core/renderer_vulkan/maxwell_to_vk.h" 16#include "video_core/renderer_vulkan/maxwell_to_vk.h"
17#include "video_core/renderer_vulkan/vk_compute_pass.h" 17#include "video_core/renderer_vulkan/vk_compute_pass.h"
18#include "video_core/renderer_vulkan/vk_rasterizer.h"
19#include "video_core/renderer_vulkan/vk_render_pass_cache.h" 18#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
20#include "video_core/renderer_vulkan/vk_scheduler.h" 19#include "video_core/renderer_vulkan/vk_scheduler.h"
21#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" 20#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -163,11 +162,12 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
163 }; 162 };
164} 163}
165 164
166[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) { 165[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
166 const ImageInfo& info) {
167 if (info.type == ImageType::Buffer) { 167 if (info.type == ImageType::Buffer) {
168 return vk::Image{}; 168 return vk::Image{};
169 } 169 }
170 return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); 170 return allocator.CreateImage(MakeImageCreateInfo(device, info));
171} 171}
172 172
173[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { 173[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
@@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
330 }; 330 };
331} 331}
332 332
333[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( 333[[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16>
334 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { 334TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
335 std::vector<VkBufferCopy> result(copies.size()); 335 boost::container::small_vector<VkBufferCopy, 16> result(copies.size());
336 std::ranges::transform( 336 std::ranges::transform(
337 copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { 337 copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
338 return VkBufferCopy{ 338 return VkBufferCopy{
@@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
344 return result; 344 return result;
345} 345}
346 346
347[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( 347[[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies(
348 std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { 348 std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
349 struct Maker { 349 struct Maker {
350 VkBufferImageCopy operator()(const BufferImageCopy& copy) const { 350 VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
@@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
377 VkImageAspectFlags aspect_mask; 377 VkImageAspectFlags aspect_mask;
378 }; 378 };
379 if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { 379 if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
380 std::vector<VkBufferImageCopy> result(copies.size() * 2); 380 boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2);
381 std::ranges::transform(copies, result.begin(), 381 std::ranges::transform(copies, result.begin(),
382 Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); 382 Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
383 std::ranges::transform(copies, result.begin() + copies.size(), 383 std::ranges::transform(copies, result.begin() + copies.size(),
384 Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); 384 Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
385 return result; 385 return result;
386 } else { 386 } else {
387 std::vector<VkBufferImageCopy> result(copies.size()); 387 boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size());
388 std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); 388 std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
389 return result; 389 return result;
390 } 390 }
@@ -839,14 +839,14 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
839 839
840VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { 840VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
841 const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); 841 const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL);
842 if (buffer_commits[level]) { 842 if (buffers[level]) {
843 return *buffers[level]; 843 return *buffers[level];
844 } 844 }
845 const auto new_size = Common::NextPow2(needed_size); 845 const auto new_size = Common::NextPow2(needed_size);
846 static constexpr VkBufferUsageFlags flags = 846 static constexpr VkBufferUsageFlags flags =
847 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | 847 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
848 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; 848 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
849 buffers[level] = device.GetLogical().CreateBuffer({ 849 const VkBufferCreateInfo temp_ci = {
850 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 850 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
851 .pNext = nullptr, 851 .pNext = nullptr,
852 .flags = 0, 852 .flags = 0,
@@ -855,9 +855,8 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
855 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 855 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
856 .queueFamilyIndexCount = 0, 856 .queueFamilyIndexCount = 0,
857 .pQueueFamilyIndices = nullptr, 857 .pQueueFamilyIndices = nullptr,
858 }); 858 };
859 buffer_commits[level] = std::make_unique<MemoryCommit>( 859 buffers[level] = memory_allocator.CreateBuffer(temp_ci, MemoryUsage::DeviceLocal);
860 memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal));
861 return *buffers[level]; 860 return *buffers[level];
862} 861}
863 862
@@ -867,8 +866,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() {
867 866
868void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, 867void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
869 std::span<const VideoCommon::ImageCopy> copies) { 868 std::span<const VideoCommon::ImageCopy> copies) {
870 std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); 869 boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size());
871 std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); 870 boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size());
872 const VkImageAspectFlags src_aspect_mask = src.AspectMask(); 871 const VkImageAspectFlags src_aspect_mask = src.AspectMask();
873 const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); 872 const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
874 873
@@ -1157,7 +1156,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
1157 1156
1158void TextureCacheRuntime::CopyImage(Image& dst, Image& src, 1157void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
1159 std::span<const VideoCommon::ImageCopy> copies) { 1158 std::span<const VideoCommon::ImageCopy> copies) {
1160 std::vector<VkImageCopy> vk_copies(copies.size()); 1159 boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size());
1161 const VkImageAspectFlags aspect_mask = dst.AspectMask(); 1160 const VkImageAspectFlags aspect_mask = dst.AspectMask();
1162 ASSERT(aspect_mask == src.AspectMask()); 1161 ASSERT(aspect_mask == src.AspectMask());
1163 1162
@@ -1266,8 +1265,8 @@ void TextureCacheRuntime::TickFrame() {}
1266Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, 1265Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
1267 VAddr cpu_addr_) 1266 VAddr cpu_addr_)
1268 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler}, 1267 : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
1269 runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)), 1268 runtime{&runtime_},
1270 commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)), 1269 original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info)),
1271 aspect_mask(ImageAspectMask(info.format)) { 1270 aspect_mask(ImageAspectMask(info.format)) {
1272 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { 1271 if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
1273 if (Settings::values.async_astc.GetValue()) { 1272 if (Settings::values.async_astc.GetValue()) {
@@ -1280,6 +1279,10 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
1280 flags |= VideoCommon::ImageFlagBits::Converted; 1279 flags |= VideoCommon::ImageFlagBits::Converted;
1281 flags |= VideoCommon::ImageFlagBits::CostlyLoad; 1280 flags |= VideoCommon::ImageFlagBits::CostlyLoad;
1282 } 1281 }
1282 if (IsPixelFormatBCn(info.format) && !runtime->device.IsOptimalBcnSupported()) {
1283 flags |= VideoCommon::ImageFlagBits::Converted;
1284 flags |= VideoCommon::ImageFlagBits::CostlyLoad;
1285 }
1283 if (runtime->device.HasDebuggingToolAttached()) { 1286 if (runtime->device.HasDebuggingToolAttached()) {
1284 original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); 1287 original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str());
1285 } 1288 }
@@ -1332,7 +1335,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
1332 ScaleDown(true); 1335 ScaleDown(true);
1333 } 1336 }
1334 scheduler->RequestOutsideRenderPassOperationContext(); 1337 scheduler->RequestOutsideRenderPassOperationContext();
1335 std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); 1338 auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
1336 const VkBuffer src_buffer = buffer; 1339 const VkBuffer src_buffer = buffer;
1337 const VkImage vk_image = *original_image; 1340 const VkImage vk_image = *original_image;
1338 const VkImageAspectFlags vk_aspect_mask = aspect_mask; 1341 const VkImageAspectFlags vk_aspect_mask = aspect_mask;
@@ -1367,8 +1370,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS
1367 if (is_rescaled) { 1370 if (is_rescaled) {
1368 ScaleDown(); 1371 ScaleDown();
1369 } 1372 }
1370 boost::container::small_vector<VkBuffer, 1> buffers_vector{}; 1373 boost::container::small_vector<VkBuffer, 8> buffers_vector{};
1371 boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies; 1374 boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
1375 vk_copies;
1372 for (size_t index = 0; index < buffers_span.size(); index++) { 1376 for (size_t index = 0; index < buffers_span.size(); index++) {
1373 buffers_vector.emplace_back(buffers_span[index]); 1377 buffers_vector.emplace_back(buffers_span[index]);
1374 vk_copies.emplace_back( 1378 vk_copies.emplace_back(
@@ -1467,9 +1471,7 @@ bool Image::ScaleUp(bool ignore) {
1467 auto scaled_info = info; 1471 auto scaled_info = info;
1468 scaled_info.size.width = scaled_width; 1472 scaled_info.size.width = scaled_width;
1469 scaled_info.size.height = scaled_height; 1473 scaled_info.size.height = scaled_height;
1470 scaled_image = MakeImage(runtime->device, scaled_info); 1474 scaled_image = MakeImage(runtime->device, runtime->memory_allocator, scaled_info);
1471 auto& allocator = runtime->memory_allocator;
1472 scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
1473 ignore = false; 1475 ignore = false;
1474 } 1476 }
1475 current_image = *scaled_image; 1477 current_image = *scaled_image;
@@ -1858,7 +1860,7 @@ Framebuffer::~Framebuffer() = default;
1858void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, 1860void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
1859 std::span<ImageView*, NUM_RT> color_buffers, 1861 std::span<ImageView*, NUM_RT> color_buffers,
1860 ImageView* depth_buffer, bool is_rescaled) { 1862 ImageView* depth_buffer, bool is_rescaled) {
1861 std::vector<VkImageView> attachments; 1863 boost::container::small_vector<VkImageView, NUM_RT + 1> attachments;
1862 RenderPassKey renderpass_key{}; 1864 RenderPassKey renderpass_key{};
1863 s32 num_layers = 1; 1865 s32 num_layers = 1;
1864 1866
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index f14525dcb..220943116 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -116,7 +116,6 @@ public:
116 116
117 static constexpr size_t indexing_slots = 8 * sizeof(size_t); 117 static constexpr size_t indexing_slots = 8 * sizeof(size_t);
118 std::array<vk::Buffer, indexing_slots> buffers{}; 118 std::array<vk::Buffer, indexing_slots> buffers{};
119 std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};
120}; 119};
121 120
122class Image : public VideoCommon::ImageBase { 121class Image : public VideoCommon::ImageBase {
@@ -180,12 +179,10 @@ private:
180 TextureCacheRuntime* runtime{}; 179 TextureCacheRuntime* runtime{};
181 180
182 vk::Image original_image; 181 vk::Image original_image;
183 MemoryCommit commit;
184 std::vector<vk::ImageView> storage_image_views; 182 std::vector<vk::ImageView> storage_image_views;
185 VkImageAspectFlags aspect_mask = 0; 183 VkImageAspectFlags aspect_mask = 0;
186 bool initialized = false; 184 bool initialized = false;
187 vk::Image scaled_image{}; 185 vk::Image scaled_image{};
188 MemoryCommit scaled_commit{};
189 VkImage current_image{}; 186 VkImage current_image{};
190 187
191 std::unique_ptr<Framebuffer> scale_framebuffer; 188 std::unique_ptr<Framebuffer> scale_framebuffer;
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
index a802d3c49..460d8d59d 100644
--- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
+++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp
@@ -18,7 +18,7 @@ using namespace Common::Literals;
18 18
19TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) 19TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld)
20#ifndef ANDROID 20#ifndef ANDROID
21 : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} 21 : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device}
22#endif 22#endif
23{ 23{
24 { 24 {
@@ -41,7 +41,7 @@ void TurboMode::Run(std::stop_token stop_token) {
41 auto& dld = m_device.GetLogical(); 41 auto& dld = m_device.GetLogical();
42 42
43 // Allocate buffer. 2MiB should be sufficient. 43 // Allocate buffer. 2MiB should be sufficient.
44 auto buffer = dld.CreateBuffer(VkBufferCreateInfo{ 44 const VkBufferCreateInfo buffer_ci = {
45 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 45 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
46 .pNext = nullptr, 46 .pNext = nullptr,
47 .flags = 0, 47 .flags = 0,
@@ -50,10 +50,8 @@ void TurboMode::Run(std::stop_token stop_token) {
50 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 50 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
51 .queueFamilyIndexCount = 0, 51 .queueFamilyIndexCount = 0,
52 .pQueueFamilyIndices = nullptr, 52 .pQueueFamilyIndices = nullptr,
53 }); 53 };
54 54 vk::Buffer buffer = m_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
55 // Commit some device local memory for the buffer.
56 auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
57 55
58 // Create the descriptor pool to contain our descriptor. 56 // Create the descriptor pool to contain our descriptor.
59 static constexpr VkDescriptorPoolSize pool_size{ 57 static constexpr VkDescriptorPoolSize pool_size{
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index c5213875b..4db948b6d 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() {
151 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), 151 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
152 marked_for_removal.end()); 152 marked_for_removal.end());
153 153
154 std::vector<ShaderInfo*> removed_shaders; 154 boost::container::small_vector<ShaderInfo*, 16> removed_shaders;
155 removed_shaders.reserve(marked_for_removal.size());
156 155
157 std::scoped_lock lock{lookup_mutex}; 156 std::scoped_lock lock{lookup_mutex};
158
159 for (Entry* const entry : marked_for_removal) { 157 for (Entry* const entry : marked_for_removal) {
160 removed_shaders.push_back(entry->data); 158 removed_shaders.push_back(entry->data);
161 159
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index cb51529e4..e16cd5e73 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -269,6 +269,28 @@ bool IsPixelFormatASTC(PixelFormat format) {
269 } 269 }
270} 270}
271 271
272bool IsPixelFormatBCn(PixelFormat format) {
273 switch (format) {
274 case PixelFormat::BC1_RGBA_UNORM:
275 case PixelFormat::BC2_UNORM:
276 case PixelFormat::BC3_UNORM:
277 case PixelFormat::BC4_UNORM:
278 case PixelFormat::BC4_SNORM:
279 case PixelFormat::BC5_UNORM:
280 case PixelFormat::BC5_SNORM:
281 case PixelFormat::BC1_RGBA_SRGB:
282 case PixelFormat::BC2_SRGB:
283 case PixelFormat::BC3_SRGB:
284 case PixelFormat::BC7_UNORM:
285 case PixelFormat::BC6H_UFLOAT:
286 case PixelFormat::BC6H_SFLOAT:
287 case PixelFormat::BC7_SRGB:
288 return true;
289 default:
290 return false;
291 }
292}
293
272bool IsPixelFormatSRGB(PixelFormat format) { 294bool IsPixelFormatSRGB(PixelFormat format) {
273 switch (format) { 295 switch (format) {
274 case PixelFormat::A8B8G8R8_SRGB: 296 case PixelFormat::A8B8G8R8_SRGB:
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 0225d3287..9b9c4d9bc 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -501,6 +501,8 @@ SurfaceType GetFormatType(PixelFormat pixel_format);
501 501
502bool IsPixelFormatASTC(PixelFormat format); 502bool IsPixelFormatASTC(PixelFormat format);
503 503
504bool IsPixelFormatBCn(PixelFormat format);
505
504bool IsPixelFormatSRGB(PixelFormat format); 506bool IsPixelFormatSRGB(PixelFormat format);
505 507
506bool IsPixelFormatInteger(PixelFormat format); 508bool IsPixelFormatInteger(PixelFormat format);
diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp
new file mode 100644
index 000000000..3e26474a3
--- /dev/null
+++ b/src/video_core/texture_cache/decode_bc.cpp
@@ -0,0 +1,129 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <algorithm>
5#include <array>
6#include <span>
7#include <bc_decoder.h>
8
9#include "common/common_types.h"
10#include "video_core/texture_cache/decode_bc.h"
11
12namespace VideoCommon {
13
14namespace {
15constexpr u32 BLOCK_SIZE = 4;
16
17using VideoCore::Surface::PixelFormat;
18
19constexpr bool IsSigned(PixelFormat pixel_format) {
20 switch (pixel_format) {
21 case PixelFormat::BC4_SNORM:
22 case PixelFormat::BC4_UNORM:
23 case PixelFormat::BC5_SNORM:
24 case PixelFormat::BC5_UNORM:
25 case PixelFormat::BC6H_SFLOAT:
26 case PixelFormat::BC6H_UFLOAT:
27 return true;
28 default:
29 return false;
30 }
31}
32
33constexpr u32 BlockSize(PixelFormat pixel_format) {
34 switch (pixel_format) {
35 case PixelFormat::BC1_RGBA_SRGB:
36 case PixelFormat::BC1_RGBA_UNORM:
37 case PixelFormat::BC4_SNORM:
38 case PixelFormat::BC4_UNORM:
39 return 8;
40 default:
41 return 16;
42 }
43}
44} // Anonymous namespace
45
46u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) {
47 switch (pixel_format) {
48 case PixelFormat::BC4_SNORM:
49 case PixelFormat::BC4_UNORM:
50 return 1;
51 case PixelFormat::BC5_SNORM:
52 case PixelFormat::BC5_UNORM:
53 return 2;
54 case PixelFormat::BC6H_SFLOAT:
55 case PixelFormat::BC6H_UFLOAT:
56 return 8;
57 default:
58 return 4;
59 }
60}
61
62template <auto decompress, PixelFormat pixel_format>
63void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent,
64 bool is_signed = false) {
65 const u32 out_bpp = ConvertedBytesPerBlock(pixel_format);
66 const u32 block_width = std::min(extent.width, BLOCK_SIZE);
67 const u32 block_height = std::min(extent.height, BLOCK_SIZE);
68 const u32 pitch = extent.width * out_bpp;
69 size_t input_offset = 0;
70 size_t output_offset = 0;
71 for (u32 slice = 0; slice < extent.depth; ++slice) {
72 for (u32 y = 0; y < extent.height; y += block_height) {
73 size_t row_offset = 0;
74 for (u32 x = 0; x < extent.width;
75 x += block_width, row_offset += block_width * out_bpp) {
76 const u8* src = input.data() + input_offset;
77 u8* const dst = output.data() + output_offset + row_offset;
78 if constexpr (IsSigned(pixel_format)) {
79 decompress(src, dst, x, y, extent.width, extent.height, is_signed);
80 } else {
81 decompress(src, dst, x, y, extent.width, extent.height);
82 }
83 input_offset += BlockSize(pixel_format);
84 }
85 output_offset += block_height * pitch;
86 }
87 }
88}
89
90void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent,
91 VideoCore::Surface::PixelFormat pixel_format) {
92 switch (pixel_format) {
93 case PixelFormat::BC1_RGBA_UNORM:
94 case PixelFormat::BC1_RGBA_SRGB:
95 DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent);
96 break;
97 case PixelFormat::BC2_UNORM:
98 case PixelFormat::BC2_SRGB:
99 DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent);
100 break;
101 case PixelFormat::BC3_UNORM:
102 case PixelFormat::BC3_SRGB:
103 DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent);
104 break;
105 case PixelFormat::BC4_SNORM:
106 case PixelFormat::BC4_UNORM:
107 DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>(
108 input, output, extent, pixel_format == PixelFormat::BC4_SNORM);
109 break;
110 case PixelFormat::BC5_SNORM:
111 case PixelFormat::BC5_UNORM:
112 DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>(
113 input, output, extent, pixel_format == PixelFormat::BC5_SNORM);
114 break;
115 case PixelFormat::BC6H_SFLOAT:
116 case PixelFormat::BC6H_UFLOAT:
117 DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>(
118 input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT);
119 break;
120 case PixelFormat::BC7_SRGB:
121 case PixelFormat::BC7_UNORM:
122 DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent);
123 break;
124 default:
125 LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format);
126 }
127}
128
129} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc.h
index ab2f735be..41d1ec0a3 100644
--- a/src/video_core/texture_cache/decode_bc4.h
+++ b/src/video_core/texture_cache/decode_bc.h
@@ -6,10 +6,14 @@
6#include <span> 6#include <span>
7 7
8#include "common/common_types.h" 8#include "common/common_types.h"
9#include "video_core/surface.h"
9#include "video_core/texture_cache/types.h" 10#include "video_core/texture_cache/types.h"
10 11
11namespace VideoCommon { 12namespace VideoCommon {
12 13
13void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output); 14[[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format);
15
16void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent,
17 VideoCore::Surface::PixelFormat pixel_format);
14 18
15} // namespace VideoCommon 19} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp
deleted file mode 100644
index ef98afdca..000000000
--- a/src/video_core/texture_cache/decode_bc4.cpp
+++ /dev/null
@@ -1,96 +0,0 @@
1// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
2// SPDX-License-Identifier: GPL-2.0-or-later
3
4#include <algorithm>
5#include <array>
6#include <span>
7
8#include "common/assert.h"
9#include "common/common_types.h"
10#include "video_core/texture_cache/decode_bc4.h"
11#include "video_core/texture_cache/types.h"
12
13namespace VideoCommon {
14
15// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
16[[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) {
17 const u32 code_offset = 16 + 3 * (4 * y + x);
18 const u32 code = (bits >> code_offset) & 7;
19 const u32 red0 = (bits >> 0) & 0xff;
20 const u32 red1 = (bits >> 8) & 0xff;
21 if (red0 > red1) {
22 switch (code) {
23 case 0:
24 return red0;
25 case 1:
26 return red1;
27 case 2:
28 return (6 * red0 + 1 * red1) / 7;
29 case 3:
30 return (5 * red0 + 2 * red1) / 7;
31 case 4:
32 return (4 * red0 + 3 * red1) / 7;
33 case 5:
34 return (3 * red0 + 4 * red1) / 7;
35 case 6:
36 return (2 * red0 + 5 * red1) / 7;
37 case 7:
38 return (1 * red0 + 6 * red1) / 7;
39 }
40 } else {
41 switch (code) {
42 case 0:
43 return red0;
44 case 1:
45 return red1;
46 case 2:
47 return (4 * red0 + 1 * red1) / 5;
48 case 3:
49 return (3 * red0 + 2 * red1) / 5;
50 case 4:
51 return (2 * red0 + 3 * red1) / 5;
52 case 5:
53 return (1 * red0 + 4 * red1) / 5;
54 case 6:
55 return 0;
56 case 7:
57 return 0xff;
58 }
59 }
60 return 0;
61}
62
63void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) {
64 UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width);
65 UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height);
66 static constexpr u32 BLOCK_SIZE = 4;
67 size_t input_offset = 0;
68 for (u32 slice = 0; slice < extent.depth; ++slice) {
69 for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) {
70 for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) {
71 u64 bits;
72 std::memcpy(&bits, &input[input_offset], sizeof(bits));
73 input_offset += sizeof(bits);
74
75 for (u32 y = 0; y < BLOCK_SIZE; ++y) {
76 for (u32 x = 0; x < BLOCK_SIZE; ++x) {
77 const u32 linear_z = slice;
78 const u32 linear_y = block_y * BLOCK_SIZE + y;
79 const u32 linear_x = block_x * BLOCK_SIZE + x;
80 const u32 offset_z = linear_z * extent.width * extent.height;
81 const u32 offset_y = linear_y * extent.width;
82 const u32 offset_x = linear_x;
83 const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL;
84 const u32 color = DecompressBlock(bits, x, y);
85 output[output_offset + 0] = static_cast<u8>(color);
86 output[output_offset + 1] = 0;
87 output[output_offset + 2] = 0;
88 output[output_offset + 3] = 0xff;
89 }
90 }
91 }
92 }
93 }
94}
95
96} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 1b8a17ee8..55d49d017 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -6,6 +6,7 @@
6#include <array> 6#include <array>
7#include <optional> 7#include <optional>
8#include <vector> 8#include <vector>
9#include <boost/container/small_vector.hpp>
9 10
10#include "common/common_funcs.h" 11#include "common/common_funcs.h"
11#include "common/common_types.h" 12#include "common/common_types.h"
@@ -108,8 +109,8 @@ struct ImageBase {
108 std::vector<ImageViewInfo> image_view_infos; 109 std::vector<ImageViewInfo> image_view_infos;
109 std::vector<ImageViewId> image_view_ids; 110 std::vector<ImageViewId> image_view_ids;
110 111
111 std::vector<u32> slice_offsets; 112 boost::container::small_vector<u32, 16> slice_offsets;
112 std::vector<SubresourceBase> slice_subresources; 113 boost::container::small_vector<SubresourceBase, 16> slice_subresources;
113 114
114 std::vector<AliasedImage> aliased_images; 115 std::vector<AliasedImage> aliased_images;
115 std::vector<ImageId> overlapping_images; 116 std::vector<ImageId> overlapping_images;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 4027d860b..8190f3ba1 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -186,6 +186,10 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
186 186
187template <class P> 187template <class P>
188void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) { 188void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
189 if (!Settings::values.barrier_feedback_loops.GetValue()) {
190 return;
191 }
192
189 const bool requires_barrier = [&] { 193 const bool requires_barrier = [&] {
190 for (const auto& view : views) { 194 for (const auto& view : views) {
191 if (!view.id) { 195 if (!view.id) {
@@ -300,7 +304,7 @@ void TextureCache<P>::SynchronizeComputeDescriptors() {
300} 304}
301 305
302template <class P> 306template <class P>
303bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { 307bool TextureCache<P>::RescaleRenderTargets() {
304 auto& flags = maxwell3d->dirty.flags; 308 auto& flags = maxwell3d->dirty.flags;
305 u32 scale_rating = 0; 309 u32 scale_rating = 0;
306 bool rescaled = false; 310 bool rescaled = false;
@@ -338,13 +342,13 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
338 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; 342 ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
339 if (flags[Dirty::ColorBuffer0 + index] || force) { 343 if (flags[Dirty::ColorBuffer0 + index] || force) {
340 flags[Dirty::ColorBuffer0 + index] = false; 344 flags[Dirty::ColorBuffer0 + index] = false;
341 BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); 345 BindRenderTarget(&color_buffer_id, FindColorBuffer(index));
342 } 346 }
343 check_rescale(color_buffer_id, tmp_color_images[index]); 347 check_rescale(color_buffer_id, tmp_color_images[index]);
344 } 348 }
345 if (flags[Dirty::ZetaBuffer] || force) { 349 if (flags[Dirty::ZetaBuffer] || force) {
346 flags[Dirty::ZetaBuffer] = false; 350 flags[Dirty::ZetaBuffer] = false;
347 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); 351 BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer());
348 } 352 }
349 check_rescale(render_targets.depth_buffer_id, tmp_depth_image); 353 check_rescale(render_targets.depth_buffer_id, tmp_depth_image);
350 354
@@ -409,7 +413,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
409 return; 413 return;
410 } 414 }
411 415
412 const bool rescaled = RescaleRenderTargets(is_clear); 416 const bool rescaled = RescaleRenderTargets();
413 if (is_rescaling != rescaled) { 417 if (is_rescaling != rescaled) {
414 flags[Dirty::RescaleViewports] = true; 418 flags[Dirty::RescaleViewports] = true;
415 flags[Dirty::RescaleScissors] = true; 419 flags[Dirty::RescaleScissors] = true;
@@ -522,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
522 526
523template <class P> 527template <class P>
524void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { 528void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
525 std::vector<ImageId> images; 529 boost::container::small_vector<ImageId, 16> images;
526 ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { 530 ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
527 if (!image.IsSafeDownload()) { 531 if (!image.IsSafeDownload()) {
528 return; 532 return;
@@ -575,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V
575 579
576template <class P> 580template <class P>
577void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { 581void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
578 std::vector<ImageId> deleted_images; 582 boost::container::small_vector<ImageId, 16> deleted_images;
579 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); 583 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
580 for (const ImageId id : deleted_images) { 584 for (const ImageId id : deleted_images) {
581 Image& image = slot_images[id]; 585 Image& image = slot_images[id];
@@ -589,19 +593,11 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
589 593
590template <class P> 594template <class P>
591void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { 595void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
592 std::vector<ImageId> deleted_images; 596 boost::container::small_vector<ImageId, 16> deleted_images;
593 ForEachImageInRegionGPU(as_id, gpu_addr, size, 597 ForEachImageInRegionGPU(as_id, gpu_addr, size,
594 [&](ImageId id, Image&) { deleted_images.push_back(id); }); 598 [&](ImageId id, Image&) { deleted_images.push_back(id); });
595 for (const ImageId id : deleted_images) { 599 for (const ImageId id : deleted_images) {
596 Image& image = slot_images[id]; 600 Image& image = slot_images[id];
597 if (True(image.flags & ImageFlagBits::CpuModified)) {
598 return;
599 }
600 image.flags |= ImageFlagBits::CpuModified;
601 if (True(image.flags & ImageFlagBits::Tracked)) {
602 UntrackImage(image, id);
603 }
604 /*
605 if (True(image.flags & ImageFlagBits::Remapped)) { 601 if (True(image.flags & ImageFlagBits::Remapped)) {
606 continue; 602 continue;
607 } 603 }
@@ -609,7 +605,6 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
609 if (True(image.flags & ImageFlagBits::Tracked)) { 605 if (True(image.flags & ImageFlagBits::Tracked)) {
610 UntrackImage(image, id); 606 UntrackImage(image, id);
611 } 607 }
612 */
613 } 608 }
614} 609}
615 610
@@ -875,6 +870,10 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo
875 return NULL_IMAGE_ID; 870 return NULL_IMAGE_ID;
876 } 871 }
877 auto& image = slot_images[image_id]; 872 auto& image = slot_images[image_id];
873 if (image.info.type == ImageType::e3D) {
874 // Don't accelerate 3D images.
875 return NULL_IMAGE_ID;
876 }
878 if (!is_upload && !image.info.dma_downloaded) { 877 if (!is_upload && !image.info.dma_downloaded) {
879 // Force a full sync. 878 // Force a full sync.
880 image.info.dma_downloaded = true; 879 image.info.dma_downloaded = true;
@@ -1097,7 +1096,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1097 const bool native_bgr = runtime.HasNativeBgr(); 1096 const bool native_bgr = runtime.HasNativeBgr();
1098 const bool flexible_formats = True(options & RelaxedOptions::Format); 1097 const bool flexible_formats = True(options & RelaxedOptions::Format);
1099 ImageId image_id{}; 1098 ImageId image_id{};
1100 boost::container::small_vector<ImageId, 1> image_ids; 1099 boost::container::small_vector<ImageId, 8> image_ids;
1101 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { 1100 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1102 if (True(existing_image.flags & ImageFlagBits::Remapped)) { 1101 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
1103 return false; 1102 return false;
@@ -1618,7 +1617,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr)
1618 } 1617 }
1619 } 1618 }
1620 ImageId image_id{}; 1619 ImageId image_id{};
1621 boost::container::small_vector<ImageId, 1> image_ids; 1620 boost::container::small_vector<ImageId, 8> image_ids;
1622 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { 1621 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1623 if (True(existing_image.flags & ImageFlagBits::Remapped)) { 1622 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
1624 return false; 1623 return false;
@@ -1678,7 +1677,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
1678} 1677}
1679 1678
1680template <class P> 1679template <class P>
1681ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { 1680ImageViewId TextureCache<P>::FindColorBuffer(size_t index) {
1682 const auto& regs = maxwell3d->regs; 1681 const auto& regs = maxwell3d->regs;
1683 if (index >= regs.rt_control.count) { 1682 if (index >= regs.rt_control.count) {
1684 return ImageViewId{}; 1683 return ImageViewId{};
@@ -1692,11 +1691,11 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
1692 return ImageViewId{}; 1691 return ImageViewId{};
1693 } 1692 }
1694 const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode); 1693 const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode);
1695 return FindRenderTargetView(info, gpu_addr, is_clear); 1694 return FindRenderTargetView(info, gpu_addr);
1696} 1695}
1697 1696
1698template <class P> 1697template <class P>
1699ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { 1698ImageViewId TextureCache<P>::FindDepthBuffer() {
1700 const auto& regs = maxwell3d->regs; 1699 const auto& regs = maxwell3d->regs;
1701 if (!regs.zeta_enable) { 1700 if (!regs.zeta_enable) {
1702 return ImageViewId{}; 1701 return ImageViewId{};
@@ -1706,18 +1705,16 @@ ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
1706 return ImageViewId{}; 1705 return ImageViewId{};
1707 } 1706 }
1708 const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode); 1707 const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode);
1709 return FindRenderTargetView(info, gpu_addr, is_clear); 1708 return FindRenderTargetView(info, gpu_addr);
1710} 1709}
1711 1710
1712template <class P> 1711template <class P>
1713ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, 1712ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr) {
1714 bool is_clear) {
1715 const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{};
1716 ImageId image_id{}; 1713 ImageId image_id{};
1717 bool delete_state = has_deleted_images; 1714 bool delete_state = has_deleted_images;
1718 do { 1715 do {
1719 has_deleted_images = false; 1716 has_deleted_images = false;
1720 image_id = FindOrInsertImage(info, gpu_addr, options); 1717 image_id = FindOrInsertImage(info, gpu_addr);
1721 delete_state |= has_deleted_images; 1718 delete_state |= has_deleted_images;
1722 } while (has_deleted_images); 1719 } while (has_deleted_images);
1723 has_deleted_images = delete_state; 1720 has_deleted_images = delete_state;
@@ -1940,7 +1937,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1940 image.map_view_id = map_id; 1937 image.map_view_id = map_id;
1941 return; 1938 return;
1942 } 1939 }
1943 std::vector<ImageViewId> sparse_maps{}; 1940 boost::container::small_vector<ImageViewId, 16> sparse_maps;
1944 ForEachSparseSegment( 1941 ForEachSparseSegment(
1945 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { 1942 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1946 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); 1943 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
@@ -2215,7 +2212,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
2215 2212
2216template <class P> 2213template <class P>
2217void TextureCache<P>::SynchronizeAliases(ImageId image_id) { 2214void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
2218 boost::container::small_vector<const AliasedImage*, 1> aliased_images; 2215 boost::container::small_vector<const AliasedImage*, 8> aliased_images;
2219 Image& image = slot_images[image_id]; 2216 Image& image = slot_images[image_id];
2220 bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); 2217 bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
2221 bool any_modified = True(image.flags & ImageFlagBits::GpuModified); 2218 bool any_modified = True(image.flags & ImageFlagBits::GpuModified);
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index d96ddea9d..e9ec91265 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -56,7 +56,7 @@ struct ImageViewInOut {
56struct AsyncDecodeContext { 56struct AsyncDecodeContext {
57 ImageId image_id; 57 ImageId image_id;
58 Common::ScratchBuffer<u8> decoded_data; 58 Common::ScratchBuffer<u8> decoded_data;
59 std::vector<BufferImageCopy> copies; 59 boost::container::small_vector<BufferImageCopy, 16> copies;
60 std::mutex mutex; 60 std::mutex mutex;
61 std::atomic_bool complete; 61 std::atomic_bool complete;
62}; 62};
@@ -178,9 +178,8 @@ public:
178 void SynchronizeComputeDescriptors(); 178 void SynchronizeComputeDescriptors();
179 179
180 /// Updates the Render Targets if they can be rescaled 180 /// Updates the Render Targets if they can be rescaled
181 /// @param is_clear True when the render targets are being used for clears
182 /// @retval True if the Render Targets have been rescaled. 181 /// @retval True if the Render Targets have been rescaled.
183 bool RescaleRenderTargets(bool is_clear); 182 bool RescaleRenderTargets();
184 183
185 /// Update bound render targets and upload memory if necessary 184 /// Update bound render targets and upload memory if necessary
186 /// @param is_clear True when the render targets are being used for clears 185 /// @param is_clear True when the render targets are being used for clears
@@ -336,14 +335,13 @@ private:
336 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); 335 [[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
337 336
338 /// Find or create an image view for the given color buffer index 337 /// Find or create an image view for the given color buffer index
339 [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); 338 [[nodiscard]] ImageViewId FindColorBuffer(size_t index);
340 339
341 /// Find or create an image view for the depth buffer 340 /// Find or create an image view for the depth buffer
342 [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); 341 [[nodiscard]] ImageViewId FindDepthBuffer();
343 342
344 /// Find or create a view for a render target with the given image parameters 343 /// Find or create a view for a render target with the given image parameters
345 [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, 344 [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr);
346 bool is_clear);
347 345
348 /// Iterates over all the images in a region calling func 346 /// Iterates over all the images in a region calling func
349 template <typename Func> 347 template <typename Func>
@@ -431,7 +429,7 @@ private:
431 429
432 std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; 430 std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
433 std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; 431 std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
434 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; 432 std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
435 433
436 VAddr virtual_invalid_space{}; 434 VAddr virtual_invalid_space{};
437 435
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 95a5b47d8..9a618a57a 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -24,7 +24,7 @@
24#include "video_core/engines/maxwell_3d.h" 24#include "video_core/engines/maxwell_3d.h"
25#include "video_core/memory_manager.h" 25#include "video_core/memory_manager.h"
26#include "video_core/surface.h" 26#include "video_core/surface.h"
27#include "video_core/texture_cache/decode_bc4.h" 27#include "video_core/texture_cache/decode_bc.h"
28#include "video_core/texture_cache/format_lookup_table.h" 28#include "video_core/texture_cache/format_lookup_table.h"
29#include "video_core/texture_cache/formatter.h" 29#include "video_core/texture_cache/formatter.h"
30#include "video_core/texture_cache/samples_helper.h" 30#include "video_core/texture_cache/samples_helper.h"
@@ -61,8 +61,6 @@ using VideoCore::Surface::PixelFormatFromDepthFormat;
61using VideoCore::Surface::PixelFormatFromRenderTargetFormat; 61using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
62using VideoCore::Surface::SurfaceType; 62using VideoCore::Surface::SurfaceType;
63 63
64constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
65
66struct LevelInfo { 64struct LevelInfo {
67 Extent3D size; 65 Extent3D size;
68 Extent3D block; 66 Extent3D block;
@@ -329,13 +327,13 @@ template <u32 GOB_EXTENT>
329 327
330[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( 328[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
331 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { 329 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
332 const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); 330 const auto slice_offsets = CalculateSliceOffsets(new_info);
333 const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); 331 const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
334 const auto it = std::ranges::find(slice_offsets, diff); 332 const auto it = std::ranges::find(slice_offsets, diff);
335 if (it == slice_offsets.end()) { 333 if (it == slice_offsets.end()) {
336 return std::nullopt; 334 return std::nullopt;
337 } 335 }
338 const std::vector subresources = CalculateSliceSubresources(new_info); 336 const auto subresources = CalculateSliceSubresources(new_info);
339 const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; 337 const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
340 const ImageInfo& info = overlap.info; 338 const ImageInfo& info = overlap.info;
341 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { 339 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
@@ -612,7 +610,8 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept {
612 } 610 }
613 return output_size; 611 return output_size;
614 } 612 }
615 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; 613 return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers *
614 ConvertedBytesPerBlock(info.format);
616} 615}
617 616
618u32 CalculateLayerStride(const ImageInfo& info) noexcept { 617u32 CalculateLayerStride(const ImageInfo& info) noexcept {
@@ -655,9 +654,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
655 return sizes; 654 return sizes;
656} 655}
657 656
658std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { 657boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) {
659 ASSERT(info.type == ImageType::e3D); 658 ASSERT(info.type == ImageType::e3D);
660 std::vector<u32> offsets; 659 boost::container::small_vector<u32, 16> offsets;
661 offsets.reserve(NumSlices(info)); 660 offsets.reserve(NumSlices(info));
662 661
663 const LevelInfo level_info = MakeLevelInfo(info); 662 const LevelInfo level_info = MakeLevelInfo(info);
@@ -679,9 +678,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
679 return offsets; 678 return offsets;
680} 679}
681 680
682std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { 681boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
682 const ImageInfo& info) {
683 ASSERT(info.type == ImageType::e3D); 683 ASSERT(info.type == ImageType::e3D);
684 std::vector<SubresourceBase> subresources; 684 boost::container::small_vector<SubresourceBase, 16> subresources;
685 subresources.reserve(NumSlices(info)); 685 subresources.reserve(NumSlices(info));
686 for (s32 level = 0; level < info.resources.levels; ++level) { 686 for (s32 level = 0; level < info.resources.levels; ++level) {
687 const s32 depth = AdjustMipSize(info.size.depth, level); 687 const s32 depth = AdjustMipSize(info.size.depth, level);
@@ -723,8 +723,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
723 } 723 }
724} 724}
725 725
726std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, 726boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst,
727 SubresourceBase base, u32 up_scale, u32 down_shift) { 727 const ImageInfo& src,
728 SubresourceBase base,
729 u32 up_scale, u32 down_shift) {
728 ASSERT(dst.resources.levels >= src.resources.levels); 730 ASSERT(dst.resources.levels >= src.resources.levels);
729 731
730 const bool is_dst_3d = dst.type == ImageType::e3D; 732 const bool is_dst_3d = dst.type == ImageType::e3D;
@@ -733,7 +735,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
733 ASSERT(src.resources.levels == 1); 735 ASSERT(src.resources.levels == 1);
734 } 736 }
735 const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; 737 const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D};
736 std::vector<ImageCopy> copies; 738 boost::container::small_vector<ImageCopy, 16> copies;
737 copies.reserve(src.resources.levels); 739 copies.reserve(src.resources.levels);
738 for (s32 level = 0; level < src.resources.levels; ++level) { 740 for (s32 level = 0; level < src.resources.levels; ++level) {
739 ImageCopy& copy = copies.emplace_back(); 741 ImageCopy& copy = copies.emplace_back();
@@ -770,9 +772,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
770 return copies; 772 return copies;
771} 773}
772 774
773std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, 775boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src,
774 u32 down_shift) { 776 u32 up_scale,
775 std::vector<ImageCopy> copies; 777 u32 down_shift) {
778 boost::container::small_vector<ImageCopy, 16> copies;
776 copies.reserve(src.resources.levels); 779 copies.reserve(src.resources.levels);
777 const bool is_3d = src.type == ImageType::e3D; 780 const bool is_3d = src.type == ImageType::e3D;
778 for (s32 level = 0; level < src.resources.levels; ++level) { 781 for (s32 level = 0; level < src.resources.levels; ++level) {
@@ -824,9 +827,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
824 return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); 827 return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
825} 828}
826 829
827std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 830boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
828 const ImageInfo& info, std::span<const u8> input, 831 GPUVAddr gpu_addr,
829 std::span<u8> output) { 832 const ImageInfo& info,
833 std::span<const u8> input,
834 std::span<u8> output) {
830 const size_t guest_size_bytes = input.size_bytes(); 835 const size_t guest_size_bytes = input.size_bytes();
831 const u32 bpp_log2 = BytesPerBlockLog2(info.format); 836 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
832 const Extent3D size = info.size; 837 const Extent3D size = info.size;
@@ -861,7 +866,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
861 info.tile_width_spacing); 866 info.tile_width_spacing);
862 size_t guest_offset = 0; 867 size_t guest_offset = 0;
863 u32 host_offset = 0; 868 u32 host_offset = 0;
864 std::vector<BufferImageCopy> copies(num_levels); 869 boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
865 870
866 for (s32 level = 0; level < num_levels; ++level) { 871 for (s32 level = 0; level < num_levels; ++level) {
867 const Extent3D level_size = AdjustMipSize(size, level); 872 const Extent3D level_size = AdjustMipSize(size, level);
@@ -939,7 +944,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
939 tile_size.height, output.subspan(output_offset)); 944 tile_size.height, output.subspan(output_offset));
940 945
941 output_offset += copy.image_extent.width * copy.image_extent.height * 946 output_offset += copy.image_extent.width * copy.image_extent.height *
942 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; 947 copy.image_subresource.num_layers *
948 BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
943 } else if (astc) { 949 } else if (astc) {
944 // BC1 uses 0.5 bytes per texel 950 // BC1 uses 0.5 bytes per texel
945 // BC3 uses 1 byte per texel 951 // BC3 uses 1 byte per texel
@@ -950,7 +956,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
950 956
951 const u32 plane_dim = copy.image_extent.width * copy.image_extent.height; 957 const u32 plane_dim = copy.image_extent.width * copy.image_extent.height;
952 const u32 level_size = plane_dim * copy.image_extent.depth * 958 const u32 level_size = plane_dim * copy.image_extent.depth *
953 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; 959 copy.image_subresource.num_layers *
960 BytesPerBlock(PixelFormat::A8B8G8R8_UNORM);
954 decode_scratch.resize_destructive(level_size); 961 decode_scratch.resize_destructive(level_size);
955 962
956 Tegra::Texture::ASTC::Decompress( 963 Tegra::Texture::ASTC::Decompress(
@@ -970,15 +977,20 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
970 bpp_div; 977 bpp_div;
971 output_offset += static_cast<u32>(copy.buffer_size); 978 output_offset += static_cast<u32>(copy.buffer_size);
972 } else { 979 } else {
973 DecompressBC4(input_offset, copy.image_extent, output.subspan(output_offset)); 980 const Extent3D image_extent{
974 981 .width = copy.image_extent.width,
982 .height = copy.image_extent.height * copy.image_subresource.num_layers,
983 .depth = copy.image_extent.depth,
984 };
985 DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format);
975 output_offset += copy.image_extent.width * copy.image_extent.height * 986 output_offset += copy.image_extent.width * copy.image_extent.height *
976 copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; 987 copy.image_subresource.num_layers *
988 ConvertedBytesPerBlock(info.format);
977 } 989 }
978 } 990 }
979} 991}
980 992
981std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { 993boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
982 const Extent3D size = info.size; 994 const Extent3D size = info.size;
983 const u32 bytes_per_block = BytesPerBlock(info.format); 995 const u32 bytes_per_block = BytesPerBlock(info.format);
984 if (info.type == ImageType::Linear) { 996 if (info.type == ImageType::Linear) {
@@ -1006,7 +1018,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
1006 1018
1007 u32 host_offset = 0; 1019 u32 host_offset = 0;
1008 1020
1009 std::vector<BufferImageCopy> copies(num_levels); 1021 boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
1010 for (s32 level = 0; level < num_levels; ++level) { 1022 for (s32 level = 0; level < num_levels; ++level) {
1011 const Extent3D level_size = AdjustMipSize(size, level); 1023 const Extent3D level_size = AdjustMipSize(size, level);
1012 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); 1024 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
@@ -1042,10 +1054,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
1042 return AdjustMipBlockSize(num_tiles, level_info.block, level); 1054 return AdjustMipBlockSize(num_tiles, level_info.block, level);
1043} 1055}
1044 1056
1045std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { 1057boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) {
1046 const Extent2D tile_size = DefaultBlockSize(info.format); 1058 const Extent2D tile_size = DefaultBlockSize(info.format);
1047 if (info.type == ImageType::Linear) { 1059 if (info.type == ImageType::Linear) {
1048 return std::vector{SwizzleParameters{ 1060 return {SwizzleParameters{
1049 .num_tiles = AdjustTileSize(info.size, tile_size), 1061 .num_tiles = AdjustTileSize(info.size, tile_size),
1050 .block = {}, 1062 .block = {},
1051 .buffer_offset = 0, 1063 .buffer_offset = 0,
@@ -1057,7 +1069,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
1057 const s32 num_levels = info.resources.levels; 1069 const s32 num_levels = info.resources.levels;
1058 1070
1059 u32 guest_offset = 0; 1071 u32 guest_offset = 0;
1060 std::vector<SwizzleParameters> params(num_levels); 1072 boost::container::small_vector<SwizzleParameters, 16> params(num_levels);
1061 for (s32 level = 0; level < num_levels; ++level) { 1073 for (s32 level = 0; level < num_levels; ++level) {
1062 const Extent3D level_size = AdjustMipSize(size, level); 1074 const Extent3D level_size = AdjustMipSize(size, level);
1063 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); 1075 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 84aa6880d..ab45a43c4 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -5,6 +5,7 @@
5 5
6#include <optional> 6#include <optional>
7#include <span> 7#include <span>
8#include <boost/container/small_vector.hpp>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "common/scratch_buffer.h" 11#include "common/scratch_buffer.h"
@@ -40,9 +41,10 @@ struct OverlapResult {
40 41
41[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; 42[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
42 43
43[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); 44[[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info);
44 45
45[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); 46[[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
47 const ImageInfo& info);
46 48
47[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); 49[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
48 50
@@ -51,21 +53,18 @@ struct OverlapResult {
51 53
52[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; 54[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
53 55
54[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, 56[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(
55 const ImageInfo& src, 57 const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1,
56 SubresourceBase base, u32 up_scale = 1, 58 u32 down_shift = 0);
57 u32 down_shift = 0);
58 59
59[[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, 60[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(
60 u32 up_scale = 1, 61 const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0);
61 u32 down_shift = 0);
62 62
63[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); 63[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
64 64
65[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, 65[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(
66 GPUVAddr gpu_addr, const ImageInfo& info, 66 Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
67 std::span<const u8> input, 67 std::span<const u8> input, std::span<u8> output);
68 std::span<u8> output);
69 68
70[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 69[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
71 const ImageBase& image, std::span<u8> output); 70 const ImageBase& image, std::span<u8> output);
@@ -73,13 +72,15 @@ struct OverlapResult {
73void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, 72void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
74 std::span<BufferImageCopy> copies); 73 std::span<BufferImageCopy> copies);
75 74
76[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); 75[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(
76 const ImageInfo& info);
77 77
78[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); 78[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
79 79
80[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); 80[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
81 81
82[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); 82[[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(
83 const ImageInfo& info);
83 84
84void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, 85void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
85 std::span<const BufferImageCopy> copies, std::span<const u8> memory, 86 std::span<const BufferImageCopy> copies, std::span<const u8> memory,
diff --git a/src/video_core/textures/bcn.cpp b/src/video_core/textures/bcn.cpp
index 671212a49..16ddbe320 100644
--- a/src/video_core/textures/bcn.cpp
+++ b/src/video_core/textures/bcn.cpp
@@ -3,7 +3,6 @@
3 3
4#include <stb_dxt.h> 4#include <stb_dxt.h>
5#include <string.h> 5#include <string.h>
6
7#include "common/alignment.h" 6#include "common/alignment.h"
8#include "video_core/textures/bcn.h" 7#include "video_core/textures/bcn.h"
9#include "video_core/textures/workers.h" 8#include "video_core/textures/workers.h"
diff --git a/src/video_core/textures/bcn.h b/src/video_core/textures/bcn.h
index 6464af885..d5d2a16c9 100644
--- a/src/video_core/textures/bcn.h
+++ b/src/video_core/textures/bcn.h
@@ -4,14 +4,13 @@
4#pragma once 4#pragma once
5 5
6#include <span> 6#include <span>
7#include <stdint.h> 7
8#include "common/common_types.h"
8 9
9namespace Tegra::Texture::BCN { 10namespace Tegra::Texture::BCN {
10 11
11void CompressBC1(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, 12void CompressBC1(std::span<const u8> data, u32 width, u32 height, u32 depth, std::span<u8> output);
12 std::span<uint8_t> output);
13 13
14void CompressBC3(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, 14void CompressBC3(std::span<const u8> data, u32 width, u32 height, u32 depth, std::span<u8> output);
15 std::span<uint8_t> output);
16 15
17} // namespace Tegra::Texture::BCN 16} // namespace Tegra::Texture::BCN
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp
index 155599316..1f353d2df 100644
--- a/src/video_core/transform_feedback.cpp
+++ b/src/video_core/transform_feedback.cpp
@@ -13,7 +13,7 @@
13 13
14namespace VideoCommon { 14namespace VideoCommon {
15 15
16std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( 16std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
17 const TransformFeedbackState& state) { 17 const TransformFeedbackState& state) {
18 static constexpr std::array VECTORS{ 18 static constexpr std::array VECTORS{
19 28U, // gl_Position 19 28U, // gl_Position
@@ -62,7 +62,8 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
62 216U, // gl_TexCoord[6] 62 216U, // gl_TexCoord[6]
63 220U, // gl_TexCoord[7] 63 220U, // gl_TexCoord[7]
64 }; 64 };
65 std::vector<Shader::TransformFeedbackVarying> xfb(256); 65 std::array<Shader::TransformFeedbackVarying, 256> xfb{};
66 u32 count{0};
66 for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { 67 for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) {
67 const auto& locations = state.varyings[buffer]; 68 const auto& locations = state.varyings[buffer];
68 const auto& layout = state.layouts[buffer]; 69 const auto& layout = state.layouts[buffer];
@@ -103,11 +104,12 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
103 } 104 }
104 } 105 }
105 xfb[attribute] = varying; 106 xfb[attribute] = varying;
107 count = std::max(count, attribute);
106 highest = std::max(highest, (base_offset + varying.components) * 4); 108 highest = std::max(highest, (base_offset + varying.components) * 4);
107 } 109 }
108 UNIMPLEMENTED_IF(highest != layout.stride); 110 UNIMPLEMENTED_IF(highest != layout.stride);
109 } 111 }
110 return xfb; 112 return {xfb, count + 1};
111} 113}
112 114
113} // namespace VideoCommon 115} // namespace VideoCommon
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h
index d13eb16c3..401b1352a 100644
--- a/src/video_core/transform_feedback.h
+++ b/src/video_core/transform_feedback.h
@@ -24,7 +24,7 @@ struct TransformFeedbackState {
24 varyings; 24 varyings;
25}; 25};
26 26
27std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( 27std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
28 const TransformFeedbackState& state); 28 const TransformFeedbackState& state);
29 29
30} // namespace VideoCommon 30} // namespace VideoCommon
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
index 9de484c29..67e8065a4 100644
--- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp
@@ -7,10 +7,10 @@
7 7
8namespace Vulkan { 8namespace Vulkan {
9namespace { 9namespace {
10VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, 10VkBool32 DebugUtilCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
11 VkDebugUtilsMessageTypeFlagsEXT type, 11 VkDebugUtilsMessageTypeFlagsEXT type,
12 const VkDebugUtilsMessengerCallbackDataEXT* data, 12 const VkDebugUtilsMessengerCallbackDataEXT* data,
13 [[maybe_unused]] void* user_data) { 13 [[maybe_unused]] void* user_data) {
14 // Skip logging known false-positive validation errors 14 // Skip logging known false-positive validation errors
15 switch (static_cast<u32>(data->messageIdNumber)) { 15 switch (static_cast<u32>(data->messageIdNumber)) {
16#ifdef ANDROID 16#ifdef ANDROID
@@ -62,9 +62,26 @@ VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
62 } 62 }
63 return VK_FALSE; 63 return VK_FALSE;
64} 64}
65
66VkBool32 DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType,
67 uint64_t object, size_t location, int32_t messageCode,
68 const char* pLayerPrefix, const char* pMessage, void* pUserData) {
69 const VkDebugReportFlagBitsEXT severity = static_cast<VkDebugReportFlagBitsEXT>(flags);
70 const std::string_view message{pMessage};
71 if (severity & VK_DEBUG_REPORT_ERROR_BIT_EXT) {
72 LOG_CRITICAL(Render_Vulkan, "{}", message);
73 } else if (severity & VK_DEBUG_REPORT_WARNING_BIT_EXT) {
74 LOG_WARNING(Render_Vulkan, "{}", message);
75 } else if (severity & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) {
76 LOG_INFO(Render_Vulkan, "{}", message);
77 } else if (severity & VK_DEBUG_REPORT_DEBUG_BIT_EXT) {
78 LOG_DEBUG(Render_Vulkan, "{}", message);
79 }
80 return VK_FALSE;
81}
65} // Anonymous namespace 82} // Anonymous namespace
66 83
67vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance) { 84vk::DebugUtilsMessenger CreateDebugUtilsCallback(const vk::Instance& instance) {
68 return instance.CreateDebugUtilsMessenger(VkDebugUtilsMessengerCreateInfoEXT{ 85 return instance.CreateDebugUtilsMessenger(VkDebugUtilsMessengerCreateInfoEXT{
69 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, 86 .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
70 .pNext = nullptr, 87 .pNext = nullptr,
@@ -76,7 +93,18 @@ vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance) {
76 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | 93 .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
77 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | 94 VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
78 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, 95 VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
79 .pfnUserCallback = Callback, 96 .pfnUserCallback = DebugUtilCallback,
97 .pUserData = nullptr,
98 });
99}
100
101vk::DebugReportCallback CreateDebugReportCallback(const vk::Instance& instance) {
102 return instance.CreateDebugReportCallback({
103 .sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
104 .pNext = nullptr,
105 .flags = VK_DEBUG_REPORT_DEBUG_BIT_EXT | VK_DEBUG_REPORT_INFORMATION_BIT_EXT |
106 VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT,
107 .pfnCallback = DebugReportCallback,
80 .pUserData = nullptr, 108 .pUserData = nullptr,
81 }); 109 });
82} 110}
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h
index 71b1f69ec..a8af7b406 100644
--- a/src/video_core/vulkan_common/vulkan_debug_callback.h
+++ b/src/video_core/vulkan_common/vulkan_debug_callback.h
@@ -7,6 +7,8 @@
7 7
8namespace Vulkan { 8namespace Vulkan {
9 9
10vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance); 10vk::DebugUtilsMessenger CreateDebugUtilsCallback(const vk::Instance& instance);
11
12vk::DebugReportCallback CreateDebugReportCallback(const vk::Instance& instance);
11 13
12} // namespace Vulkan 14} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index dcedf4425..421e71e5a 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -22,6 +22,8 @@
22#include <adrenotools/bcenabler.h> 22#include <adrenotools/bcenabler.h>
23#endif 23#endif
24 24
25#include <vk_mem_alloc.h>
26
25namespace Vulkan { 27namespace Vulkan {
26using namespace Common::Literals; 28using namespace Common::Literals;
27namespace { 29namespace {
@@ -316,6 +318,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
316std::vector<const char*> ExtensionListForVulkan( 318std::vector<const char*> ExtensionListForVulkan(
317 const std::set<std::string, std::less<>>& extensions) { 319 const std::set<std::string, std::less<>>& extensions) {
318 std::vector<const char*> output; 320 std::vector<const char*> output;
321 output.reserve(extensions.size());
319 for (const auto& extension : extensions) { 322 for (const auto& extension : extensions) {
320 output.push_back(extension.c_str()); 323 output.push_back(extension.c_str());
321 } 324 }
@@ -346,7 +349,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
346 const bool is_s8gen2 = device_id == 0x43050a01; 349 const bool is_s8gen2 = device_id == 0x43050a01;
347 const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY; 350 const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY;
348 351
349 if ((is_mvk || is_qualcomm || is_turnip) && !is_suitable) { 352 if ((is_mvk || is_qualcomm || is_turnip || is_arm) && !is_suitable) {
350 LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway"); 353 LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway");
351 } else if (!is_suitable) { 354 } else if (!is_suitable) {
352 throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); 355 throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER);
@@ -525,6 +528,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
525 } 528 }
526 529
527 sets_per_pool = 64; 530 sets_per_pool = 64;
531 if (extensions.extended_dynamic_state3 && is_amd_driver &&
532 properties.properties.driverVersion >= VK_MAKE_API_VERSION(0, 2, 0, 270)) {
533 LOG_WARNING(Render_Vulkan,
534 "AMD drivers after 23.5.2 have broken extendedDynamicState3ColorBlendEquation");
535 features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false;
536 features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false;
537 dynamic_state3_blending = false;
538 }
528 if (is_amd_driver) { 539 if (is_amd_driver) {
529 // AMD drivers need a higher amount of Sets per Pool in certain circumstances like in XC2. 540 // AMD drivers need a higher amount of Sets per Pool in certain circumstances like in XC2.
530 sets_per_pool = 96; 541 sets_per_pool = 96;
@@ -562,6 +573,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
562 LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); 573 LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits");
563 cant_blit_msaa = true; 574 cant_blit_msaa = true;
564 } 575 }
576 has_broken_compute =
577 CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) &&
578 !Settings::values.enable_compute_pipelines.GetValue();
565 if (is_intel_anv || (is_qualcomm && !is_s8gen2)) { 579 if (is_intel_anv || (is_qualcomm && !is_s8gen2)) {
566 LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format"); 580 LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format");
567 must_emulate_bgr565 = true; 581 must_emulate_bgr565 = true;
@@ -592,9 +606,31 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
592 606
593 graphics_queue = logical.GetQueue(graphics_family); 607 graphics_queue = logical.GetQueue(graphics_family);
594 present_queue = logical.GetQueue(present_family); 608 present_queue = logical.GetQueue(present_family);
609
610 VmaVulkanFunctions functions{};
611 functions.vkGetInstanceProcAddr = dld.vkGetInstanceProcAddr;
612 functions.vkGetDeviceProcAddr = dld.vkGetDeviceProcAddr;
613
614 const VmaAllocatorCreateInfo allocator_info = {
615 .flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT,
616 .physicalDevice = physical,
617 .device = *logical,
618 .preferredLargeHeapBlockSize = 0,
619 .pAllocationCallbacks = nullptr,
620 .pDeviceMemoryCallbacks = nullptr,
621 .pHeapSizeLimit = nullptr,
622 .pVulkanFunctions = &functions,
623 .instance = instance,
624 .vulkanApiVersion = VK_API_VERSION_1_1,
625 .pTypeExternalMemoryHandleTypes = nullptr,
626 };
627
628 vk::Check(vmaCreateAllocator(&allocator_info, &allocator));
595} 629}
596 630
597Device::~Device() = default; 631Device::~Device() {
632 vmaDestroyAllocator(allocator);
633}
598 634
599VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, 635VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
600 FormatType format_type) const { 636 FormatType format_type) const {
@@ -877,6 +913,10 @@ bool Device::GetSuitability(bool requires_swapchain) {
877 properties.driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; 913 properties.driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES;
878 SetNext(next, properties.driver); 914 SetNext(next, properties.driver);
879 915
916 // Retrieve subgroup properties.
917 properties.subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
918 SetNext(next, properties.subgroup_properties);
919
880 // Retrieve relevant extension properties. 920 // Retrieve relevant extension properties.
881 if (extensions.shader_float_controls) { 921 if (extensions.shader_float_controls) {
882 properties.float_controls.sType = 922 properties.float_controls.sType =
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index 8c7e44fcb..1f17265d5 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -10,9 +10,12 @@
10#include <vector> 10#include <vector>
11 11
12#include "common/common_types.h" 12#include "common/common_types.h"
13#include "common/logging/log.h"
13#include "common/settings.h" 14#include "common/settings.h"
14#include "video_core/vulkan_common/vulkan_wrapper.h" 15#include "video_core/vulkan_common/vulkan_wrapper.h"
15 16
17VK_DEFINE_HANDLE(VmaAllocator)
18
16// Define all features which may be used by the implementation here. 19// Define all features which may be used by the implementation here.
17// Vulkan version in the macro describes the minimum version required for feature availability. 20// Vulkan version in the macro describes the minimum version required for feature availability.
18// If the Vulkan version is lower than the required version, the named extension is required. 21// If the Vulkan version is lower than the required version, the named extension is required.
@@ -198,6 +201,11 @@ public:
198 return dld; 201 return dld;
199 } 202 }
200 203
204 /// Returns the VMA allocator.
205 VmaAllocator GetAllocator() const {
206 return allocator;
207 }
208
201 /// Returns the logical device. 209 /// Returns the logical device.
202 const vk::Device& GetLogical() const { 210 const vk::Device& GetLogical() const {
203 return logical; 211 return logical;
@@ -285,6 +293,11 @@ public:
285 return features.features.textureCompressionASTC_LDR; 293 return features.features.textureCompressionASTC_LDR;
286 } 294 }
287 295
296 /// Returns true if BCn is natively supported.
297 bool IsOptimalBcnSupported() const {
298 return features.features.textureCompressionBC;
299 }
300
288 /// Returns true if descriptor aliasing is natively supported. 301 /// Returns true if descriptor aliasing is natively supported.
289 bool IsDescriptorAliasingSupported() const { 302 bool IsDescriptorAliasingSupported() const {
290 return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY; 303 return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
@@ -315,6 +328,11 @@ public:
315 return properties.subgroup_size_control.requiredSubgroupSizeStages & stage; 328 return properties.subgroup_size_control.requiredSubgroupSizeStages & stage;
316 } 329 }
317 330
331 /// Returns true if the device supports the provided subgroup feature.
332 bool IsSubgroupFeatureSupported(VkSubgroupFeatureFlagBits feature) const {
333 return properties.subgroup_properties.supportedOperations & feature;
334 }
335
318 /// Returns the maximum number of push descriptors. 336 /// Returns the maximum number of push descriptors.
319 u32 MaxPushDescriptors() const { 337 u32 MaxPushDescriptors() const {
320 return properties.push_descriptor.maxPushDescriptors; 338 return properties.push_descriptor.maxPushDescriptors;
@@ -380,6 +398,11 @@ public:
380 return extensions.swapchain_mutable_format; 398 return extensions.swapchain_mutable_format;
381 } 399 }
382 400
401 /// Returns true if VK_KHR_shader_float_controls is enabled.
402 bool IsKhrShaderFloatControlsSupported() const {
403 return extensions.shader_float_controls;
404 }
405
383 /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. 406 /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
384 bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { 407 bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
385 return extensions.workgroup_memory_explicit_layout; 408 return extensions.workgroup_memory_explicit_layout;
@@ -405,6 +428,11 @@ public:
405 return extensions.sampler_filter_minmax; 428 return extensions.sampler_filter_minmax;
406 } 429 }
407 430
431 /// Returns true if the device supports VK_EXT_shader_stencil_export.
432 bool IsExtShaderStencilExportSupported() const {
433 return extensions.shader_stencil_export;
434 }
435
408 /// Returns true if the device supports VK_EXT_depth_range_unrestricted. 436 /// Returns true if the device supports VK_EXT_depth_range_unrestricted.
409 bool IsExtDepthRangeUnrestrictedSupported() const { 437 bool IsExtDepthRangeUnrestrictedSupported() const {
410 return extensions.depth_range_unrestricted; 438 return extensions.depth_range_unrestricted;
@@ -474,9 +502,9 @@ public:
474 return extensions.vertex_input_dynamic_state; 502 return extensions.vertex_input_dynamic_state;
475 } 503 }
476 504
477 /// Returns true if the device supports VK_EXT_shader_stencil_export. 505 /// Returns true if the device supports VK_EXT_shader_demote_to_helper_invocation
478 bool IsExtShaderStencilExportSupported() const { 506 bool IsExtShaderDemoteToHelperInvocationSupported() const {
479 return extensions.shader_stencil_export; 507 return extensions.shader_demote_to_helper_invocation;
480 } 508 }
481 509
482 /// Returns true if the device supports VK_EXT_conservative_rasterization. 510 /// Returns true if the device supports VK_EXT_conservative_rasterization.
@@ -510,12 +538,17 @@ public:
510 if (extensions.spirv_1_4) { 538 if (extensions.spirv_1_4) {
511 return 0x00010400U; 539 return 0x00010400U;
512 } 540 }
513 return 0x00010000U; 541 return 0x00010300U;
514 } 542 }
515 543
516 /// Returns true when a known debugging tool is attached. 544 /// Returns true when a known debugging tool is attached.
517 bool HasDebuggingToolAttached() const { 545 bool HasDebuggingToolAttached() const {
518 return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue(); 546 return has_renderdoc || has_nsight_graphics;
547 }
548
549 /// @returns True if compute pipelines can cause crashing.
550 bool HasBrokenCompute() const {
551 return has_broken_compute;
519 } 552 }
520 553
521 /// Returns true when the device does not properly support cube compatibility. 554 /// Returns true when the device does not properly support cube compatibility.
@@ -575,10 +608,30 @@ public:
575 return properties.properties.limits.maxVertexInputBindings; 608 return properties.properties.limits.maxVertexInputBindings;
576 } 609 }
577 610
611 u32 GetMaxViewports() const {
612 return properties.properties.limits.maxViewports;
613 }
614
578 bool SupportsConditionalBarriers() const { 615 bool SupportsConditionalBarriers() const {
579 return supports_conditional_barriers; 616 return supports_conditional_barriers;
580 } 617 }
581 618
619 [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id,
620 u32 driver_version) {
621 if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) {
622 const u32 major = VK_API_VERSION_MAJOR(driver_version);
623 const u32 minor = VK_API_VERSION_MINOR(driver_version);
624 const u32 patch = VK_API_VERSION_PATCH(driver_version);
625 if (major == 0 && minor == 405 && patch < 286) {
626 LOG_WARNING(
627 Render_Vulkan,
628 "Intel proprietary drivers 0.405.0 until 0.405.286 have broken compute");
629 return true;
630 }
631 }
632 return false;
633 }
634
582private: 635private:
583 /// Checks if the physical device is suitable and configures the object state 636 /// Checks if the physical device is suitable and configures the object state
584 /// with all necessary info about its properties. 637 /// with all necessary info about its properties.
@@ -608,6 +661,7 @@ private:
608 661
609private: 662private:
610 VkInstance instance; ///< Vulkan instance. 663 VkInstance instance; ///< Vulkan instance.
664 VmaAllocator allocator; ///< VMA allocator.
611 vk::DeviceDispatch dld; ///< Device function pointers. 665 vk::DeviceDispatch dld; ///< Device function pointers.
612 vk::PhysicalDevice physical; ///< Physical device. 666 vk::PhysicalDevice physical; ///< Physical device.
613 vk::Device logical; ///< Logical device. 667 vk::Device logical; ///< Logical device.
@@ -650,6 +704,7 @@ private:
650 704
651 struct Properties { 705 struct Properties {
652 VkPhysicalDeviceDriverProperties driver{}; 706 VkPhysicalDeviceDriverProperties driver{};
707 VkPhysicalDeviceSubgroupProperties subgroup_properties{};
653 VkPhysicalDeviceFloatControlsProperties float_controls{}; 708 VkPhysicalDeviceFloatControlsProperties float_controls{};
654 VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{}; 709 VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{};
655 VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{}; 710 VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{};
@@ -672,6 +727,7 @@ private:
672 bool is_integrated{}; ///< Is GPU an iGPU. 727 bool is_integrated{}; ///< Is GPU an iGPU.
673 bool is_virtual{}; ///< Is GPU a virtual GPU. 728 bool is_virtual{}; ///< Is GPU a virtual GPU.
674 bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. 729 bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device.
730 bool has_broken_compute{}; ///< Compute shaders can cause crashes
675 bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit 731 bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit
676 bool has_renderdoc{}; ///< Has RenderDoc attached 732 bool has_renderdoc{}; ///< Has RenderDoc attached
677 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached 733 bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp
index b6d83e446..7624a9b32 100644
--- a/src/video_core/vulkan_common/vulkan_instance.cpp
+++ b/src/video_core/vulkan_common/vulkan_instance.cpp
@@ -31,10 +31,34 @@
31 31
32namespace Vulkan { 32namespace Vulkan {
33namespace { 33namespace {
34
35[[nodiscard]] bool AreExtensionsSupported(const vk::InstanceDispatch& dld,
36 std::span<const char* const> extensions) {
37 const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
38 if (!properties) {
39 LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
40 return false;
41 }
42 for (const char* extension : extensions) {
43 const auto it = std::ranges::find_if(*properties, [extension](const auto& prop) {
44 return std::strcmp(extension, prop.extensionName) == 0;
45 });
46 if (it == properties->end()) {
47 LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
48 return false;
49 }
50 }
51 return true;
52}
53
34[[nodiscard]] std::vector<const char*> RequiredExtensions( 54[[nodiscard]] std::vector<const char*> RequiredExtensions(
35 Core::Frontend::WindowSystemType window_type, bool enable_validation) { 55 const vk::InstanceDispatch& dld, Core::Frontend::WindowSystemType window_type,
56 bool enable_validation) {
36 std::vector<const char*> extensions; 57 std::vector<const char*> extensions;
37 extensions.reserve(6); 58 extensions.reserve(6);
59#ifdef __APPLE__
60 extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
61#endif
38 switch (window_type) { 62 switch (window_type) {
39 case Core::Frontend::WindowSystemType::Headless: 63 case Core::Frontend::WindowSystemType::Headless:
40 break; 64 break;
@@ -66,35 +90,14 @@ namespace {
66 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); 90 extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
67 } 91 }
68 if (enable_validation) { 92 if (enable_validation) {
69 extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); 93 const bool debug_utils =
94 AreExtensionsSupported(dld, std::array{VK_EXT_DEBUG_UTILS_EXTENSION_NAME});
95 extensions.push_back(debug_utils ? VK_EXT_DEBUG_UTILS_EXTENSION_NAME
96 : VK_EXT_DEBUG_REPORT_EXTENSION_NAME);
70 } 97 }
71 extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
72
73#ifdef __APPLE__
74 extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME);
75#endif
76 return extensions; 98 return extensions;
77} 99}
78 100
79[[nodiscard]] bool AreExtensionsSupported(const vk::InstanceDispatch& dld,
80 std::span<const char* const> extensions) {
81 const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld);
82 if (!properties) {
83 LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
84 return false;
85 }
86 for (const char* extension : extensions) {
87 const auto it = std::ranges::find_if(*properties, [extension](const auto& prop) {
88 return std::strcmp(extension, prop.extensionName) == 0;
89 });
90 if (it == properties->end()) {
91 LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
92 return false;
93 }
94 }
95 return true;
96}
97
98[[nodiscard]] std::vector<const char*> Layers(bool enable_validation) { 101[[nodiscard]] std::vector<const char*> Layers(bool enable_validation) {
99 std::vector<const char*> layers; 102 std::vector<const char*> layers;
100 if (enable_validation) { 103 if (enable_validation) {
@@ -138,7 +141,8 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD
138 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); 141 LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers");
139 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); 142 throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED);
140 } 143 }
141 const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_validation); 144 const std::vector<const char*> extensions =
145 RequiredExtensions(dld, window_type, enable_validation);
142 if (!AreExtensionsSupported(dld, extensions)) { 146 if (!AreExtensionsSupported(dld, extensions)) {
143 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); 147 throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT);
144 } 148 }
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
index e28a556f8..a2ef0efa4 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp
@@ -6,8 +6,6 @@
6#include <optional> 6#include <optional>
7#include <vector> 7#include <vector>
8 8
9#include <glad/glad.h>
10
11#include "common/alignment.h" 9#include "common/alignment.h"
12#include "common/assert.h" 10#include "common/assert.h"
13#include "common/common_types.h" 11#include "common/common_types.h"
@@ -17,6 +15,8 @@
17#include "video_core/vulkan_common/vulkan_memory_allocator.h" 15#include "video_core/vulkan_common/vulkan_memory_allocator.h"
18#include "video_core/vulkan_common/vulkan_wrapper.h" 16#include "video_core/vulkan_common/vulkan_wrapper.h"
19 17
18#include <vk_mem_alloc.h>
19
20namespace Vulkan { 20namespace Vulkan {
21namespace { 21namespace {
22struct Range { 22struct Range {
@@ -49,22 +49,45 @@ struct Range {
49 case MemoryUsage::Download: 49 case MemoryUsage::Download:
50 return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | 50 return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
51 VK_MEMORY_PROPERTY_HOST_CACHED_BIT; 51 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
52 case MemoryUsage::Stream:
53 return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
54 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
52 } 55 }
53 ASSERT_MSG(false, "Invalid memory usage={}", usage); 56 ASSERT_MSG(false, "Invalid memory usage={}", usage);
54 return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; 57 return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
55} 58}
56 59
57constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{ 60[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferedVmaFlags(MemoryUsage usage) {
58 .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, 61 return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
59 .pNext = nullptr, 62 : VkMemoryPropertyFlagBits{};
60#ifdef _WIN32 63}
61 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, 64
62#elif __unix__ 65[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) {
63 .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, 66 switch (usage) {
64#else 67 case MemoryUsage::Upload:
65 .handleTypes = 0, 68 case MemoryUsage::Stream:
66#endif 69 return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
67}; 70 case MemoryUsage::Download:
71 return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
72 case MemoryUsage::DeviceLocal:
73 return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
74 VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT;
75 }
76 return {};
77}
78
79[[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) {
80 switch (usage) {
81 case MemoryUsage::DeviceLocal:
82 case MemoryUsage::Stream:
83 return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
84 case MemoryUsage::Upload:
85 case MemoryUsage::Download:
86 return VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
87 }
88 return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
89}
90
68} // Anonymous namespace 91} // Anonymous namespace
69 92
70class MemoryAllocation { 93class MemoryAllocation {
@@ -74,14 +97,6 @@ public:
74 : allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, 97 : allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
75 property_flags{properties}, shifted_memory_type{1U << type} {} 98 property_flags{properties}, shifted_memory_type{1U << type} {}
76 99
77#if defined(_WIN32) || defined(__unix__)
78 ~MemoryAllocation() {
79 if (owning_opengl_handle != 0) {
80 glDeleteMemoryObjectsEXT(1, &owning_opengl_handle);
81 }
82 }
83#endif
84
85 MemoryAllocation& operator=(const MemoryAllocation&) = delete; 100 MemoryAllocation& operator=(const MemoryAllocation&) = delete;
86 MemoryAllocation(const MemoryAllocation&) = delete; 101 MemoryAllocation(const MemoryAllocation&) = delete;
87 102
@@ -120,31 +135,6 @@ public:
120 return memory_mapped_span; 135 return memory_mapped_span;
121 } 136 }
122 137
123#ifdef _WIN32
124 [[nodiscard]] u32 ExportOpenGLHandle() {
125 if (!owning_opengl_handle) {
126 glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
127 glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size,
128 GL_HANDLE_TYPE_OPAQUE_WIN32_EXT,
129 memory.GetMemoryWin32HandleKHR());
130 }
131 return owning_opengl_handle;
132 }
133#elif __unix__
134 [[nodiscard]] u32 ExportOpenGLHandle() {
135 if (!owning_opengl_handle) {
136 glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
137 glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT,
138 memory.GetMemoryFdKHR());
139 }
140 return owning_opengl_handle;
141 }
142#else
143 [[nodiscard]] u32 ExportOpenGLHandle() {
144 return 0;
145 }
146#endif
147
148 /// Returns whether this allocation is compatible with the arguments. 138 /// Returns whether this allocation is compatible with the arguments.
149 [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { 139 [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
150 return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0; 140 return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0;
@@ -182,9 +172,6 @@ private:
182 const u32 shifted_memory_type; ///< Shifted Vulkan memory type. 172 const u32 shifted_memory_type; ///< Shifted Vulkan memory type.
183 std::vector<Range> commits; ///< All commit ranges done from this allocation. 173 std::vector<Range> commits; ///< All commit ranges done from this allocation.
184 std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before. 174 std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
185#if defined(_WIN32) || defined(__unix__)
186 u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle.
187#endif
188}; 175};
189 176
190MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, 177MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
@@ -216,24 +203,70 @@ std::span<u8> MemoryCommit::Map() {
216 return span; 203 return span;
217} 204}
218 205
219u32 MemoryCommit::ExportOpenGLHandle() const {
220 return allocation->ExportOpenGLHandle();
221}
222
223void MemoryCommit::Release() { 206void MemoryCommit::Release() {
224 if (allocation) { 207 if (allocation) {
225 allocation->Free(begin); 208 allocation->Free(begin);
226 } 209 }
227} 210}
228 211
229MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_) 212MemoryAllocator::MemoryAllocator(const Device& device_)
230 : device{device_}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, 213 : device{device_}, allocator{device.GetAllocator()},
231 export_allocations{export_allocations_}, 214 properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
232 buffer_image_granularity{ 215 buffer_image_granularity{
233 device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} 216 device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {}
234 217
235MemoryAllocator::~MemoryAllocator() = default; 218MemoryAllocator::~MemoryAllocator() = default;
236 219
220vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
221 const VmaAllocationCreateInfo alloc_ci = {
222 .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
223 .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
224 .requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
225 .preferredFlags = 0,
226 .memoryTypeBits = 0,
227 .pool = VK_NULL_HANDLE,
228 .pUserData = nullptr,
229 .priority = 0.f,
230 };
231
232 VkImage handle{};
233 VmaAllocation allocation{};
234
235 vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr));
236
237 return vk::Image(handle, *device.GetLogical(), allocator, allocation,
238 device.GetDispatchLoader());
239}
240
241vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const {
242 const VmaAllocationCreateInfo alloc_ci = {
243 .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT |
244 MemoryUsageVmaFlags(usage),
245 .usage = MemoryUsageVma(usage),
246 .requiredFlags = 0,
247 .preferredFlags = MemoryUsagePreferedVmaFlags(usage),
248 .memoryTypeBits = 0,
249 .pool = VK_NULL_HANDLE,
250 .pUserData = nullptr,
251 .priority = 0.f,
252 };
253
254 VkBuffer handle{};
255 VmaAllocationInfo alloc_info{};
256 VmaAllocation allocation{};
257 VkMemoryPropertyFlags property_flags{};
258
259 vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
260 vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
261
262 u8* data = reinterpret_cast<u8*>(alloc_info.pMappedData);
263 const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{};
264 const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
265
266 return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent,
267 device.GetDispatchLoader());
268}
269
237MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) { 270MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
238 // Find the fastest memory flags we can afford with the current requirements 271 // Find the fastest memory flags we can afford with the current requirements
239 const u32 type_mask = requirements.memoryTypeBits; 272 const u32 type_mask = requirements.memoryTypeBits;
@@ -253,25 +286,11 @@ MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, M
253 return TryCommit(requirements, flags).value(); 286 return TryCommit(requirements, flags).value();
254} 287}
255 288
256MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage) {
257 auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), usage);
258 buffer.BindMemory(commit.Memory(), commit.Offset());
259 return commit;
260}
261
262MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) {
263 VkMemoryRequirements requirements = device.GetLogical().GetImageMemoryRequirements(*image);
264 requirements.size = Common::AlignUp(requirements.size, buffer_image_granularity);
265 auto commit = Commit(requirements, usage);
266 image.BindMemory(commit.Memory(), commit.Offset());
267 return commit;
268}
269
270bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { 289bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
271 const u32 type = FindType(flags, type_mask).value(); 290 const u32 type = FindType(flags, type_mask).value();
272 vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ 291 vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
273 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, 292 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
274 .pNext = export_allocations ? &EXPORT_ALLOCATE_INFO : nullptr, 293 .pNext = nullptr,
275 .allocationSize = size, 294 .allocationSize = size,
276 .memoryTypeIndex = type, 295 .memoryTypeIndex = type,
277 }); 296 });
@@ -342,16 +361,4 @@ std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 ty
342 return std::nullopt; 361 return std::nullopt;
343} 362}
344 363
345bool IsHostVisible(MemoryUsage usage) noexcept {
346 switch (usage) {
347 case MemoryUsage::DeviceLocal:
348 return false;
349 case MemoryUsage::Upload:
350 case MemoryUsage::Download:
351 return true;
352 }
353 ASSERT_MSG(false, "Invalid memory usage={}", usage);
354 return false;
355}
356
357} // namespace Vulkan 364} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h
index a5bff03fe..f449bc8d0 100644
--- a/src/video_core/vulkan_common/vulkan_memory_allocator.h
+++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h
@@ -9,6 +9,8 @@
9#include "common/common_types.h" 9#include "common/common_types.h"
10#include "video_core/vulkan_common/vulkan_wrapper.h" 10#include "video_core/vulkan_common/vulkan_wrapper.h"
11 11
12VK_DEFINE_HANDLE(VmaAllocator)
13
12namespace Vulkan { 14namespace Vulkan {
13 15
14class Device; 16class Device;
@@ -17,9 +19,11 @@ class MemoryAllocation;
17 19
18/// Hints and requirements for the backing memory type of a commit 20/// Hints and requirements for the backing memory type of a commit
19enum class MemoryUsage { 21enum class MemoryUsage {
20 DeviceLocal, ///< Hints device local usages, fastest memory type to read and write from the GPU 22 DeviceLocal, ///< Requests device local host visible buffer, falling back to device local
23 ///< memory.
21 Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads 24 Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads
22 Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks 25 Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks
26 Stream, ///< Requests device local host visible buffer, falling back host memory.
23}; 27};
24 28
25/// Ownership handle of a memory commitment. 29/// Ownership handle of a memory commitment.
@@ -41,9 +45,6 @@ public:
41 /// It will map the backing allocation if it hasn't been mapped before. 45 /// It will map the backing allocation if it hasn't been mapped before.
42 std::span<u8> Map(); 46 std::span<u8> Map();
43 47
44 /// Returns an non-owning OpenGL handle, creating one if it doesn't exist.
45 u32 ExportOpenGLHandle() const;
46
47 /// Returns the Vulkan memory handler. 48 /// Returns the Vulkan memory handler.
48 VkDeviceMemory Memory() const { 49 VkDeviceMemory Memory() const {
49 return memory; 50 return memory;
@@ -74,16 +75,19 @@ public:
74 * Construct memory allocator 75 * Construct memory allocator
75 * 76 *
76 * @param device_ Device to allocate from 77 * @param device_ Device to allocate from
77 * @param export_allocations_ True when allocations have to be exported
78 * 78 *
79 * @throw vk::Exception on failure 79 * @throw vk::Exception on failure
80 */ 80 */
81 explicit MemoryAllocator(const Device& device_, bool export_allocations_); 81 explicit MemoryAllocator(const Device& device_);
82 ~MemoryAllocator(); 82 ~MemoryAllocator();
83 83
84 MemoryAllocator& operator=(const MemoryAllocator&) = delete; 84 MemoryAllocator& operator=(const MemoryAllocator&) = delete;
85 MemoryAllocator(const MemoryAllocator&) = delete; 85 MemoryAllocator(const MemoryAllocator&) = delete;
86 86
87 vk::Image CreateImage(const VkImageCreateInfo& ci) const;
88
89 vk::Buffer CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const;
90
87 /** 91 /**
88 * Commits a memory with the specified requirements. 92 * Commits a memory with the specified requirements.
89 * 93 *
@@ -97,9 +101,6 @@ public:
97 /// Commits memory required by the buffer and binds it. 101 /// Commits memory required by the buffer and binds it.
98 MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage); 102 MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage);
99 103
100 /// Commits memory required by the image and binds it.
101 MemoryCommit Commit(const vk::Image& image, MemoryUsage usage);
102
103private: 104private:
104 /// Tries to allocate a chunk of memory. 105 /// Tries to allocate a chunk of memory.
105 bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size); 106 bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
@@ -117,15 +118,12 @@ private:
117 /// Returns index to the fastest memory type compatible with the passed requirements. 118 /// Returns index to the fastest memory type compatible with the passed requirements.
118 std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const; 119 std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const;
119 120
120 const Device& device; ///< Device handle. 121 const Device& device; ///< Device handle.
121 const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. 122 VmaAllocator allocator; ///< Vma allocator.
122 const bool export_allocations; ///< True when memory allocations have to be exported. 123 const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
123 std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. 124 std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
124 VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers 125 VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
125 // and optimal images 126 // and optimal images
126}; 127};
127 128
128/// Returns true when a memory usage is guaranteed to be host visible.
129bool IsHostVisible(MemoryUsage usage) noexcept;
130
131} // namespace Vulkan 129} // namespace Vulkan
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index 336f53700..2fa29793a 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -12,6 +12,8 @@
12 12
13#include "video_core/vulkan_common/vulkan_wrapper.h" 13#include "video_core/vulkan_common/vulkan_wrapper.h"
14 14
15#include <vk_mem_alloc.h>
16
15namespace Vulkan::vk { 17namespace Vulkan::vk {
16 18
17namespace { 19namespace {
@@ -257,7 +259,9 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept {
257 // These functions may fail to load depending on the enabled extensions. 259 // These functions may fail to load depending on the enabled extensions.
258 // Don't return a failure on these. 260 // Don't return a failure on these.
259 X(vkCreateDebugUtilsMessengerEXT); 261 X(vkCreateDebugUtilsMessengerEXT);
262 X(vkCreateDebugReportCallbackEXT);
260 X(vkDestroyDebugUtilsMessengerEXT); 263 X(vkDestroyDebugUtilsMessengerEXT);
264 X(vkDestroyDebugReportCallbackEXT);
261 X(vkDestroySurfaceKHR); 265 X(vkDestroySurfaceKHR);
262 X(vkGetPhysicalDeviceFeatures2); 266 X(vkGetPhysicalDeviceFeatures2);
263 X(vkGetPhysicalDeviceProperties2); 267 X(vkGetPhysicalDeviceProperties2);
@@ -479,6 +483,11 @@ void Destroy(VkInstance instance, VkDebugUtilsMessengerEXT handle,
479 dld.vkDestroyDebugUtilsMessengerEXT(instance, handle, nullptr); 483 dld.vkDestroyDebugUtilsMessengerEXT(instance, handle, nullptr);
480} 484}
481 485
486void Destroy(VkInstance instance, VkDebugReportCallbackEXT handle,
487 const InstanceDispatch& dld) noexcept {
488 dld.vkDestroyDebugReportCallbackEXT(instance, handle, nullptr);
489}
490
482void Destroy(VkInstance instance, VkSurfaceKHR handle, const InstanceDispatch& dld) noexcept { 491void Destroy(VkInstance instance, VkSurfaceKHR handle, const InstanceDispatch& dld) noexcept {
483 dld.vkDestroySurfaceKHR(instance, handle, nullptr); 492 dld.vkDestroySurfaceKHR(instance, handle, nullptr);
484} 493}
@@ -547,24 +556,47 @@ DebugUtilsMessenger Instance::CreateDebugUtilsMessenger(
547 return DebugUtilsMessenger(object, handle, *dld); 556 return DebugUtilsMessenger(object, handle, *dld);
548} 557}
549 558
550void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { 559DebugReportCallback Instance::CreateDebugReportCallback(
551 Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); 560 const VkDebugReportCallbackCreateInfoEXT& create_info) const {
561 VkDebugReportCallbackEXT object;
562 Check(dld->vkCreateDebugReportCallbackEXT(handle, &create_info, nullptr, &object));
563 return DebugReportCallback(object, handle, *dld);
552} 564}
553 565
554void Buffer::SetObjectNameEXT(const char* name) const { 566void Image::SetObjectNameEXT(const char* name) const {
555 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); 567 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name);
556} 568}
557 569
558void BufferView::SetObjectNameEXT(const char* name) const { 570void Image::Release() const noexcept {
559 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); 571 if (handle) {
572 vmaDestroyImage(allocator, handle, allocation);
573 }
560} 574}
561 575
562void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { 576void Buffer::Flush() const {
563 Check(dld->vkBindImageMemory(owner, handle, memory, offset)); 577 if (!is_coherent) {
578 vmaFlushAllocation(allocator, allocation, 0, VK_WHOLE_SIZE);
579 }
564} 580}
565 581
566void Image::SetObjectNameEXT(const char* name) const { 582void Buffer::Invalidate() const {
567 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); 583 if (!is_coherent) {
584 vmaInvalidateAllocation(allocator, allocation, 0, VK_WHOLE_SIZE);
585 }
586}
587
588void Buffer::SetObjectNameEXT(const char* name) const {
589 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name);
590}
591
592void Buffer::Release() const noexcept {
593 if (handle) {
594 vmaDestroyBuffer(allocator, handle, allocation);
595 }
596}
597
598void BufferView::SetObjectNameEXT(const char* name) const {
599 SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name);
568} 600}
569 601
570void ImageView::SetObjectNameEXT(const char* name) const { 602void ImageView::SetObjectNameEXT(const char* name) const {
@@ -701,24 +733,12 @@ Queue Device::GetQueue(u32 family_index) const noexcept {
701 return Queue(queue, *dld); 733 return Queue(queue, *dld);
702} 734}
703 735
704Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const {
705 VkBuffer object;
706 Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object));
707 return Buffer(object, handle, *dld);
708}
709
710BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const { 736BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const {
711 VkBufferView object; 737 VkBufferView object;
712 Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object)); 738 Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object));
713 return BufferView(object, handle, *dld); 739 return BufferView(object, handle, *dld);
714} 740}
715 741
716Image Device::CreateImage(const VkImageCreateInfo& ci) const {
717 VkImage object;
718 Check(dld->vkCreateImage(handle, &ci, nullptr, &object));
719 return Image(object, handle, *dld);
720}
721
722ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { 742ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const {
723 VkImageView object; 743 VkImageView object;
724 Check(dld->vkCreateImageView(handle, &ci, nullptr, &object)); 744 Check(dld->vkCreateImageView(handle, &ci, nullptr, &object));
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 4ff328a21..b5e70fcd4 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -32,6 +32,9 @@
32#pragma warning(disable : 26812) // Disable prefer enum class over enum 32#pragma warning(disable : 26812) // Disable prefer enum class over enum
33#endif 33#endif
34 34
35VK_DEFINE_HANDLE(VmaAllocator)
36VK_DEFINE_HANDLE(VmaAllocation)
37
35namespace Vulkan::vk { 38namespace Vulkan::vk {
36 39
37/** 40/**
@@ -161,8 +164,10 @@ struct InstanceDispatch {
161 PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties{}; 164 PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties{};
162 165
163 PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT{}; 166 PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT{};
167 PFN_vkCreateDebugReportCallbackEXT vkCreateDebugReportCallbackEXT{};
164 PFN_vkCreateDevice vkCreateDevice{}; 168 PFN_vkCreateDevice vkCreateDevice{};
165 PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT{}; 169 PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT{};
170 PFN_vkDestroyDebugReportCallbackEXT vkDestroyDebugReportCallbackEXT{};
166 PFN_vkDestroyDevice vkDestroyDevice{}; 171 PFN_vkDestroyDevice vkDestroyDevice{};
167 PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR{}; 172 PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR{};
168 PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties{}; 173 PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties{};
@@ -363,6 +368,7 @@ void Destroy(VkDevice, VkSwapchainKHR, const DeviceDispatch&) noexcept;
363void Destroy(VkDevice, VkSemaphore, const DeviceDispatch&) noexcept; 368void Destroy(VkDevice, VkSemaphore, const DeviceDispatch&) noexcept;
364void Destroy(VkDevice, VkShaderModule, const DeviceDispatch&) noexcept; 369void Destroy(VkDevice, VkShaderModule, const DeviceDispatch&) noexcept;
365void Destroy(VkInstance, VkDebugUtilsMessengerEXT, const InstanceDispatch&) noexcept; 370void Destroy(VkInstance, VkDebugUtilsMessengerEXT, const InstanceDispatch&) noexcept;
371void Destroy(VkInstance, VkDebugReportCallbackEXT, const InstanceDispatch&) noexcept;
366void Destroy(VkInstance, VkSurfaceKHR, const InstanceDispatch&) noexcept; 372void Destroy(VkInstance, VkSurfaceKHR, const InstanceDispatch&) noexcept;
367 373
368VkResult Free(VkDevice, VkDescriptorPool, Span<VkDescriptorSet>, const DeviceDispatch&) noexcept; 374VkResult Free(VkDevice, VkDescriptorPool, Span<VkDescriptorSet>, const DeviceDispatch&) noexcept;
@@ -578,6 +584,7 @@ private:
578}; 584};
579 585
580using DebugUtilsMessenger = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; 586using DebugUtilsMessenger = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>;
587using DebugReportCallback = Handle<VkDebugReportCallbackEXT, VkInstance, InstanceDispatch>;
581using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; 588using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>;
582using DescriptorUpdateTemplate = Handle<VkDescriptorUpdateTemplate, VkDevice, DeviceDispatch>; 589using DescriptorUpdateTemplate = Handle<VkDescriptorUpdateTemplate, VkDevice, DeviceDispatch>;
583using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; 590using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>;
@@ -610,12 +617,149 @@ public:
610 DebugUtilsMessenger CreateDebugUtilsMessenger( 617 DebugUtilsMessenger CreateDebugUtilsMessenger(
611 const VkDebugUtilsMessengerCreateInfoEXT& create_info) const; 618 const VkDebugUtilsMessengerCreateInfoEXT& create_info) const;
612 619
620 /// Creates a debug report callback.
621 /// @throw Exception on creation failure.
622 DebugReportCallback CreateDebugReportCallback(
623 const VkDebugReportCallbackCreateInfoEXT& create_info) const;
624
613 /// Returns dispatch table. 625 /// Returns dispatch table.
614 const InstanceDispatch& Dispatch() const noexcept { 626 const InstanceDispatch& Dispatch() const noexcept {
615 return *dld; 627 return *dld;
616 } 628 }
617}; 629};
618 630
631class Image {
632public:
633 explicit Image(VkImage handle_, VkDevice owner_, VmaAllocator allocator_,
634 VmaAllocation allocation_, const DeviceDispatch& dld_) noexcept
635 : handle{handle_}, owner{owner_}, allocator{allocator_},
636 allocation{allocation_}, dld{&dld_} {}
637 Image() = default;
638
639 Image(const Image&) = delete;
640 Image& operator=(const Image&) = delete;
641
642 Image(Image&& rhs) noexcept
643 : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, allocator{rhs.allocator},
644 allocation{rhs.allocation}, dld{rhs.dld} {}
645
646 Image& operator=(Image&& rhs) noexcept {
647 Release();
648 handle = std::exchange(rhs.handle, nullptr);
649 owner = rhs.owner;
650 allocator = rhs.allocator;
651 allocation = rhs.allocation;
652 dld = rhs.dld;
653 return *this;
654 }
655
656 ~Image() noexcept {
657 Release();
658 }
659
660 VkImage operator*() const noexcept {
661 return handle;
662 }
663
664 void reset() noexcept {
665 Release();
666 handle = nullptr;
667 }
668
669 explicit operator bool() const noexcept {
670 return handle != nullptr;
671 }
672
673 void SetObjectNameEXT(const char* name) const;
674
675private:
676 void Release() const noexcept;
677
678 VkImage handle = nullptr;
679 VkDevice owner = nullptr;
680 VmaAllocator allocator = nullptr;
681 VmaAllocation allocation = nullptr;
682 const DeviceDispatch* dld = nullptr;
683};
684
685class Buffer {
686public:
687 explicit Buffer(VkBuffer handle_, VkDevice owner_, VmaAllocator allocator_,
688 VmaAllocation allocation_, std::span<u8> mapped_, bool is_coherent_,
689 const DeviceDispatch& dld_) noexcept
690 : handle{handle_}, owner{owner_}, allocator{allocator_},
691 allocation{allocation_}, mapped{mapped_}, is_coherent{is_coherent_}, dld{&dld_} {}
692 Buffer() = default;
693
694 Buffer(const Buffer&) = delete;
695 Buffer& operator=(const Buffer&) = delete;
696
697 Buffer(Buffer&& rhs) noexcept
698 : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, allocator{rhs.allocator},
699 allocation{rhs.allocation}, mapped{rhs.mapped},
700 is_coherent{rhs.is_coherent}, dld{rhs.dld} {}
701
702 Buffer& operator=(Buffer&& rhs) noexcept {
703 Release();
704 handle = std::exchange(rhs.handle, nullptr);
705 owner = rhs.owner;
706 allocator = rhs.allocator;
707 allocation = rhs.allocation;
708 mapped = rhs.mapped;
709 is_coherent = rhs.is_coherent;
710 dld = rhs.dld;
711 return *this;
712 }
713
714 ~Buffer() noexcept {
715 Release();
716 }
717
718 VkBuffer operator*() const noexcept {
719 return handle;
720 }
721
722 void reset() noexcept {
723 Release();
724 handle = nullptr;
725 }
726
727 explicit operator bool() const noexcept {
728 return handle != nullptr;
729 }
730
731 /// Returns the host mapped memory, an empty span otherwise.
732 std::span<u8> Mapped() noexcept {
733 return mapped;
734 }
735
736 std::span<const u8> Mapped() const noexcept {
737 return mapped;
738 }
739
740 /// Returns true if the buffer is mapped to the host.
741 bool IsHostVisible() const noexcept {
742 return !mapped.empty();
743 }
744
745 void Flush() const;
746
747 void Invalidate() const;
748
749 void SetObjectNameEXT(const char* name) const;
750
751private:
752 void Release() const noexcept;
753
754 VkBuffer handle = nullptr;
755 VkDevice owner = nullptr;
756 VmaAllocator allocator = nullptr;
757 VmaAllocation allocation = nullptr;
758 std::span<u8> mapped = {};
759 bool is_coherent = false;
760 const DeviceDispatch* dld = nullptr;
761};
762
619class Queue { 763class Queue {
620public: 764public:
621 /// Construct an empty queue handle. 765 /// Construct an empty queue handle.
@@ -639,17 +783,6 @@ private:
639 const DeviceDispatch* dld = nullptr; 783 const DeviceDispatch* dld = nullptr;
640}; 784};
641 785
642class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> {
643 using Handle<VkBuffer, VkDevice, DeviceDispatch>::Handle;
644
645public:
646 /// Attaches a memory allocation.
647 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
648
649 /// Set object name.
650 void SetObjectNameEXT(const char* name) const;
651};
652
653class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> { 786class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> {
654 using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle; 787 using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle;
655 788
@@ -658,17 +791,6 @@ public:
658 void SetObjectNameEXT(const char* name) const; 791 void SetObjectNameEXT(const char* name) const;
659}; 792};
660 793
661class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {
662 using Handle<VkImage, VkDevice, DeviceDispatch>::Handle;
663
664public:
665 /// Attaches a memory allocation.
666 void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
667
668 /// Set object name.
669 void SetObjectNameEXT(const char* name) const;
670};
671
672class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> { 794class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> {
673 using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle; 795 using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle;
674 796
@@ -840,12 +962,8 @@ public:
840 962
841 Queue GetQueue(u32 family_index) const noexcept; 963 Queue GetQueue(u32 family_index) const noexcept;
842 964
843 Buffer CreateBuffer(const VkBufferCreateInfo& ci) const;
844
845 BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const; 965 BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const;
846 966
847 Image CreateImage(const VkImageCreateInfo& ci) const;
848
849 ImageView CreateImageView(const VkImageViewCreateInfo& ci) const; 967 ImageView CreateImageView(const VkImageViewCreateInfo& ci) const;
850 968
851 Semaphore CreateSemaphore() const; 969 Semaphore CreateSemaphore() const;