diff options
| author | 2023-07-01 15:01:11 -0700 | |
|---|---|---|
| committer | 2023-07-01 15:01:11 -0700 | |
| commit | 98685d48e3cb9f25f6919f004ec62cadf33afad2 (patch) | |
| tree | 9df2ce7f57370641589bfae7196c77b090bcbe0f /src/video_core | |
| parent | PR feedback + constification (diff) | |
| parent | Update translations (2023-07-01) (#10972) (diff) | |
| download | yuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.tar.gz yuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.tar.xz yuzu-98685d48e3cb9f25f6919f004ec62cadf33afad2.zip | |
Merge remote-tracking branch 'origin/master' into ssl
Diffstat (limited to 'src/video_core')
70 files changed, 1088 insertions, 700 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index bf6439530..3b2fe01da 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt | |||
| @@ -220,8 +220,8 @@ add_library(video_core STATIC | |||
| 220 | surface.h | 220 | surface.h |
| 221 | texture_cache/accelerated_swizzle.cpp | 221 | texture_cache/accelerated_swizzle.cpp |
| 222 | texture_cache/accelerated_swizzle.h | 222 | texture_cache/accelerated_swizzle.h |
| 223 | texture_cache/decode_bc4.cpp | 223 | texture_cache/decode_bc.cpp |
| 224 | texture_cache/decode_bc4.h | 224 | texture_cache/decode_bc.h |
| 225 | texture_cache/descriptor_table.h | 225 | texture_cache/descriptor_table.h |
| 226 | texture_cache/formatter.cpp | 226 | texture_cache/formatter.cpp |
| 227 | texture_cache/formatter.h | 227 | texture_cache/formatter.h |
| @@ -279,7 +279,7 @@ add_library(video_core STATIC | |||
| 279 | create_target_directory_groups(video_core) | 279 | create_target_directory_groups(video_core) |
| 280 | 280 | ||
| 281 | target_link_libraries(video_core PUBLIC common core) | 281 | target_link_libraries(video_core PUBLIC common core) |
| 282 | target_link_libraries(video_core PUBLIC glad shader_recompiler stb) | 282 | target_link_libraries(video_core PUBLIC glad shader_recompiler stb bc_decoder) |
| 283 | 283 | ||
| 284 | if (YUZU_USE_BUNDLED_FFMPEG AND NOT (WIN32 OR ANDROID)) | 284 | if (YUZU_USE_BUNDLED_FFMPEG AND NOT (WIN32 OR ANDROID)) |
| 285 | add_dependencies(video_core ffmpeg-build) | 285 | add_dependencies(video_core ffmpeg-build) |
| @@ -291,7 +291,7 @@ target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) | |||
| 291 | 291 | ||
| 292 | add_dependencies(video_core host_shaders) | 292 | add_dependencies(video_core host_shaders) |
| 293 | target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) | 293 | target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) |
| 294 | target_link_libraries(video_core PRIVATE sirit Vulkan::Headers) | 294 | target_link_libraries(video_core PRIVATE sirit Vulkan::Headers vma) |
| 295 | 295 | ||
| 296 | if (ENABLE_NSIGHT_AFTERMATH) | 296 | if (ENABLE_NSIGHT_AFTERMATH) |
| 297 | if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK}) | 297 | if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK}) |
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 45977d578..58a45ab67 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -207,7 +207,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 207 | if (has_new_downloads) { | 207 | if (has_new_downloads) { |
| 208 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 208 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 209 | } | 209 | } |
| 210 | tmp_buffer.resize(amount); | 210 | tmp_buffer.resize_destructive(amount); |
| 211 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); | 211 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); |
| 212 | cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); | 212 | cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); |
| 213 | return true; | 213 | return true; |
| @@ -1279,7 +1279,7 @@ template <class P> | |||
| 1279 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, | 1279 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, |
| 1280 | u32 wanted_size) { | 1280 | u32 wanted_size) { |
| 1281 | static constexpr int STREAM_LEAP_THRESHOLD = 16; | 1281 | static constexpr int STREAM_LEAP_THRESHOLD = 16; |
| 1282 | std::vector<BufferId> overlap_ids; | 1282 | boost::container::small_vector<BufferId, 16> overlap_ids; |
| 1283 | VAddr begin = cpu_addr; | 1283 | VAddr begin = cpu_addr; |
| 1284 | VAddr end = cpu_addr + wanted_size; | 1284 | VAddr end = cpu_addr + wanted_size; |
| 1285 | int stream_score = 0; | 1285 | int stream_score = 0; |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 63a120f7a..fe6068cfe 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 229 | using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; | 229 | using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; |
| 230 | 230 | ||
| 231 | struct OverlapResult { | 231 | struct OverlapResult { |
| 232 | std::vector<BufferId> ids; | 232 | boost::container::small_vector<BufferId, 16> ids; |
| 233 | VAddr begin; | 233 | VAddr begin; |
| 234 | VAddr end; | 234 | VAddr end; |
| 235 | bool has_stream_leap = false; | 235 | bool has_stream_leap = false; |
| @@ -582,7 +582,7 @@ private: | |||
| 582 | BufferId inline_buffer_id; | 582 | BufferId inline_buffer_id; |
| 583 | 583 | ||
| 584 | std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | 584 | std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; |
| 585 | std::vector<u8> tmp_buffer; | 585 | Common::ScratchBuffer<u8> tmp_buffer; |
| 586 | }; | 586 | }; |
| 587 | 587 | ||
| 588 | } // namespace VideoCommon | 588 | } // namespace VideoCommon |
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 83112dfce..7d660af47 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h | |||
| @@ -63,7 +63,6 @@ struct ChCommand { | |||
| 63 | }; | 63 | }; |
| 64 | 64 | ||
| 65 | using ChCommandHeaderList = std::vector<ChCommandHeader>; | 65 | using ChCommandHeaderList = std::vector<ChCommandHeader>; |
| 66 | using ChCommandList = std::vector<ChCommand>; | ||
| 67 | 66 | ||
| 68 | struct ThiRegisters { | 67 | struct ThiRegisters { |
| 69 | u32_le increment_syncpt{}; | 68 | u32_le increment_syncpt{}; |
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 1cdb690ed..8a2784cdc 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <span> | 7 | #include <span> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include <boost/container/small_vector.hpp> | ||
| 9 | #include <queue> | 10 | #include <queue> |
| 10 | 11 | ||
| 11 | #include "common/bit_field.h" | 12 | #include "common/bit_field.h" |
| @@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub | |||
| 102 | struct CommandList final { | 103 | struct CommandList final { |
| 103 | CommandList() = default; | 104 | CommandList() = default; |
| 104 | explicit CommandList(std::size_t size) : command_lists(size) {} | 105 | explicit CommandList(std::size_t size) : command_lists(size) {} |
| 105 | explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_) | 106 | explicit CommandList( |
| 107 | boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_) | ||
| 106 | : prefetch_command_list{std::move(prefetch_command_list_)} {} | 108 | : prefetch_command_list{std::move(prefetch_command_list_)} {} |
| 107 | 109 | ||
| 108 | std::vector<CommandListHeader> command_lists; | 110 | boost::container::small_vector<CommandListHeader, 512> command_lists; |
| 109 | std::vector<CommandHeader> prefetch_command_list; | 111 | boost::container::small_vector<CommandHeader, 512> prefetch_command_list; |
| 110 | }; | 112 | }; |
| 111 | 113 | ||
| 112 | /** | 114 | /** |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index ebe5536de..a290d6ea7 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -108,9 +108,11 @@ void MaxwellDMA::Launch() { | |||
| 108 | if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { | 108 | if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { |
| 109 | ASSERT(regs.remap_const.component_size_minus_one == 3); | 109 | ASSERT(regs.remap_const.component_size_minus_one == 3); |
| 110 | accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); | 110 | accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); |
| 111 | std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); | 111 | read_buffer.resize_destructive(regs.line_length_in * sizeof(u32)); |
| 112 | std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in); | ||
| 113 | std::ranges::fill(span, regs.remap_consta_value); | ||
| 112 | memory_manager.WriteBlockUnsafe(regs.offset_out, | 114 | memory_manager.WriteBlockUnsafe(regs.offset_out, |
| 113 | reinterpret_cast<u8*>(tmp_buffer.data()), | 115 | reinterpret_cast<u8*>(read_buffer.data()), |
| 114 | regs.line_length_in * sizeof(u32)); | 116 | regs.line_length_in * sizeof(u32)); |
| 115 | } else { | 117 | } else { |
| 116 | memory_manager.FlushCaching(); | 118 | memory_manager.FlushCaching(); |
| @@ -126,32 +128,33 @@ void MaxwellDMA::Launch() { | |||
| 126 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); | 128 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); |
| 127 | UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); | 129 | UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); |
| 128 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | 130 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |
| 129 | std::vector<u8> tmp_buffer(16); | 131 | read_buffer.resize_destructive(16); |
| 130 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 132 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 131 | memory_manager.ReadBlockUnsafe( | 133 | memory_manager.ReadBlock( |
| 132 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), | 134 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), |
| 133 | tmp_buffer.data(), tmp_buffer.size()); | 135 | read_buffer.data(), read_buffer.size()); |
| 134 | memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), | 136 | memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(), |
| 135 | tmp_buffer.size()); | 137 | read_buffer.size()); |
| 136 | } | 138 | } |
| 137 | } else if (is_src_pitch && !is_dst_pitch) { | 139 | } else if (is_src_pitch && !is_dst_pitch) { |
| 138 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); | 140 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); |
| 139 | UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); | 141 | UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); |
| 140 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | 142 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |
| 141 | std::vector<u8> tmp_buffer(16); | 143 | read_buffer.resize_destructive(16); |
| 142 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 144 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 143 | memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), | 145 | memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), |
| 144 | tmp_buffer.size()); | 146 | read_buffer.size()); |
| 145 | memory_manager.WriteBlockCached( | 147 | memory_manager.WriteBlockCached( |
| 146 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), | 148 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), |
| 147 | tmp_buffer.data(), tmp_buffer.size()); | 149 | read_buffer.data(), read_buffer.size()); |
| 148 | } | 150 | } |
| 149 | } else { | 151 | } else { |
| 150 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { | 152 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { |
| 151 | std::vector<u8> tmp_buffer(regs.line_length_in); | 153 | read_buffer.resize_destructive(regs.line_length_in); |
| 152 | memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), | 154 | memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), |
| 153 | regs.line_length_in); | 155 | regs.line_length_in, |
| 154 | memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), | 156 | VideoCommon::CacheType::NoBufferCache); |
| 157 | memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(), | ||
| 155 | regs.line_length_in); | 158 | regs.line_length_in); |
| 156 | } | 159 | } |
| 157 | } | 160 | } |
| @@ -171,7 +174,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 171 | src_operand.address = regs.offset_in; | 174 | src_operand.address = regs.offset_in; |
| 172 | 175 | ||
| 173 | DMA::BufferOperand dst_operand; | 176 | DMA::BufferOperand dst_operand; |
| 174 | dst_operand.pitch = regs.pitch_out; | 177 | u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out)); |
| 178 | dst_operand.pitch = abs_pitch_out; | ||
| 175 | dst_operand.width = regs.line_length_in; | 179 | dst_operand.width = regs.line_length_in; |
| 176 | dst_operand.height = regs.line_count; | 180 | dst_operand.height = regs.line_count; |
| 177 | dst_operand.address = regs.offset_out; | 181 | dst_operand.address = regs.offset_out; |
| @@ -218,7 +222,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 218 | const size_t src_size = | 222 | const size_t src_size = |
| 219 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | 223 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |
| 220 | 224 | ||
| 221 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | 225 | const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count; |
| 222 | read_buffer.resize_destructive(src_size); | 226 | read_buffer.resize_destructive(src_size); |
| 223 | write_buffer.resize_destructive(dst_size); | 227 | write_buffer.resize_destructive(dst_size); |
| 224 | 228 | ||
| @@ -227,7 +231,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 227 | 231 | ||
| 228 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, | 232 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, |
| 229 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | 233 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |
| 230 | regs.pitch_out); | 234 | abs_pitch_out); |
| 231 | 235 | ||
| 232 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | 236 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |
| 233 | } | 237 | } |
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 456f733cf..db385076d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp | |||
| @@ -193,18 +193,13 @@ struct GPU::Impl { | |||
| 193 | } | 193 | } |
| 194 | 194 | ||
| 195 | [[nodiscard]] u64 GetTicks() const { | 195 | [[nodiscard]] u64 GetTicks() const { |
| 196 | // This values were reversed engineered by fincs from NVN | 196 | u64 gpu_tick = system.CoreTiming().GetGPUTicks(); |
| 197 | // The gpu clock is reported in units of 385/625 nanoseconds | ||
| 198 | constexpr u64 gpu_ticks_num = 384; | ||
| 199 | constexpr u64 gpu_ticks_den = 625; | ||
| 200 | 197 | ||
| 201 | u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count(); | ||
| 202 | if (Settings::values.use_fast_gpu_time.GetValue()) { | 198 | if (Settings::values.use_fast_gpu_time.GetValue()) { |
| 203 | nanoseconds /= 256; | 199 | gpu_tick /= 256; |
| 204 | } | 200 | } |
| 205 | const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; | 201 | |
| 206 | const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; | 202 | return gpu_tick; |
| 207 | return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; | ||
| 208 | } | 203 | } |
| 209 | 204 | ||
| 210 | [[nodiscard]] bool IsAsync() const { | 205 | [[nodiscard]] bool IsAsync() const { |
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index 6ce179167..ce827eb6c 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | #include <array> | 4 | #include <array> |
| 5 | #include <bit> | 5 | #include <bit> |
| 6 | 6 | ||
| 7 | #include "common/scratch_buffer.h" | ||
| 7 | #include "common/settings.h" | 8 | #include "common/settings.h" |
| 8 | #include "video_core/host1x/codecs/h264.h" | 9 | #include "video_core/host1x/codecs/h264.h" |
| 9 | #include "video_core/host1x/host1x.h" | 10 | #include "video_core/host1x/host1x.h" |
| @@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) { | |||
| 188 | } | 189 | } |
| 189 | 190 | ||
| 190 | void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { | 191 | void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { |
| 191 | std::vector<u8> scan(count); | 192 | static Common::ScratchBuffer<u8> scan{}; |
| 193 | scan.resize_destructive(count); | ||
| 192 | if (count == 16) { | 194 | if (count == 16) { |
| 193 | std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); | 195 | std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); |
| 194 | } else { | 196 | } else { |
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 2442c3c29..e61d9af80 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt | |||
| @@ -33,6 +33,7 @@ set(SHADER_FILES | |||
| 33 | opengl_fidelityfx_fsr.frag | 33 | opengl_fidelityfx_fsr.frag |
| 34 | opengl_fidelityfx_fsr_easu.frag | 34 | opengl_fidelityfx_fsr_easu.frag |
| 35 | opengl_fidelityfx_fsr_rcas.frag | 35 | opengl_fidelityfx_fsr_rcas.frag |
| 36 | opengl_lmem_warmup.comp | ||
| 36 | opengl_present.frag | 37 | opengl_present.frag |
| 37 | opengl_present.vert | 38 | opengl_present.vert |
| 38 | opengl_present_scaleforce.frag | 39 | opengl_present_scaleforce.frag |
diff --git a/src/video_core/host_shaders/opengl_lmem_warmup.comp b/src/video_core/host_shaders/opengl_lmem_warmup.comp new file mode 100644 index 000000000..518268477 --- /dev/null +++ b/src/video_core/host_shaders/opengl_lmem_warmup.comp | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | // This shader is a workaround for a quirk in NVIDIA OpenGL drivers | ||
| 5 | // Shaders using local memory see a great performance benefit if a shader that was dispatched | ||
| 6 | // before it had more local memory allocated. | ||
| 7 | // This shader allocates the maximum local memory allowed on NVIDIA drivers to ensure that | ||
| 8 | // subsequent shaders see the performance boost. | ||
| 9 | |||
| 10 | // NOTE: This shader does no actual meaningful work and returns immediately, | ||
| 11 | // it is simply a means to have the driver expect a shader using lots of local memory. | ||
| 12 | |||
| 13 | #version 450 | ||
| 14 | |||
| 15 | layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; | ||
| 16 | |||
| 17 | layout(location = 0) uniform uint uniform_data; | ||
| 18 | |||
| 19 | layout(binding = 0, rgba8) uniform writeonly restrict image2DArray dest_image; | ||
| 20 | |||
| 21 | #define MAX_LMEM_SIZE 4080 // Size chosen to avoid errors in Nvidia's GLSL compiler | ||
| 22 | #define NUM_LMEM_CONSTANTS 1 | ||
| 23 | #define ARRAY_SIZE MAX_LMEM_SIZE - NUM_LMEM_CONSTANTS | ||
| 24 | |||
| 25 | uint lmem_0[ARRAY_SIZE]; | ||
| 26 | const uvec4 constant_values[NUM_LMEM_CONSTANTS] = uvec4[](uvec4(0)); | ||
| 27 | |||
| 28 | void main() { | ||
| 29 | const uint global_id = gl_GlobalInvocationID.x; | ||
| 30 | if (global_id <= 128) { | ||
| 31 | // Since the shader is called with a dispatch of 1x1x1 | ||
| 32 | // This should always be the case, and this shader will not actually execute | ||
| 33 | return; | ||
| 34 | } | ||
| 35 | for (uint t = 0; t < uniform_data; t++) { | ||
| 36 | const uint offset = (t * uniform_data); | ||
| 37 | lmem_0[offset] = t; | ||
| 38 | } | ||
| 39 | const uint offset = (gl_GlobalInvocationID.y * uniform_data + gl_GlobalInvocationID.x); | ||
| 40 | const uint value = lmem_0[offset]; | ||
| 41 | const uint const_value = constant_values[offset / 4][offset % 4]; | ||
| 42 | const uvec4 color = uvec4(value + const_value); | ||
| 43 | |||
| 44 | // A "side-effect" is needed so the variables don't get optimized out, | ||
| 45 | // but this should never execute so there should be no clobbering of previously bound state. | ||
| 46 | imageStore(dest_image, ivec3(gl_GlobalInvocationID), color); | ||
| 47 | } | ||
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 7b2cde7a7..45141e488 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -111,7 +111,7 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | |||
| 111 | [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr); | 111 | [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr); |
| 112 | SetEntry<false>(current_gpu_addr, entry_type); | 112 | SetEntry<false>(current_gpu_addr, entry_type); |
| 113 | if (current_entry_type != entry_type) { | 113 | if (current_entry_type != entry_type) { |
| 114 | rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); | 114 | rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, page_size); |
| 115 | } | 115 | } |
| 116 | if constexpr (entry_type == EntryType::Mapped) { | 116 | if constexpr (entry_type == EntryType::Mapped) { |
| 117 | const VAddr current_cpu_addr = cpu_addr + offset; | 117 | const VAddr current_cpu_addr = cpu_addr + offset; |
| @@ -134,7 +134,7 @@ GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr | |||
| 134 | [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); | 134 | [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); |
| 135 | SetEntry<true>(current_gpu_addr, entry_type); | 135 | SetEntry<true>(current_gpu_addr, entry_type); |
| 136 | if (current_entry_type != entry_type) { | 136 | if (current_entry_type != entry_type) { |
| 137 | rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); | 137 | rasterizer->ModifyGPUMemory(unique_identifier, current_gpu_addr, big_page_size); |
| 138 | } | 138 | } |
| 139 | if constexpr (entry_type == EntryType::Mapped) { | 139 | if constexpr (entry_type == EntryType::Mapped) { |
| 140 | const VAddr current_cpu_addr = cpu_addr + offset; | 140 | const VAddr current_cpu_addr = cpu_addr + offset; |
| @@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, | |||
| 587 | 587 | ||
| 588 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, | 588 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, |
| 589 | VideoCommon::CacheType which) { | 589 | VideoCommon::CacheType which) { |
| 590 | std::vector<u8> tmp_buffer(size); | 590 | tmp_buffer.resize_destructive(size); |
| 591 | ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); | 591 | ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); |
| 592 | 592 | ||
| 593 | // The output block must be flushed in case it has data modified from the GPU. | 593 | // The output block must be flushed in case it has data modified from the GPU. |
| @@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons | |||
| 670 | return result; | 670 | return result; |
| 671 | } | 671 | } |
| 672 | 672 | ||
| 673 | std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | 673 | boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> |
| 674 | GPUVAddr gpu_addr, std::size_t size) const { | 674 | MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const { |
| 675 | std::vector<std::pair<GPUVAddr, std::size_t>> result{}; | 675 | boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> result{}; |
| 676 | GetSubmappedRangeImpl<true>(gpu_addr, size, result); | 676 | GetSubmappedRangeImpl<true>(gpu_addr, size, result); |
| 677 | return result; | 677 | return result; |
| 678 | } | 678 | } |
| @@ -680,8 +680,9 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | |||
| 680 | template <bool is_gpu_address> | 680 | template <bool is_gpu_address> |
| 681 | void MemoryManager::GetSubmappedRangeImpl( | 681 | void MemoryManager::GetSubmappedRangeImpl( |
| 682 | GPUVAddr gpu_addr, std::size_t size, | 682 | GPUVAddr gpu_addr, std::size_t size, |
| 683 | std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& | 683 | boost::container::small_vector< |
| 684 | result) const { | 684 | std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result) |
| 685 | const { | ||
| 685 | std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> | 686 | std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> |
| 686 | last_segment{}; | 687 | last_segment{}; |
| 687 | std::optional<VAddr> old_page_addr{}; | 688 | std::optional<VAddr> old_page_addr{}; |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 794535122..4202c26ff 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -8,10 +8,12 @@ | |||
| 8 | #include <mutex> | 8 | #include <mutex> |
| 9 | #include <optional> | 9 | #include <optional> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | #include <boost/container/small_vector.hpp> | ||
| 11 | 12 | ||
| 12 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 13 | #include "common/multi_level_page_table.h" | 14 | #include "common/multi_level_page_table.h" |
| 14 | #include "common/range_map.h" | 15 | #include "common/range_map.h" |
| 16 | #include "common/scratch_buffer.h" | ||
| 15 | #include "common/virtual_buffer.h" | 17 | #include "common/virtual_buffer.h" |
| 16 | #include "video_core/cache_types.h" | 18 | #include "video_core/cache_types.h" |
| 17 | #include "video_core/pte_kind.h" | 19 | #include "video_core/pte_kind.h" |
| @@ -107,8 +109,8 @@ public: | |||
| 107 | * if the region is continuous, a single pair will be returned. If it's unmapped, an empty | 109 | * if the region is continuous, a single pair will be returned. If it's unmapped, an empty |
| 108 | * vector will be returned; | 110 | * vector will be returned; |
| 109 | */ | 111 | */ |
| 110 | std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, | 112 | boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( |
| 111 | std::size_t size) const; | 113 | GPUVAddr gpu_addr, std::size_t size) const; |
| 112 | 114 | ||
| 113 | GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, | 115 | GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, |
| 114 | PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); | 116 | PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); |
| @@ -165,7 +167,8 @@ private: | |||
| 165 | template <bool is_gpu_address> | 167 | template <bool is_gpu_address> |
| 166 | void GetSubmappedRangeImpl( | 168 | void GetSubmappedRangeImpl( |
| 167 | GPUVAddr gpu_addr, std::size_t size, | 169 | GPUVAddr gpu_addr, std::size_t size, |
| 168 | std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& | 170 | boost::container::small_vector< |
| 171 | std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& | ||
| 169 | result) const; | 172 | result) const; |
| 170 | 173 | ||
| 171 | Core::System& system; | 174 | Core::System& system; |
| @@ -215,8 +218,8 @@ private: | |||
| 215 | Common::VirtualBuffer<u32> big_page_table_cpu; | 218 | Common::VirtualBuffer<u32> big_page_table_cpu; |
| 216 | 219 | ||
| 217 | std::vector<u64> big_page_continuous; | 220 | std::vector<u64> big_page_continuous; |
| 218 | std::vector<std::pair<VAddr, std::size_t>> page_stash{}; | 221 | boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{}; |
| 219 | std::vector<std::pair<VAddr, std::size_t>> page_stash2{}; | 222 | boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{}; |
| 220 | 223 | ||
| 221 | mutable std::mutex guard; | 224 | mutable std::mutex guard; |
| 222 | 225 | ||
| @@ -226,6 +229,8 @@ private: | |||
| 226 | std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; | 229 | std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; |
| 227 | 230 | ||
| 228 | static std::atomic<size_t> unique_identifier_generator; | 231 | static std::atomic<size_t> unique_identifier_generator; |
| 232 | |||
| 233 | Common::ScratchBuffer<u8> tmp_buffer; | ||
| 229 | }; | 234 | }; |
| 230 | 235 | ||
| 231 | } // namespace Tegra | 236 | } // namespace Tegra |
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 3151c0db8..f9ca55c36 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp | |||
| @@ -63,6 +63,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac | |||
| 63 | writes_global_memory = !use_storage_buffers && | 63 | writes_global_memory = !use_storage_buffers && |
| 64 | std::ranges::any_of(info.storage_buffers_descriptors, | 64 | std::ranges::any_of(info.storage_buffers_descriptors, |
| 65 | [](const auto& desc) { return desc.is_written; }); | 65 | [](const auto& desc) { return desc.is_written; }); |
| 66 | uses_local_memory = info.uses_local_memory; | ||
| 66 | if (force_context_flush) { | 67 | if (force_context_flush) { |
| 67 | std::scoped_lock lock{built_mutex}; | 68 | std::scoped_lock lock{built_mutex}; |
| 68 | built_fence.Create(); | 69 | built_fence.Create(); |
diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index 9bcc72b59..c26b4fa5e 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h | |||
| @@ -59,6 +59,10 @@ public: | |||
| 59 | return writes_global_memory; | 59 | return writes_global_memory; |
| 60 | } | 60 | } |
| 61 | 61 | ||
| 62 | [[nodiscard]] bool UsesLocalMemory() const noexcept { | ||
| 63 | return uses_local_memory; | ||
| 64 | } | ||
| 65 | |||
| 62 | void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_, | 66 | void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_, |
| 63 | Tegra::MemoryManager* gpu_memory_) { | 67 | Tegra::MemoryManager* gpu_memory_) { |
| 64 | kepler_compute = kepler_compute_; | 68 | kepler_compute = kepler_compute_; |
| @@ -84,6 +88,7 @@ private: | |||
| 84 | 88 | ||
| 85 | bool use_storage_buffers{}; | 89 | bool use_storage_buffers{}; |
| 86 | bool writes_global_memory{}; | 90 | bool writes_global_memory{}; |
| 91 | bool uses_local_memory{}; | ||
| 87 | 92 | ||
| 88 | std::mutex built_mutex; | 93 | std::mutex built_mutex; |
| 89 | std::condition_variable built_condvar; | 94 | std::condition_variable built_condvar; |
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 03d234f2f..33e63c17d 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp | |||
| @@ -194,6 +194,7 @@ Device::Device(Core::Frontend::EmuWindow& emu_window) { | |||
| 194 | has_bool_ref_bug = true; | 194 | has_bool_ref_bug = true; |
| 195 | } | 195 | } |
| 196 | } | 196 | } |
| 197 | has_lmem_perf_bug = is_nvidia; | ||
| 197 | 198 | ||
| 198 | strict_context_required = emu_window.StrictContextRequired(); | 199 | strict_context_required = emu_window.StrictContextRequired(); |
| 199 | // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. | 200 | // Blocks AMD and Intel OpenGL drivers on Windows from using asynchronous shader compilation. |
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ad27264e5..a5a6bbbba 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h | |||
| @@ -192,6 +192,10 @@ public: | |||
| 192 | return supports_conditional_barriers; | 192 | return supports_conditional_barriers; |
| 193 | } | 193 | } |
| 194 | 194 | ||
| 195 | bool HasLmemPerfBug() const { | ||
| 196 | return has_lmem_perf_bug; | ||
| 197 | } | ||
| 198 | |||
| 195 | private: | 199 | private: |
| 196 | static bool TestVariableAoffi(); | 200 | static bool TestVariableAoffi(); |
| 197 | static bool TestPreciseBug(); | 201 | static bool TestPreciseBug(); |
| @@ -238,6 +242,7 @@ private: | |||
| 238 | bool can_report_memory{}; | 242 | bool can_report_memory{}; |
| 239 | bool strict_context_required{}; | 243 | bool strict_context_required{}; |
| 240 | bool supports_conditional_barriers{}; | 244 | bool supports_conditional_barriers{}; |
| 245 | bool has_lmem_perf_bug{}; | ||
| 241 | 246 | ||
| 242 | std::string vendor_name; | 247 | std::string vendor_name; |
| 243 | }; | 248 | }; |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index c58f760b8..23a48c6fe 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | |||
| @@ -215,6 +215,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c | |||
| 215 | 215 | ||
| 216 | writes_global_memory |= std::ranges::any_of( | 216 | writes_global_memory |= std::ranges::any_of( |
| 217 | info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); | 217 | info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); |
| 218 | uses_local_memory |= info.uses_local_memory; | ||
| 218 | } | 219 | } |
| 219 | ASSERT(num_textures <= MAX_TEXTURES); | 220 | ASSERT(num_textures <= MAX_TEXTURES); |
| 220 | ASSERT(num_images <= MAX_IMAGES); | 221 | ASSERT(num_images <= MAX_IMAGES); |
diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 7bab3be0a..7b3d7eae8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h | |||
| @@ -98,6 +98,10 @@ public: | |||
| 98 | return writes_global_memory; | 98 | return writes_global_memory; |
| 99 | } | 99 | } |
| 100 | 100 | ||
| 101 | [[nodiscard]] bool UsesLocalMemory() const noexcept { | ||
| 102 | return uses_local_memory; | ||
| 103 | } | ||
| 104 | |||
| 101 | [[nodiscard]] bool IsBuilt() noexcept; | 105 | [[nodiscard]] bool IsBuilt() noexcept; |
| 102 | 106 | ||
| 103 | template <typename Spec> | 107 | template <typename Spec> |
| @@ -146,6 +150,7 @@ private: | |||
| 146 | 150 | ||
| 147 | bool use_storage_buffers{}; | 151 | bool use_storage_buffers{}; |
| 148 | bool writes_global_memory{}; | 152 | bool writes_global_memory{}; |
| 153 | bool uses_local_memory{}; | ||
| 149 | 154 | ||
| 150 | static constexpr std::size_t XFB_ENTRY_STRIDE = 3; | 155 | static constexpr std::size_t XFB_ENTRY_STRIDE = 3; |
| 151 | GLsizei num_xfb_attribs{}; | 156 | GLsizei num_xfb_attribs{}; |
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index fc711c44a..edf527f2d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp | |||
| @@ -222,6 +222,9 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||
| 222 | gpu.TickWork(); | 222 | gpu.TickWork(); |
| 223 | 223 | ||
| 224 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | 224 | std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; |
| 225 | if (pipeline->UsesLocalMemory()) { | ||
| 226 | program_manager.LocalMemoryWarmup(); | ||
| 227 | } | ||
| 225 | pipeline->SetEngine(maxwell3d, gpu_memory); | 228 | pipeline->SetEngine(maxwell3d, gpu_memory); |
| 226 | pipeline->Configure(is_indexed); | 229 | pipeline->Configure(is_indexed); |
| 227 | 230 | ||
| @@ -371,6 +374,9 @@ void RasterizerOpenGL::DispatchCompute() { | |||
| 371 | if (!pipeline) { | 374 | if (!pipeline) { |
| 372 | return; | 375 | return; |
| 373 | } | 376 | } |
| 377 | if (pipeline->UsesLocalMemory()) { | ||
| 378 | program_manager.LocalMemoryWarmup(); | ||
| 379 | } | ||
| 374 | pipeline->SetEngine(kepler_compute, gpu_memory); | 380 | pipeline->SetEngine(kepler_compute, gpu_memory); |
| 375 | pipeline->Configure(); | 381 | pipeline->Configure(); |
| 376 | const auto& qmd{kepler_compute->launch_description}; | 382 | const auto& qmd{kepler_compute->launch_description}; |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3f077311e..0329ed820 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, | |||
| 85 | case Shader::Stage::VertexB: | 85 | case Shader::Stage::VertexB: |
| 86 | case Shader::Stage::Geometry: | 86 | case Shader::Stage::Geometry: |
| 87 | if (!use_assembly_shaders && key.xfb_enabled != 0) { | 87 | if (!use_assembly_shaders && key.xfb_enabled != 0) { |
| 88 | info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); | 88 | auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); |
| 89 | info.xfb_varyings = varyings; | ||
| 90 | info.xfb_count = count; | ||
| 89 | } | 91 | } |
| 90 | break; | 92 | break; |
| 91 | case Shader::Stage::TessellationEval: | 93 | case Shader::Stage::TessellationEval: |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 98841ae65..03d4b9d06 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp | |||
| @@ -3,7 +3,9 @@ | |||
| 3 | 3 | ||
| 4 | #include <glad/glad.h> | 4 | #include <glad/glad.h> |
| 5 | 5 | ||
| 6 | #include "video_core/host_shaders/opengl_lmem_warmup_comp.h" | ||
| 6 | #include "video_core/renderer_opengl/gl_shader_manager.h" | 7 | #include "video_core/renderer_opengl/gl_shader_manager.h" |
| 8 | #include "video_core/renderer_opengl/gl_shader_util.h" | ||
| 7 | 9 | ||
| 8 | namespace OpenGL { | 10 | namespace OpenGL { |
| 9 | 11 | ||
| @@ -17,6 +19,10 @@ ProgramManager::ProgramManager(const Device& device) { | |||
| 17 | if (device.UseAssemblyShaders()) { | 19 | if (device.UseAssemblyShaders()) { |
| 18 | glEnable(GL_COMPUTE_PROGRAM_NV); | 20 | glEnable(GL_COMPUTE_PROGRAM_NV); |
| 19 | } | 21 | } |
| 22 | if (device.HasLmemPerfBug()) { | ||
| 23 | lmem_warmup_program = | ||
| 24 | CreateProgram(HostShaders::OPENGL_LMEM_WARMUP_COMP, GL_COMPUTE_SHADER); | ||
| 25 | } | ||
| 20 | } | 26 | } |
| 21 | 27 | ||
| 22 | void ProgramManager::BindComputeProgram(GLuint program) { | 28 | void ProgramManager::BindComputeProgram(GLuint program) { |
| @@ -98,6 +104,13 @@ void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NU | |||
| 98 | 104 | ||
| 99 | void ProgramManager::RestoreGuestCompute() {} | 105 | void ProgramManager::RestoreGuestCompute() {} |
| 100 | 106 | ||
| 107 | void ProgramManager::LocalMemoryWarmup() { | ||
| 108 | if (lmem_warmup_program.handle != 0) { | ||
| 109 | BindComputeProgram(lmem_warmup_program.handle); | ||
| 110 | glDispatchCompute(1, 1, 1); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | |||
| 101 | void ProgramManager::BindPipeline() { | 114 | void ProgramManager::BindPipeline() { |
| 102 | if (!is_pipeline_bound) { | 115 | if (!is_pipeline_bound) { |
| 103 | is_pipeline_bound = true; | 116 | is_pipeline_bound = true; |
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 07ffab77f..852d8c88e 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h | |||
| @@ -30,6 +30,8 @@ public: | |||
| 30 | 30 | ||
| 31 | void RestoreGuestCompute(); | 31 | void RestoreGuestCompute(); |
| 32 | 32 | ||
| 33 | void LocalMemoryWarmup(); | ||
| 34 | |||
| 33 | private: | 35 | private: |
| 34 | void BindPipeline(); | 36 | void BindPipeline(); |
| 35 | 37 | ||
| @@ -44,6 +46,7 @@ private: | |||
| 44 | u32 current_stage_mask = 0; | 46 | u32 current_stage_mask = 0; |
| 45 | std::array<GLuint, NUM_STAGES> current_programs{}; | 47 | std::array<GLuint, NUM_STAGES> current_programs{}; |
| 46 | GLuint current_assembly_compute_program = 0; | 48 | GLuint current_assembly_compute_program = 0; |
| 49 | OGLProgram lmem_warmup_program; | ||
| 47 | }; | 50 | }; |
| 48 | 51 | ||
| 49 | } // namespace OpenGL | 52 | } // namespace OpenGL |
diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index cf2964a3f..28d4b15a0 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp | |||
| @@ -495,6 +495,9 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer, | |||
| 495 | const Region2D& dst_region, const Region2D& src_region, | 495 | const Region2D& dst_region, const Region2D& src_region, |
| 496 | Tegra::Engines::Fermi2D::Filter filter, | 496 | Tegra::Engines::Fermi2D::Filter filter, |
| 497 | Tegra::Engines::Fermi2D::Operation operation) { | 497 | Tegra::Engines::Fermi2D::Operation operation) { |
| 498 | if (!device.IsExtShaderStencilExportSupported()) { | ||
| 499 | return; | ||
| 500 | } | ||
| 498 | ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); | 501 | ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point); |
| 499 | ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); | 502 | ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy); |
| 500 | const BlitImagePipelineKey key{ | 503 | const BlitImagePipelineKey key{ |
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 9a0b10568..a8540339d 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp | |||
| @@ -259,6 +259,26 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with | |||
| 259 | break; | 259 | break; |
| 260 | } | 260 | } |
| 261 | } | 261 | } |
| 262 | // Transcode on hardware that doesn't support BCn natively | ||
| 263 | if (!device.IsOptimalBcnSupported() && VideoCore::Surface::IsPixelFormatBCn(pixel_format)) { | ||
| 264 | const bool is_srgb = with_srgb && VideoCore::Surface::IsPixelFormatSRGB(pixel_format); | ||
| 265 | if (pixel_format == PixelFormat::BC4_SNORM) { | ||
| 266 | tuple.format = VK_FORMAT_R8_SNORM; | ||
| 267 | } else if (pixel_format == PixelFormat::BC4_UNORM) { | ||
| 268 | tuple.format = VK_FORMAT_R8_UNORM; | ||
| 269 | } else if (pixel_format == PixelFormat::BC5_SNORM) { | ||
| 270 | tuple.format = VK_FORMAT_R8G8_SNORM; | ||
| 271 | } else if (pixel_format == PixelFormat::BC5_UNORM) { | ||
| 272 | tuple.format = VK_FORMAT_R8G8_UNORM; | ||
| 273 | } else if (pixel_format == PixelFormat::BC6H_SFLOAT || | ||
| 274 | pixel_format == PixelFormat::BC6H_UFLOAT) { | ||
| 275 | tuple.format = VK_FORMAT_R16G16B16A16_SFLOAT; | ||
| 276 | } else if (is_srgb) { | ||
| 277 | tuple.format = VK_FORMAT_A8B8G8R8_SRGB_PACK32; | ||
| 278 | } else { | ||
| 279 | tuple.format = VK_FORMAT_A8B8G8R8_UNORM_PACK32; | ||
| 280 | } | ||
| 281 | } | ||
| 262 | const bool attachable = (tuple.usage & Attachable) != 0; | 282 | const bool attachable = (tuple.usage & Attachable) != 0; |
| 263 | const bool storage = (tuple.usage & Storage) != 0; | 283 | const bool storage = (tuple.usage & Storage) != 0; |
| 264 | 284 | ||
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 77128c6e2..454bb66a4 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <fmt/format.h> | 12 | #include <fmt/format.h> |
| 13 | 13 | ||
| 14 | #include "common/logging/log.h" | 14 | #include "common/logging/log.h" |
| 15 | #include "common/polyfill_ranges.h" | ||
| 15 | #include "common/scope_exit.h" | 16 | #include "common/scope_exit.h" |
| 16 | #include "common/settings.h" | 17 | #include "common/settings.h" |
| 17 | #include "common/telemetry.h" | 18 | #include "common/telemetry.h" |
| @@ -65,6 +66,21 @@ std::string BuildCommaSeparatedExtensions( | |||
| 65 | return fmt::format("{}", fmt::join(available_extensions, ",")); | 66 | return fmt::format("{}", fmt::join(available_extensions, ",")); |
| 66 | } | 67 | } |
| 67 | 68 | ||
| 69 | DebugCallback MakeDebugCallback(const vk::Instance& instance, const vk::InstanceDispatch& dld) { | ||
| 70 | if (!Settings::values.renderer_debug) { | ||
| 71 | return DebugCallback{}; | ||
| 72 | } | ||
| 73 | const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); | ||
| 74 | const auto it = std::ranges::find_if(*properties, [](const auto& prop) { | ||
| 75 | return std::strcmp(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, prop.extensionName) == 0; | ||
| 76 | }); | ||
| 77 | if (it != properties->end()) { | ||
| 78 | return CreateDebugUtilsCallback(instance); | ||
| 79 | } else { | ||
| 80 | return CreateDebugReportCallback(instance); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | |||
| 68 | } // Anonymous namespace | 84 | } // Anonymous namespace |
| 69 | 85 | ||
| 70 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, | 86 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, |
| @@ -87,10 +103,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, | |||
| 87 | cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), | 103 | cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary(context.get())), |
| 88 | instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, | 104 | instance(CreateInstance(*library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type, |
| 89 | Settings::values.renderer_debug.GetValue())), | 105 | Settings::values.renderer_debug.GetValue())), |
| 90 | debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr), | 106 | debug_callback(MakeDebugCallback(instance, dld)), |
| 91 | surface(CreateSurface(instance, render_window.GetWindowInfo())), | 107 | surface(CreateSurface(instance, render_window.GetWindowInfo())), |
| 92 | device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false), | 108 | device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(), |
| 93 | state_tracker(), scheduler(device, state_tracker), | 109 | scheduler(device, state_tracker), |
| 94 | swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, | 110 | swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width, |
| 95 | render_window.GetFramebufferLayout().height, false), | 111 | render_window.GetFramebufferLayout().height, false), |
| 96 | present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, | 112 | present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, |
| @@ -173,7 +189,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr | |||
| 173 | return; | 189 | return; |
| 174 | } | 190 | } |
| 175 | const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; | 191 | const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; |
| 176 | vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{ | 192 | vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{ |
| 177 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | 193 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, |
| 178 | .pNext = nullptr, | 194 | .pNext = nullptr, |
| 179 | .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, | 195 | .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, |
| @@ -196,7 +212,6 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr | |||
| 196 | .pQueueFamilyIndices = nullptr, | 212 | .pQueueFamilyIndices = nullptr, |
| 197 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 213 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 198 | }); | 214 | }); |
| 199 | const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal); | ||
| 200 | 215 | ||
| 201 | const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ | 216 | const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ |
| 202 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 217 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| @@ -234,8 +249,8 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr | |||
| 234 | .queueFamilyIndexCount = 0, | 249 | .queueFamilyIndexCount = 0, |
| 235 | .pQueueFamilyIndices = nullptr, | 250 | .pQueueFamilyIndices = nullptr, |
| 236 | }; | 251 | }; |
| 237 | const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info); | 252 | const vk::Buffer dst_buffer = |
| 238 | MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download); | 253 | memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download); |
| 239 | 254 | ||
| 240 | scheduler.RequestOutsideRenderPassOperationContext(); | 255 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 241 | scheduler.Record([&](vk::CommandBuffer cmdbuf) { | 256 | scheduler.Record([&](vk::CommandBuffer cmdbuf) { |
| @@ -309,8 +324,9 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr | |||
| 309 | scheduler.Finish(); | 324 | scheduler.Finish(); |
| 310 | 325 | ||
| 311 | // Copy backing image data to the QImage screenshot buffer | 326 | // Copy backing image data to the QImage screenshot buffer |
| 312 | const auto dst_memory_map = dst_buffer_memory.Map(); | 327 | dst_buffer.Invalidate(); |
| 313 | std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size()); | 328 | std::memcpy(renderer_settings.screenshot_bits, dst_buffer.Mapped().data(), |
| 329 | dst_buffer.Mapped().size()); | ||
| 314 | renderer_settings.screenshot_complete_callback(false); | 330 | renderer_settings.screenshot_complete_callback(false); |
| 315 | renderer_settings.screenshot_requested = false; | 331 | renderer_settings.screenshot_requested = false; |
| 316 | } | 332 | } |
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index b2e8cbd1b..ca22c0baa 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <memory> | 6 | #include <memory> |
| 7 | #include <string> | 7 | #include <string> |
| 8 | #include <variant> | ||
| 8 | 9 | ||
| 9 | #include "common/dynamic_library.h" | 10 | #include "common/dynamic_library.h" |
| 10 | #include "video_core/renderer_base.h" | 11 | #include "video_core/renderer_base.h" |
| @@ -33,6 +34,8 @@ class GPU; | |||
| 33 | 34 | ||
| 34 | namespace Vulkan { | 35 | namespace Vulkan { |
| 35 | 36 | ||
| 37 | using DebugCallback = std::variant<vk::DebugUtilsMessenger, vk::DebugReportCallback>; | ||
| 38 | |||
| 36 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, | 39 | Device CreateDevice(const vk::Instance& instance, const vk::InstanceDispatch& dld, |
| 37 | VkSurfaceKHR surface); | 40 | VkSurfaceKHR surface); |
| 38 | 41 | ||
| @@ -71,7 +74,7 @@ private: | |||
| 71 | vk::InstanceDispatch dld; | 74 | vk::InstanceDispatch dld; |
| 72 | 75 | ||
| 73 | vk::Instance instance; | 76 | vk::Instance instance; |
| 74 | vk::DebugUtilsMessenger debug_callback; | 77 | DebugCallback debug_callback; |
| 75 | vk::SurfaceKHR surface; | 78 | vk::SurfaceKHR surface; |
| 76 | 79 | ||
| 77 | ScreenInfo screen_info; | 80 | ScreenInfo screen_info; |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index acb143fc7..ad3b29f0e 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp | |||
| @@ -162,7 +162,7 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, | |||
| 162 | SetUniformData(data, layout); | 162 | SetUniformData(data, layout); |
| 163 | SetVertexData(data, framebuffer, layout); | 163 | SetVertexData(data, framebuffer, layout); |
| 164 | 164 | ||
| 165 | const std::span<u8> mapped_span = buffer_commit.Map(); | 165 | const std::span<u8> mapped_span = buffer.Mapped(); |
| 166 | std::memcpy(mapped_span.data(), &data, sizeof(data)); | 166 | std::memcpy(mapped_span.data(), &data, sizeof(data)); |
| 167 | 167 | ||
| 168 | if (!use_accelerated) { | 168 | if (!use_accelerated) { |
| @@ -1071,14 +1071,9 @@ void BlitScreen::ReleaseRawImages() { | |||
| 1071 | scheduler.Wait(tick); | 1071 | scheduler.Wait(tick); |
| 1072 | } | 1072 | } |
| 1073 | raw_images.clear(); | 1073 | raw_images.clear(); |
| 1074 | raw_buffer_commits.clear(); | ||
| 1075 | |||
| 1076 | aa_image_view.reset(); | 1074 | aa_image_view.reset(); |
| 1077 | aa_image.reset(); | 1075 | aa_image.reset(); |
| 1078 | aa_commit = MemoryCommit{}; | ||
| 1079 | |||
| 1080 | buffer.reset(); | 1076 | buffer.reset(); |
| 1081 | buffer_commit = MemoryCommit{}; | ||
| 1082 | } | 1077 | } |
| 1083 | 1078 | ||
| 1084 | void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { | 1079 | void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { |
| @@ -1094,20 +1089,18 @@ void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer | |||
| 1094 | .pQueueFamilyIndices = nullptr, | 1089 | .pQueueFamilyIndices = nullptr, |
| 1095 | }; | 1090 | }; |
| 1096 | 1091 | ||
| 1097 | buffer = device.GetLogical().CreateBuffer(ci); | 1092 | buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload); |
| 1098 | buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload); | ||
| 1099 | } | 1093 | } |
| 1100 | 1094 | ||
| 1101 | void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { | 1095 | void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { |
| 1102 | raw_images.resize(image_count); | 1096 | raw_images.resize(image_count); |
| 1103 | raw_image_views.resize(image_count); | 1097 | raw_image_views.resize(image_count); |
| 1104 | raw_buffer_commits.resize(image_count); | ||
| 1105 | 1098 | ||
| 1106 | const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1, | 1099 | const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1, |
| 1107 | u32 down_shift = 0) { | 1100 | u32 down_shift = 0) { |
| 1108 | u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | 1101 | u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
| 1109 | : VK_IMAGE_USAGE_TRANSFER_DST_BIT; | 1102 | : VK_IMAGE_USAGE_TRANSFER_DST_BIT; |
| 1110 | return device.GetLogical().CreateImage(VkImageCreateInfo{ | 1103 | return memory_allocator.CreateImage(VkImageCreateInfo{ |
| 1111 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | 1104 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, |
| 1112 | .pNext = nullptr, | 1105 | .pNext = nullptr, |
| 1113 | .flags = 0, | 1106 | .flags = 0, |
| @@ -1130,9 +1123,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { | |||
| 1130 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 1123 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 1131 | }); | 1124 | }); |
| 1132 | }; | 1125 | }; |
| 1133 | const auto create_commit = [&](vk::Image& image) { | ||
| 1134 | return memory_allocator.Commit(image, MemoryUsage::DeviceLocal); | ||
| 1135 | }; | ||
| 1136 | const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) { | 1126 | const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) { |
| 1137 | return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ | 1127 | return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ |
| 1138 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 1128 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| @@ -1161,7 +1151,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { | |||
| 1161 | 1151 | ||
| 1162 | for (size_t i = 0; i < image_count; ++i) { | 1152 | for (size_t i = 0; i < image_count; ++i) { |
| 1163 | raw_images[i] = create_image(); | 1153 | raw_images[i] = create_image(); |
| 1164 | raw_buffer_commits[i] = create_commit(raw_images[i]); | ||
| 1165 | raw_image_views[i] = create_image_view(raw_images[i]); | 1154 | raw_image_views[i] = create_image_view(raw_images[i]); |
| 1166 | } | 1155 | } |
| 1167 | 1156 | ||
| @@ -1169,7 +1158,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { | |||
| 1169 | const u32 up_scale = Settings::values.resolution_info.up_scale; | 1158 | const u32 up_scale = Settings::values.resolution_info.up_scale; |
| 1170 | const u32 down_shift = Settings::values.resolution_info.down_shift; | 1159 | const u32 down_shift = Settings::values.resolution_info.down_shift; |
| 1171 | aa_image = create_image(true, up_scale, down_shift); | 1160 | aa_image = create_image(true, up_scale, down_shift); |
| 1172 | aa_commit = create_commit(aa_image); | ||
| 1173 | aa_image_view = create_image_view(aa_image, true); | 1161 | aa_image_view = create_image_view(aa_image, true); |
| 1174 | VkExtent2D size{ | 1162 | VkExtent2D size{ |
| 1175 | .width = (up_scale * framebuffer.width) >> down_shift, | 1163 | .width = (up_scale * framebuffer.width) >> down_shift, |
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 68ec20253..8365b5668 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h | |||
| @@ -142,13 +142,11 @@ private: | |||
| 142 | vk::Sampler sampler; | 142 | vk::Sampler sampler; |
| 143 | 143 | ||
| 144 | vk::Buffer buffer; | 144 | vk::Buffer buffer; |
| 145 | MemoryCommit buffer_commit; | ||
| 146 | 145 | ||
| 147 | std::vector<u64> resource_ticks; | 146 | std::vector<u64> resource_ticks; |
| 148 | 147 | ||
| 149 | std::vector<vk::Image> raw_images; | 148 | std::vector<vk::Image> raw_images; |
| 150 | std::vector<vk::ImageView> raw_image_views; | 149 | std::vector<vk::ImageView> raw_image_views; |
| 151 | std::vector<MemoryCommit> raw_buffer_commits; | ||
| 152 | 150 | ||
| 153 | vk::DescriptorPool aa_descriptor_pool; | 151 | vk::DescriptorPool aa_descriptor_pool; |
| 154 | vk::DescriptorSetLayout aa_descriptor_set_layout; | 152 | vk::DescriptorSetLayout aa_descriptor_set_layout; |
| @@ -159,7 +157,6 @@ private: | |||
| 159 | vk::DescriptorSets aa_descriptor_sets; | 157 | vk::DescriptorSets aa_descriptor_sets; |
| 160 | vk::Image aa_image; | 158 | vk::Image aa_image; |
| 161 | vk::ImageView aa_image_view; | 159 | vk::ImageView aa_image_view; |
| 162 | MemoryCommit aa_commit; | ||
| 163 | 160 | ||
| 164 | u32 raw_width = 0; | 161 | u32 raw_width = 0; |
| 165 | u32 raw_height = 0; | 162 | u32 raw_height = 0; |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index e30fcb1ed..b72f95235 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -50,7 +50,7 @@ size_t BytesPerIndex(VkIndexType index_type) { | |||
| 50 | } | 50 | } |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | vk::Buffer CreateBuffer(const Device& device, u64 size) { | 53 | vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allocator, u64 size) { |
| 54 | VkBufferUsageFlags flags = | 54 | VkBufferUsageFlags flags = |
| 55 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | 55 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 56 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | | 56 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | |
| @@ -60,7 +60,7 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) { | |||
| 60 | if (device.IsExtTransformFeedbackSupported()) { | 60 | if (device.IsExtTransformFeedbackSupported()) { |
| 61 | flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | 61 | flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; |
| 62 | } | 62 | } |
| 63 | return device.GetLogical().CreateBuffer({ | 63 | const VkBufferCreateInfo buffer_ci = { |
| 64 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 64 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 65 | .pNext = nullptr, | 65 | .pNext = nullptr, |
| 66 | .flags = 0, | 66 | .flags = 0, |
| @@ -69,7 +69,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) { | |||
| 69 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 69 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 70 | .queueFamilyIndexCount = 0, | 70 | .queueFamilyIndexCount = 0, |
| 71 | .pQueueFamilyIndices = nullptr, | 71 | .pQueueFamilyIndices = nullptr, |
| 72 | }); | 72 | }; |
| 73 | return memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); | ||
| 73 | } | 74 | } |
| 74 | } // Anonymous namespace | 75 | } // Anonymous namespace |
| 75 | 76 | ||
| @@ -79,8 +80,8 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params) | |||
| 79 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, | 80 | Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_, |
| 80 | VAddr cpu_addr_, u64 size_bytes_) | 81 | VAddr cpu_addr_, u64 size_bytes_) |
| 81 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), | 82 | : VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_), |
| 82 | device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())}, | 83 | device{&runtime.device}, buffer{ |
| 83 | commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} { | 84 | CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} { |
| 84 | if (runtime.device.HasDebuggingToolAttached()) { | 85 | if (runtime.device.HasDebuggingToolAttached()) { |
| 85 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); | 86 | buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str()); |
| 86 | } | 87 | } |
| @@ -138,7 +139,7 @@ public: | |||
| 138 | const u32 num_first_offset_copies = 4; | 139 | const u32 num_first_offset_copies = 4; |
| 139 | const size_t bytes_per_index = BytesPerIndex(index_type); | 140 | const size_t bytes_per_index = BytesPerIndex(index_type); |
| 140 | const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; | 141 | const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; |
| 141 | buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | 142 | const VkBufferCreateInfo buffer_ci = { |
| 142 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 143 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 143 | .pNext = nullptr, | 144 | .pNext = nullptr, |
| 144 | .flags = 0, | 145 | .flags = 0, |
| @@ -147,14 +148,21 @@ public: | |||
| 147 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 148 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 148 | .queueFamilyIndexCount = 0, | 149 | .queueFamilyIndexCount = 0, |
| 149 | .pQueueFamilyIndices = nullptr, | 150 | .pQueueFamilyIndices = nullptr, |
| 150 | }); | 151 | }; |
| 152 | buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); | ||
| 151 | if (device.HasDebuggingToolAttached()) { | 153 | if (device.HasDebuggingToolAttached()) { |
| 152 | buffer.SetObjectNameEXT("Quad LUT"); | 154 | buffer.SetObjectNameEXT("Quad LUT"); |
| 153 | } | 155 | } |
| 154 | memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||
| 155 | 156 | ||
| 156 | const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); | 157 | const bool host_visible = buffer.IsHostVisible(); |
| 157 | u8* staging_data = staging.mapped_span.data(); | 158 | const StagingBufferRef staging = [&] { |
| 159 | if (host_visible) { | ||
| 160 | return StagingBufferRef{}; | ||
| 161 | } | ||
| 162 | return staging_pool.Request(size_bytes, MemoryUsage::Upload); | ||
| 163 | }(); | ||
| 164 | |||
| 165 | u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data(); | ||
| 158 | const size_t quad_size = bytes_per_index * 6; | 166 | const size_t quad_size = bytes_per_index * 6; |
| 159 | 167 | ||
| 160 | for (u32 first = 0; first < num_first_offset_copies; ++first) { | 168 | for (u32 first = 0; first < num_first_offset_copies; ++first) { |
| @@ -164,29 +172,33 @@ public: | |||
| 164 | } | 172 | } |
| 165 | } | 173 | } |
| 166 | 174 | ||
| 167 | scheduler.RequestOutsideRenderPassOperationContext(); | 175 | if (!host_visible) { |
| 168 | scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, | 176 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 169 | dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { | 177 | scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, |
| 170 | const VkBufferCopy copy{ | 178 | dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { |
| 171 | .srcOffset = src_offset, | 179 | const VkBufferCopy copy{ |
| 172 | .dstOffset = 0, | 180 | .srcOffset = src_offset, |
| 173 | .size = size_bytes, | 181 | .dstOffset = 0, |
| 174 | }; | 182 | .size = size_bytes, |
| 175 | const VkBufferMemoryBarrier write_barrier{ | 183 | }; |
| 176 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | 184 | const VkBufferMemoryBarrier write_barrier{ |
| 177 | .pNext = nullptr, | 185 | .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, |
| 178 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | 186 | .pNext = nullptr, |
| 179 | .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | 187 | .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, |
| 180 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 188 | .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, |
| 181 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | 189 | .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 182 | .buffer = dst_buffer, | 190 | .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, |
| 183 | .offset = 0, | 191 | .buffer = dst_buffer, |
| 184 | .size = size_bytes, | 192 | .offset = 0, |
| 185 | }; | 193 | .size = size_bytes, |
| 186 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | 194 | }; |
| 187 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | 195 | cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); |
| 188 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier); | 196 | cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, |
| 189 | }); | 197 | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier); |
| 198 | }); | ||
| 199 | } else { | ||
| 200 | buffer.Flush(); | ||
| 201 | } | ||
| 190 | } | 202 | } |
| 191 | 203 | ||
| 192 | void BindBuffer(u32 first) { | 204 | void BindBuffer(u32 first) { |
| @@ -361,7 +373,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, | |||
| 361 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | 373 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, |
| 362 | }; | 374 | }; |
| 363 | // Measuring a popular game, this number never exceeds the specified size once data is warmed up | 375 | // Measuring a popular game, this number never exceeds the specified size once data is warmed up |
| 364 | boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); | 376 | boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); |
| 365 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); | 377 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); |
| 366 | scheduler.RequestOutsideRenderPassOperationContext(); | 378 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 367 | scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { | 379 | scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { |
| @@ -578,7 +590,8 @@ void BufferCacheRuntime::ReserveNullBuffer() { | |||
| 578 | .pNext = nullptr, | 590 | .pNext = nullptr, |
| 579 | .flags = 0, | 591 | .flags = 0, |
| 580 | .size = 4, | 592 | .size = 4, |
| 581 | .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | 593 | .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | |
| 594 | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||
| 582 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 595 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 583 | .queueFamilyIndexCount = 0, | 596 | .queueFamilyIndexCount = 0, |
| 584 | .pQueueFamilyIndices = nullptr, | 597 | .pQueueFamilyIndices = nullptr, |
| @@ -587,11 +600,10 @@ void BufferCacheRuntime::ReserveNullBuffer() { | |||
| 587 | create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | 600 | create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; |
| 588 | } | 601 | } |
| 589 | create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; | 602 | create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT; |
| 590 | null_buffer = device.GetLogical().CreateBuffer(create_info); | 603 | null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal); |
| 591 | if (device.HasDebuggingToolAttached()) { | 604 | if (device.HasDebuggingToolAttached()) { |
| 592 | null_buffer.SetObjectNameEXT("Null buffer"); | 605 | null_buffer.SetObjectNameEXT("Null buffer"); |
| 593 | } | 606 | } |
| 594 | null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal); | ||
| 595 | 607 | ||
| 596 | scheduler.RequestOutsideRenderPassOperationContext(); | 608 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 597 | scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) { | 609 | scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) { |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index cdeef8846..95446c732 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h | |||
| @@ -48,7 +48,6 @@ private: | |||
| 48 | 48 | ||
| 49 | const Device* device{}; | 49 | const Device* device{}; |
| 50 | vk::Buffer buffer; | 50 | vk::Buffer buffer; |
| 51 | MemoryCommit commit; | ||
| 52 | std::vector<BufferView> views; | 51 | std::vector<BufferView> views; |
| 53 | }; | 52 | }; |
| 54 | 53 | ||
| @@ -142,7 +141,6 @@ private: | |||
| 142 | std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer; | 141 | std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer; |
| 143 | 142 | ||
| 144 | vk::Buffer null_buffer; | 143 | vk::Buffer null_buffer; |
| 145 | MemoryCommit null_buffer_commit; | ||
| 146 | 144 | ||
| 147 | std::unique_ptr<Uint8Pass> uint8_pass; | 145 | std::unique_ptr<Uint8Pass> uint8_pass; |
| 148 | QuadIndexedPass quad_index_pass; | 146 | QuadIndexedPass quad_index_pass; |
diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index df972cd54..9bcdca2fb 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp | |||
| @@ -205,10 +205,9 @@ void FSR::CreateDescriptorSets() { | |||
| 205 | void FSR::CreateImages() { | 205 | void FSR::CreateImages() { |
| 206 | images.resize(image_count * 2); | 206 | images.resize(image_count * 2); |
| 207 | image_views.resize(image_count * 2); | 207 | image_views.resize(image_count * 2); |
| 208 | buffer_commits.resize(image_count * 2); | ||
| 209 | 208 | ||
| 210 | for (size_t i = 0; i < image_count * 2; ++i) { | 209 | for (size_t i = 0; i < image_count * 2; ++i) { |
| 211 | images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{ | 210 | images[i] = memory_allocator.CreateImage(VkImageCreateInfo{ |
| 212 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | 211 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, |
| 213 | .pNext = nullptr, | 212 | .pNext = nullptr, |
| 214 | .flags = 0, | 213 | .flags = 0, |
| @@ -231,7 +230,6 @@ void FSR::CreateImages() { | |||
| 231 | .pQueueFamilyIndices = nullptr, | 230 | .pQueueFamilyIndices = nullptr, |
| 232 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 231 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 233 | }); | 232 | }); |
| 234 | buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal); | ||
| 235 | image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ | 233 | image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ |
| 236 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 234 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| 237 | .pNext = nullptr, | 235 | .pNext = nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h index 5d872861f..8bb9fc23a 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.h +++ b/src/video_core/renderer_vulkan/vk_fsr.h | |||
| @@ -47,7 +47,6 @@ private: | |||
| 47 | vk::Sampler sampler; | 47 | vk::Sampler sampler; |
| 48 | std::vector<vk::Image> images; | 48 | std::vector<vk::Image> images; |
| 49 | std::vector<vk::ImageView> image_views; | 49 | std::vector<vk::ImageView> image_views; |
| 50 | std::vector<MemoryCommit> buffer_commits; | ||
| 51 | }; | 50 | }; |
| 52 | 51 | ||
| 53 | } // namespace Vulkan | 52 | } // namespace Vulkan |
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index c1595642e..ad35cacac 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp | |||
| @@ -652,13 +652,14 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { | |||
| 652 | .pNext = nullptr, | 652 | .pNext = nullptr, |
| 653 | .negativeOneToOne = key.state.ndc_minus_one_to_one.Value() != 0 ? VK_TRUE : VK_FALSE, | 653 | .negativeOneToOne = key.state.ndc_minus_one_to_one.Value() != 0 ? VK_TRUE : VK_FALSE, |
| 654 | }; | 654 | }; |
| 655 | const u32 num_viewports = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports); | ||
| 655 | VkPipelineViewportStateCreateInfo viewport_ci{ | 656 | VkPipelineViewportStateCreateInfo viewport_ci{ |
| 656 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, | 657 | .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, |
| 657 | .pNext = nullptr, | 658 | .pNext = nullptr, |
| 658 | .flags = 0, | 659 | .flags = 0, |
| 659 | .viewportCount = Maxwell::NumViewports, | 660 | .viewportCount = num_viewports, |
| 660 | .pViewports = nullptr, | 661 | .pViewports = nullptr, |
| 661 | .scissorCount = Maxwell::NumViewports, | 662 | .scissorCount = num_viewports, |
| 662 | .pScissors = nullptr, | 663 | .pScissors = nullptr, |
| 663 | }; | 664 | }; |
| 664 | if (device.IsNvViewportSwizzleSupported()) { | 665 | if (device.IsNvViewportSwizzleSupported()) { |
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp index 5eeda08d2..6b288b994 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp | |||
| @@ -75,15 +75,9 @@ void MasterSemaphore::Refresh() { | |||
| 75 | 75 | ||
| 76 | void MasterSemaphore::Wait(u64 tick) { | 76 | void MasterSemaphore::Wait(u64 tick) { |
| 77 | if (!semaphore) { | 77 | if (!semaphore) { |
| 78 | // If we don't support timeline semaphores, use an atomic wait | 78 | // If we don't support timeline semaphores, wait for the value normally |
| 79 | while (true) { | 79 | std::unique_lock lk{free_mutex}; |
| 80 | u64 current_value = gpu_tick.load(std::memory_order_relaxed); | 80 | free_cv.wait(lk, [&] { return gpu_tick.load(std::memory_order_relaxed) >= tick; }); |
| 81 | if (current_value >= tick) { | ||
| 82 | return; | ||
| 83 | } | ||
| 84 | gpu_tick.wait(current_value); | ||
| 85 | } | ||
| 86 | |||
| 87 | return; | 81 | return; |
| 88 | } | 82 | } |
| 89 | 83 | ||
| @@ -198,11 +192,13 @@ void MasterSemaphore::WaitThread(std::stop_token token) { | |||
| 198 | 192 | ||
| 199 | fence.Wait(); | 193 | fence.Wait(); |
| 200 | fence.Reset(); | 194 | fence.Reset(); |
| 201 | gpu_tick.store(host_tick); | ||
| 202 | gpu_tick.notify_all(); | ||
| 203 | 195 | ||
| 204 | std::scoped_lock lock{free_mutex}; | 196 | { |
| 205 | free_queue.push_front(std::move(fence)); | 197 | std::scoped_lock lock{free_mutex}; |
| 198 | free_queue.push_front(std::move(fence)); | ||
| 199 | gpu_tick.store(host_tick); | ||
| 200 | } | ||
| 201 | free_cv.notify_one(); | ||
| 206 | } | 202 | } |
| 207 | } | 203 | } |
| 208 | 204 | ||
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index 1e7c90215..3f599d7bd 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h | |||
| @@ -72,6 +72,7 @@ private: | |||
| 72 | std::atomic<u64> current_tick{1}; ///< Current logical tick. | 72 | std::atomic<u64> current_tick{1}; ///< Current logical tick. |
| 73 | std::mutex wait_mutex; | 73 | std::mutex wait_mutex; |
| 74 | std::mutex free_mutex; | 74 | std::mutex free_mutex; |
| 75 | std::condition_variable free_cv; | ||
| 75 | std::condition_variable_any wait_cv; | 76 | std::condition_variable_any wait_cv; |
| 76 | std::queue<Waitable> wait_queue; ///< Queue for the fences to be waited on by the wait thread. | 77 | std::queue<Waitable> wait_queue; ///< Queue for the fences to be waited on by the wait thread. |
| 77 | std::deque<vk::Fence> free_queue; ///< Holds available fences for submission. | 78 | std::deque<vk::Fence> free_queue; ///< Holds available fences for submission. |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 18e040a1b..d600c4e61 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program | |||
| 167 | info.fixed_state_point_size = point_size; | 167 | info.fixed_state_point_size = point_size; |
| 168 | } | 168 | } |
| 169 | if (key.state.xfb_enabled) { | 169 | if (key.state.xfb_enabled) { |
| 170 | info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); | 170 | auto [varyings, count] = |
| 171 | VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); | ||
| 172 | info.xfb_varyings = varyings; | ||
| 173 | info.xfb_count = count; | ||
| 171 | } | 174 | } |
| 172 | info.convert_depth_mode = gl_ndc; | 175 | info.convert_depth_mode = gl_ndc; |
| 173 | } | 176 | } |
| @@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program | |||
| 214 | info.fixed_state_point_size = point_size; | 217 | info.fixed_state_point_size = point_size; |
| 215 | } | 218 | } |
| 216 | if (key.state.xfb_enabled != 0) { | 219 | if (key.state.xfb_enabled != 0) { |
| 217 | info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); | 220 | auto [varyings, count] = |
| 221 | VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); | ||
| 222 | info.xfb_varyings = varyings; | ||
| 223 | info.xfb_count = count; | ||
| 218 | } | 224 | } |
| 219 | info.convert_depth_mode = gl_ndc; | 225 | info.convert_depth_mode = gl_ndc; |
| 220 | break; | 226 | break; |
| @@ -303,7 +309,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 303 | .support_int16 = device.IsShaderInt16Supported(), | 309 | .support_int16 = device.IsShaderInt16Supported(), |
| 304 | .support_int64 = device.IsShaderInt64Supported(), | 310 | .support_int64 = device.IsShaderInt64Supported(), |
| 305 | .support_vertex_instance_id = false, | 311 | .support_vertex_instance_id = false, |
| 306 | .support_float_controls = true, | 312 | .support_float_controls = device.IsKhrShaderFloatControlsSupported(), |
| 307 | .support_separate_denorm_behavior = | 313 | .support_separate_denorm_behavior = |
| 308 | float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, | 314 | float_control.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL, |
| 309 | .support_separate_rounding_mode = | 315 | .support_separate_rounding_mode = |
| @@ -319,12 +325,13 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||
| 319 | .support_fp64_signed_zero_nan_preserve = | 325 | .support_fp64_signed_zero_nan_preserve = |
| 320 | float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, | 326 | float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE, |
| 321 | .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), | 327 | .support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(), |
| 322 | .support_vote = true, | 328 | .support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT), |
| 323 | .support_viewport_index_layer_non_geometry = | 329 | .support_viewport_index_layer_non_geometry = |
| 324 | device.IsExtShaderViewportIndexLayerSupported(), | 330 | device.IsExtShaderViewportIndexLayerSupported(), |
| 325 | .support_viewport_mask = device.IsNvViewportArray2Supported(), | 331 | .support_viewport_mask = device.IsNvViewportArray2Supported(), |
| 326 | .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), | 332 | .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), |
| 327 | .support_demote_to_helper_invocation = true, | 333 | .support_demote_to_helper_invocation = |
| 334 | device.IsExtShaderDemoteToHelperInvocationSupported(), | ||
| 328 | .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), | 335 | .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), |
| 329 | .support_derivative_control = true, | 336 | .support_derivative_control = true, |
| 330 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), | 337 | .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), |
| @@ -705,10 +712,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | |||
| 705 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | 712 | std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( |
| 706 | ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, | 713 | ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, |
| 707 | PipelineStatistics* statistics, bool build_in_parallel) try { | 714 | PipelineStatistics* statistics, bool build_in_parallel) try { |
| 708 | // TODO: Remove this when Intel fixes their shader compiler. | 715 | if (device.HasBrokenCompute()) { |
| 709 | // https://github.com/IGCIT/Intel-GPU-Community-Issue-Tracker-IGCIT/issues/159 | ||
| 710 | if (device.GetDriverID() == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS && | ||
| 711 | !Settings::values.enable_compute_pipelines.GetValue()) { | ||
| 712 | LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash()); | 716 | LOG_ERROR(Render_Vulkan, "Skipping 0x{:016x}", key.Hash()); |
| 713 | return nullptr; | 717 | return nullptr; |
| 714 | } | 718 | } |
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp index 10ace0420..d681bd22a 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp | |||
| @@ -181,7 +181,7 @@ void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_ | |||
| 181 | frame->height = height; | 181 | frame->height = height; |
| 182 | frame->is_srgb = is_srgb; | 182 | frame->is_srgb = is_srgb; |
| 183 | 183 | ||
| 184 | frame->image = dld.CreateImage({ | 184 | frame->image = memory_allocator.CreateImage({ |
| 185 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | 185 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, |
| 186 | .pNext = nullptr, | 186 | .pNext = nullptr, |
| 187 | .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, | 187 | .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, |
| @@ -204,8 +204,6 @@ void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_ | |||
| 204 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 204 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 205 | }); | 205 | }); |
| 206 | 206 | ||
| 207 | frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal); | ||
| 208 | |||
| 209 | frame->image_view = dld.CreateImageView({ | 207 | frame->image_view = dld.CreateImageView({ |
| 210 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, | 208 | .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
| 211 | .pNext = nullptr, | 209 | .pNext = nullptr, |
diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h index 4ac2e2395..83e859416 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.h +++ b/src/video_core/renderer_vulkan/vk_present_manager.h | |||
| @@ -29,7 +29,6 @@ struct Frame { | |||
| 29 | vk::Image image; | 29 | vk::Image image; |
| 30 | vk::ImageView image_view; | 30 | vk::ImageView image_view; |
| 31 | vk::Framebuffer framebuffer; | 31 | vk::Framebuffer framebuffer; |
| 32 | MemoryCommit image_commit; | ||
| 33 | vk::CommandBuffer cmdbuf; | 32 | vk::CommandBuffer cmdbuf; |
| 34 | vk::Semaphore render_ready; | 33 | vk::Semaphore render_ready; |
| 35 | vk::Fence present_done; | 34 | vk::Fence present_done; |
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 84e3a30cc..f7c0d939a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp | |||
| @@ -315,7 +315,14 @@ void RasterizerVulkan::Clear(u32 layer_count) { | |||
| 315 | FlushWork(); | 315 | FlushWork(); |
| 316 | gpu_memory->FlushCaching(); | 316 | gpu_memory->FlushCaching(); |
| 317 | 317 | ||
| 318 | #if ANDROID | ||
| 319 | if (Settings::IsGPULevelHigh()) { | ||
| 320 | // This is problematic on Android, disable on GPU Normal. | ||
| 321 | query_cache.UpdateCounters(); | ||
| 322 | } | ||
| 323 | #else | ||
| 318 | query_cache.UpdateCounters(); | 324 | query_cache.UpdateCounters(); |
| 325 | #endif | ||
| 319 | 326 | ||
| 320 | auto& regs = maxwell3d->regs; | 327 | auto& regs = maxwell3d->regs; |
| 321 | const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || | 328 | const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || |
| @@ -925,7 +932,7 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg | |||
| 925 | } | 932 | } |
| 926 | const bool is_rescaling{texture_cache.IsRescaling()}; | 933 | const bool is_rescaling{texture_cache.IsRescaling()}; |
| 927 | const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; | 934 | const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f; |
| 928 | const std::array viewports{ | 935 | const std::array viewport_list{ |
| 929 | GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale), | 936 | GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale), |
| 930 | GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale), | 937 | GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale), |
| 931 | GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale), | 938 | GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale), |
| @@ -935,7 +942,11 @@ void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& reg | |||
| 935 | GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale), | 942 | GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale), |
| 936 | GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale), | 943 | GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale), |
| 937 | }; | 944 | }; |
| 938 | scheduler.Record([viewports](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewports); }); | 945 | scheduler.Record([this, viewport_list](vk::CommandBuffer cmdbuf) { |
| 946 | const u32 num_viewports = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports); | ||
| 947 | const vk::Span<VkViewport> viewports(viewport_list.data(), num_viewports); | ||
| 948 | cmdbuf.SetViewport(0, viewports); | ||
| 949 | }); | ||
| 939 | } | 950 | } |
| 940 | 951 | ||
| 941 | void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { | 952 | void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) { |
| @@ -948,7 +959,7 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs | |||
| 948 | up_scale = Settings::values.resolution_info.up_scale; | 959 | up_scale = Settings::values.resolution_info.up_scale; |
| 949 | down_shift = Settings::values.resolution_info.down_shift; | 960 | down_shift = Settings::values.resolution_info.down_shift; |
| 950 | } | 961 | } |
| 951 | const std::array scissors{ | 962 | const std::array scissor_list{ |
| 952 | GetScissorState(regs, 0, up_scale, down_shift), | 963 | GetScissorState(regs, 0, up_scale, down_shift), |
| 953 | GetScissorState(regs, 1, up_scale, down_shift), | 964 | GetScissorState(regs, 1, up_scale, down_shift), |
| 954 | GetScissorState(regs, 2, up_scale, down_shift), | 965 | GetScissorState(regs, 2, up_scale, down_shift), |
| @@ -966,7 +977,11 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs | |||
| 966 | GetScissorState(regs, 14, up_scale, down_shift), | 977 | GetScissorState(regs, 14, up_scale, down_shift), |
| 967 | GetScissorState(regs, 15, up_scale, down_shift), | 978 | GetScissorState(regs, 15, up_scale, down_shift), |
| 968 | }; | 979 | }; |
| 969 | scheduler.Record([scissors](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissors); }); | 980 | scheduler.Record([this, scissor_list](vk::CommandBuffer cmdbuf) { |
| 981 | const u32 num_scissors = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports); | ||
| 982 | const vk::Span<VkRect2D> scissors(scissor_list.data(), num_scissors); | ||
| 983 | cmdbuf.SetScissor(0, scissors); | ||
| 984 | }); | ||
| 970 | } | 985 | } |
| 971 | 986 | ||
| 972 | void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { | 987 | void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { |
diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/vk_smaa.cpp index f8735189d..5efd7d66e 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.cpp +++ b/src/video_core/renderer_vulkan/vk_smaa.cpp | |||
| @@ -25,9 +25,7 @@ namespace { | |||
| 25 | 25 | ||
| 26 | #define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0]))) | 26 | #define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0]))) |
| 27 | 27 | ||
| 28 | std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device, | 28 | vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) { |
| 29 | MemoryAllocator& allocator, | ||
| 30 | VkExtent2D dimensions, VkFormat format) { | ||
| 31 | const VkImageCreateInfo image_ci{ | 29 | const VkImageCreateInfo image_ci{ |
| 32 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, | 30 | .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, |
| 33 | .pNext = nullptr, | 31 | .pNext = nullptr, |
| @@ -46,11 +44,7 @@ std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device, | |||
| 46 | .pQueueFamilyIndices = nullptr, | 44 | .pQueueFamilyIndices = nullptr, |
| 47 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, | 45 | .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, |
| 48 | }; | 46 | }; |
| 49 | 47 | return allocator.CreateImage(image_ci); | |
| 50 | auto image = device.GetLogical().CreateImage(image_ci); | ||
| 51 | auto commit = allocator.Commit(image, Vulkan::MemoryUsage::DeviceLocal); | ||
| 52 | |||
| 53 | return std::make_pair(std::move(image), std::move(commit)); | ||
| 54 | } | 48 | } |
| 55 | 49 | ||
| 56 | void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, | 50 | void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, |
| @@ -82,7 +76,7 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo | |||
| 82 | void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler, | 76 | void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler, |
| 83 | vk::Image& image, VkExtent2D dimensions, VkFormat format, | 77 | vk::Image& image, VkExtent2D dimensions, VkFormat format, |
| 84 | std::span<const u8> initial_contents = {}) { | 78 | std::span<const u8> initial_contents = {}) { |
| 85 | auto upload_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | 79 | const VkBufferCreateInfo upload_ci = { |
| 86 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 80 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 87 | .pNext = nullptr, | 81 | .pNext = nullptr, |
| 88 | .flags = 0, | 82 | .flags = 0, |
| @@ -91,9 +85,10 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc | |||
| 91 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 85 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 92 | .queueFamilyIndexCount = 0, | 86 | .queueFamilyIndexCount = 0, |
| 93 | .pQueueFamilyIndices = nullptr, | 87 | .pQueueFamilyIndices = nullptr, |
| 94 | }); | 88 | }; |
| 95 | auto upload_commit = allocator.Commit(upload_buffer, MemoryUsage::Upload); | 89 | auto upload_buffer = allocator.CreateBuffer(upload_ci, MemoryUsage::Upload); |
| 96 | std::ranges::copy(initial_contents, upload_commit.Map().begin()); | 90 | std::ranges::copy(initial_contents, upload_buffer.Mapped().begin()); |
| 91 | upload_buffer.Flush(); | ||
| 97 | 92 | ||
| 98 | const std::array<VkBufferImageCopy, 1> regions{{{ | 93 | const std::array<VkBufferImageCopy, 1> regions{{{ |
| 99 | .bufferOffset = 0, | 94 | .bufferOffset = 0, |
| @@ -117,9 +112,6 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc | |||
| 117 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); | 112 | VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); |
| 118 | }); | 113 | }); |
| 119 | scheduler.Finish(); | 114 | scheduler.Finish(); |
| 120 | |||
| 121 | // This should go out of scope before the commit | ||
| 122 | auto upload_buffer2 = std::move(upload_buffer); | ||
| 123 | } | 115 | } |
| 124 | 116 | ||
| 125 | vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) { | 117 | vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) { |
| @@ -531,10 +523,8 @@ void SMAA::CreateImages() { | |||
| 531 | static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; | 523 | static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; |
| 532 | static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; | 524 | static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; |
| 533 | 525 | ||
| 534 | std::tie(m_static_images[Area], m_static_buffer_commits[Area]) = | 526 | m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); |
| 535 | CreateWrappedImage(m_device, m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); | 527 | m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM); |
| 536 | std::tie(m_static_images[Search], m_static_buffer_commits[Search]) = | ||
| 537 | CreateWrappedImage(m_device, m_allocator, search_extent, VK_FORMAT_R8_UNORM); | ||
| 538 | 528 | ||
| 539 | m_static_image_views[Area] = | 529 | m_static_image_views[Area] = |
| 540 | CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM); | 530 | CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM); |
| @@ -544,12 +534,11 @@ void SMAA::CreateImages() { | |||
| 544 | for (u32 i = 0; i < m_image_count; i++) { | 534 | for (u32 i = 0; i < m_image_count; i++) { |
| 545 | Images& images = m_dynamic_images.emplace_back(); | 535 | Images& images = m_dynamic_images.emplace_back(); |
| 546 | 536 | ||
| 547 | std::tie(images.images[Blend], images.buffer_commits[Blend]) = | 537 | images.images[Blend] = |
| 548 | CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); | 538 | CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); |
| 549 | std::tie(images.images[Edges], images.buffer_commits[Edges]) = | 539 | images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT); |
| 550 | CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT); | 540 | images.images[Output] = |
| 551 | std::tie(images.images[Output], images.buffer_commits[Output]) = | 541 | CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); |
| 552 | CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); | ||
| 553 | 542 | ||
| 554 | images.image_views[Blend] = | 543 | images.image_views[Blend] = |
| 555 | CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT); | 544 | CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT); |
diff --git a/src/video_core/renderer_vulkan/vk_smaa.h b/src/video_core/renderer_vulkan/vk_smaa.h index 99a369148..0e214258a 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.h +++ b/src/video_core/renderer_vulkan/vk_smaa.h | |||
| @@ -66,13 +66,11 @@ private: | |||
| 66 | std::array<vk::Pipeline, MaxSMAAStage> m_pipelines{}; | 66 | std::array<vk::Pipeline, MaxSMAAStage> m_pipelines{}; |
| 67 | std::array<vk::RenderPass, MaxSMAAStage> m_renderpasses{}; | 67 | std::array<vk::RenderPass, MaxSMAAStage> m_renderpasses{}; |
| 68 | 68 | ||
| 69 | std::array<MemoryCommit, MaxStaticImage> m_static_buffer_commits; | ||
| 70 | std::array<vk::Image, MaxStaticImage> m_static_images{}; | 69 | std::array<vk::Image, MaxStaticImage> m_static_images{}; |
| 71 | std::array<vk::ImageView, MaxStaticImage> m_static_image_views{}; | 70 | std::array<vk::ImageView, MaxStaticImage> m_static_image_views{}; |
| 72 | 71 | ||
| 73 | struct Images { | 72 | struct Images { |
| 74 | vk::DescriptorSets descriptor_sets{}; | 73 | vk::DescriptorSets descriptor_sets{}; |
| 75 | std::array<MemoryCommit, MaxDynamicImage> buffer_commits; | ||
| 76 | std::array<vk::Image, MaxDynamicImage> images{}; | 74 | std::array<vk::Image, MaxDynamicImage> images{}; |
| 77 | std::array<vk::ImageView, MaxDynamicImage> image_views{}; | 75 | std::array<vk::ImageView, MaxDynamicImage> image_views{}; |
| 78 | std::array<vk::Framebuffer, MaxSMAAStage> framebuffers{}; | 76 | std::array<vk::Framebuffer, MaxSMAAStage> framebuffers{}; |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 74ca77216..ce92f66ab 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | |||
| @@ -30,55 +30,6 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; | |||
| 30 | constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; | 30 | constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; |
| 31 | constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; | 31 | constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; |
| 32 | 32 | ||
| 33 | constexpr VkMemoryPropertyFlags HOST_FLAGS = | ||
| 34 | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; | ||
| 35 | constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; | ||
| 36 | |||
| 37 | bool IsStreamHeap(VkMemoryHeap heap) noexcept { | ||
| 38 | return STREAM_BUFFER_SIZE < (heap.size * 2) / 3; | ||
| 39 | } | ||
| 40 | |||
| 41 | std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, | ||
| 42 | VkMemoryPropertyFlags flags) noexcept { | ||
| 43 | for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { | ||
| 44 | if (((type_mask >> type_index) & 1) == 0) { | ||
| 45 | // Memory type is incompatible | ||
| 46 | continue; | ||
| 47 | } | ||
| 48 | const VkMemoryType& memory_type = props.memoryTypes[type_index]; | ||
| 49 | if ((memory_type.propertyFlags & flags) != flags) { | ||
| 50 | // Memory type doesn't have the flags we want | ||
| 51 | continue; | ||
| 52 | } | ||
| 53 | if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) { | ||
| 54 | // Memory heap is not suitable for streaming | ||
| 55 | continue; | ||
| 56 | } | ||
| 57 | // Success! | ||
| 58 | return type_index; | ||
| 59 | } | ||
| 60 | return std::nullopt; | ||
| 61 | } | ||
| 62 | |||
| 63 | u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, | ||
| 64 | bool try_device_local) { | ||
| 65 | std::optional<u32> type; | ||
| 66 | if (try_device_local) { | ||
| 67 | // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this | ||
| 68 | type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS); | ||
| 69 | if (type) { | ||
| 70 | return *type; | ||
| 71 | } | ||
| 72 | } | ||
| 73 | // Otherwise try without the DEVICE_LOCAL_BIT | ||
| 74 | type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS); | ||
| 75 | if (type) { | ||
| 76 | return *type; | ||
| 77 | } | ||
| 78 | // This should never happen, and in case it does, signal it as an out of memory situation | ||
| 79 | throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); | ||
| 80 | } | ||
| 81 | |||
| 82 | size_t Region(size_t iterator) noexcept { | 33 | size_t Region(size_t iterator) noexcept { |
| 83 | return iterator / REGION_SIZE; | 34 | return iterator / REGION_SIZE; |
| 84 | } | 35 | } |
| @@ -87,58 +38,26 @@ size_t Region(size_t iterator) noexcept { | |||
| 87 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, | 38 | StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, |
| 88 | Scheduler& scheduler_) | 39 | Scheduler& scheduler_) |
| 89 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { | 40 | : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { |
| 90 | const vk::Device& dev = device.GetLogical(); | 41 | VkBufferCreateInfo stream_ci = { |
| 91 | stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ | ||
| 92 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 42 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 93 | .pNext = nullptr, | 43 | .pNext = nullptr, |
| 94 | .flags = 0, | 44 | .flags = 0, |
| 95 | .size = STREAM_BUFFER_SIZE, | 45 | .size = STREAM_BUFFER_SIZE, |
| 96 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | | 46 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | |
| 97 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | | 47 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, |
| 98 | VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT, | ||
| 99 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 48 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 100 | .queueFamilyIndexCount = 0, | 49 | .queueFamilyIndexCount = 0, |
| 101 | .pQueueFamilyIndices = nullptr, | 50 | .pQueueFamilyIndices = nullptr, |
| 102 | }); | ||
| 103 | if (device.HasDebuggingToolAttached()) { | ||
| 104 | stream_buffer.SetObjectNameEXT("Stream Buffer"); | ||
| 105 | } | ||
| 106 | VkMemoryDedicatedRequirements dedicated_reqs{ | ||
| 107 | .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, | ||
| 108 | .pNext = nullptr, | ||
| 109 | .prefersDedicatedAllocation = VK_FALSE, | ||
| 110 | .requiresDedicatedAllocation = VK_FALSE, | ||
| 111 | }; | ||
| 112 | const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs); | ||
| 113 | const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE || | ||
| 114 | dedicated_reqs.requiresDedicatedAllocation == VK_TRUE; | ||
| 115 | const VkMemoryDedicatedAllocateInfo dedicated_info{ | ||
| 116 | .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, | ||
| 117 | .pNext = nullptr, | ||
| 118 | .image = nullptr, | ||
| 119 | .buffer = *stream_buffer, | ||
| 120 | }; | 51 | }; |
| 121 | const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties; | 52 | if (device.IsExtTransformFeedbackSupported()) { |
| 122 | VkMemoryAllocateInfo stream_memory_info{ | 53 | stream_ci.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; |
| 123 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | ||
| 124 | .pNext = make_dedicated ? &dedicated_info : nullptr, | ||
| 125 | .allocationSize = requirements.size, | ||
| 126 | .memoryTypeIndex = | ||
| 127 | FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true), | ||
| 128 | }; | ||
| 129 | stream_memory = dev.TryAllocateMemory(stream_memory_info); | ||
| 130 | if (!stream_memory) { | ||
| 131 | LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory"); | ||
| 132 | stream_memory_info.memoryTypeIndex = | ||
| 133 | FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false); | ||
| 134 | stream_memory = dev.AllocateMemory(stream_memory_info); | ||
| 135 | } | 54 | } |
| 136 | 55 | stream_buffer = memory_allocator.CreateBuffer(stream_ci, MemoryUsage::Stream); | |
| 137 | if (device.HasDebuggingToolAttached()) { | 56 | if (device.HasDebuggingToolAttached()) { |
| 138 | stream_memory.SetObjectNameEXT("Stream Buffer Memory"); | 57 | stream_buffer.SetObjectNameEXT("Stream Buffer"); |
| 139 | } | 58 | } |
| 140 | stream_buffer.BindMemory(*stream_memory, 0); | 59 | stream_pointer = stream_buffer.Mapped(); |
| 141 | stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE); | 60 | ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!"); |
| 142 | } | 61 | } |
| 143 | 62 | ||
| 144 | StagingBufferPool::~StagingBufferPool() = default; | 63 | StagingBufferPool::~StagingBufferPool() = default; |
| @@ -199,7 +118,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { | |||
| 199 | return StagingBufferRef{ | 118 | return StagingBufferRef{ |
| 200 | .buffer = *stream_buffer, | 119 | .buffer = *stream_buffer, |
| 201 | .offset = static_cast<VkDeviceSize>(offset), | 120 | .offset = static_cast<VkDeviceSize>(offset), |
| 202 | .mapped_span = std::span<u8>(stream_pointer + offset, size), | 121 | .mapped_span = stream_pointer.subspan(offset, size), |
| 203 | .usage{}, | 122 | .usage{}, |
| 204 | .log2_level{}, | 123 | .log2_level{}, |
| 205 | .index{}, | 124 | .index{}, |
| @@ -247,29 +166,29 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s | |||
| 247 | StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage, | 166 | StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage, |
| 248 | bool deferred) { | 167 | bool deferred) { |
| 249 | const u32 log2 = Common::Log2Ceil64(size); | 168 | const u32 log2 = Common::Log2Ceil64(size); |
| 250 | vk::Buffer buffer = device.GetLogical().CreateBuffer({ | 169 | VkBufferCreateInfo buffer_ci = { |
| 251 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 170 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 252 | .pNext = nullptr, | 171 | .pNext = nullptr, |
| 253 | .flags = 0, | 172 | .flags = 0, |
| 254 | .size = 1ULL << log2, | 173 | .size = 1ULL << log2, |
| 255 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | 174 | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 256 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | | 175 | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | |
| 257 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | | 176 | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, |
| 258 | VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT, | ||
| 259 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 177 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 260 | .queueFamilyIndexCount = 0, | 178 | .queueFamilyIndexCount = 0, |
| 261 | .pQueueFamilyIndices = nullptr, | 179 | .pQueueFamilyIndices = nullptr, |
| 262 | }); | 180 | }; |
| 181 | if (device.IsExtTransformFeedbackSupported()) { | ||
| 182 | buffer_ci.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; | ||
| 183 | } | ||
| 184 | vk::Buffer buffer = memory_allocator.CreateBuffer(buffer_ci, usage); | ||
| 263 | if (device.HasDebuggingToolAttached()) { | 185 | if (device.HasDebuggingToolAttached()) { |
| 264 | ++buffer_index; | 186 | ++buffer_index; |
| 265 | buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str()); | 187 | buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str()); |
| 266 | } | 188 | } |
| 267 | MemoryCommit commit = memory_allocator.Commit(buffer, usage); | 189 | const std::span<u8> mapped_span = buffer.Mapped(); |
| 268 | const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{}; | ||
| 269 | |||
| 270 | StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{ | 190 | StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{ |
| 271 | .buffer = std::move(buffer), | 191 | .buffer = std::move(buffer), |
| 272 | .commit = std::move(commit), | ||
| 273 | .mapped_span = mapped_span, | 192 | .mapped_span = mapped_span, |
| 274 | .usage = usage, | 193 | .usage = usage, |
| 275 | .log2_level = log2, | 194 | .log2_level = log2, |
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 4fd15f11a..5f69f08b1 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | |||
| @@ -46,7 +46,6 @@ private: | |||
| 46 | 46 | ||
| 47 | struct StagingBuffer { | 47 | struct StagingBuffer { |
| 48 | vk::Buffer buffer; | 48 | vk::Buffer buffer; |
| 49 | MemoryCommit commit; | ||
| 50 | std::span<u8> mapped_span; | 49 | std::span<u8> mapped_span; |
| 51 | MemoryUsage usage; | 50 | MemoryUsage usage; |
| 52 | u32 log2_level; | 51 | u32 log2_level; |
| @@ -97,8 +96,7 @@ private: | |||
| 97 | Scheduler& scheduler; | 96 | Scheduler& scheduler; |
| 98 | 97 | ||
| 99 | vk::Buffer stream_buffer; | 98 | vk::Buffer stream_buffer; |
| 100 | vk::DeviceMemory stream_memory; | 99 | std::span<u8> stream_pointer; |
| 101 | u8* stream_pointer = nullptr; | ||
| 102 | 100 | ||
| 103 | size_t iterator = 0; | 101 | size_t iterator = 0; |
| 104 | size_t used_iterator = 0; | 102 | size_t used_iterator = 0; |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index f025f618b..8385b5509 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -15,7 +15,6 @@ | |||
| 15 | #include "video_core/renderer_vulkan/blit_image.h" | 15 | #include "video_core/renderer_vulkan/blit_image.h" |
| 16 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" | 16 | #include "video_core/renderer_vulkan/maxwell_to_vk.h" |
| 17 | #include "video_core/renderer_vulkan/vk_compute_pass.h" | 17 | #include "video_core/renderer_vulkan/vk_compute_pass.h" |
| 18 | #include "video_core/renderer_vulkan/vk_rasterizer.h" | ||
| 19 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" | 18 | #include "video_core/renderer_vulkan/vk_render_pass_cache.h" |
| 20 | #include "video_core/renderer_vulkan/vk_scheduler.h" | 19 | #include "video_core/renderer_vulkan/vk_scheduler.h" |
| 21 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" | 20 | #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" |
| @@ -163,11 +162,12 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 163 | }; | 162 | }; |
| 164 | } | 163 | } |
| 165 | 164 | ||
| 166 | [[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) { | 165 | [[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator, |
| 166 | const ImageInfo& info) { | ||
| 167 | if (info.type == ImageType::Buffer) { | 167 | if (info.type == ImageType::Buffer) { |
| 168 | return vk::Image{}; | 168 | return vk::Image{}; |
| 169 | } | 169 | } |
| 170 | return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info)); | 170 | return allocator.CreateImage(MakeImageCreateInfo(device, info)); |
| 171 | } | 171 | } |
| 172 | 172 | ||
| 173 | [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { | 173 | [[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) { |
| @@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 330 | }; | 330 | }; |
| 331 | } | 331 | } |
| 332 | 332 | ||
| 333 | [[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( | 333 | [[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16> |
| 334 | std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { | 334 | TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { |
| 335 | std::vector<VkBufferCopy> result(copies.size()); | 335 | boost::container::small_vector<VkBufferCopy, 16> result(copies.size()); |
| 336 | std::ranges::transform( | 336 | std::ranges::transform( |
| 337 | copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { | 337 | copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { |
| 338 | return VkBufferCopy{ | 338 | return VkBufferCopy{ |
| @@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 344 | return result; | 344 | return result; |
| 345 | } | 345 | } |
| 346 | 346 | ||
| 347 | [[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( | 347 | [[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies( |
| 348 | std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { | 348 | std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { |
| 349 | struct Maker { | 349 | struct Maker { |
| 350 | VkBufferImageCopy operator()(const BufferImageCopy& copy) const { | 350 | VkBufferImageCopy operator()(const BufferImageCopy& copy) const { |
| @@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 377 | VkImageAspectFlags aspect_mask; | 377 | VkImageAspectFlags aspect_mask; |
| 378 | }; | 378 | }; |
| 379 | if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { | 379 | if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { |
| 380 | std::vector<VkBufferImageCopy> result(copies.size() * 2); | 380 | boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2); |
| 381 | std::ranges::transform(copies, result.begin(), | 381 | std::ranges::transform(copies, result.begin(), |
| 382 | Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); | 382 | Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); |
| 383 | std::ranges::transform(copies, result.begin() + copies.size(), | 383 | std::ranges::transform(copies, result.begin() + copies.size(), |
| 384 | Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); | 384 | Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); |
| 385 | return result; | 385 | return result; |
| 386 | } else { | 386 | } else { |
| 387 | std::vector<VkBufferImageCopy> result(copies.size()); | 387 | boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size()); |
| 388 | std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); | 388 | std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); |
| 389 | return result; | 389 | return result; |
| 390 | } | 390 | } |
| @@ -839,14 +839,14 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { | |||
| 839 | 839 | ||
| 840 | VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { | 840 | VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { |
| 841 | const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); | 841 | const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL); |
| 842 | if (buffer_commits[level]) { | 842 | if (buffers[level]) { |
| 843 | return *buffers[level]; | 843 | return *buffers[level]; |
| 844 | } | 844 | } |
| 845 | const auto new_size = Common::NextPow2(needed_size); | 845 | const auto new_size = Common::NextPow2(needed_size); |
| 846 | static constexpr VkBufferUsageFlags flags = | 846 | static constexpr VkBufferUsageFlags flags = |
| 847 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | 847 | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |
| 848 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; | 848 | VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT; |
| 849 | buffers[level] = device.GetLogical().CreateBuffer({ | 849 | const VkBufferCreateInfo temp_ci = { |
| 850 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 850 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 851 | .pNext = nullptr, | 851 | .pNext = nullptr, |
| 852 | .flags = 0, | 852 | .flags = 0, |
| @@ -855,9 +855,8 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) { | |||
| 855 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 855 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 856 | .queueFamilyIndexCount = 0, | 856 | .queueFamilyIndexCount = 0, |
| 857 | .pQueueFamilyIndices = nullptr, | 857 | .pQueueFamilyIndices = nullptr, |
| 858 | }); | 858 | }; |
| 859 | buffer_commits[level] = std::make_unique<MemoryCommit>( | 859 | buffers[level] = memory_allocator.CreateBuffer(temp_ci, MemoryUsage::DeviceLocal); |
| 860 | memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal)); | ||
| 861 | return *buffers[level]; | 860 | return *buffers[level]; |
| 862 | } | 861 | } |
| 863 | 862 | ||
| @@ -867,8 +866,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() { | |||
| 867 | 866 | ||
| 868 | void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, | 867 | void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, |
| 869 | std::span<const VideoCommon::ImageCopy> copies) { | 868 | std::span<const VideoCommon::ImageCopy> copies) { |
| 870 | std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); | 869 | boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size()); |
| 871 | std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); | 870 | boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size()); |
| 872 | const VkImageAspectFlags src_aspect_mask = src.AspectMask(); | 871 | const VkImageAspectFlags src_aspect_mask = src.AspectMask(); |
| 873 | const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); | 872 | const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); |
| 874 | 873 | ||
| @@ -1157,7 +1156,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im | |||
| 1157 | 1156 | ||
| 1158 | void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | 1157 | void TextureCacheRuntime::CopyImage(Image& dst, Image& src, |
| 1159 | std::span<const VideoCommon::ImageCopy> copies) { | 1158 | std::span<const VideoCommon::ImageCopy> copies) { |
| 1160 | std::vector<VkImageCopy> vk_copies(copies.size()); | 1159 | boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size()); |
| 1161 | const VkImageAspectFlags aspect_mask = dst.AspectMask(); | 1160 | const VkImageAspectFlags aspect_mask = dst.AspectMask(); |
| 1162 | ASSERT(aspect_mask == src.AspectMask()); | 1161 | ASSERT(aspect_mask == src.AspectMask()); |
| 1163 | 1162 | ||
| @@ -1266,8 +1265,8 @@ void TextureCacheRuntime::TickFrame() {} | |||
| 1266 | Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, | 1265 | Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_, |
| 1267 | VAddr cpu_addr_) | 1266 | VAddr cpu_addr_) |
| 1268 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler}, | 1267 | : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler}, |
| 1269 | runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)), | 1268 | runtime{&runtime_}, |
| 1270 | commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)), | 1269 | original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info)), |
| 1271 | aspect_mask(ImageAspectMask(info.format)) { | 1270 | aspect_mask(ImageAspectMask(info.format)) { |
| 1272 | if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { | 1271 | if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { |
| 1273 | if (Settings::values.async_astc.GetValue()) { | 1272 | if (Settings::values.async_astc.GetValue()) { |
| @@ -1280,6 +1279,10 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu | |||
| 1280 | flags |= VideoCommon::ImageFlagBits::Converted; | 1279 | flags |= VideoCommon::ImageFlagBits::Converted; |
| 1281 | flags |= VideoCommon::ImageFlagBits::CostlyLoad; | 1280 | flags |= VideoCommon::ImageFlagBits::CostlyLoad; |
| 1282 | } | 1281 | } |
| 1282 | if (IsPixelFormatBCn(info.format) && !runtime->device.IsOptimalBcnSupported()) { | ||
| 1283 | flags |= VideoCommon::ImageFlagBits::Converted; | ||
| 1284 | flags |= VideoCommon::ImageFlagBits::CostlyLoad; | ||
| 1285 | } | ||
| 1283 | if (runtime->device.HasDebuggingToolAttached()) { | 1286 | if (runtime->device.HasDebuggingToolAttached()) { |
| 1284 | original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); | 1287 | original_image.SetObjectNameEXT(VideoCommon::Name(*this).c_str()); |
| 1285 | } | 1288 | } |
| @@ -1332,7 +1335,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, | |||
| 1332 | ScaleDown(true); | 1335 | ScaleDown(true); |
| 1333 | } | 1336 | } |
| 1334 | scheduler->RequestOutsideRenderPassOperationContext(); | 1337 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 1335 | std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); | 1338 | auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); |
| 1336 | const VkBuffer src_buffer = buffer; | 1339 | const VkBuffer src_buffer = buffer; |
| 1337 | const VkImage vk_image = *original_image; | 1340 | const VkImage vk_image = *original_image; |
| 1338 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; | 1341 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; |
| @@ -1367,8 +1370,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS | |||
| 1367 | if (is_rescaled) { | 1370 | if (is_rescaled) { |
| 1368 | ScaleDown(); | 1371 | ScaleDown(); |
| 1369 | } | 1372 | } |
| 1370 | boost::container::small_vector<VkBuffer, 1> buffers_vector{}; | 1373 | boost::container::small_vector<VkBuffer, 8> buffers_vector{}; |
| 1371 | boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies; | 1374 | boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8> |
| 1375 | vk_copies; | ||
| 1372 | for (size_t index = 0; index < buffers_span.size(); index++) { | 1376 | for (size_t index = 0; index < buffers_span.size(); index++) { |
| 1373 | buffers_vector.emplace_back(buffers_span[index]); | 1377 | buffers_vector.emplace_back(buffers_span[index]); |
| 1374 | vk_copies.emplace_back( | 1378 | vk_copies.emplace_back( |
| @@ -1467,9 +1471,7 @@ bool Image::ScaleUp(bool ignore) { | |||
| 1467 | auto scaled_info = info; | 1471 | auto scaled_info = info; |
| 1468 | scaled_info.size.width = scaled_width; | 1472 | scaled_info.size.width = scaled_width; |
| 1469 | scaled_info.size.height = scaled_height; | 1473 | scaled_info.size.height = scaled_height; |
| 1470 | scaled_image = MakeImage(runtime->device, scaled_info); | 1474 | scaled_image = MakeImage(runtime->device, runtime->memory_allocator, scaled_info); |
| 1471 | auto& allocator = runtime->memory_allocator; | ||
| 1472 | scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal)); | ||
| 1473 | ignore = false; | 1475 | ignore = false; |
| 1474 | } | 1476 | } |
| 1475 | current_image = *scaled_image; | 1477 | current_image = *scaled_image; |
| @@ -1858,7 +1860,7 @@ Framebuffer::~Framebuffer() = default; | |||
| 1858 | void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, | 1860 | void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, |
| 1859 | std::span<ImageView*, NUM_RT> color_buffers, | 1861 | std::span<ImageView*, NUM_RT> color_buffers, |
| 1860 | ImageView* depth_buffer, bool is_rescaled) { | 1862 | ImageView* depth_buffer, bool is_rescaled) { |
| 1861 | std::vector<VkImageView> attachments; | 1863 | boost::container::small_vector<VkImageView, NUM_RT + 1> attachments; |
| 1862 | RenderPassKey renderpass_key{}; | 1864 | RenderPassKey renderpass_key{}; |
| 1863 | s32 num_layers = 1; | 1865 | s32 num_layers = 1; |
| 1864 | 1866 | ||
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index f14525dcb..220943116 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h | |||
| @@ -116,7 +116,6 @@ public: | |||
| 116 | 116 | ||
| 117 | static constexpr size_t indexing_slots = 8 * sizeof(size_t); | 117 | static constexpr size_t indexing_slots = 8 * sizeof(size_t); |
| 118 | std::array<vk::Buffer, indexing_slots> buffers{}; | 118 | std::array<vk::Buffer, indexing_slots> buffers{}; |
| 119 | std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{}; | ||
| 120 | }; | 119 | }; |
| 121 | 120 | ||
| 122 | class Image : public VideoCommon::ImageBase { | 121 | class Image : public VideoCommon::ImageBase { |
| @@ -180,12 +179,10 @@ private: | |||
| 180 | TextureCacheRuntime* runtime{}; | 179 | TextureCacheRuntime* runtime{}; |
| 181 | 180 | ||
| 182 | vk::Image original_image; | 181 | vk::Image original_image; |
| 183 | MemoryCommit commit; | ||
| 184 | std::vector<vk::ImageView> storage_image_views; | 182 | std::vector<vk::ImageView> storage_image_views; |
| 185 | VkImageAspectFlags aspect_mask = 0; | 183 | VkImageAspectFlags aspect_mask = 0; |
| 186 | bool initialized = false; | 184 | bool initialized = false; |
| 187 | vk::Image scaled_image{}; | 185 | vk::Image scaled_image{}; |
| 188 | MemoryCommit scaled_commit{}; | ||
| 189 | VkImage current_image{}; | 186 | VkImage current_image{}; |
| 190 | 187 | ||
| 191 | std::unique_ptr<Framebuffer> scale_framebuffer; | 188 | std::unique_ptr<Framebuffer> scale_framebuffer; |
diff --git a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp index a802d3c49..460d8d59d 100644 --- a/src/video_core/renderer_vulkan/vk_turbo_mode.cpp +++ b/src/video_core/renderer_vulkan/vk_turbo_mode.cpp | |||
| @@ -18,7 +18,7 @@ using namespace Common::Literals; | |||
| 18 | 18 | ||
| 19 | TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) | 19 | TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld) |
| 20 | #ifndef ANDROID | 20 | #ifndef ANDROID |
| 21 | : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false} | 21 | : m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device} |
| 22 | #endif | 22 | #endif |
| 23 | { | 23 | { |
| 24 | { | 24 | { |
| @@ -41,7 +41,7 @@ void TurboMode::Run(std::stop_token stop_token) { | |||
| 41 | auto& dld = m_device.GetLogical(); | 41 | auto& dld = m_device.GetLogical(); |
| 42 | 42 | ||
| 43 | // Allocate buffer. 2MiB should be sufficient. | 43 | // Allocate buffer. 2MiB should be sufficient. |
| 44 | auto buffer = dld.CreateBuffer(VkBufferCreateInfo{ | 44 | const VkBufferCreateInfo buffer_ci = { |
| 45 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | 45 | .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
| 46 | .pNext = nullptr, | 46 | .pNext = nullptr, |
| 47 | .flags = 0, | 47 | .flags = 0, |
| @@ -50,10 +50,8 @@ void TurboMode::Run(std::stop_token stop_token) { | |||
| 50 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | 50 | .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
| 51 | .queueFamilyIndexCount = 0, | 51 | .queueFamilyIndexCount = 0, |
| 52 | .pQueueFamilyIndices = nullptr, | 52 | .pQueueFamilyIndices = nullptr, |
| 53 | }); | 53 | }; |
| 54 | 54 | vk::Buffer buffer = m_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal); | |
| 55 | // Commit some device local memory for the buffer. | ||
| 56 | auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||
| 57 | 55 | ||
| 58 | // Create the descriptor pool to contain our descriptor. | 56 | // Create the descriptor pool to contain our descriptor. |
| 59 | static constexpr VkDescriptorPoolSize pool_size{ | 57 | static constexpr VkDescriptorPoolSize pool_size{ |
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index c5213875b..4db948b6d 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp | |||
| @@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() { | |||
| 151 | marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), | 151 | marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), |
| 152 | marked_for_removal.end()); | 152 | marked_for_removal.end()); |
| 153 | 153 | ||
| 154 | std::vector<ShaderInfo*> removed_shaders; | 154 | boost::container::small_vector<ShaderInfo*, 16> removed_shaders; |
| 155 | removed_shaders.reserve(marked_for_removal.size()); | ||
| 156 | 155 | ||
| 157 | std::scoped_lock lock{lookup_mutex}; | 156 | std::scoped_lock lock{lookup_mutex}; |
| 158 | |||
| 159 | for (Entry* const entry : marked_for_removal) { | 157 | for (Entry* const entry : marked_for_removal) { |
| 160 | removed_shaders.push_back(entry->data); | 158 | removed_shaders.push_back(entry->data); |
| 161 | 159 | ||
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index cb51529e4..e16cd5e73 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp | |||
| @@ -269,6 +269,28 @@ bool IsPixelFormatASTC(PixelFormat format) { | |||
| 269 | } | 269 | } |
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | bool IsPixelFormatBCn(PixelFormat format) { | ||
| 273 | switch (format) { | ||
| 274 | case PixelFormat::BC1_RGBA_UNORM: | ||
| 275 | case PixelFormat::BC2_UNORM: | ||
| 276 | case PixelFormat::BC3_UNORM: | ||
| 277 | case PixelFormat::BC4_UNORM: | ||
| 278 | case PixelFormat::BC4_SNORM: | ||
| 279 | case PixelFormat::BC5_UNORM: | ||
| 280 | case PixelFormat::BC5_SNORM: | ||
| 281 | case PixelFormat::BC1_RGBA_SRGB: | ||
| 282 | case PixelFormat::BC2_SRGB: | ||
| 283 | case PixelFormat::BC3_SRGB: | ||
| 284 | case PixelFormat::BC7_UNORM: | ||
| 285 | case PixelFormat::BC6H_UFLOAT: | ||
| 286 | case PixelFormat::BC6H_SFLOAT: | ||
| 287 | case PixelFormat::BC7_SRGB: | ||
| 288 | return true; | ||
| 289 | default: | ||
| 290 | return false; | ||
| 291 | } | ||
| 292 | } | ||
| 293 | |||
| 272 | bool IsPixelFormatSRGB(PixelFormat format) { | 294 | bool IsPixelFormatSRGB(PixelFormat format) { |
| 273 | switch (format) { | 295 | switch (format) { |
| 274 | case PixelFormat::A8B8G8R8_SRGB: | 296 | case PixelFormat::A8B8G8R8_SRGB: |
diff --git a/src/video_core/surface.h b/src/video_core/surface.h index 0225d3287..9b9c4d9bc 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h | |||
| @@ -501,6 +501,8 @@ SurfaceType GetFormatType(PixelFormat pixel_format); | |||
| 501 | 501 | ||
| 502 | bool IsPixelFormatASTC(PixelFormat format); | 502 | bool IsPixelFormatASTC(PixelFormat format); |
| 503 | 503 | ||
| 504 | bool IsPixelFormatBCn(PixelFormat format); | ||
| 505 | |||
| 504 | bool IsPixelFormatSRGB(PixelFormat format); | 506 | bool IsPixelFormatSRGB(PixelFormat format); |
| 505 | 507 | ||
| 506 | bool IsPixelFormatInteger(PixelFormat format); | 508 | bool IsPixelFormatInteger(PixelFormat format); |
diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp new file mode 100644 index 000000000..3e26474a3 --- /dev/null +++ b/src/video_core/texture_cache/decode_bc.cpp | |||
| @@ -0,0 +1,129 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <algorithm> | ||
| 5 | #include <array> | ||
| 6 | #include <span> | ||
| 7 | #include <bc_decoder.h> | ||
| 8 | |||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/texture_cache/decode_bc.h" | ||
| 11 | |||
| 12 | namespace VideoCommon { | ||
| 13 | |||
| 14 | namespace { | ||
| 15 | constexpr u32 BLOCK_SIZE = 4; | ||
| 16 | |||
| 17 | using VideoCore::Surface::PixelFormat; | ||
| 18 | |||
| 19 | constexpr bool IsSigned(PixelFormat pixel_format) { | ||
| 20 | switch (pixel_format) { | ||
| 21 | case PixelFormat::BC4_SNORM: | ||
| 22 | case PixelFormat::BC4_UNORM: | ||
| 23 | case PixelFormat::BC5_SNORM: | ||
| 24 | case PixelFormat::BC5_UNORM: | ||
| 25 | case PixelFormat::BC6H_SFLOAT: | ||
| 26 | case PixelFormat::BC6H_UFLOAT: | ||
| 27 | return true; | ||
| 28 | default: | ||
| 29 | return false; | ||
| 30 | } | ||
| 31 | } | ||
| 32 | |||
| 33 | constexpr u32 BlockSize(PixelFormat pixel_format) { | ||
| 34 | switch (pixel_format) { | ||
| 35 | case PixelFormat::BC1_RGBA_SRGB: | ||
| 36 | case PixelFormat::BC1_RGBA_UNORM: | ||
| 37 | case PixelFormat::BC4_SNORM: | ||
| 38 | case PixelFormat::BC4_UNORM: | ||
| 39 | return 8; | ||
| 40 | default: | ||
| 41 | return 16; | ||
| 42 | } | ||
| 43 | } | ||
| 44 | } // Anonymous namespace | ||
| 45 | |||
| 46 | u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) { | ||
| 47 | switch (pixel_format) { | ||
| 48 | case PixelFormat::BC4_SNORM: | ||
| 49 | case PixelFormat::BC4_UNORM: | ||
| 50 | return 1; | ||
| 51 | case PixelFormat::BC5_SNORM: | ||
| 52 | case PixelFormat::BC5_UNORM: | ||
| 53 | return 2; | ||
| 54 | case PixelFormat::BC6H_SFLOAT: | ||
| 55 | case PixelFormat::BC6H_UFLOAT: | ||
| 56 | return 8; | ||
| 57 | default: | ||
| 58 | return 4; | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | template <auto decompress, PixelFormat pixel_format> | ||
| 63 | void DecompressBlocks(std::span<const u8> input, std::span<u8> output, Extent3D extent, | ||
| 64 | bool is_signed = false) { | ||
| 65 | const u32 out_bpp = ConvertedBytesPerBlock(pixel_format); | ||
| 66 | const u32 block_width = std::min(extent.width, BLOCK_SIZE); | ||
| 67 | const u32 block_height = std::min(extent.height, BLOCK_SIZE); | ||
| 68 | const u32 pitch = extent.width * out_bpp; | ||
| 69 | size_t input_offset = 0; | ||
| 70 | size_t output_offset = 0; | ||
| 71 | for (u32 slice = 0; slice < extent.depth; ++slice) { | ||
| 72 | for (u32 y = 0; y < extent.height; y += block_height) { | ||
| 73 | size_t row_offset = 0; | ||
| 74 | for (u32 x = 0; x < extent.width; | ||
| 75 | x += block_width, row_offset += block_width * out_bpp) { | ||
| 76 | const u8* src = input.data() + input_offset; | ||
| 77 | u8* const dst = output.data() + output_offset + row_offset; | ||
| 78 | if constexpr (IsSigned(pixel_format)) { | ||
| 79 | decompress(src, dst, x, y, extent.width, extent.height, is_signed); | ||
| 80 | } else { | ||
| 81 | decompress(src, dst, x, y, extent.width, extent.height); | ||
| 82 | } | ||
| 83 | input_offset += BlockSize(pixel_format); | ||
| 84 | } | ||
| 85 | output_offset += block_height * pitch; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | } | ||
| 89 | |||
| 90 | void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, | ||
| 91 | VideoCore::Surface::PixelFormat pixel_format) { | ||
| 92 | switch (pixel_format) { | ||
| 93 | case PixelFormat::BC1_RGBA_UNORM: | ||
| 94 | case PixelFormat::BC1_RGBA_SRGB: | ||
| 95 | DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, extent); | ||
| 96 | break; | ||
| 97 | case PixelFormat::BC2_UNORM: | ||
| 98 | case PixelFormat::BC2_SRGB: | ||
| 99 | DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, extent); | ||
| 100 | break; | ||
| 101 | case PixelFormat::BC3_UNORM: | ||
| 102 | case PixelFormat::BC3_SRGB: | ||
| 103 | DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, extent); | ||
| 104 | break; | ||
| 105 | case PixelFormat::BC4_SNORM: | ||
| 106 | case PixelFormat::BC4_UNORM: | ||
| 107 | DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>( | ||
| 108 | input, output, extent, pixel_format == PixelFormat::BC4_SNORM); | ||
| 109 | break; | ||
| 110 | case PixelFormat::BC5_SNORM: | ||
| 111 | case PixelFormat::BC5_UNORM: | ||
| 112 | DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>( | ||
| 113 | input, output, extent, pixel_format == PixelFormat::BC5_SNORM); | ||
| 114 | break; | ||
| 115 | case PixelFormat::BC6H_SFLOAT: | ||
| 116 | case PixelFormat::BC6H_UFLOAT: | ||
| 117 | DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>( | ||
| 118 | input, output, extent, pixel_format == PixelFormat::BC6H_SFLOAT); | ||
| 119 | break; | ||
| 120 | case PixelFormat::BC7_SRGB: | ||
| 121 | case PixelFormat::BC7_UNORM: | ||
| 122 | DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, extent); | ||
| 123 | break; | ||
| 124 | default: | ||
| 125 | LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format); | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/decode_bc4.h b/src/video_core/texture_cache/decode_bc.h index ab2f735be..41d1ec0a3 100644 --- a/src/video_core/texture_cache/decode_bc4.h +++ b/src/video_core/texture_cache/decode_bc.h | |||
| @@ -6,10 +6,14 @@ | |||
| 6 | #include <span> | 6 | #include <span> |
| 7 | 7 | ||
| 8 | #include "common/common_types.h" | 8 | #include "common/common_types.h" |
| 9 | #include "video_core/surface.h" | ||
| 9 | #include "video_core/texture_cache/types.h" | 10 | #include "video_core/texture_cache/types.h" |
| 10 | 11 | ||
| 11 | namespace VideoCommon { | 12 | namespace VideoCommon { |
| 12 | 13 | ||
| 13 | void DecompressBC4(std::span<const u8> data, Extent3D extent, std::span<u8> output); | 14 | [[nodiscard]] u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format); |
| 15 | |||
| 16 | void DecompressBCn(std::span<const u8> input, std::span<u8> output, Extent3D extent, | ||
| 17 | VideoCore::Surface::PixelFormat pixel_format); | ||
| 14 | 18 | ||
| 15 | } // namespace VideoCommon | 19 | } // namespace VideoCommon |
diff --git a/src/video_core/texture_cache/decode_bc4.cpp b/src/video_core/texture_cache/decode_bc4.cpp deleted file mode 100644 index ef98afdca..000000000 --- a/src/video_core/texture_cache/decode_bc4.cpp +++ /dev/null | |||
| @@ -1,96 +0,0 @@ | |||
| 1 | // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project | ||
| 2 | // SPDX-License-Identifier: GPL-2.0-or-later | ||
| 3 | |||
| 4 | #include <algorithm> | ||
| 5 | #include <array> | ||
| 6 | #include <span> | ||
| 7 | |||
| 8 | #include "common/assert.h" | ||
| 9 | #include "common/common_types.h" | ||
| 10 | #include "video_core/texture_cache/decode_bc4.h" | ||
| 11 | #include "video_core/texture_cache/types.h" | ||
| 12 | |||
| 13 | namespace VideoCommon { | ||
| 14 | |||
| 15 | // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt | ||
| 16 | [[nodiscard]] constexpr u32 DecompressBlock(u64 bits, u32 x, u32 y) { | ||
| 17 | const u32 code_offset = 16 + 3 * (4 * y + x); | ||
| 18 | const u32 code = (bits >> code_offset) & 7; | ||
| 19 | const u32 red0 = (bits >> 0) & 0xff; | ||
| 20 | const u32 red1 = (bits >> 8) & 0xff; | ||
| 21 | if (red0 > red1) { | ||
| 22 | switch (code) { | ||
| 23 | case 0: | ||
| 24 | return red0; | ||
| 25 | case 1: | ||
| 26 | return red1; | ||
| 27 | case 2: | ||
| 28 | return (6 * red0 + 1 * red1) / 7; | ||
| 29 | case 3: | ||
| 30 | return (5 * red0 + 2 * red1) / 7; | ||
| 31 | case 4: | ||
| 32 | return (4 * red0 + 3 * red1) / 7; | ||
| 33 | case 5: | ||
| 34 | return (3 * red0 + 4 * red1) / 7; | ||
| 35 | case 6: | ||
| 36 | return (2 * red0 + 5 * red1) / 7; | ||
| 37 | case 7: | ||
| 38 | return (1 * red0 + 6 * red1) / 7; | ||
| 39 | } | ||
| 40 | } else { | ||
| 41 | switch (code) { | ||
| 42 | case 0: | ||
| 43 | return red0; | ||
| 44 | case 1: | ||
| 45 | return red1; | ||
| 46 | case 2: | ||
| 47 | return (4 * red0 + 1 * red1) / 5; | ||
| 48 | case 3: | ||
| 49 | return (3 * red0 + 2 * red1) / 5; | ||
| 50 | case 4: | ||
| 51 | return (2 * red0 + 3 * red1) / 5; | ||
| 52 | case 5: | ||
| 53 | return (1 * red0 + 4 * red1) / 5; | ||
| 54 | case 6: | ||
| 55 | return 0; | ||
| 56 | case 7: | ||
| 57 | return 0xff; | ||
| 58 | } | ||
| 59 | } | ||
| 60 | return 0; | ||
| 61 | } | ||
| 62 | |||
| 63 | void DecompressBC4(std::span<const u8> input, Extent3D extent, std::span<u8> output) { | ||
| 64 | UNIMPLEMENTED_IF_MSG(extent.width % 4 != 0, "Unaligned width={}", extent.width); | ||
| 65 | UNIMPLEMENTED_IF_MSG(extent.height % 4 != 0, "Unaligned height={}", extent.height); | ||
| 66 | static constexpr u32 BLOCK_SIZE = 4; | ||
| 67 | size_t input_offset = 0; | ||
| 68 | for (u32 slice = 0; slice < extent.depth; ++slice) { | ||
| 69 | for (u32 block_y = 0; block_y < extent.height / 4; ++block_y) { | ||
| 70 | for (u32 block_x = 0; block_x < extent.width / 4; ++block_x) { | ||
| 71 | u64 bits; | ||
| 72 | std::memcpy(&bits, &input[input_offset], sizeof(bits)); | ||
| 73 | input_offset += sizeof(bits); | ||
| 74 | |||
| 75 | for (u32 y = 0; y < BLOCK_SIZE; ++y) { | ||
| 76 | for (u32 x = 0; x < BLOCK_SIZE; ++x) { | ||
| 77 | const u32 linear_z = slice; | ||
| 78 | const u32 linear_y = block_y * BLOCK_SIZE + y; | ||
| 79 | const u32 linear_x = block_x * BLOCK_SIZE + x; | ||
| 80 | const u32 offset_z = linear_z * extent.width * extent.height; | ||
| 81 | const u32 offset_y = linear_y * extent.width; | ||
| 82 | const u32 offset_x = linear_x; | ||
| 83 | const u32 output_offset = (offset_z + offset_y + offset_x) * 4ULL; | ||
| 84 | const u32 color = DecompressBlock(bits, x, y); | ||
| 85 | output[output_offset + 0] = static_cast<u8>(color); | ||
| 86 | output[output_offset + 1] = 0; | ||
| 87 | output[output_offset + 2] = 0; | ||
| 88 | output[output_offset + 3] = 0xff; | ||
| 89 | } | ||
| 90 | } | ||
| 91 | } | ||
| 92 | } | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | } // namespace VideoCommon | ||
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 1b8a17ee8..55d49d017 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <optional> | 7 | #include <optional> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include <boost/container/small_vector.hpp> | ||
| 9 | 10 | ||
| 10 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -108,8 +109,8 @@ struct ImageBase { | |||
| 108 | std::vector<ImageViewInfo> image_view_infos; | 109 | std::vector<ImageViewInfo> image_view_infos; |
| 109 | std::vector<ImageViewId> image_view_ids; | 110 | std::vector<ImageViewId> image_view_ids; |
| 110 | 111 | ||
| 111 | std::vector<u32> slice_offsets; | 112 | boost::container::small_vector<u32, 16> slice_offsets; |
| 112 | std::vector<SubresourceBase> slice_subresources; | 113 | boost::container::small_vector<SubresourceBase, 16> slice_subresources; |
| 113 | 114 | ||
| 114 | std::vector<AliasedImage> aliased_images; | 115 | std::vector<AliasedImage> aliased_images; |
| 115 | std::vector<ImageId> overlapping_images; | 116 | std::vector<ImageId> overlapping_images; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4027d860b..8190f3ba1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -186,6 +186,10 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { | |||
| 186 | 186 | ||
| 187 | template <class P> | 187 | template <class P> |
| 188 | void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) { | 188 | void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) { |
| 189 | if (!Settings::values.barrier_feedback_loops.GetValue()) { | ||
| 190 | return; | ||
| 191 | } | ||
| 192 | |||
| 189 | const bool requires_barrier = [&] { | 193 | const bool requires_barrier = [&] { |
| 190 | for (const auto& view : views) { | 194 | for (const auto& view : views) { |
| 191 | if (!view.id) { | 195 | if (!view.id) { |
| @@ -300,7 +304,7 @@ void TextureCache<P>::SynchronizeComputeDescriptors() { | |||
| 300 | } | 304 | } |
| 301 | 305 | ||
| 302 | template <class P> | 306 | template <class P> |
| 303 | bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { | 307 | bool TextureCache<P>::RescaleRenderTargets() { |
| 304 | auto& flags = maxwell3d->dirty.flags; | 308 | auto& flags = maxwell3d->dirty.flags; |
| 305 | u32 scale_rating = 0; | 309 | u32 scale_rating = 0; |
| 306 | bool rescaled = false; | 310 | bool rescaled = false; |
| @@ -338,13 +342,13 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { | |||
| 338 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; | 342 | ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; |
| 339 | if (flags[Dirty::ColorBuffer0 + index] || force) { | 343 | if (flags[Dirty::ColorBuffer0 + index] || force) { |
| 340 | flags[Dirty::ColorBuffer0 + index] = false; | 344 | flags[Dirty::ColorBuffer0 + index] = false; |
| 341 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); | 345 | BindRenderTarget(&color_buffer_id, FindColorBuffer(index)); |
| 342 | } | 346 | } |
| 343 | check_rescale(color_buffer_id, tmp_color_images[index]); | 347 | check_rescale(color_buffer_id, tmp_color_images[index]); |
| 344 | } | 348 | } |
| 345 | if (flags[Dirty::ZetaBuffer] || force) { | 349 | if (flags[Dirty::ZetaBuffer] || force) { |
| 346 | flags[Dirty::ZetaBuffer] = false; | 350 | flags[Dirty::ZetaBuffer] = false; |
| 347 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); | 351 | BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer()); |
| 348 | } | 352 | } |
| 349 | check_rescale(render_targets.depth_buffer_id, tmp_depth_image); | 353 | check_rescale(render_targets.depth_buffer_id, tmp_depth_image); |
| 350 | 354 | ||
| @@ -409,7 +413,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { | |||
| 409 | return; | 413 | return; |
| 410 | } | 414 | } |
| 411 | 415 | ||
| 412 | const bool rescaled = RescaleRenderTargets(is_clear); | 416 | const bool rescaled = RescaleRenderTargets(); |
| 413 | if (is_rescaling != rescaled) { | 417 | if (is_rescaling != rescaled) { |
| 414 | flags[Dirty::RescaleViewports] = true; | 418 | flags[Dirty::RescaleViewports] = true; |
| 415 | flags[Dirty::RescaleScissors] = true; | 419 | flags[Dirty::RescaleScissors] = true; |
| @@ -522,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | |||
| 522 | 526 | ||
| 523 | template <class P> | 527 | template <class P> |
| 524 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | 528 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { |
| 525 | std::vector<ImageId> images; | 529 | boost::container::small_vector<ImageId, 16> images; |
| 526 | ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { | 530 | ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { |
| 527 | if (!image.IsSafeDownload()) { | 531 | if (!image.IsSafeDownload()) { |
| 528 | return; | 532 | return; |
| @@ -575,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V | |||
| 575 | 579 | ||
| 576 | template <class P> | 580 | template <class P> |
| 577 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | 581 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { |
| 578 | std::vector<ImageId> deleted_images; | 582 | boost::container::small_vector<ImageId, 16> deleted_images; |
| 579 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); | 583 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 580 | for (const ImageId id : deleted_images) { | 584 | for (const ImageId id : deleted_images) { |
| 581 | Image& image = slot_images[id]; | 585 | Image& image = slot_images[id]; |
| @@ -589,19 +593,11 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | |||
| 589 | 593 | ||
| 590 | template <class P> | 594 | template <class P> |
| 591 | void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { | 595 | void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { |
| 592 | std::vector<ImageId> deleted_images; | 596 | boost::container::small_vector<ImageId, 16> deleted_images; |
| 593 | ForEachImageInRegionGPU(as_id, gpu_addr, size, | 597 | ForEachImageInRegionGPU(as_id, gpu_addr, size, |
| 594 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); | 598 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 595 | for (const ImageId id : deleted_images) { | 599 | for (const ImageId id : deleted_images) { |
| 596 | Image& image = slot_images[id]; | 600 | Image& image = slot_images[id]; |
| 597 | if (True(image.flags & ImageFlagBits::CpuModified)) { | ||
| 598 | return; | ||
| 599 | } | ||
| 600 | image.flags |= ImageFlagBits::CpuModified; | ||
| 601 | if (True(image.flags & ImageFlagBits::Tracked)) { | ||
| 602 | UntrackImage(image, id); | ||
| 603 | } | ||
| 604 | /* | ||
| 605 | if (True(image.flags & ImageFlagBits::Remapped)) { | 601 | if (True(image.flags & ImageFlagBits::Remapped)) { |
| 606 | continue; | 602 | continue; |
| 607 | } | 603 | } |
| @@ -609,7 +605,6 @@ void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz | |||
| 609 | if (True(image.flags & ImageFlagBits::Tracked)) { | 605 | if (True(image.flags & ImageFlagBits::Tracked)) { |
| 610 | UntrackImage(image, id); | 606 | UntrackImage(image, id); |
| 611 | } | 607 | } |
| 612 | */ | ||
| 613 | } | 608 | } |
| 614 | } | 609 | } |
| 615 | 610 | ||
| @@ -875,6 +870,10 @@ ImageId TextureCache<P>::DmaImageId(const Tegra::DMA::ImageOperand& operand, boo | |||
| 875 | return NULL_IMAGE_ID; | 870 | return NULL_IMAGE_ID; |
| 876 | } | 871 | } |
| 877 | auto& image = slot_images[image_id]; | 872 | auto& image = slot_images[image_id]; |
| 873 | if (image.info.type == ImageType::e3D) { | ||
| 874 | // Don't accelerate 3D images. | ||
| 875 | return NULL_IMAGE_ID; | ||
| 876 | } | ||
| 878 | if (!is_upload && !image.info.dma_downloaded) { | 877 | if (!is_upload && !image.info.dma_downloaded) { |
| 879 | // Force a full sync. | 878 | // Force a full sync. |
| 880 | image.info.dma_downloaded = true; | 879 | image.info.dma_downloaded = true; |
| @@ -1097,7 +1096,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||
| 1097 | const bool native_bgr = runtime.HasNativeBgr(); | 1096 | const bool native_bgr = runtime.HasNativeBgr(); |
| 1098 | const bool flexible_formats = True(options & RelaxedOptions::Format); | 1097 | const bool flexible_formats = True(options & RelaxedOptions::Format); |
| 1099 | ImageId image_id{}; | 1098 | ImageId image_id{}; |
| 1100 | boost::container::small_vector<ImageId, 1> image_ids; | 1099 | boost::container::small_vector<ImageId, 8> image_ids; |
| 1101 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | 1100 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1102 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | 1101 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { |
| 1103 | return false; | 1102 | return false; |
| @@ -1618,7 +1617,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) | |||
| 1618 | } | 1617 | } |
| 1619 | } | 1618 | } |
| 1620 | ImageId image_id{}; | 1619 | ImageId image_id{}; |
| 1621 | boost::container::small_vector<ImageId, 1> image_ids; | 1620 | boost::container::small_vector<ImageId, 8> image_ids; |
| 1622 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | 1621 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1623 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | 1622 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { |
| 1624 | return false; | 1623 | return false; |
| @@ -1678,7 +1677,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { | |||
| 1678 | } | 1677 | } |
| 1679 | 1678 | ||
| 1680 | template <class P> | 1679 | template <class P> |
| 1681 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { | 1680 | ImageViewId TextureCache<P>::FindColorBuffer(size_t index) { |
| 1682 | const auto& regs = maxwell3d->regs; | 1681 | const auto& regs = maxwell3d->regs; |
| 1683 | if (index >= regs.rt_control.count) { | 1682 | if (index >= regs.rt_control.count) { |
| 1684 | return ImageViewId{}; | 1683 | return ImageViewId{}; |
| @@ -1692,11 +1691,11 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { | |||
| 1692 | return ImageViewId{}; | 1691 | return ImageViewId{}; |
| 1693 | } | 1692 | } |
| 1694 | const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode); | 1693 | const ImageInfo info(regs.rt[index], regs.anti_alias_samples_mode); |
| 1695 | return FindRenderTargetView(info, gpu_addr, is_clear); | 1694 | return FindRenderTargetView(info, gpu_addr); |
| 1696 | } | 1695 | } |
| 1697 | 1696 | ||
| 1698 | template <class P> | 1697 | template <class P> |
| 1699 | ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { | 1698 | ImageViewId TextureCache<P>::FindDepthBuffer() { |
| 1700 | const auto& regs = maxwell3d->regs; | 1699 | const auto& regs = maxwell3d->regs; |
| 1701 | if (!regs.zeta_enable) { | 1700 | if (!regs.zeta_enable) { |
| 1702 | return ImageViewId{}; | 1701 | return ImageViewId{}; |
| @@ -1706,18 +1705,16 @@ ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { | |||
| 1706 | return ImageViewId{}; | 1705 | return ImageViewId{}; |
| 1707 | } | 1706 | } |
| 1708 | const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode); | 1707 | const ImageInfo info(regs.zeta, regs.zeta_size, regs.anti_alias_samples_mode); |
| 1709 | return FindRenderTargetView(info, gpu_addr, is_clear); | 1708 | return FindRenderTargetView(info, gpu_addr); |
| 1710 | } | 1709 | } |
| 1711 | 1710 | ||
| 1712 | template <class P> | 1711 | template <class P> |
| 1713 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, | 1712 | ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr) { |
| 1714 | bool is_clear) { | ||
| 1715 | const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; | ||
| 1716 | ImageId image_id{}; | 1713 | ImageId image_id{}; |
| 1717 | bool delete_state = has_deleted_images; | 1714 | bool delete_state = has_deleted_images; |
| 1718 | do { | 1715 | do { |
| 1719 | has_deleted_images = false; | 1716 | has_deleted_images = false; |
| 1720 | image_id = FindOrInsertImage(info, gpu_addr, options); | 1717 | image_id = FindOrInsertImage(info, gpu_addr); |
| 1721 | delete_state |= has_deleted_images; | 1718 | delete_state |= has_deleted_images; |
| 1722 | } while (has_deleted_images); | 1719 | } while (has_deleted_images); |
| 1723 | has_deleted_images = delete_state; | 1720 | has_deleted_images = delete_state; |
| @@ -1940,7 +1937,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1940 | image.map_view_id = map_id; | 1937 | image.map_view_id = map_id; |
| 1941 | return; | 1938 | return; |
| 1942 | } | 1939 | } |
| 1943 | std::vector<ImageViewId> sparse_maps{}; | 1940 | boost::container::small_vector<ImageViewId, 16> sparse_maps; |
| 1944 | ForEachSparseSegment( | 1941 | ForEachSparseSegment( |
| 1945 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | 1942 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { |
| 1946 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | 1943 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); |
| @@ -2215,7 +2212,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept { | |||
| 2215 | 2212 | ||
| 2216 | template <class P> | 2213 | template <class P> |
| 2217 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | 2214 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { |
| 2218 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | 2215 | boost::container::small_vector<const AliasedImage*, 8> aliased_images; |
| 2219 | Image& image = slot_images[image_id]; | 2216 | Image& image = slot_images[image_id]; |
| 2220 | bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); | 2217 | bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); |
| 2221 | bool any_modified = True(image.flags & ImageFlagBits::GpuModified); | 2218 | bool any_modified = True(image.flags & ImageFlagBits::GpuModified); |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index d96ddea9d..e9ec91265 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -56,7 +56,7 @@ struct ImageViewInOut { | |||
| 56 | struct AsyncDecodeContext { | 56 | struct AsyncDecodeContext { |
| 57 | ImageId image_id; | 57 | ImageId image_id; |
| 58 | Common::ScratchBuffer<u8> decoded_data; | 58 | Common::ScratchBuffer<u8> decoded_data; |
| 59 | std::vector<BufferImageCopy> copies; | 59 | boost::container::small_vector<BufferImageCopy, 16> copies; |
| 60 | std::mutex mutex; | 60 | std::mutex mutex; |
| 61 | std::atomic_bool complete; | 61 | std::atomic_bool complete; |
| 62 | }; | 62 | }; |
| @@ -178,9 +178,8 @@ public: | |||
| 178 | void SynchronizeComputeDescriptors(); | 178 | void SynchronizeComputeDescriptors(); |
| 179 | 179 | ||
| 180 | /// Updates the Render Targets if they can be rescaled | 180 | /// Updates the Render Targets if they can be rescaled |
| 181 | /// @param is_clear True when the render targets are being used for clears | ||
| 182 | /// @retval True if the Render Targets have been rescaled. | 181 | /// @retval True if the Render Targets have been rescaled. |
| 183 | bool RescaleRenderTargets(bool is_clear); | 182 | bool RescaleRenderTargets(); |
| 184 | 183 | ||
| 185 | /// Update bound render targets and upload memory if necessary | 184 | /// Update bound render targets and upload memory if necessary |
| 186 | /// @param is_clear True when the render targets are being used for clears | 185 | /// @param is_clear True when the render targets are being used for clears |
| @@ -336,14 +335,13 @@ private: | |||
| 336 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); | 335 | [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); |
| 337 | 336 | ||
| 338 | /// Find or create an image view for the given color buffer index | 337 | /// Find or create an image view for the given color buffer index |
| 339 | [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); | 338 | [[nodiscard]] ImageViewId FindColorBuffer(size_t index); |
| 340 | 339 | ||
| 341 | /// Find or create an image view for the depth buffer | 340 | /// Find or create an image view for the depth buffer |
| 342 | [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); | 341 | [[nodiscard]] ImageViewId FindDepthBuffer(); |
| 343 | 342 | ||
| 344 | /// Find or create a view for a render target with the given image parameters | 343 | /// Find or create a view for a render target with the given image parameters |
| 345 | [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, | 344 | [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr); |
| 346 | bool is_clear); | ||
| 347 | 345 | ||
| 348 | /// Iterates over all the images in a region calling func | 346 | /// Iterates over all the images in a region calling func |
| 349 | template <typename Func> | 347 | template <typename Func> |
| @@ -431,7 +429,7 @@ private: | |||
| 431 | 429 | ||
| 432 | std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; | 430 | std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; |
| 433 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; | 431 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; |
| 434 | std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; | 432 | std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; |
| 435 | 433 | ||
| 436 | VAddr virtual_invalid_space{}; | 434 | VAddr virtual_invalid_space{}; |
| 437 | 435 | ||
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 95a5b47d8..9a618a57a 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -24,7 +24,7 @@ | |||
| 24 | #include "video_core/engines/maxwell_3d.h" | 24 | #include "video_core/engines/maxwell_3d.h" |
| 25 | #include "video_core/memory_manager.h" | 25 | #include "video_core/memory_manager.h" |
| 26 | #include "video_core/surface.h" | 26 | #include "video_core/surface.h" |
| 27 | #include "video_core/texture_cache/decode_bc4.h" | 27 | #include "video_core/texture_cache/decode_bc.h" |
| 28 | #include "video_core/texture_cache/format_lookup_table.h" | 28 | #include "video_core/texture_cache/format_lookup_table.h" |
| 29 | #include "video_core/texture_cache/formatter.h" | 29 | #include "video_core/texture_cache/formatter.h" |
| 30 | #include "video_core/texture_cache/samples_helper.h" | 30 | #include "video_core/texture_cache/samples_helper.h" |
| @@ -61,8 +61,6 @@ using VideoCore::Surface::PixelFormatFromDepthFormat; | |||
| 61 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | 61 | using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
| 62 | using VideoCore::Surface::SurfaceType; | 62 | using VideoCore::Surface::SurfaceType; |
| 63 | 63 | ||
| 64 | constexpr u32 CONVERTED_BYTES_PER_BLOCK = BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); | ||
| 65 | |||
| 66 | struct LevelInfo { | 64 | struct LevelInfo { |
| 67 | Extent3D size; | 65 | Extent3D size; |
| 68 | Extent3D block; | 66 | Extent3D block; |
| @@ -329,13 +327,13 @@ template <u32 GOB_EXTENT> | |||
| 329 | 327 | ||
| 330 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( | 328 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( |
| 331 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { | 329 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { |
| 332 | const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); | 330 | const auto slice_offsets = CalculateSliceOffsets(new_info); |
| 333 | const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); | 331 | const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); |
| 334 | const auto it = std::ranges::find(slice_offsets, diff); | 332 | const auto it = std::ranges::find(slice_offsets, diff); |
| 335 | if (it == slice_offsets.end()) { | 333 | if (it == slice_offsets.end()) { |
| 336 | return std::nullopt; | 334 | return std::nullopt; |
| 337 | } | 335 | } |
| 338 | const std::vector subresources = CalculateSliceSubresources(new_info); | 336 | const auto subresources = CalculateSliceSubresources(new_info); |
| 339 | const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; | 337 | const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; |
| 340 | const ImageInfo& info = overlap.info; | 338 | const ImageInfo& info = overlap.info; |
| 341 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { | 339 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { |
| @@ -612,7 +610,8 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept { | |||
| 612 | } | 610 | } |
| 613 | return output_size; | 611 | return output_size; |
| 614 | } | 612 | } |
| 615 | return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * CONVERTED_BYTES_PER_BLOCK; | 613 | return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * |
| 614 | ConvertedBytesPerBlock(info.format); | ||
| 616 | } | 615 | } |
| 617 | 616 | ||
| 618 | u32 CalculateLayerStride(const ImageInfo& info) noexcept { | 617 | u32 CalculateLayerStride(const ImageInfo& info) noexcept { |
| @@ -655,9 +654,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { | |||
| 655 | return sizes; | 654 | return sizes; |
| 656 | } | 655 | } |
| 657 | 656 | ||
| 658 | std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | 657 | boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) { |
| 659 | ASSERT(info.type == ImageType::e3D); | 658 | ASSERT(info.type == ImageType::e3D); |
| 660 | std::vector<u32> offsets; | 659 | boost::container::small_vector<u32, 16> offsets; |
| 661 | offsets.reserve(NumSlices(info)); | 660 | offsets.reserve(NumSlices(info)); |
| 662 | 661 | ||
| 663 | const LevelInfo level_info = MakeLevelInfo(info); | 662 | const LevelInfo level_info = MakeLevelInfo(info); |
| @@ -679,9 +678,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | |||
| 679 | return offsets; | 678 | return offsets; |
| 680 | } | 679 | } |
| 681 | 680 | ||
| 682 | std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { | 681 | boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources( |
| 682 | const ImageInfo& info) { | ||
| 683 | ASSERT(info.type == ImageType::e3D); | 683 | ASSERT(info.type == ImageType::e3D); |
| 684 | std::vector<SubresourceBase> subresources; | 684 | boost::container::small_vector<SubresourceBase, 16> subresources; |
| 685 | subresources.reserve(NumSlices(info)); | 685 | subresources.reserve(NumSlices(info)); |
| 686 | for (s32 level = 0; level < info.resources.levels; ++level) { | 686 | for (s32 level = 0; level < info.resources.levels; ++level) { |
| 687 | const s32 depth = AdjustMipSize(info.size.depth, level); | 687 | const s32 depth = AdjustMipSize(info.size.depth, level); |
| @@ -723,8 +723,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { | |||
| 723 | } | 723 | } |
| 724 | } | 724 | } |
| 725 | 725 | ||
| 726 | std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, | 726 | boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst, |
| 727 | SubresourceBase base, u32 up_scale, u32 down_shift) { | 727 | const ImageInfo& src, |
| 728 | SubresourceBase base, | ||
| 729 | u32 up_scale, u32 down_shift) { | ||
| 728 | ASSERT(dst.resources.levels >= src.resources.levels); | 730 | ASSERT(dst.resources.levels >= src.resources.levels); |
| 729 | 731 | ||
| 730 | const bool is_dst_3d = dst.type == ImageType::e3D; | 732 | const bool is_dst_3d = dst.type == ImageType::e3D; |
| @@ -733,7 +735,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | |||
| 733 | ASSERT(src.resources.levels == 1); | 735 | ASSERT(src.resources.levels == 1); |
| 734 | } | 736 | } |
| 735 | const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; | 737 | const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; |
| 736 | std::vector<ImageCopy> copies; | 738 | boost::container::small_vector<ImageCopy, 16> copies; |
| 737 | copies.reserve(src.resources.levels); | 739 | copies.reserve(src.resources.levels); |
| 738 | for (s32 level = 0; level < src.resources.levels; ++level) { | 740 | for (s32 level = 0; level < src.resources.levels; ++level) { |
| 739 | ImageCopy& copy = copies.emplace_back(); | 741 | ImageCopy& copy = copies.emplace_back(); |
| @@ -770,9 +772,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | |||
| 770 | return copies; | 772 | return copies; |
| 771 | } | 773 | } |
| 772 | 774 | ||
| 773 | std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, | 775 | boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src, |
| 774 | u32 down_shift) { | 776 | u32 up_scale, |
| 775 | std::vector<ImageCopy> copies; | 777 | u32 down_shift) { |
| 778 | boost::container::small_vector<ImageCopy, 16> copies; | ||
| 776 | copies.reserve(src.resources.levels); | 779 | copies.reserve(src.resources.levels); |
| 777 | const bool is_3d = src.type == ImageType::e3D; | 780 | const bool is_3d = src.type == ImageType::e3D; |
| 778 | for (s32 level = 0; level < src.resources.levels; ++level) { | 781 | for (s32 level = 0; level < src.resources.levels; ++level) { |
| @@ -824,9 +827,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config | |||
| 824 | return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); | 827 | return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); |
| 825 | } | 828 | } |
| 826 | 829 | ||
| 827 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 830 | boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory, |
| 828 | const ImageInfo& info, std::span<const u8> input, | 831 | GPUVAddr gpu_addr, |
| 829 | std::span<u8> output) { | 832 | const ImageInfo& info, |
| 833 | std::span<const u8> input, | ||
| 834 | std::span<u8> output) { | ||
| 830 | const size_t guest_size_bytes = input.size_bytes(); | 835 | const size_t guest_size_bytes = input.size_bytes(); |
| 831 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | 836 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); |
| 832 | const Extent3D size = info.size; | 837 | const Extent3D size = info.size; |
| @@ -861,7 +866,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP | |||
| 861 | info.tile_width_spacing); | 866 | info.tile_width_spacing); |
| 862 | size_t guest_offset = 0; | 867 | size_t guest_offset = 0; |
| 863 | u32 host_offset = 0; | 868 | u32 host_offset = 0; |
| 864 | std::vector<BufferImageCopy> copies(num_levels); | 869 | boost::container::small_vector<BufferImageCopy, 16> copies(num_levels); |
| 865 | 870 | ||
| 866 | for (s32 level = 0; level < num_levels; ++level) { | 871 | for (s32 level = 0; level < num_levels; ++level) { |
| 867 | const Extent3D level_size = AdjustMipSize(size, level); | 872 | const Extent3D level_size = AdjustMipSize(size, level); |
| @@ -939,7 +944,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 939 | tile_size.height, output.subspan(output_offset)); | 944 | tile_size.height, output.subspan(output_offset)); |
| 940 | 945 | ||
| 941 | output_offset += copy.image_extent.width * copy.image_extent.height * | 946 | output_offset += copy.image_extent.width * copy.image_extent.height * |
| 942 | copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; | 947 | copy.image_subresource.num_layers * |
| 948 | BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); | ||
| 943 | } else if (astc) { | 949 | } else if (astc) { |
| 944 | // BC1 uses 0.5 bytes per texel | 950 | // BC1 uses 0.5 bytes per texel |
| 945 | // BC3 uses 1 byte per texel | 951 | // BC3 uses 1 byte per texel |
| @@ -950,7 +956,8 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 950 | 956 | ||
| 951 | const u32 plane_dim = copy.image_extent.width * copy.image_extent.height; | 957 | const u32 plane_dim = copy.image_extent.width * copy.image_extent.height; |
| 952 | const u32 level_size = plane_dim * copy.image_extent.depth * | 958 | const u32 level_size = plane_dim * copy.image_extent.depth * |
| 953 | copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; | 959 | copy.image_subresource.num_layers * |
| 960 | BytesPerBlock(PixelFormat::A8B8G8R8_UNORM); | ||
| 954 | decode_scratch.resize_destructive(level_size); | 961 | decode_scratch.resize_destructive(level_size); |
| 955 | 962 | ||
| 956 | Tegra::Texture::ASTC::Decompress( | 963 | Tegra::Texture::ASTC::Decompress( |
| @@ -970,15 +977,20 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 970 | bpp_div; | 977 | bpp_div; |
| 971 | output_offset += static_cast<u32>(copy.buffer_size); | 978 | output_offset += static_cast<u32>(copy.buffer_size); |
| 972 | } else { | 979 | } else { |
| 973 | DecompressBC4(input_offset, copy.image_extent, output.subspan(output_offset)); | 980 | const Extent3D image_extent{ |
| 974 | 981 | .width = copy.image_extent.width, | |
| 982 | .height = copy.image_extent.height * copy.image_subresource.num_layers, | ||
| 983 | .depth = copy.image_extent.depth, | ||
| 984 | }; | ||
| 985 | DecompressBCn(input_offset, output.subspan(output_offset), image_extent, info.format); | ||
| 975 | output_offset += copy.image_extent.width * copy.image_extent.height * | 986 | output_offset += copy.image_extent.width * copy.image_extent.height * |
| 976 | copy.image_subresource.num_layers * CONVERTED_BYTES_PER_BLOCK; | 987 | copy.image_subresource.num_layers * |
| 988 | ConvertedBytesPerBlock(info.format); | ||
| 977 | } | 989 | } |
| 978 | } | 990 | } |
| 979 | } | 991 | } |
| 980 | 992 | ||
| 981 | std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { | 993 | boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) { |
| 982 | const Extent3D size = info.size; | 994 | const Extent3D size = info.size; |
| 983 | const u32 bytes_per_block = BytesPerBlock(info.format); | 995 | const u32 bytes_per_block = BytesPerBlock(info.format); |
| 984 | if (info.type == ImageType::Linear) { | 996 | if (info.type == ImageType::Linear) { |
| @@ -1006,7 +1018,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { | |||
| 1006 | 1018 | ||
| 1007 | u32 host_offset = 0; | 1019 | u32 host_offset = 0; |
| 1008 | 1020 | ||
| 1009 | std::vector<BufferImageCopy> copies(num_levels); | 1021 | boost::container::small_vector<BufferImageCopy, 16> copies(num_levels); |
| 1010 | for (s32 level = 0; level < num_levels; ++level) { | 1022 | for (s32 level = 0; level < num_levels; ++level) { |
| 1011 | const Extent3D level_size = AdjustMipSize(size, level); | 1023 | const Extent3D level_size = AdjustMipSize(size, level); |
| 1012 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | 1024 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); |
| @@ -1042,10 +1054,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) { | |||
| 1042 | return AdjustMipBlockSize(num_tiles, level_info.block, level); | 1054 | return AdjustMipBlockSize(num_tiles, level_info.block, level); |
| 1043 | } | 1055 | } |
| 1044 | 1056 | ||
| 1045 | std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { | 1057 | boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) { |
| 1046 | const Extent2D tile_size = DefaultBlockSize(info.format); | 1058 | const Extent2D tile_size = DefaultBlockSize(info.format); |
| 1047 | if (info.type == ImageType::Linear) { | 1059 | if (info.type == ImageType::Linear) { |
| 1048 | return std::vector{SwizzleParameters{ | 1060 | return {SwizzleParameters{ |
| 1049 | .num_tiles = AdjustTileSize(info.size, tile_size), | 1061 | .num_tiles = AdjustTileSize(info.size, tile_size), |
| 1050 | .block = {}, | 1062 | .block = {}, |
| 1051 | .buffer_offset = 0, | 1063 | .buffer_offset = 0, |
| @@ -1057,7 +1069,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { | |||
| 1057 | const s32 num_levels = info.resources.levels; | 1069 | const s32 num_levels = info.resources.levels; |
| 1058 | 1070 | ||
| 1059 | u32 guest_offset = 0; | 1071 | u32 guest_offset = 0; |
| 1060 | std::vector<SwizzleParameters> params(num_levels); | 1072 | boost::container::small_vector<SwizzleParameters, 16> params(num_levels); |
| 1061 | for (s32 level = 0; level < num_levels; ++level) { | 1073 | for (s32 level = 0; level < num_levels; ++level) { |
| 1062 | const Extent3D level_size = AdjustMipSize(size, level); | 1074 | const Extent3D level_size = AdjustMipSize(size, level); |
| 1063 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | 1075 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); |
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 84aa6880d..ab45a43c4 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <optional> | 6 | #include <optional> |
| 7 | #include <span> | 7 | #include <span> |
| 8 | #include <boost/container/small_vector.hpp> | ||
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | #include "common/scratch_buffer.h" | 11 | #include "common/scratch_buffer.h" |
| @@ -40,9 +41,10 @@ struct OverlapResult { | |||
| 40 | 41 | ||
| 41 | [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; | 42 | [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; |
| 42 | 43 | ||
| 43 | [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); | 44 | [[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info); |
| 44 | 45 | ||
| 45 | [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); | 46 | [[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources( |
| 47 | const ImageInfo& info); | ||
| 46 | 48 | ||
| 47 | [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); | 49 | [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); |
| 48 | 50 | ||
| @@ -51,21 +53,18 @@ struct OverlapResult { | |||
| 51 | 53 | ||
| 52 | [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; | 54 | [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; |
| 53 | 55 | ||
| 54 | [[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, | 56 | [[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies( |
| 55 | const ImageInfo& src, | 57 | const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1, |
| 56 | SubresourceBase base, u32 up_scale = 1, | 58 | u32 down_shift = 0); |
| 57 | u32 down_shift = 0); | ||
| 58 | 59 | ||
| 59 | [[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, | 60 | [[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies( |
| 60 | u32 up_scale = 1, | 61 | const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0); |
| 61 | u32 down_shift = 0); | ||
| 62 | 62 | ||
| 63 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | 63 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); |
| 64 | 64 | ||
| 65 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | 65 | [[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage( |
| 66 | GPUVAddr gpu_addr, const ImageInfo& info, | 66 | Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |
| 67 | std::span<const u8> input, | 67 | std::span<const u8> input, std::span<u8> output); |
| 68 | std::span<u8> output); | ||
| 69 | 68 | ||
| 70 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 69 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, |
| 71 | const ImageBase& image, std::span<u8> output); | 70 | const ImageBase& image, std::span<u8> output); |
| @@ -73,13 +72,15 @@ struct OverlapResult { | |||
| 73 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | 72 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, |
| 74 | std::span<BufferImageCopy> copies); | 73 | std::span<BufferImageCopy> copies); |
| 75 | 74 | ||
| 76 | [[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); | 75 | [[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies( |
| 76 | const ImageInfo& info); | ||
| 77 | 77 | ||
| 78 | [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); | 78 | [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); |
| 79 | 79 | ||
| 80 | [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); | 80 | [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); |
| 81 | 81 | ||
| 82 | [[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); | 82 | [[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles( |
| 83 | const ImageInfo& info); | ||
| 83 | 84 | ||
| 84 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | 85 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |
| 85 | std::span<const BufferImageCopy> copies, std::span<const u8> memory, | 86 | std::span<const BufferImageCopy> copies, std::span<const u8> memory, |
diff --git a/src/video_core/textures/bcn.cpp b/src/video_core/textures/bcn.cpp index 671212a49..16ddbe320 100644 --- a/src/video_core/textures/bcn.cpp +++ b/src/video_core/textures/bcn.cpp | |||
| @@ -3,7 +3,6 @@ | |||
| 3 | 3 | ||
| 4 | #include <stb_dxt.h> | 4 | #include <stb_dxt.h> |
| 5 | #include <string.h> | 5 | #include <string.h> |
| 6 | |||
| 7 | #include "common/alignment.h" | 6 | #include "common/alignment.h" |
| 8 | #include "video_core/textures/bcn.h" | 7 | #include "video_core/textures/bcn.h" |
| 9 | #include "video_core/textures/workers.h" | 8 | #include "video_core/textures/workers.h" |
diff --git a/src/video_core/textures/bcn.h b/src/video_core/textures/bcn.h index 6464af885..d5d2a16c9 100644 --- a/src/video_core/textures/bcn.h +++ b/src/video_core/textures/bcn.h | |||
| @@ -4,14 +4,13 @@ | |||
| 4 | #pragma once | 4 | #pragma once |
| 5 | 5 | ||
| 6 | #include <span> | 6 | #include <span> |
| 7 | #include <stdint.h> | 7 | |
| 8 | #include "common/common_types.h" | ||
| 8 | 9 | ||
| 9 | namespace Tegra::Texture::BCN { | 10 | namespace Tegra::Texture::BCN { |
| 10 | 11 | ||
| 11 | void CompressBC1(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, | 12 | void CompressBC1(std::span<const u8> data, u32 width, u32 height, u32 depth, std::span<u8> output); |
| 12 | std::span<uint8_t> output); | ||
| 13 | 13 | ||
| 14 | void CompressBC3(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, | 14 | void CompressBC3(std::span<const u8> data, u32 width, u32 height, u32 depth, std::span<u8> output); |
| 15 | std::span<uint8_t> output); | ||
| 16 | 15 | ||
| 17 | } // namespace Tegra::Texture::BCN | 16 | } // namespace Tegra::Texture::BCN |
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp index 155599316..1f353d2df 100644 --- a/src/video_core/transform_feedback.cpp +++ b/src/video_core/transform_feedback.cpp | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | namespace VideoCommon { | 14 | namespace VideoCommon { |
| 15 | 15 | ||
| 16 | std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( | 16 | std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings( |
| 17 | const TransformFeedbackState& state) { | 17 | const TransformFeedbackState& state) { |
| 18 | static constexpr std::array VECTORS{ | 18 | static constexpr std::array VECTORS{ |
| 19 | 28U, // gl_Position | 19 | 28U, // gl_Position |
| @@ -62,7 +62,8 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( | |||
| 62 | 216U, // gl_TexCoord[6] | 62 | 216U, // gl_TexCoord[6] |
| 63 | 220U, // gl_TexCoord[7] | 63 | 220U, // gl_TexCoord[7] |
| 64 | }; | 64 | }; |
| 65 | std::vector<Shader::TransformFeedbackVarying> xfb(256); | 65 | std::array<Shader::TransformFeedbackVarying, 256> xfb{}; |
| 66 | u32 count{0}; | ||
| 66 | for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { | 67 | for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { |
| 67 | const auto& locations = state.varyings[buffer]; | 68 | const auto& locations = state.varyings[buffer]; |
| 68 | const auto& layout = state.layouts[buffer]; | 69 | const auto& layout = state.layouts[buffer]; |
| @@ -103,11 +104,12 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( | |||
| 103 | } | 104 | } |
| 104 | } | 105 | } |
| 105 | xfb[attribute] = varying; | 106 | xfb[attribute] = varying; |
| 107 | count = std::max(count, attribute); | ||
| 106 | highest = std::max(highest, (base_offset + varying.components) * 4); | 108 | highest = std::max(highest, (base_offset + varying.components) * 4); |
| 107 | } | 109 | } |
| 108 | UNIMPLEMENTED_IF(highest != layout.stride); | 110 | UNIMPLEMENTED_IF(highest != layout.stride); |
| 109 | } | 111 | } |
| 110 | return xfb; | 112 | return {xfb, count + 1}; |
| 111 | } | 113 | } |
| 112 | 114 | ||
| 113 | } // namespace VideoCommon | 115 | } // namespace VideoCommon |
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h index d13eb16c3..401b1352a 100644 --- a/src/video_core/transform_feedback.h +++ b/src/video_core/transform_feedback.h | |||
| @@ -24,7 +24,7 @@ struct TransformFeedbackState { | |||
| 24 | varyings; | 24 | varyings; |
| 25 | }; | 25 | }; |
| 26 | 26 | ||
| 27 | std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( | 27 | std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings( |
| 28 | const TransformFeedbackState& state); | 28 | const TransformFeedbackState& state); |
| 29 | 29 | ||
| 30 | } // namespace VideoCommon | 30 | } // namespace VideoCommon |
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.cpp b/src/video_core/vulkan_common/vulkan_debug_callback.cpp index 9de484c29..67e8065a4 100644 --- a/src/video_core/vulkan_common/vulkan_debug_callback.cpp +++ b/src/video_core/vulkan_common/vulkan_debug_callback.cpp | |||
| @@ -7,10 +7,10 @@ | |||
| 7 | 7 | ||
| 8 | namespace Vulkan { | 8 | namespace Vulkan { |
| 9 | namespace { | 9 | namespace { |
| 10 | VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, | 10 | VkBool32 DebugUtilCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, |
| 11 | VkDebugUtilsMessageTypeFlagsEXT type, | 11 | VkDebugUtilsMessageTypeFlagsEXT type, |
| 12 | const VkDebugUtilsMessengerCallbackDataEXT* data, | 12 | const VkDebugUtilsMessengerCallbackDataEXT* data, |
| 13 | [[maybe_unused]] void* user_data) { | 13 | [[maybe_unused]] void* user_data) { |
| 14 | // Skip logging known false-positive validation errors | 14 | // Skip logging known false-positive validation errors |
| 15 | switch (static_cast<u32>(data->messageIdNumber)) { | 15 | switch (static_cast<u32>(data->messageIdNumber)) { |
| 16 | #ifdef ANDROID | 16 | #ifdef ANDROID |
| @@ -62,9 +62,26 @@ VkBool32 Callback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, | |||
| 62 | } | 62 | } |
| 63 | return VK_FALSE; | 63 | return VK_FALSE; |
| 64 | } | 64 | } |
| 65 | |||
| 66 | VkBool32 DebugReportCallback(VkDebugReportFlagsEXT flags, VkDebugReportObjectTypeEXT objectType, | ||
| 67 | uint64_t object, size_t location, int32_t messageCode, | ||
| 68 | const char* pLayerPrefix, const char* pMessage, void* pUserData) { | ||
| 69 | const VkDebugReportFlagBitsEXT severity = static_cast<VkDebugReportFlagBitsEXT>(flags); | ||
| 70 | const std::string_view message{pMessage}; | ||
| 71 | if (severity & VK_DEBUG_REPORT_ERROR_BIT_EXT) { | ||
| 72 | LOG_CRITICAL(Render_Vulkan, "{}", message); | ||
| 73 | } else if (severity & VK_DEBUG_REPORT_WARNING_BIT_EXT) { | ||
| 74 | LOG_WARNING(Render_Vulkan, "{}", message); | ||
| 75 | } else if (severity & VK_DEBUG_REPORT_INFORMATION_BIT_EXT) { | ||
| 76 | LOG_INFO(Render_Vulkan, "{}", message); | ||
| 77 | } else if (severity & VK_DEBUG_REPORT_DEBUG_BIT_EXT) { | ||
| 78 | LOG_DEBUG(Render_Vulkan, "{}", message); | ||
| 79 | } | ||
| 80 | return VK_FALSE; | ||
| 81 | } | ||
| 65 | } // Anonymous namespace | 82 | } // Anonymous namespace |
| 66 | 83 | ||
| 67 | vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance) { | 84 | vk::DebugUtilsMessenger CreateDebugUtilsCallback(const vk::Instance& instance) { |
| 68 | return instance.CreateDebugUtilsMessenger(VkDebugUtilsMessengerCreateInfoEXT{ | 85 | return instance.CreateDebugUtilsMessenger(VkDebugUtilsMessengerCreateInfoEXT{ |
| 69 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, | 86 | .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, |
| 70 | .pNext = nullptr, | 87 | .pNext = nullptr, |
| @@ -76,7 +93,18 @@ vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance) { | |||
| 76 | .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | | 93 | .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | |
| 77 | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | | 94 | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | |
| 78 | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, | 95 | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT, |
| 79 | .pfnUserCallback = Callback, | 96 | .pfnUserCallback = DebugUtilCallback, |
| 97 | .pUserData = nullptr, | ||
| 98 | }); | ||
| 99 | } | ||
| 100 | |||
| 101 | vk::DebugReportCallback CreateDebugReportCallback(const vk::Instance& instance) { | ||
| 102 | return instance.CreateDebugReportCallback({ | ||
| 103 | .sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, | ||
| 104 | .pNext = nullptr, | ||
| 105 | .flags = VK_DEBUG_REPORT_DEBUG_BIT_EXT | VK_DEBUG_REPORT_INFORMATION_BIT_EXT | | ||
| 106 | VK_DEBUG_REPORT_ERROR_BIT_EXT | VK_DEBUG_REPORT_WARNING_BIT_EXT, | ||
| 107 | .pfnCallback = DebugReportCallback, | ||
| 80 | .pUserData = nullptr, | 108 | .pUserData = nullptr, |
| 81 | }); | 109 | }); |
| 82 | } | 110 | } |
diff --git a/src/video_core/vulkan_common/vulkan_debug_callback.h b/src/video_core/vulkan_common/vulkan_debug_callback.h index 71b1f69ec..a8af7b406 100644 --- a/src/video_core/vulkan_common/vulkan_debug_callback.h +++ b/src/video_core/vulkan_common/vulkan_debug_callback.h | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | 7 | ||
| 8 | namespace Vulkan { | 8 | namespace Vulkan { |
| 9 | 9 | ||
| 10 | vk::DebugUtilsMessenger CreateDebugCallback(const vk::Instance& instance); | 10 | vk::DebugUtilsMessenger CreateDebugUtilsCallback(const vk::Instance& instance); |
| 11 | |||
| 12 | vk::DebugReportCallback CreateDebugReportCallback(const vk::Instance& instance); | ||
| 11 | 13 | ||
| 12 | } // namespace Vulkan | 14 | } // namespace Vulkan |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index dcedf4425..421e71e5a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -22,6 +22,8 @@ | |||
| 22 | #include <adrenotools/bcenabler.h> | 22 | #include <adrenotools/bcenabler.h> |
| 23 | #endif | 23 | #endif |
| 24 | 24 | ||
| 25 | #include <vk_mem_alloc.h> | ||
| 26 | |||
| 25 | namespace Vulkan { | 27 | namespace Vulkan { |
| 26 | using namespace Common::Literals; | 28 | using namespace Common::Literals; |
| 27 | namespace { | 29 | namespace { |
| @@ -316,6 +318,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, | |||
| 316 | std::vector<const char*> ExtensionListForVulkan( | 318 | std::vector<const char*> ExtensionListForVulkan( |
| 317 | const std::set<std::string, std::less<>>& extensions) { | 319 | const std::set<std::string, std::less<>>& extensions) { |
| 318 | std::vector<const char*> output; | 320 | std::vector<const char*> output; |
| 321 | output.reserve(extensions.size()); | ||
| 319 | for (const auto& extension : extensions) { | 322 | for (const auto& extension : extensions) { |
| 320 | output.push_back(extension.c_str()); | 323 | output.push_back(extension.c_str()); |
| 321 | } | 324 | } |
| @@ -346,7 +349,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 346 | const bool is_s8gen2 = device_id == 0x43050a01; | 349 | const bool is_s8gen2 = device_id == 0x43050a01; |
| 347 | const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY; | 350 | const bool is_arm = driver_id == VK_DRIVER_ID_ARM_PROPRIETARY; |
| 348 | 351 | ||
| 349 | if ((is_mvk || is_qualcomm || is_turnip) && !is_suitable) { | 352 | if ((is_mvk || is_qualcomm || is_turnip || is_arm) && !is_suitable) { |
| 350 | LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway"); | 353 | LOG_WARNING(Render_Vulkan, "Unsuitable driver, continuing anyway"); |
| 351 | } else if (!is_suitable) { | 354 | } else if (!is_suitable) { |
| 352 | throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); | 355 | throw vk::Exception(VK_ERROR_INCOMPATIBLE_DRIVER); |
| @@ -525,6 +528,14 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 525 | } | 528 | } |
| 526 | 529 | ||
| 527 | sets_per_pool = 64; | 530 | sets_per_pool = 64; |
| 531 | if (extensions.extended_dynamic_state3 && is_amd_driver && | ||
| 532 | properties.properties.driverVersion >= VK_MAKE_API_VERSION(0, 2, 0, 270)) { | ||
| 533 | LOG_WARNING(Render_Vulkan, | ||
| 534 | "AMD drivers after 23.5.2 have broken extendedDynamicState3ColorBlendEquation"); | ||
| 535 | features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false; | ||
| 536 | features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false; | ||
| 537 | dynamic_state3_blending = false; | ||
| 538 | } | ||
| 528 | if (is_amd_driver) { | 539 | if (is_amd_driver) { |
| 529 | // AMD drivers need a higher amount of Sets per Pool in certain circumstances like in XC2. | 540 | // AMD drivers need a higher amount of Sets per Pool in certain circumstances like in XC2. |
| 530 | sets_per_pool = 96; | 541 | sets_per_pool = 96; |
| @@ -562,6 +573,9 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 562 | LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); | 573 | LOG_WARNING(Render_Vulkan, "Intel proprietary drivers do not support MSAA image blits"); |
| 563 | cant_blit_msaa = true; | 574 | cant_blit_msaa = true; |
| 564 | } | 575 | } |
| 576 | has_broken_compute = | ||
| 577 | CheckBrokenCompute(properties.driver.driverID, properties.properties.driverVersion) && | ||
| 578 | !Settings::values.enable_compute_pipelines.GetValue(); | ||
| 565 | if (is_intel_anv || (is_qualcomm && !is_s8gen2)) { | 579 | if (is_intel_anv || (is_qualcomm && !is_s8gen2)) { |
| 566 | LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format"); | 580 | LOG_WARNING(Render_Vulkan, "Driver does not support native BGR format"); |
| 567 | must_emulate_bgr565 = true; | 581 | must_emulate_bgr565 = true; |
| @@ -592,9 +606,31 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
| 592 | 606 | ||
| 593 | graphics_queue = logical.GetQueue(graphics_family); | 607 | graphics_queue = logical.GetQueue(graphics_family); |
| 594 | present_queue = logical.GetQueue(present_family); | 608 | present_queue = logical.GetQueue(present_family); |
| 609 | |||
| 610 | VmaVulkanFunctions functions{}; | ||
| 611 | functions.vkGetInstanceProcAddr = dld.vkGetInstanceProcAddr; | ||
| 612 | functions.vkGetDeviceProcAddr = dld.vkGetDeviceProcAddr; | ||
| 613 | |||
| 614 | const VmaAllocatorCreateInfo allocator_info = { | ||
| 615 | .flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT, | ||
| 616 | .physicalDevice = physical, | ||
| 617 | .device = *logical, | ||
| 618 | .preferredLargeHeapBlockSize = 0, | ||
| 619 | .pAllocationCallbacks = nullptr, | ||
| 620 | .pDeviceMemoryCallbacks = nullptr, | ||
| 621 | .pHeapSizeLimit = nullptr, | ||
| 622 | .pVulkanFunctions = &functions, | ||
| 623 | .instance = instance, | ||
| 624 | .vulkanApiVersion = VK_API_VERSION_1_1, | ||
| 625 | .pTypeExternalMemoryHandleTypes = nullptr, | ||
| 626 | }; | ||
| 627 | |||
| 628 | vk::Check(vmaCreateAllocator(&allocator_info, &allocator)); | ||
| 595 | } | 629 | } |
| 596 | 630 | ||
| 597 | Device::~Device() = default; | 631 | Device::~Device() { |
| 632 | vmaDestroyAllocator(allocator); | ||
| 633 | } | ||
| 598 | 634 | ||
| 599 | VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, | 635 | VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage, |
| 600 | FormatType format_type) const { | 636 | FormatType format_type) const { |
| @@ -877,6 +913,10 @@ bool Device::GetSuitability(bool requires_swapchain) { | |||
| 877 | properties.driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; | 913 | properties.driver.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; |
| 878 | SetNext(next, properties.driver); | 914 | SetNext(next, properties.driver); |
| 879 | 915 | ||
| 916 | // Retrieve subgroup properties. | ||
| 917 | properties.subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; | ||
| 918 | SetNext(next, properties.subgroup_properties); | ||
| 919 | |||
| 880 | // Retrieve relevant extension properties. | 920 | // Retrieve relevant extension properties. |
| 881 | if (extensions.shader_float_controls) { | 921 | if (extensions.shader_float_controls) { |
| 882 | properties.float_controls.sType = | 922 | properties.float_controls.sType = |
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 8c7e44fcb..1f17265d5 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h | |||
| @@ -10,9 +10,12 @@ | |||
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | 11 | ||
| 12 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| 13 | #include "common/logging/log.h" | ||
| 13 | #include "common/settings.h" | 14 | #include "common/settings.h" |
| 14 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 15 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 15 | 16 | ||
| 17 | VK_DEFINE_HANDLE(VmaAllocator) | ||
| 18 | |||
| 16 | // Define all features which may be used by the implementation here. | 19 | // Define all features which may be used by the implementation here. |
| 17 | // Vulkan version in the macro describes the minimum version required for feature availability. | 20 | // Vulkan version in the macro describes the minimum version required for feature availability. |
| 18 | // If the Vulkan version is lower than the required version, the named extension is required. | 21 | // If the Vulkan version is lower than the required version, the named extension is required. |
| @@ -198,6 +201,11 @@ public: | |||
| 198 | return dld; | 201 | return dld; |
| 199 | } | 202 | } |
| 200 | 203 | ||
| 204 | /// Returns the VMA allocator. | ||
| 205 | VmaAllocator GetAllocator() const { | ||
| 206 | return allocator; | ||
| 207 | } | ||
| 208 | |||
| 201 | /// Returns the logical device. | 209 | /// Returns the logical device. |
| 202 | const vk::Device& GetLogical() const { | 210 | const vk::Device& GetLogical() const { |
| 203 | return logical; | 211 | return logical; |
| @@ -285,6 +293,11 @@ public: | |||
| 285 | return features.features.textureCompressionASTC_LDR; | 293 | return features.features.textureCompressionASTC_LDR; |
| 286 | } | 294 | } |
| 287 | 295 | ||
| 296 | /// Returns true if BCn is natively supported. | ||
| 297 | bool IsOptimalBcnSupported() const { | ||
| 298 | return features.features.textureCompressionBC; | ||
| 299 | } | ||
| 300 | |||
| 288 | /// Returns true if descriptor aliasing is natively supported. | 301 | /// Returns true if descriptor aliasing is natively supported. |
| 289 | bool IsDescriptorAliasingSupported() const { | 302 | bool IsDescriptorAliasingSupported() const { |
| 290 | return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY; | 303 | return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY; |
| @@ -315,6 +328,11 @@ public: | |||
| 315 | return properties.subgroup_size_control.requiredSubgroupSizeStages & stage; | 328 | return properties.subgroup_size_control.requiredSubgroupSizeStages & stage; |
| 316 | } | 329 | } |
| 317 | 330 | ||
| 331 | /// Returns true if the device supports the provided subgroup feature. | ||
| 332 | bool IsSubgroupFeatureSupported(VkSubgroupFeatureFlagBits feature) const { | ||
| 333 | return properties.subgroup_properties.supportedOperations & feature; | ||
| 334 | } | ||
| 335 | |||
| 318 | /// Returns the maximum number of push descriptors. | 336 | /// Returns the maximum number of push descriptors. |
| 319 | u32 MaxPushDescriptors() const { | 337 | u32 MaxPushDescriptors() const { |
| 320 | return properties.push_descriptor.maxPushDescriptors; | 338 | return properties.push_descriptor.maxPushDescriptors; |
| @@ -380,6 +398,11 @@ public: | |||
| 380 | return extensions.swapchain_mutable_format; | 398 | return extensions.swapchain_mutable_format; |
| 381 | } | 399 | } |
| 382 | 400 | ||
| 401 | /// Returns true if VK_KHR_shader_float_controls is enabled. | ||
| 402 | bool IsKhrShaderFloatControlsSupported() const { | ||
| 403 | return extensions.shader_float_controls; | ||
| 404 | } | ||
| 405 | |||
| 383 | /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. | 406 | /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. |
| 384 | bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { | 407 | bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { |
| 385 | return extensions.workgroup_memory_explicit_layout; | 408 | return extensions.workgroup_memory_explicit_layout; |
| @@ -405,6 +428,11 @@ public: | |||
| 405 | return extensions.sampler_filter_minmax; | 428 | return extensions.sampler_filter_minmax; |
| 406 | } | 429 | } |
| 407 | 430 | ||
| 431 | /// Returns true if the device supports VK_EXT_shader_stencil_export. | ||
| 432 | bool IsExtShaderStencilExportSupported() const { | ||
| 433 | return extensions.shader_stencil_export; | ||
| 434 | } | ||
| 435 | |||
| 408 | /// Returns true if the device supports VK_EXT_depth_range_unrestricted. | 436 | /// Returns true if the device supports VK_EXT_depth_range_unrestricted. |
| 409 | bool IsExtDepthRangeUnrestrictedSupported() const { | 437 | bool IsExtDepthRangeUnrestrictedSupported() const { |
| 410 | return extensions.depth_range_unrestricted; | 438 | return extensions.depth_range_unrestricted; |
| @@ -474,9 +502,9 @@ public: | |||
| 474 | return extensions.vertex_input_dynamic_state; | 502 | return extensions.vertex_input_dynamic_state; |
| 475 | } | 503 | } |
| 476 | 504 | ||
| 477 | /// Returns true if the device supports VK_EXT_shader_stencil_export. | 505 | /// Returns true if the device supports VK_EXT_shader_demote_to_helper_invocation |
| 478 | bool IsExtShaderStencilExportSupported() const { | 506 | bool IsExtShaderDemoteToHelperInvocationSupported() const { |
| 479 | return extensions.shader_stencil_export; | 507 | return extensions.shader_demote_to_helper_invocation; |
| 480 | } | 508 | } |
| 481 | 509 | ||
| 482 | /// Returns true if the device supports VK_EXT_conservative_rasterization. | 510 | /// Returns true if the device supports VK_EXT_conservative_rasterization. |
| @@ -510,12 +538,17 @@ public: | |||
| 510 | if (extensions.spirv_1_4) { | 538 | if (extensions.spirv_1_4) { |
| 511 | return 0x00010400U; | 539 | return 0x00010400U; |
| 512 | } | 540 | } |
| 513 | return 0x00010000U; | 541 | return 0x00010300U; |
| 514 | } | 542 | } |
| 515 | 543 | ||
| 516 | /// Returns true when a known debugging tool is attached. | 544 | /// Returns true when a known debugging tool is attached. |
| 517 | bool HasDebuggingToolAttached() const { | 545 | bool HasDebuggingToolAttached() const { |
| 518 | return has_renderdoc || has_nsight_graphics || Settings::values.renderer_debug.GetValue(); | 546 | return has_renderdoc || has_nsight_graphics; |
| 547 | } | ||
| 548 | |||
| 549 | /// @returns True if compute pipelines can cause crashing. | ||
| 550 | bool HasBrokenCompute() const { | ||
| 551 | return has_broken_compute; | ||
| 519 | } | 552 | } |
| 520 | 553 | ||
| 521 | /// Returns true when the device does not properly support cube compatibility. | 554 | /// Returns true when the device does not properly support cube compatibility. |
| @@ -575,10 +608,30 @@ public: | |||
| 575 | return properties.properties.limits.maxVertexInputBindings; | 608 | return properties.properties.limits.maxVertexInputBindings; |
| 576 | } | 609 | } |
| 577 | 610 | ||
| 611 | u32 GetMaxViewports() const { | ||
| 612 | return properties.properties.limits.maxViewports; | ||
| 613 | } | ||
| 614 | |||
| 578 | bool SupportsConditionalBarriers() const { | 615 | bool SupportsConditionalBarriers() const { |
| 579 | return supports_conditional_barriers; | 616 | return supports_conditional_barriers; |
| 580 | } | 617 | } |
| 581 | 618 | ||
| 619 | [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id, | ||
| 620 | u32 driver_version) { | ||
| 621 | if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { | ||
| 622 | const u32 major = VK_API_VERSION_MAJOR(driver_version); | ||
| 623 | const u32 minor = VK_API_VERSION_MINOR(driver_version); | ||
| 624 | const u32 patch = VK_API_VERSION_PATCH(driver_version); | ||
| 625 | if (major == 0 && minor == 405 && patch < 286) { | ||
| 626 | LOG_WARNING( | ||
| 627 | Render_Vulkan, | ||
| 628 | "Intel proprietary drivers 0.405.0 until 0.405.286 have broken compute"); | ||
| 629 | return true; | ||
| 630 | } | ||
| 631 | } | ||
| 632 | return false; | ||
| 633 | } | ||
| 634 | |||
| 582 | private: | 635 | private: |
| 583 | /// Checks if the physical device is suitable and configures the object state | 636 | /// Checks if the physical device is suitable and configures the object state |
| 584 | /// with all necessary info about its properties. | 637 | /// with all necessary info about its properties. |
| @@ -608,6 +661,7 @@ private: | |||
| 608 | 661 | ||
| 609 | private: | 662 | private: |
| 610 | VkInstance instance; ///< Vulkan instance. | 663 | VkInstance instance; ///< Vulkan instance. |
| 664 | VmaAllocator allocator; ///< VMA allocator. | ||
| 611 | vk::DeviceDispatch dld; ///< Device function pointers. | 665 | vk::DeviceDispatch dld; ///< Device function pointers. |
| 612 | vk::PhysicalDevice physical; ///< Physical device. | 666 | vk::PhysicalDevice physical; ///< Physical device. |
| 613 | vk::Device logical; ///< Logical device. | 667 | vk::Device logical; ///< Logical device. |
| @@ -650,6 +704,7 @@ private: | |||
| 650 | 704 | ||
| 651 | struct Properties { | 705 | struct Properties { |
| 652 | VkPhysicalDeviceDriverProperties driver{}; | 706 | VkPhysicalDeviceDriverProperties driver{}; |
| 707 | VkPhysicalDeviceSubgroupProperties subgroup_properties{}; | ||
| 653 | VkPhysicalDeviceFloatControlsProperties float_controls{}; | 708 | VkPhysicalDeviceFloatControlsProperties float_controls{}; |
| 654 | VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{}; | 709 | VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor{}; |
| 655 | VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{}; | 710 | VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control{}; |
| @@ -672,6 +727,7 @@ private: | |||
| 672 | bool is_integrated{}; ///< Is GPU an iGPU. | 727 | bool is_integrated{}; ///< Is GPU an iGPU. |
| 673 | bool is_virtual{}; ///< Is GPU a virtual GPU. | 728 | bool is_virtual{}; ///< Is GPU a virtual GPU. |
| 674 | bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. | 729 | bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. |
| 730 | bool has_broken_compute{}; ///< Compute shaders can cause crashes | ||
| 675 | bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit | 731 | bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit |
| 676 | bool has_renderdoc{}; ///< Has RenderDoc attached | 732 | bool has_renderdoc{}; ///< Has RenderDoc attached |
| 677 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached | 733 | bool has_nsight_graphics{}; ///< Has Nsight Graphics attached |
diff --git a/src/video_core/vulkan_common/vulkan_instance.cpp b/src/video_core/vulkan_common/vulkan_instance.cpp index b6d83e446..7624a9b32 100644 --- a/src/video_core/vulkan_common/vulkan_instance.cpp +++ b/src/video_core/vulkan_common/vulkan_instance.cpp | |||
| @@ -31,10 +31,34 @@ | |||
| 31 | 31 | ||
| 32 | namespace Vulkan { | 32 | namespace Vulkan { |
| 33 | namespace { | 33 | namespace { |
| 34 | |||
| 35 | [[nodiscard]] bool AreExtensionsSupported(const vk::InstanceDispatch& dld, | ||
| 36 | std::span<const char* const> extensions) { | ||
| 37 | const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); | ||
| 38 | if (!properties) { | ||
| 39 | LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); | ||
| 40 | return false; | ||
| 41 | } | ||
| 42 | for (const char* extension : extensions) { | ||
| 43 | const auto it = std::ranges::find_if(*properties, [extension](const auto& prop) { | ||
| 44 | return std::strcmp(extension, prop.extensionName) == 0; | ||
| 45 | }); | ||
| 46 | if (it == properties->end()) { | ||
| 47 | LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); | ||
| 48 | return false; | ||
| 49 | } | ||
| 50 | } | ||
| 51 | return true; | ||
| 52 | } | ||
| 53 | |||
| 34 | [[nodiscard]] std::vector<const char*> RequiredExtensions( | 54 | [[nodiscard]] std::vector<const char*> RequiredExtensions( |
| 35 | Core::Frontend::WindowSystemType window_type, bool enable_validation) { | 55 | const vk::InstanceDispatch& dld, Core::Frontend::WindowSystemType window_type, |
| 56 | bool enable_validation) { | ||
| 36 | std::vector<const char*> extensions; | 57 | std::vector<const char*> extensions; |
| 37 | extensions.reserve(6); | 58 | extensions.reserve(6); |
| 59 | #ifdef __APPLE__ | ||
| 60 | extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); | ||
| 61 | #endif | ||
| 38 | switch (window_type) { | 62 | switch (window_type) { |
| 39 | case Core::Frontend::WindowSystemType::Headless: | 63 | case Core::Frontend::WindowSystemType::Headless: |
| 40 | break; | 64 | break; |
| @@ -66,35 +90,14 @@ namespace { | |||
| 66 | extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); | 90 | extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); |
| 67 | } | 91 | } |
| 68 | if (enable_validation) { | 92 | if (enable_validation) { |
| 69 | extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); | 93 | const bool debug_utils = |
| 94 | AreExtensionsSupported(dld, std::array{VK_EXT_DEBUG_UTILS_EXTENSION_NAME}); | ||
| 95 | extensions.push_back(debug_utils ? VK_EXT_DEBUG_UTILS_EXTENSION_NAME | ||
| 96 | : VK_EXT_DEBUG_REPORT_EXTENSION_NAME); | ||
| 70 | } | 97 | } |
| 71 | extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); | ||
| 72 | |||
| 73 | #ifdef __APPLE__ | ||
| 74 | extensions.push_back(VK_KHR_PORTABILITY_ENUMERATION_EXTENSION_NAME); | ||
| 75 | #endif | ||
| 76 | return extensions; | 98 | return extensions; |
| 77 | } | 99 | } |
| 78 | 100 | ||
| 79 | [[nodiscard]] bool AreExtensionsSupported(const vk::InstanceDispatch& dld, | ||
| 80 | std::span<const char* const> extensions) { | ||
| 81 | const std::optional properties = vk::EnumerateInstanceExtensionProperties(dld); | ||
| 82 | if (!properties) { | ||
| 83 | LOG_ERROR(Render_Vulkan, "Failed to query extension properties"); | ||
| 84 | return false; | ||
| 85 | } | ||
| 86 | for (const char* extension : extensions) { | ||
| 87 | const auto it = std::ranges::find_if(*properties, [extension](const auto& prop) { | ||
| 88 | return std::strcmp(extension, prop.extensionName) == 0; | ||
| 89 | }); | ||
| 90 | if (it == properties->end()) { | ||
| 91 | LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension); | ||
| 92 | return false; | ||
| 93 | } | ||
| 94 | } | ||
| 95 | return true; | ||
| 96 | } | ||
| 97 | |||
| 98 | [[nodiscard]] std::vector<const char*> Layers(bool enable_validation) { | 101 | [[nodiscard]] std::vector<const char*> Layers(bool enable_validation) { |
| 99 | std::vector<const char*> layers; | 102 | std::vector<const char*> layers; |
| 100 | if (enable_validation) { | 103 | if (enable_validation) { |
| @@ -138,7 +141,8 @@ vk::Instance CreateInstance(const Common::DynamicLibrary& library, vk::InstanceD | |||
| 138 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); | 141 | LOG_ERROR(Render_Vulkan, "Failed to load Vulkan function pointers"); |
| 139 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); | 142 | throw vk::Exception(VK_ERROR_INITIALIZATION_FAILED); |
| 140 | } | 143 | } |
| 141 | const std::vector<const char*> extensions = RequiredExtensions(window_type, enable_validation); | 144 | const std::vector<const char*> extensions = |
| 145 | RequiredExtensions(dld, window_type, enable_validation); | ||
| 142 | if (!AreExtensionsSupported(dld, extensions)) { | 146 | if (!AreExtensionsSupported(dld, extensions)) { |
| 143 | throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); | 147 | throw vk::Exception(VK_ERROR_EXTENSION_NOT_PRESENT); |
| 144 | } | 148 | } |
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index e28a556f8..a2ef0efa4 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp | |||
| @@ -6,8 +6,6 @@ | |||
| 6 | #include <optional> | 6 | #include <optional> |
| 7 | #include <vector> | 7 | #include <vector> |
| 8 | 8 | ||
| 9 | #include <glad/glad.h> | ||
| 10 | |||
| 11 | #include "common/alignment.h" | 9 | #include "common/alignment.h" |
| 12 | #include "common/assert.h" | 10 | #include "common/assert.h" |
| 13 | #include "common/common_types.h" | 11 | #include "common/common_types.h" |
| @@ -17,6 +15,8 @@ | |||
| 17 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" | 15 | #include "video_core/vulkan_common/vulkan_memory_allocator.h" |
| 18 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 16 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 19 | 17 | ||
| 18 | #include <vk_mem_alloc.h> | ||
| 19 | |||
| 20 | namespace Vulkan { | 20 | namespace Vulkan { |
| 21 | namespace { | 21 | namespace { |
| 22 | struct Range { | 22 | struct Range { |
| @@ -49,22 +49,45 @@ struct Range { | |||
| 49 | case MemoryUsage::Download: | 49 | case MemoryUsage::Download: |
| 50 | return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | | 50 | return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | |
| 51 | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; | 51 | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; |
| 52 | case MemoryUsage::Stream: | ||
| 53 | return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | | ||
| 54 | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; | ||
| 52 | } | 55 | } |
| 53 | ASSERT_MSG(false, "Invalid memory usage={}", usage); | 56 | ASSERT_MSG(false, "Invalid memory usage={}", usage); |
| 54 | return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; | 57 | return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; |
| 55 | } | 58 | } |
| 56 | 59 | ||
| 57 | constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{ | 60 | [[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferedVmaFlags(MemoryUsage usage) { |
| 58 | .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, | 61 | return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
| 59 | .pNext = nullptr, | 62 | : VkMemoryPropertyFlagBits{}; |
| 60 | #ifdef _WIN32 | 63 | } |
| 61 | .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT, | 64 | |
| 62 | #elif __unix__ | 65 | [[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) { |
| 63 | .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT, | 66 | switch (usage) { |
| 64 | #else | 67 | case MemoryUsage::Upload: |
| 65 | .handleTypes = 0, | 68 | case MemoryUsage::Stream: |
| 66 | #endif | 69 | return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; |
| 67 | }; | 70 | case MemoryUsage::Download: |
| 71 | return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; | ||
| 72 | case MemoryUsage::DeviceLocal: | ||
| 73 | return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | | ||
| 74 | VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT; | ||
| 75 | } | ||
| 76 | return {}; | ||
| 77 | } | ||
| 78 | |||
| 79 | [[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) { | ||
| 80 | switch (usage) { | ||
| 81 | case MemoryUsage::DeviceLocal: | ||
| 82 | case MemoryUsage::Stream: | ||
| 83 | return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; | ||
| 84 | case MemoryUsage::Upload: | ||
| 85 | case MemoryUsage::Download: | ||
| 86 | return VMA_MEMORY_USAGE_AUTO_PREFER_HOST; | ||
| 87 | } | ||
| 88 | return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; | ||
| 89 | } | ||
| 90 | |||
| 68 | } // Anonymous namespace | 91 | } // Anonymous namespace |
| 69 | 92 | ||
| 70 | class MemoryAllocation { | 93 | class MemoryAllocation { |
| @@ -74,14 +97,6 @@ public: | |||
| 74 | : allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, | 97 | : allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, |
| 75 | property_flags{properties}, shifted_memory_type{1U << type} {} | 98 | property_flags{properties}, shifted_memory_type{1U << type} {} |
| 76 | 99 | ||
| 77 | #if defined(_WIN32) || defined(__unix__) | ||
| 78 | ~MemoryAllocation() { | ||
| 79 | if (owning_opengl_handle != 0) { | ||
| 80 | glDeleteMemoryObjectsEXT(1, &owning_opengl_handle); | ||
| 81 | } | ||
| 82 | } | ||
| 83 | #endif | ||
| 84 | |||
| 85 | MemoryAllocation& operator=(const MemoryAllocation&) = delete; | 100 | MemoryAllocation& operator=(const MemoryAllocation&) = delete; |
| 86 | MemoryAllocation(const MemoryAllocation&) = delete; | 101 | MemoryAllocation(const MemoryAllocation&) = delete; |
| 87 | 102 | ||
| @@ -120,31 +135,6 @@ public: | |||
| 120 | return memory_mapped_span; | 135 | return memory_mapped_span; |
| 121 | } | 136 | } |
| 122 | 137 | ||
| 123 | #ifdef _WIN32 | ||
| 124 | [[nodiscard]] u32 ExportOpenGLHandle() { | ||
| 125 | if (!owning_opengl_handle) { | ||
| 126 | glCreateMemoryObjectsEXT(1, &owning_opengl_handle); | ||
| 127 | glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size, | ||
| 128 | GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, | ||
| 129 | memory.GetMemoryWin32HandleKHR()); | ||
| 130 | } | ||
| 131 | return owning_opengl_handle; | ||
| 132 | } | ||
| 133 | #elif __unix__ | ||
| 134 | [[nodiscard]] u32 ExportOpenGLHandle() { | ||
| 135 | if (!owning_opengl_handle) { | ||
| 136 | glCreateMemoryObjectsEXT(1, &owning_opengl_handle); | ||
| 137 | glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT, | ||
| 138 | memory.GetMemoryFdKHR()); | ||
| 139 | } | ||
| 140 | return owning_opengl_handle; | ||
| 141 | } | ||
| 142 | #else | ||
| 143 | [[nodiscard]] u32 ExportOpenGLHandle() { | ||
| 144 | return 0; | ||
| 145 | } | ||
| 146 | #endif | ||
| 147 | |||
| 148 | /// Returns whether this allocation is compatible with the arguments. | 138 | /// Returns whether this allocation is compatible with the arguments. |
| 149 | [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { | 139 | [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { |
| 150 | return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0; | 140 | return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0; |
| @@ -182,9 +172,6 @@ private: | |||
| 182 | const u32 shifted_memory_type; ///< Shifted Vulkan memory type. | 172 | const u32 shifted_memory_type; ///< Shifted Vulkan memory type. |
| 183 | std::vector<Range> commits; ///< All commit ranges done from this allocation. | 173 | std::vector<Range> commits; ///< All commit ranges done from this allocation. |
| 184 | std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before. | 174 | std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before. |
| 185 | #if defined(_WIN32) || defined(__unix__) | ||
| 186 | u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle. | ||
| 187 | #endif | ||
| 188 | }; | 175 | }; |
| 189 | 176 | ||
| 190 | MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, | 177 | MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, |
| @@ -216,24 +203,70 @@ std::span<u8> MemoryCommit::Map() { | |||
| 216 | return span; | 203 | return span; |
| 217 | } | 204 | } |
| 218 | 205 | ||
| 219 | u32 MemoryCommit::ExportOpenGLHandle() const { | ||
| 220 | return allocation->ExportOpenGLHandle(); | ||
| 221 | } | ||
| 222 | |||
| 223 | void MemoryCommit::Release() { | 206 | void MemoryCommit::Release() { |
| 224 | if (allocation) { | 207 | if (allocation) { |
| 225 | allocation->Free(begin); | 208 | allocation->Free(begin); |
| 226 | } | 209 | } |
| 227 | } | 210 | } |
| 228 | 211 | ||
| 229 | MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_) | 212 | MemoryAllocator::MemoryAllocator(const Device& device_) |
| 230 | : device{device_}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, | 213 | : device{device_}, allocator{device.GetAllocator()}, |
| 231 | export_allocations{export_allocations_}, | 214 | properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, |
| 232 | buffer_image_granularity{ | 215 | buffer_image_granularity{ |
| 233 | device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} | 216 | device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} |
| 234 | 217 | ||
| 235 | MemoryAllocator::~MemoryAllocator() = default; | 218 | MemoryAllocator::~MemoryAllocator() = default; |
| 236 | 219 | ||
| 220 | vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const { | ||
| 221 | const VmaAllocationCreateInfo alloc_ci = { | ||
| 222 | .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, | ||
| 223 | .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, | ||
| 224 | .requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, | ||
| 225 | .preferredFlags = 0, | ||
| 226 | .memoryTypeBits = 0, | ||
| 227 | .pool = VK_NULL_HANDLE, | ||
| 228 | .pUserData = nullptr, | ||
| 229 | .priority = 0.f, | ||
| 230 | }; | ||
| 231 | |||
| 232 | VkImage handle{}; | ||
| 233 | VmaAllocation allocation{}; | ||
| 234 | |||
| 235 | vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr)); | ||
| 236 | |||
| 237 | return vk::Image(handle, *device.GetLogical(), allocator, allocation, | ||
| 238 | device.GetDispatchLoader()); | ||
| 239 | } | ||
| 240 | |||
| 241 | vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const { | ||
| 242 | const VmaAllocationCreateInfo alloc_ci = { | ||
| 243 | .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT | | ||
| 244 | MemoryUsageVmaFlags(usage), | ||
| 245 | .usage = MemoryUsageVma(usage), | ||
| 246 | .requiredFlags = 0, | ||
| 247 | .preferredFlags = MemoryUsagePreferedVmaFlags(usage), | ||
| 248 | .memoryTypeBits = 0, | ||
| 249 | .pool = VK_NULL_HANDLE, | ||
| 250 | .pUserData = nullptr, | ||
| 251 | .priority = 0.f, | ||
| 252 | }; | ||
| 253 | |||
| 254 | VkBuffer handle{}; | ||
| 255 | VmaAllocationInfo alloc_info{}; | ||
| 256 | VmaAllocation allocation{}; | ||
| 257 | VkMemoryPropertyFlags property_flags{}; | ||
| 258 | |||
| 259 | vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info)); | ||
| 260 | vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags); | ||
| 261 | |||
| 262 | u8* data = reinterpret_cast<u8*>(alloc_info.pMappedData); | ||
| 263 | const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{}; | ||
| 264 | const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; | ||
| 265 | |||
| 266 | return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent, | ||
| 267 | device.GetDispatchLoader()); | ||
| 268 | } | ||
| 269 | |||
| 237 | MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) { | 270 | MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) { |
| 238 | // Find the fastest memory flags we can afford with the current requirements | 271 | // Find the fastest memory flags we can afford with the current requirements |
| 239 | const u32 type_mask = requirements.memoryTypeBits; | 272 | const u32 type_mask = requirements.memoryTypeBits; |
| @@ -253,25 +286,11 @@ MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, M | |||
| 253 | return TryCommit(requirements, flags).value(); | 286 | return TryCommit(requirements, flags).value(); |
| 254 | } | 287 | } |
| 255 | 288 | ||
| 256 | MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage) { | ||
| 257 | auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), usage); | ||
| 258 | buffer.BindMemory(commit.Memory(), commit.Offset()); | ||
| 259 | return commit; | ||
| 260 | } | ||
| 261 | |||
| 262 | MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) { | ||
| 263 | VkMemoryRequirements requirements = device.GetLogical().GetImageMemoryRequirements(*image); | ||
| 264 | requirements.size = Common::AlignUp(requirements.size, buffer_image_granularity); | ||
| 265 | auto commit = Commit(requirements, usage); | ||
| 266 | image.BindMemory(commit.Memory(), commit.Offset()); | ||
| 267 | return commit; | ||
| 268 | } | ||
| 269 | |||
| 270 | bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { | 289 | bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { |
| 271 | const u32 type = FindType(flags, type_mask).value(); | 290 | const u32 type = FindType(flags, type_mask).value(); |
| 272 | vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ | 291 | vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ |
| 273 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, | 292 | .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, |
| 274 | .pNext = export_allocations ? &EXPORT_ALLOCATE_INFO : nullptr, | 293 | .pNext = nullptr, |
| 275 | .allocationSize = size, | 294 | .allocationSize = size, |
| 276 | .memoryTypeIndex = type, | 295 | .memoryTypeIndex = type, |
| 277 | }); | 296 | }); |
| @@ -342,16 +361,4 @@ std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 ty | |||
| 342 | return std::nullopt; | 361 | return std::nullopt; |
| 343 | } | 362 | } |
| 344 | 363 | ||
| 345 | bool IsHostVisible(MemoryUsage usage) noexcept { | ||
| 346 | switch (usage) { | ||
| 347 | case MemoryUsage::DeviceLocal: | ||
| 348 | return false; | ||
| 349 | case MemoryUsage::Upload: | ||
| 350 | case MemoryUsage::Download: | ||
| 351 | return true; | ||
| 352 | } | ||
| 353 | ASSERT_MSG(false, "Invalid memory usage={}", usage); | ||
| 354 | return false; | ||
| 355 | } | ||
| 356 | |||
| 357 | } // namespace Vulkan | 364 | } // namespace Vulkan |
diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index a5bff03fe..f449bc8d0 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h | |||
| @@ -9,6 +9,8 @@ | |||
| 9 | #include "common/common_types.h" | 9 | #include "common/common_types.h" |
| 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 10 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 11 | 11 | ||
| 12 | VK_DEFINE_HANDLE(VmaAllocator) | ||
| 13 | |||
| 12 | namespace Vulkan { | 14 | namespace Vulkan { |
| 13 | 15 | ||
| 14 | class Device; | 16 | class Device; |
| @@ -17,9 +19,11 @@ class MemoryAllocation; | |||
| 17 | 19 | ||
| 18 | /// Hints and requirements for the backing memory type of a commit | 20 | /// Hints and requirements for the backing memory type of a commit |
| 19 | enum class MemoryUsage { | 21 | enum class MemoryUsage { |
| 20 | DeviceLocal, ///< Hints device local usages, fastest memory type to read and write from the GPU | 22 | DeviceLocal, ///< Requests device local host visible buffer, falling back to device local |
| 23 | ///< memory. | ||
| 21 | Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads | 24 | Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads |
| 22 | Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks | 25 | Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks |
| 26 | Stream, ///< Requests device local host visible buffer, falling back host memory. | ||
| 23 | }; | 27 | }; |
| 24 | 28 | ||
| 25 | /// Ownership handle of a memory commitment. | 29 | /// Ownership handle of a memory commitment. |
| @@ -41,9 +45,6 @@ public: | |||
| 41 | /// It will map the backing allocation if it hasn't been mapped before. | 45 | /// It will map the backing allocation if it hasn't been mapped before. |
| 42 | std::span<u8> Map(); | 46 | std::span<u8> Map(); |
| 43 | 47 | ||
| 44 | /// Returns an non-owning OpenGL handle, creating one if it doesn't exist. | ||
| 45 | u32 ExportOpenGLHandle() const; | ||
| 46 | |||
| 47 | /// Returns the Vulkan memory handler. | 48 | /// Returns the Vulkan memory handler. |
| 48 | VkDeviceMemory Memory() const { | 49 | VkDeviceMemory Memory() const { |
| 49 | return memory; | 50 | return memory; |
| @@ -74,16 +75,19 @@ public: | |||
| 74 | * Construct memory allocator | 75 | * Construct memory allocator |
| 75 | * | 76 | * |
| 76 | * @param device_ Device to allocate from | 77 | * @param device_ Device to allocate from |
| 77 | * @param export_allocations_ True when allocations have to be exported | ||
| 78 | * | 78 | * |
| 79 | * @throw vk::Exception on failure | 79 | * @throw vk::Exception on failure |
| 80 | */ | 80 | */ |
| 81 | explicit MemoryAllocator(const Device& device_, bool export_allocations_); | 81 | explicit MemoryAllocator(const Device& device_); |
| 82 | ~MemoryAllocator(); | 82 | ~MemoryAllocator(); |
| 83 | 83 | ||
| 84 | MemoryAllocator& operator=(const MemoryAllocator&) = delete; | 84 | MemoryAllocator& operator=(const MemoryAllocator&) = delete; |
| 85 | MemoryAllocator(const MemoryAllocator&) = delete; | 85 | MemoryAllocator(const MemoryAllocator&) = delete; |
| 86 | 86 | ||
| 87 | vk::Image CreateImage(const VkImageCreateInfo& ci) const; | ||
| 88 | |||
| 89 | vk::Buffer CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const; | ||
| 90 | |||
| 87 | /** | 91 | /** |
| 88 | * Commits a memory with the specified requirements. | 92 | * Commits a memory with the specified requirements. |
| 89 | * | 93 | * |
| @@ -97,9 +101,6 @@ public: | |||
| 97 | /// Commits memory required by the buffer and binds it. | 101 | /// Commits memory required by the buffer and binds it. |
| 98 | MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage); | 102 | MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage); |
| 99 | 103 | ||
| 100 | /// Commits memory required by the image and binds it. | ||
| 101 | MemoryCommit Commit(const vk::Image& image, MemoryUsage usage); | ||
| 102 | |||
| 103 | private: | 104 | private: |
| 104 | /// Tries to allocate a chunk of memory. | 105 | /// Tries to allocate a chunk of memory. |
| 105 | bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size); | 106 | bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size); |
| @@ -117,15 +118,12 @@ private: | |||
| 117 | /// Returns index to the fastest memory type compatible with the passed requirements. | 118 | /// Returns index to the fastest memory type compatible with the passed requirements. |
| 118 | std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const; | 119 | std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const; |
| 119 | 120 | ||
| 120 | const Device& device; ///< Device handle. | 121 | const Device& device; ///< Device handle. |
| 121 | const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. | 122 | VmaAllocator allocator; ///< Vma allocator. |
| 122 | const bool export_allocations; ///< True when memory allocations have to be exported. | 123 | const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. |
| 123 | std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. | 124 | std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations. |
| 124 | VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers | 125 | VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers |
| 125 | // and optimal images | 126 | // and optimal images |
| 126 | }; | 127 | }; |
| 127 | 128 | ||
| 128 | /// Returns true when a memory usage is guaranteed to be host visible. | ||
| 129 | bool IsHostVisible(MemoryUsage usage) noexcept; | ||
| 130 | |||
| 131 | } // namespace Vulkan | 129 | } // namespace Vulkan |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index 336f53700..2fa29793a 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp | |||
| @@ -12,6 +12,8 @@ | |||
| 12 | 12 | ||
| 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" | 13 | #include "video_core/vulkan_common/vulkan_wrapper.h" |
| 14 | 14 | ||
| 15 | #include <vk_mem_alloc.h> | ||
| 16 | |||
| 15 | namespace Vulkan::vk { | 17 | namespace Vulkan::vk { |
| 16 | 18 | ||
| 17 | namespace { | 19 | namespace { |
| @@ -257,7 +259,9 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept { | |||
| 257 | // These functions may fail to load depending on the enabled extensions. | 259 | // These functions may fail to load depending on the enabled extensions. |
| 258 | // Don't return a failure on these. | 260 | // Don't return a failure on these. |
| 259 | X(vkCreateDebugUtilsMessengerEXT); | 261 | X(vkCreateDebugUtilsMessengerEXT); |
| 262 | X(vkCreateDebugReportCallbackEXT); | ||
| 260 | X(vkDestroyDebugUtilsMessengerEXT); | 263 | X(vkDestroyDebugUtilsMessengerEXT); |
| 264 | X(vkDestroyDebugReportCallbackEXT); | ||
| 261 | X(vkDestroySurfaceKHR); | 265 | X(vkDestroySurfaceKHR); |
| 262 | X(vkGetPhysicalDeviceFeatures2); | 266 | X(vkGetPhysicalDeviceFeatures2); |
| 263 | X(vkGetPhysicalDeviceProperties2); | 267 | X(vkGetPhysicalDeviceProperties2); |
| @@ -479,6 +483,11 @@ void Destroy(VkInstance instance, VkDebugUtilsMessengerEXT handle, | |||
| 479 | dld.vkDestroyDebugUtilsMessengerEXT(instance, handle, nullptr); | 483 | dld.vkDestroyDebugUtilsMessengerEXT(instance, handle, nullptr); |
| 480 | } | 484 | } |
| 481 | 485 | ||
| 486 | void Destroy(VkInstance instance, VkDebugReportCallbackEXT handle, | ||
| 487 | const InstanceDispatch& dld) noexcept { | ||
| 488 | dld.vkDestroyDebugReportCallbackEXT(instance, handle, nullptr); | ||
| 489 | } | ||
| 490 | |||
| 482 | void Destroy(VkInstance instance, VkSurfaceKHR handle, const InstanceDispatch& dld) noexcept { | 491 | void Destroy(VkInstance instance, VkSurfaceKHR handle, const InstanceDispatch& dld) noexcept { |
| 483 | dld.vkDestroySurfaceKHR(instance, handle, nullptr); | 492 | dld.vkDestroySurfaceKHR(instance, handle, nullptr); |
| 484 | } | 493 | } |
| @@ -547,24 +556,47 @@ DebugUtilsMessenger Instance::CreateDebugUtilsMessenger( | |||
| 547 | return DebugUtilsMessenger(object, handle, *dld); | 556 | return DebugUtilsMessenger(object, handle, *dld); |
| 548 | } | 557 | } |
| 549 | 558 | ||
| 550 | void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { | 559 | DebugReportCallback Instance::CreateDebugReportCallback( |
| 551 | Check(dld->vkBindBufferMemory(owner, handle, memory, offset)); | 560 | const VkDebugReportCallbackCreateInfoEXT& create_info) const { |
| 561 | VkDebugReportCallbackEXT object; | ||
| 562 | Check(dld->vkCreateDebugReportCallbackEXT(handle, &create_info, nullptr, &object)); | ||
| 563 | return DebugReportCallback(object, handle, *dld); | ||
| 552 | } | 564 | } |
| 553 | 565 | ||
| 554 | void Buffer::SetObjectNameEXT(const char* name) const { | 566 | void Image::SetObjectNameEXT(const char* name) const { |
| 555 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); | 567 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); |
| 556 | } | 568 | } |
| 557 | 569 | ||
| 558 | void BufferView::SetObjectNameEXT(const char* name) const { | 570 | void Image::Release() const noexcept { |
| 559 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); | 571 | if (handle) { |
| 572 | vmaDestroyImage(allocator, handle, allocation); | ||
| 573 | } | ||
| 560 | } | 574 | } |
| 561 | 575 | ||
| 562 | void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const { | 576 | void Buffer::Flush() const { |
| 563 | Check(dld->vkBindImageMemory(owner, handle, memory, offset)); | 577 | if (!is_coherent) { |
| 578 | vmaFlushAllocation(allocator, allocation, 0, VK_WHOLE_SIZE); | ||
| 579 | } | ||
| 564 | } | 580 | } |
| 565 | 581 | ||
| 566 | void Image::SetObjectNameEXT(const char* name) const { | 582 | void Buffer::Invalidate() const { |
| 567 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name); | 583 | if (!is_coherent) { |
| 584 | vmaInvalidateAllocation(allocator, allocation, 0, VK_WHOLE_SIZE); | ||
| 585 | } | ||
| 586 | } | ||
| 587 | |||
| 588 | void Buffer::SetObjectNameEXT(const char* name) const { | ||
| 589 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name); | ||
| 590 | } | ||
| 591 | |||
| 592 | void Buffer::Release() const noexcept { | ||
| 593 | if (handle) { | ||
| 594 | vmaDestroyBuffer(allocator, handle, allocation); | ||
| 595 | } | ||
| 596 | } | ||
| 597 | |||
| 598 | void BufferView::SetObjectNameEXT(const char* name) const { | ||
| 599 | SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name); | ||
| 568 | } | 600 | } |
| 569 | 601 | ||
| 570 | void ImageView::SetObjectNameEXT(const char* name) const { | 602 | void ImageView::SetObjectNameEXT(const char* name) const { |
| @@ -701,24 +733,12 @@ Queue Device::GetQueue(u32 family_index) const noexcept { | |||
| 701 | return Queue(queue, *dld); | 733 | return Queue(queue, *dld); |
| 702 | } | 734 | } |
| 703 | 735 | ||
| 704 | Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const { | ||
| 705 | VkBuffer object; | ||
| 706 | Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object)); | ||
| 707 | return Buffer(object, handle, *dld); | ||
| 708 | } | ||
| 709 | |||
| 710 | BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const { | 736 | BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const { |
| 711 | VkBufferView object; | 737 | VkBufferView object; |
| 712 | Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object)); | 738 | Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object)); |
| 713 | return BufferView(object, handle, *dld); | 739 | return BufferView(object, handle, *dld); |
| 714 | } | 740 | } |
| 715 | 741 | ||
| 716 | Image Device::CreateImage(const VkImageCreateInfo& ci) const { | ||
| 717 | VkImage object; | ||
| 718 | Check(dld->vkCreateImage(handle, &ci, nullptr, &object)); | ||
| 719 | return Image(object, handle, *dld); | ||
| 720 | } | ||
| 721 | |||
| 722 | ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { | 742 | ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const { |
| 723 | VkImageView object; | 743 | VkImageView object; |
| 724 | Check(dld->vkCreateImageView(handle, &ci, nullptr, &object)); | 744 | Check(dld->vkCreateImageView(handle, &ci, nullptr, &object)); |
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 4ff328a21..b5e70fcd4 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h | |||
| @@ -32,6 +32,9 @@ | |||
| 32 | #pragma warning(disable : 26812) // Disable prefer enum class over enum | 32 | #pragma warning(disable : 26812) // Disable prefer enum class over enum |
| 33 | #endif | 33 | #endif |
| 34 | 34 | ||
| 35 | VK_DEFINE_HANDLE(VmaAllocator) | ||
| 36 | VK_DEFINE_HANDLE(VmaAllocation) | ||
| 37 | |||
| 35 | namespace Vulkan::vk { | 38 | namespace Vulkan::vk { |
| 36 | 39 | ||
| 37 | /** | 40 | /** |
| @@ -161,8 +164,10 @@ struct InstanceDispatch { | |||
| 161 | PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties{}; | 164 | PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties{}; |
| 162 | 165 | ||
| 163 | PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT{}; | 166 | PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT{}; |
| 167 | PFN_vkCreateDebugReportCallbackEXT vkCreateDebugReportCallbackEXT{}; | ||
| 164 | PFN_vkCreateDevice vkCreateDevice{}; | 168 | PFN_vkCreateDevice vkCreateDevice{}; |
| 165 | PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT{}; | 169 | PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessengerEXT{}; |
| 170 | PFN_vkDestroyDebugReportCallbackEXT vkDestroyDebugReportCallbackEXT{}; | ||
| 166 | PFN_vkDestroyDevice vkDestroyDevice{}; | 171 | PFN_vkDestroyDevice vkDestroyDevice{}; |
| 167 | PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR{}; | 172 | PFN_vkDestroySurfaceKHR vkDestroySurfaceKHR{}; |
| 168 | PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties{}; | 173 | PFN_vkEnumerateDeviceExtensionProperties vkEnumerateDeviceExtensionProperties{}; |
| @@ -363,6 +368,7 @@ void Destroy(VkDevice, VkSwapchainKHR, const DeviceDispatch&) noexcept; | |||
| 363 | void Destroy(VkDevice, VkSemaphore, const DeviceDispatch&) noexcept; | 368 | void Destroy(VkDevice, VkSemaphore, const DeviceDispatch&) noexcept; |
| 364 | void Destroy(VkDevice, VkShaderModule, const DeviceDispatch&) noexcept; | 369 | void Destroy(VkDevice, VkShaderModule, const DeviceDispatch&) noexcept; |
| 365 | void Destroy(VkInstance, VkDebugUtilsMessengerEXT, const InstanceDispatch&) noexcept; | 370 | void Destroy(VkInstance, VkDebugUtilsMessengerEXT, const InstanceDispatch&) noexcept; |
| 371 | void Destroy(VkInstance, VkDebugReportCallbackEXT, const InstanceDispatch&) noexcept; | ||
| 366 | void Destroy(VkInstance, VkSurfaceKHR, const InstanceDispatch&) noexcept; | 372 | void Destroy(VkInstance, VkSurfaceKHR, const InstanceDispatch&) noexcept; |
| 367 | 373 | ||
| 368 | VkResult Free(VkDevice, VkDescriptorPool, Span<VkDescriptorSet>, const DeviceDispatch&) noexcept; | 374 | VkResult Free(VkDevice, VkDescriptorPool, Span<VkDescriptorSet>, const DeviceDispatch&) noexcept; |
| @@ -578,6 +584,7 @@ private: | |||
| 578 | }; | 584 | }; |
| 579 | 585 | ||
| 580 | using DebugUtilsMessenger = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; | 586 | using DebugUtilsMessenger = Handle<VkDebugUtilsMessengerEXT, VkInstance, InstanceDispatch>; |
| 587 | using DebugReportCallback = Handle<VkDebugReportCallbackEXT, VkInstance, InstanceDispatch>; | ||
| 581 | using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; | 588 | using DescriptorSetLayout = Handle<VkDescriptorSetLayout, VkDevice, DeviceDispatch>; |
| 582 | using DescriptorUpdateTemplate = Handle<VkDescriptorUpdateTemplate, VkDevice, DeviceDispatch>; | 589 | using DescriptorUpdateTemplate = Handle<VkDescriptorUpdateTemplate, VkDevice, DeviceDispatch>; |
| 583 | using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; | 590 | using Pipeline = Handle<VkPipeline, VkDevice, DeviceDispatch>; |
| @@ -610,12 +617,149 @@ public: | |||
| 610 | DebugUtilsMessenger CreateDebugUtilsMessenger( | 617 | DebugUtilsMessenger CreateDebugUtilsMessenger( |
| 611 | const VkDebugUtilsMessengerCreateInfoEXT& create_info) const; | 618 | const VkDebugUtilsMessengerCreateInfoEXT& create_info) const; |
| 612 | 619 | ||
| 620 | /// Creates a debug report callback. | ||
| 621 | /// @throw Exception on creation failure. | ||
| 622 | DebugReportCallback CreateDebugReportCallback( | ||
| 623 | const VkDebugReportCallbackCreateInfoEXT& create_info) const; | ||
| 624 | |||
| 613 | /// Returns dispatch table. | 625 | /// Returns dispatch table. |
| 614 | const InstanceDispatch& Dispatch() const noexcept { | 626 | const InstanceDispatch& Dispatch() const noexcept { |
| 615 | return *dld; | 627 | return *dld; |
| 616 | } | 628 | } |
| 617 | }; | 629 | }; |
| 618 | 630 | ||
| 631 | class Image { | ||
| 632 | public: | ||
| 633 | explicit Image(VkImage handle_, VkDevice owner_, VmaAllocator allocator_, | ||
| 634 | VmaAllocation allocation_, const DeviceDispatch& dld_) noexcept | ||
| 635 | : handle{handle_}, owner{owner_}, allocator{allocator_}, | ||
| 636 | allocation{allocation_}, dld{&dld_} {} | ||
| 637 | Image() = default; | ||
| 638 | |||
| 639 | Image(const Image&) = delete; | ||
| 640 | Image& operator=(const Image&) = delete; | ||
| 641 | |||
| 642 | Image(Image&& rhs) noexcept | ||
| 643 | : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, allocator{rhs.allocator}, | ||
| 644 | allocation{rhs.allocation}, dld{rhs.dld} {} | ||
| 645 | |||
| 646 | Image& operator=(Image&& rhs) noexcept { | ||
| 647 | Release(); | ||
| 648 | handle = std::exchange(rhs.handle, nullptr); | ||
| 649 | owner = rhs.owner; | ||
| 650 | allocator = rhs.allocator; | ||
| 651 | allocation = rhs.allocation; | ||
| 652 | dld = rhs.dld; | ||
| 653 | return *this; | ||
| 654 | } | ||
| 655 | |||
| 656 | ~Image() noexcept { | ||
| 657 | Release(); | ||
| 658 | } | ||
| 659 | |||
| 660 | VkImage operator*() const noexcept { | ||
| 661 | return handle; | ||
| 662 | } | ||
| 663 | |||
| 664 | void reset() noexcept { | ||
| 665 | Release(); | ||
| 666 | handle = nullptr; | ||
| 667 | } | ||
| 668 | |||
| 669 | explicit operator bool() const noexcept { | ||
| 670 | return handle != nullptr; | ||
| 671 | } | ||
| 672 | |||
| 673 | void SetObjectNameEXT(const char* name) const; | ||
| 674 | |||
| 675 | private: | ||
| 676 | void Release() const noexcept; | ||
| 677 | |||
| 678 | VkImage handle = nullptr; | ||
| 679 | VkDevice owner = nullptr; | ||
| 680 | VmaAllocator allocator = nullptr; | ||
| 681 | VmaAllocation allocation = nullptr; | ||
| 682 | const DeviceDispatch* dld = nullptr; | ||
| 683 | }; | ||
| 684 | |||
| 685 | class Buffer { | ||
| 686 | public: | ||
| 687 | explicit Buffer(VkBuffer handle_, VkDevice owner_, VmaAllocator allocator_, | ||
| 688 | VmaAllocation allocation_, std::span<u8> mapped_, bool is_coherent_, | ||
| 689 | const DeviceDispatch& dld_) noexcept | ||
| 690 | : handle{handle_}, owner{owner_}, allocator{allocator_}, | ||
| 691 | allocation{allocation_}, mapped{mapped_}, is_coherent{is_coherent_}, dld{&dld_} {} | ||
| 692 | Buffer() = default; | ||
| 693 | |||
| 694 | Buffer(const Buffer&) = delete; | ||
| 695 | Buffer& operator=(const Buffer&) = delete; | ||
| 696 | |||
| 697 | Buffer(Buffer&& rhs) noexcept | ||
| 698 | : handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, allocator{rhs.allocator}, | ||
| 699 | allocation{rhs.allocation}, mapped{rhs.mapped}, | ||
| 700 | is_coherent{rhs.is_coherent}, dld{rhs.dld} {} | ||
| 701 | |||
| 702 | Buffer& operator=(Buffer&& rhs) noexcept { | ||
| 703 | Release(); | ||
| 704 | handle = std::exchange(rhs.handle, nullptr); | ||
| 705 | owner = rhs.owner; | ||
| 706 | allocator = rhs.allocator; | ||
| 707 | allocation = rhs.allocation; | ||
| 708 | mapped = rhs.mapped; | ||
| 709 | is_coherent = rhs.is_coherent; | ||
| 710 | dld = rhs.dld; | ||
| 711 | return *this; | ||
| 712 | } | ||
| 713 | |||
| 714 | ~Buffer() noexcept { | ||
| 715 | Release(); | ||
| 716 | } | ||
| 717 | |||
| 718 | VkBuffer operator*() const noexcept { | ||
| 719 | return handle; | ||
| 720 | } | ||
| 721 | |||
| 722 | void reset() noexcept { | ||
| 723 | Release(); | ||
| 724 | handle = nullptr; | ||
| 725 | } | ||
| 726 | |||
| 727 | explicit operator bool() const noexcept { | ||
| 728 | return handle != nullptr; | ||
| 729 | } | ||
| 730 | |||
| 731 | /// Returns the host mapped memory, an empty span otherwise. | ||
| 732 | std::span<u8> Mapped() noexcept { | ||
| 733 | return mapped; | ||
| 734 | } | ||
| 735 | |||
| 736 | std::span<const u8> Mapped() const noexcept { | ||
| 737 | return mapped; | ||
| 738 | } | ||
| 739 | |||
| 740 | /// Returns true if the buffer is mapped to the host. | ||
| 741 | bool IsHostVisible() const noexcept { | ||
| 742 | return !mapped.empty(); | ||
| 743 | } | ||
| 744 | |||
| 745 | void Flush() const; | ||
| 746 | |||
| 747 | void Invalidate() const; | ||
| 748 | |||
| 749 | void SetObjectNameEXT(const char* name) const; | ||
| 750 | |||
| 751 | private: | ||
| 752 | void Release() const noexcept; | ||
| 753 | |||
| 754 | VkBuffer handle = nullptr; | ||
| 755 | VkDevice owner = nullptr; | ||
| 756 | VmaAllocator allocator = nullptr; | ||
| 757 | VmaAllocation allocation = nullptr; | ||
| 758 | std::span<u8> mapped = {}; | ||
| 759 | bool is_coherent = false; | ||
| 760 | const DeviceDispatch* dld = nullptr; | ||
| 761 | }; | ||
| 762 | |||
| 619 | class Queue { | 763 | class Queue { |
| 620 | public: | 764 | public: |
| 621 | /// Construct an empty queue handle. | 765 | /// Construct an empty queue handle. |
| @@ -639,17 +783,6 @@ private: | |||
| 639 | const DeviceDispatch* dld = nullptr; | 783 | const DeviceDispatch* dld = nullptr; |
| 640 | }; | 784 | }; |
| 641 | 785 | ||
| 642 | class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> { | ||
| 643 | using Handle<VkBuffer, VkDevice, DeviceDispatch>::Handle; | ||
| 644 | |||
| 645 | public: | ||
| 646 | /// Attaches a memory allocation. | ||
| 647 | void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; | ||
| 648 | |||
| 649 | /// Set object name. | ||
| 650 | void SetObjectNameEXT(const char* name) const; | ||
| 651 | }; | ||
| 652 | |||
| 653 | class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> { | 786 | class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> { |
| 654 | using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle; | 787 | using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle; |
| 655 | 788 | ||
| @@ -658,17 +791,6 @@ public: | |||
| 658 | void SetObjectNameEXT(const char* name) const; | 791 | void SetObjectNameEXT(const char* name) const; |
| 659 | }; | 792 | }; |
| 660 | 793 | ||
| 661 | class Image : public Handle<VkImage, VkDevice, DeviceDispatch> { | ||
| 662 | using Handle<VkImage, VkDevice, DeviceDispatch>::Handle; | ||
| 663 | |||
| 664 | public: | ||
| 665 | /// Attaches a memory allocation. | ||
| 666 | void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const; | ||
| 667 | |||
| 668 | /// Set object name. | ||
| 669 | void SetObjectNameEXT(const char* name) const; | ||
| 670 | }; | ||
| 671 | |||
| 672 | class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> { | 794 | class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> { |
| 673 | using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle; | 795 | using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle; |
| 674 | 796 | ||
| @@ -840,12 +962,8 @@ public: | |||
| 840 | 962 | ||
| 841 | Queue GetQueue(u32 family_index) const noexcept; | 963 | Queue GetQueue(u32 family_index) const noexcept; |
| 842 | 964 | ||
| 843 | Buffer CreateBuffer(const VkBufferCreateInfo& ci) const; | ||
| 844 | |||
| 845 | BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const; | 965 | BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const; |
| 846 | 966 | ||
| 847 | Image CreateImage(const VkImageCreateInfo& ci) const; | ||
| 848 | |||
| 849 | ImageView CreateImageView(const VkImageViewCreateInfo& ci) const; | 967 | ImageView CreateImageView(const VkImageViewCreateInfo& ci) const; |
| 850 | 968 | ||
| 851 | Semaphore CreateSemaphore() const; | 969 | Semaphore CreateSemaphore() const; |