diff options
Diffstat (limited to 'src/video_core')
21 files changed, 137 insertions, 107 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 45977d578..58a45ab67 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h | |||
| @@ -207,7 +207,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||
| 207 | if (has_new_downloads) { | 207 | if (has_new_downloads) { |
| 208 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); | 208 | memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); |
| 209 | } | 209 | } |
| 210 | tmp_buffer.resize(amount); | 210 | tmp_buffer.resize_destructive(amount); |
| 211 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); | 211 | cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); |
| 212 | cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); | 212 | cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); |
| 213 | return true; | 213 | return true; |
| @@ -1279,7 +1279,7 @@ template <class P> | |||
| 1279 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, | 1279 | typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, |
| 1280 | u32 wanted_size) { | 1280 | u32 wanted_size) { |
| 1281 | static constexpr int STREAM_LEAP_THRESHOLD = 16; | 1281 | static constexpr int STREAM_LEAP_THRESHOLD = 16; |
| 1282 | std::vector<BufferId> overlap_ids; | 1282 | boost::container::small_vector<BufferId, 16> overlap_ids; |
| 1283 | VAddr begin = cpu_addr; | 1283 | VAddr begin = cpu_addr; |
| 1284 | VAddr end = cpu_addr + wanted_size; | 1284 | VAddr end = cpu_addr + wanted_size; |
| 1285 | int stream_score = 0; | 1285 | int stream_score = 0; |
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 63a120f7a..fe6068cfe 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h | |||
| @@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf | |||
| 229 | using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; | 229 | using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; |
| 230 | 230 | ||
| 231 | struct OverlapResult { | 231 | struct OverlapResult { |
| 232 | std::vector<BufferId> ids; | 232 | boost::container::small_vector<BufferId, 16> ids; |
| 233 | VAddr begin; | 233 | VAddr begin; |
| 234 | VAddr end; | 234 | VAddr end; |
| 235 | bool has_stream_leap = false; | 235 | bool has_stream_leap = false; |
| @@ -582,7 +582,7 @@ private: | |||
| 582 | BufferId inline_buffer_id; | 582 | BufferId inline_buffer_id; |
| 583 | 583 | ||
| 584 | std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | 584 | std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; |
| 585 | std::vector<u8> tmp_buffer; | 585 | Common::ScratchBuffer<u8> tmp_buffer; |
| 586 | }; | 586 | }; |
| 587 | 587 | ||
| 588 | } // namespace VideoCommon | 588 | } // namespace VideoCommon |
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index 83112dfce..7d660af47 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h | |||
| @@ -63,7 +63,6 @@ struct ChCommand { | |||
| 63 | }; | 63 | }; |
| 64 | 64 | ||
| 65 | using ChCommandHeaderList = std::vector<ChCommandHeader>; | 65 | using ChCommandHeaderList = std::vector<ChCommandHeader>; |
| 66 | using ChCommandList = std::vector<ChCommand>; | ||
| 67 | 66 | ||
| 68 | struct ThiRegisters { | 67 | struct ThiRegisters { |
| 69 | u32_le increment_syncpt{}; | 68 | u32_le increment_syncpt{}; |
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 1cdb690ed..8a2784cdc 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <span> | 7 | #include <span> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include <boost/container/small_vector.hpp> | ||
| 9 | #include <queue> | 10 | #include <queue> |
| 10 | 11 | ||
| 11 | #include "common/bit_field.h" | 12 | #include "common/bit_field.h" |
| @@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub | |||
| 102 | struct CommandList final { | 103 | struct CommandList final { |
| 103 | CommandList() = default; | 104 | CommandList() = default; |
| 104 | explicit CommandList(std::size_t size) : command_lists(size) {} | 105 | explicit CommandList(std::size_t size) : command_lists(size) {} |
| 105 | explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_) | 106 | explicit CommandList( |
| 107 | boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_) | ||
| 106 | : prefetch_command_list{std::move(prefetch_command_list_)} {} | 108 | : prefetch_command_list{std::move(prefetch_command_list_)} {} |
| 107 | 109 | ||
| 108 | std::vector<CommandListHeader> command_lists; | 110 | boost::container::small_vector<CommandListHeader, 512> command_lists; |
| 109 | std::vector<CommandHeader> prefetch_command_list; | 111 | boost::container::small_vector<CommandHeader, 512> prefetch_command_list; |
| 110 | }; | 112 | }; |
| 111 | 113 | ||
| 112 | /** | 114 | /** |
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index ebe5536de..bc1eb41e7 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp | |||
| @@ -108,9 +108,11 @@ void MaxwellDMA::Launch() { | |||
| 108 | if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { | 108 | if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { |
| 109 | ASSERT(regs.remap_const.component_size_minus_one == 3); | 109 | ASSERT(regs.remap_const.component_size_minus_one == 3); |
| 110 | accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); | 110 | accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); |
| 111 | std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); | 111 | read_buffer.resize_destructive(regs.line_length_in * sizeof(u32)); |
| 112 | std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in); | ||
| 113 | std::ranges::fill(span, regs.remap_consta_value); | ||
| 112 | memory_manager.WriteBlockUnsafe(regs.offset_out, | 114 | memory_manager.WriteBlockUnsafe(regs.offset_out, |
| 113 | reinterpret_cast<u8*>(tmp_buffer.data()), | 115 | reinterpret_cast<u8*>(read_buffer.data()), |
| 114 | regs.line_length_in * sizeof(u32)); | 116 | regs.line_length_in * sizeof(u32)); |
| 115 | } else { | 117 | } else { |
| 116 | memory_manager.FlushCaching(); | 118 | memory_manager.FlushCaching(); |
| @@ -126,32 +128,32 @@ void MaxwellDMA::Launch() { | |||
| 126 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); | 128 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); |
| 127 | UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); | 129 | UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); |
| 128 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | 130 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |
| 129 | std::vector<u8> tmp_buffer(16); | 131 | read_buffer.resize_destructive(16); |
| 130 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 132 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 131 | memory_manager.ReadBlockUnsafe( | 133 | memory_manager.ReadBlockUnsafe( |
| 132 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), | 134 | convert_linear_2_blocklinear_addr(regs.offset_in + offset), |
| 133 | tmp_buffer.data(), tmp_buffer.size()); | 135 | read_buffer.data(), read_buffer.size()); |
| 134 | memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), | 136 | memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(), |
| 135 | tmp_buffer.size()); | 137 | read_buffer.size()); |
| 136 | } | 138 | } |
| 137 | } else if (is_src_pitch && !is_dst_pitch) { | 139 | } else if (is_src_pitch && !is_dst_pitch) { |
| 138 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); | 140 | UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); |
| 139 | UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); | 141 | UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); |
| 140 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); | 142 | UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); |
| 141 | std::vector<u8> tmp_buffer(16); | 143 | read_buffer.resize_destructive(16); |
| 142 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { | 144 | for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { |
| 143 | memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), | 145 | memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), |
| 144 | tmp_buffer.size()); | 146 | read_buffer.size()); |
| 145 | memory_manager.WriteBlockCached( | 147 | memory_manager.WriteBlockCached( |
| 146 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), | 148 | convert_linear_2_blocklinear_addr(regs.offset_out + offset), |
| 147 | tmp_buffer.data(), tmp_buffer.size()); | 149 | read_buffer.data(), read_buffer.size()); |
| 148 | } | 150 | } |
| 149 | } else { | 151 | } else { |
| 150 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { | 152 | if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { |
| 151 | std::vector<u8> tmp_buffer(regs.line_length_in); | 153 | read_buffer.resize_destructive(regs.line_length_in); |
| 152 | memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), | 154 | memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), |
| 153 | regs.line_length_in); | 155 | regs.line_length_in); |
| 154 | memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), | 156 | memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(), |
| 155 | regs.line_length_in); | 157 | regs.line_length_in); |
| 156 | } | 158 | } |
| 157 | } | 159 | } |
| @@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 171 | src_operand.address = regs.offset_in; | 173 | src_operand.address = regs.offset_in; |
| 172 | 174 | ||
| 173 | DMA::BufferOperand dst_operand; | 175 | DMA::BufferOperand dst_operand; |
| 174 | dst_operand.pitch = regs.pitch_out; | 176 | u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out)); |
| 177 | dst_operand.pitch = abs_pitch_out; | ||
| 175 | dst_operand.width = regs.line_length_in; | 178 | dst_operand.width = regs.line_length_in; |
| 176 | dst_operand.height = regs.line_count; | 179 | dst_operand.height = regs.line_count; |
| 177 | dst_operand.address = regs.offset_out; | 180 | dst_operand.address = regs.offset_out; |
| @@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 218 | const size_t src_size = | 221 | const size_t src_size = |
| 219 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | 222 | CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |
| 220 | 223 | ||
| 221 | const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | 224 | const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count; |
| 222 | read_buffer.resize_destructive(src_size); | 225 | read_buffer.resize_destructive(src_size); |
| 223 | write_buffer.resize_destructive(dst_size); | 226 | write_buffer.resize_destructive(dst_size); |
| 224 | 227 | ||
| @@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | |||
| 227 | 230 | ||
| 228 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, | 231 | UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, |
| 229 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | 232 | src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |
| 230 | regs.pitch_out); | 233 | abs_pitch_out); |
| 231 | 234 | ||
| 232 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | 235 | memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |
| 233 | } | 236 | } |
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index 6ce179167..ce827eb6c 100644 --- a/src/video_core/host1x/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp | |||
| @@ -4,6 +4,7 @@ | |||
| 4 | #include <array> | 4 | #include <array> |
| 5 | #include <bit> | 5 | #include <bit> |
| 6 | 6 | ||
| 7 | #include "common/scratch_buffer.h" | ||
| 7 | #include "common/settings.h" | 8 | #include "common/settings.h" |
| 8 | #include "video_core/host1x/codecs/h264.h" | 9 | #include "video_core/host1x/codecs/h264.h" |
| 9 | #include "video_core/host1x/host1x.h" | 10 | #include "video_core/host1x/host1x.h" |
| @@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) { | |||
| 188 | } | 189 | } |
| 189 | 190 | ||
| 190 | void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { | 191 | void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { |
| 191 | std::vector<u8> scan(count); | 192 | static Common::ScratchBuffer<u8> scan{}; |
| 193 | scan.resize_destructive(count); | ||
| 192 | if (count == 16) { | 194 | if (count == 16) { |
| 193 | std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); | 195 | std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); |
| 194 | } else { | 196 | } else { |
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index b2f7e160a..45141e488 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp | |||
| @@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, | |||
| 587 | 587 | ||
| 588 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, | 588 | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, |
| 589 | VideoCommon::CacheType which) { | 589 | VideoCommon::CacheType which) { |
| 590 | std::vector<u8> tmp_buffer(size); | 590 | tmp_buffer.resize_destructive(size); |
| 591 | ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); | 591 | ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); |
| 592 | 592 | ||
| 593 | // The output block must be flushed in case it has data modified from the GPU. | 593 | // The output block must be flushed in case it has data modified from the GPU. |
| @@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons | |||
| 670 | return result; | 670 | return result; |
| 671 | } | 671 | } |
| 672 | 672 | ||
| 673 | std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | 673 | boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> |
| 674 | GPUVAddr gpu_addr, std::size_t size) const { | 674 | MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const { |
| 675 | std::vector<std::pair<GPUVAddr, std::size_t>> result{}; | 675 | boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> result{}; |
| 676 | GetSubmappedRangeImpl<true>(gpu_addr, size, result); | 676 | GetSubmappedRangeImpl<true>(gpu_addr, size, result); |
| 677 | return result; | 677 | return result; |
| 678 | } | 678 | } |
| @@ -680,8 +680,9 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | |||
| 680 | template <bool is_gpu_address> | 680 | template <bool is_gpu_address> |
| 681 | void MemoryManager::GetSubmappedRangeImpl( | 681 | void MemoryManager::GetSubmappedRangeImpl( |
| 682 | GPUVAddr gpu_addr, std::size_t size, | 682 | GPUVAddr gpu_addr, std::size_t size, |
| 683 | std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& | 683 | boost::container::small_vector< |
| 684 | result) const { | 684 | std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result) |
| 685 | const { | ||
| 685 | std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> | 686 | std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> |
| 686 | last_segment{}; | 687 | last_segment{}; |
| 687 | std::optional<VAddr> old_page_addr{}; | 688 | std::optional<VAddr> old_page_addr{}; |
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 794535122..4202c26ff 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h | |||
| @@ -8,10 +8,12 @@ | |||
| 8 | #include <mutex> | 8 | #include <mutex> |
| 9 | #include <optional> | 9 | #include <optional> |
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | #include <boost/container/small_vector.hpp> | ||
| 11 | 12 | ||
| 12 | #include "common/common_types.h" | 13 | #include "common/common_types.h" |
| 13 | #include "common/multi_level_page_table.h" | 14 | #include "common/multi_level_page_table.h" |
| 14 | #include "common/range_map.h" | 15 | #include "common/range_map.h" |
| 16 | #include "common/scratch_buffer.h" | ||
| 15 | #include "common/virtual_buffer.h" | 17 | #include "common/virtual_buffer.h" |
| 16 | #include "video_core/cache_types.h" | 18 | #include "video_core/cache_types.h" |
| 17 | #include "video_core/pte_kind.h" | 19 | #include "video_core/pte_kind.h" |
| @@ -107,8 +109,8 @@ public: | |||
| 107 | * if the region is continuous, a single pair will be returned. If it's unmapped, an empty | 109 | * if the region is continuous, a single pair will be returned. If it's unmapped, an empty |
| 108 | * vector will be returned; | 110 | * vector will be returned; |
| 109 | */ | 111 | */ |
| 110 | std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, | 112 | boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange( |
| 111 | std::size_t size) const; | 113 | GPUVAddr gpu_addr, std::size_t size) const; |
| 112 | 114 | ||
| 113 | GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, | 115 | GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, |
| 114 | PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); | 116 | PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); |
| @@ -165,7 +167,8 @@ private: | |||
| 165 | template <bool is_gpu_address> | 167 | template <bool is_gpu_address> |
| 166 | void GetSubmappedRangeImpl( | 168 | void GetSubmappedRangeImpl( |
| 167 | GPUVAddr gpu_addr, std::size_t size, | 169 | GPUVAddr gpu_addr, std::size_t size, |
| 168 | std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& | 170 | boost::container::small_vector< |
| 171 | std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& | ||
| 169 | result) const; | 172 | result) const; |
| 170 | 173 | ||
| 171 | Core::System& system; | 174 | Core::System& system; |
| @@ -215,8 +218,8 @@ private: | |||
| 215 | Common::VirtualBuffer<u32> big_page_table_cpu; | 218 | Common::VirtualBuffer<u32> big_page_table_cpu; |
| 216 | 219 | ||
| 217 | std::vector<u64> big_page_continuous; | 220 | std::vector<u64> big_page_continuous; |
| 218 | std::vector<std::pair<VAddr, std::size_t>> page_stash{}; | 221 | boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{}; |
| 219 | std::vector<std::pair<VAddr, std::size_t>> page_stash2{}; | 222 | boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{}; |
| 220 | 223 | ||
| 221 | mutable std::mutex guard; | 224 | mutable std::mutex guard; |
| 222 | 225 | ||
| @@ -226,6 +229,8 @@ private: | |||
| 226 | std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; | 229 | std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; |
| 227 | 230 | ||
| 228 | static std::atomic<size_t> unique_identifier_generator; | 231 | static std::atomic<size_t> unique_identifier_generator; |
| 232 | |||
| 233 | Common::ScratchBuffer<u8> tmp_buffer; | ||
| 229 | }; | 234 | }; |
| 230 | 235 | ||
| 231 | } // namespace Tegra | 236 | } // namespace Tegra |
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3f077311e..0329ed820 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp | |||
| @@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key, | |||
| 85 | case Shader::Stage::VertexB: | 85 | case Shader::Stage::VertexB: |
| 86 | case Shader::Stage::Geometry: | 86 | case Shader::Stage::Geometry: |
| 87 | if (!use_assembly_shaders && key.xfb_enabled != 0) { | 87 | if (!use_assembly_shaders && key.xfb_enabled != 0) { |
| 88 | info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); | 88 | auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); |
| 89 | info.xfb_varyings = varyings; | ||
| 90 | info.xfb_count = count; | ||
| 89 | } | 91 | } |
| 90 | break; | 92 | break; |
| 91 | case Shader::Stage::TessellationEval: | 93 | case Shader::Stage::TessellationEval: |
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index e30fcb1ed..f47301ad5 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp | |||
| @@ -361,7 +361,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, | |||
| 361 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | 361 | .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, |
| 362 | }; | 362 | }; |
| 363 | // Measuring a popular game, this number never exceeds the specified size once data is warmed up | 363 | // Measuring a popular game, this number never exceeds the specified size once data is warmed up |
| 364 | boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); | 364 | boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size()); |
| 365 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); | 365 | std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); |
| 366 | scheduler.RequestOutsideRenderPassOperationContext(); | 366 | scheduler.RequestOutsideRenderPassOperationContext(); |
| 367 | scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { | 367 | scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { |
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index a2cfb2105..9f316113c 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | |||
| @@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program | |||
| 167 | info.fixed_state_point_size = point_size; | 167 | info.fixed_state_point_size = point_size; |
| 168 | } | 168 | } |
| 169 | if (key.state.xfb_enabled) { | 169 | if (key.state.xfb_enabled) { |
| 170 | info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); | 170 | auto [varyings, count] = |
| 171 | VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); | ||
| 172 | info.xfb_varyings = varyings; | ||
| 173 | info.xfb_count = count; | ||
| 171 | } | 174 | } |
| 172 | info.convert_depth_mode = gl_ndc; | 175 | info.convert_depth_mode = gl_ndc; |
| 173 | } | 176 | } |
| @@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program | |||
| 214 | info.fixed_state_point_size = point_size; | 217 | info.fixed_state_point_size = point_size; |
| 215 | } | 218 | } |
| 216 | if (key.state.xfb_enabled != 0) { | 219 | if (key.state.xfb_enabled != 0) { |
| 217 | info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); | 220 | auto [varyings, count] = |
| 221 | VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); | ||
| 222 | info.xfb_varyings = varyings; | ||
| 223 | info.xfb_count = count; | ||
| 218 | } | 224 | } |
| 219 | info.convert_depth_mode = gl_ndc; | 225 | info.convert_depth_mode = gl_ndc; |
| 220 | break; | 226 | break; |
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index f025f618b..f3cef09dd 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp | |||
| @@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 330 | }; | 330 | }; |
| 331 | } | 331 | } |
| 332 | 332 | ||
| 333 | [[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( | 333 | [[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16> |
| 334 | std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { | 334 | TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { |
| 335 | std::vector<VkBufferCopy> result(copies.size()); | 335 | boost::container::small_vector<VkBufferCopy, 16> result(copies.size()); |
| 336 | std::ranges::transform( | 336 | std::ranges::transform( |
| 337 | copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { | 337 | copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { |
| 338 | return VkBufferCopy{ | 338 | return VkBufferCopy{ |
| @@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 344 | return result; | 344 | return result; |
| 345 | } | 345 | } |
| 346 | 346 | ||
| 347 | [[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( | 347 | [[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies( |
| 348 | std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { | 348 | std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { |
| 349 | struct Maker { | 349 | struct Maker { |
| 350 | VkBufferImageCopy operator()(const BufferImageCopy& copy) const { | 350 | VkBufferImageCopy operator()(const BufferImageCopy& copy) const { |
| @@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) { | |||
| 377 | VkImageAspectFlags aspect_mask; | 377 | VkImageAspectFlags aspect_mask; |
| 378 | }; | 378 | }; |
| 379 | if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { | 379 | if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { |
| 380 | std::vector<VkBufferImageCopy> result(copies.size() * 2); | 380 | boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2); |
| 381 | std::ranges::transform(copies, result.begin(), | 381 | std::ranges::transform(copies, result.begin(), |
| 382 | Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); | 382 | Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); |
| 383 | std::ranges::transform(copies, result.begin() + copies.size(), | 383 | std::ranges::transform(copies, result.begin() + copies.size(), |
| 384 | Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); | 384 | Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); |
| 385 | return result; | 385 | return result; |
| 386 | } else { | 386 | } else { |
| 387 | std::vector<VkBufferImageCopy> result(copies.size()); | 387 | boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size()); |
| 388 | std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); | 388 | std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); |
| 389 | return result; | 389 | return result; |
| 390 | } | 390 | } |
| @@ -867,8 +867,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() { | |||
| 867 | 867 | ||
| 868 | void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, | 868 | void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, |
| 869 | std::span<const VideoCommon::ImageCopy> copies) { | 869 | std::span<const VideoCommon::ImageCopy> copies) { |
| 870 | std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); | 870 | boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size()); |
| 871 | std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); | 871 | boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size()); |
| 872 | const VkImageAspectFlags src_aspect_mask = src.AspectMask(); | 872 | const VkImageAspectFlags src_aspect_mask = src.AspectMask(); |
| 873 | const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); | 873 | const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); |
| 874 | 874 | ||
| @@ -1157,7 +1157,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im | |||
| 1157 | 1157 | ||
| 1158 | void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | 1158 | void TextureCacheRuntime::CopyImage(Image& dst, Image& src, |
| 1159 | std::span<const VideoCommon::ImageCopy> copies) { | 1159 | std::span<const VideoCommon::ImageCopy> copies) { |
| 1160 | std::vector<VkImageCopy> vk_copies(copies.size()); | 1160 | boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size()); |
| 1161 | const VkImageAspectFlags aspect_mask = dst.AspectMask(); | 1161 | const VkImageAspectFlags aspect_mask = dst.AspectMask(); |
| 1162 | ASSERT(aspect_mask == src.AspectMask()); | 1162 | ASSERT(aspect_mask == src.AspectMask()); |
| 1163 | 1163 | ||
| @@ -1332,7 +1332,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, | |||
| 1332 | ScaleDown(true); | 1332 | ScaleDown(true); |
| 1333 | } | 1333 | } |
| 1334 | scheduler->RequestOutsideRenderPassOperationContext(); | 1334 | scheduler->RequestOutsideRenderPassOperationContext(); |
| 1335 | std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); | 1335 | auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); |
| 1336 | const VkBuffer src_buffer = buffer; | 1336 | const VkBuffer src_buffer = buffer; |
| 1337 | const VkImage vk_image = *original_image; | 1337 | const VkImage vk_image = *original_image; |
| 1338 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; | 1338 | const VkImageAspectFlags vk_aspect_mask = aspect_mask; |
| @@ -1367,8 +1367,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS | |||
| 1367 | if (is_rescaled) { | 1367 | if (is_rescaled) { |
| 1368 | ScaleDown(); | 1368 | ScaleDown(); |
| 1369 | } | 1369 | } |
| 1370 | boost::container::small_vector<VkBuffer, 1> buffers_vector{}; | 1370 | boost::container::small_vector<VkBuffer, 8> buffers_vector{}; |
| 1371 | boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies; | 1371 | boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8> |
| 1372 | vk_copies; | ||
| 1372 | for (size_t index = 0; index < buffers_span.size(); index++) { | 1373 | for (size_t index = 0; index < buffers_span.size(); index++) { |
| 1373 | buffers_vector.emplace_back(buffers_span[index]); | 1374 | buffers_vector.emplace_back(buffers_span[index]); |
| 1374 | vk_copies.emplace_back( | 1375 | vk_copies.emplace_back( |
| @@ -1858,7 +1859,7 @@ Framebuffer::~Framebuffer() = default; | |||
| 1858 | void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, | 1859 | void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, |
| 1859 | std::span<ImageView*, NUM_RT> color_buffers, | 1860 | std::span<ImageView*, NUM_RT> color_buffers, |
| 1860 | ImageView* depth_buffer, bool is_rescaled) { | 1861 | ImageView* depth_buffer, bool is_rescaled) { |
| 1861 | std::vector<VkImageView> attachments; | 1862 | boost::container::small_vector<VkImageView, NUM_RT + 1> attachments; |
| 1862 | RenderPassKey renderpass_key{}; | 1863 | RenderPassKey renderpass_key{}; |
| 1863 | s32 num_layers = 1; | 1864 | s32 num_layers = 1; |
| 1864 | 1865 | ||
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp index c5213875b..4db948b6d 100644 --- a/src/video_core/shader_cache.cpp +++ b/src/video_core/shader_cache.cpp | |||
| @@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() { | |||
| 151 | marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), | 151 | marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), |
| 152 | marked_for_removal.end()); | 152 | marked_for_removal.end()); |
| 153 | 153 | ||
| 154 | std::vector<ShaderInfo*> removed_shaders; | 154 | boost::container::small_vector<ShaderInfo*, 16> removed_shaders; |
| 155 | removed_shaders.reserve(marked_for_removal.size()); | ||
| 156 | 155 | ||
| 157 | std::scoped_lock lock{lookup_mutex}; | 156 | std::scoped_lock lock{lookup_mutex}; |
| 158 | |||
| 159 | for (Entry* const entry : marked_for_removal) { | 157 | for (Entry* const entry : marked_for_removal) { |
| 160 | removed_shaders.push_back(entry->data); | 158 | removed_shaders.push_back(entry->data); |
| 161 | 159 | ||
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 1b8a17ee8..55d49d017 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <array> | 6 | #include <array> |
| 7 | #include <optional> | 7 | #include <optional> |
| 8 | #include <vector> | 8 | #include <vector> |
| 9 | #include <boost/container/small_vector.hpp> | ||
| 9 | 10 | ||
| 10 | #include "common/common_funcs.h" | 11 | #include "common/common_funcs.h" |
| 11 | #include "common/common_types.h" | 12 | #include "common/common_types.h" |
| @@ -108,8 +109,8 @@ struct ImageBase { | |||
| 108 | std::vector<ImageViewInfo> image_view_infos; | 109 | std::vector<ImageViewInfo> image_view_infos; |
| 109 | std::vector<ImageViewId> image_view_ids; | 110 | std::vector<ImageViewId> image_view_ids; |
| 110 | 111 | ||
| 111 | std::vector<u32> slice_offsets; | 112 | boost::container::small_vector<u32, 16> slice_offsets; |
| 112 | std::vector<SubresourceBase> slice_subresources; | 113 | boost::container::small_vector<SubresourceBase, 16> slice_subresources; |
| 113 | 114 | ||
| 114 | std::vector<AliasedImage> aliased_images; | 115 | std::vector<AliasedImage> aliased_images; |
| 115 | std::vector<ImageId> overlapping_images; | 116 | std::vector<ImageId> overlapping_images; |
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d58bb69ff..d3f03a995 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h | |||
| @@ -526,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { | |||
| 526 | 526 | ||
| 527 | template <class P> | 527 | template <class P> |
| 528 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { | 528 | void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { |
| 529 | std::vector<ImageId> images; | 529 | boost::container::small_vector<ImageId, 16> images; |
| 530 | ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { | 530 | ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { |
| 531 | if (!image.IsSafeDownload()) { | 531 | if (!image.IsSafeDownload()) { |
| 532 | return; | 532 | return; |
| @@ -579,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V | |||
| 579 | 579 | ||
| 580 | template <class P> | 580 | template <class P> |
| 581 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | 581 | void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { |
| 582 | std::vector<ImageId> deleted_images; | 582 | boost::container::small_vector<ImageId, 16> deleted_images; |
| 583 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); | 583 | ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 584 | for (const ImageId id : deleted_images) { | 584 | for (const ImageId id : deleted_images) { |
| 585 | Image& image = slot_images[id]; | 585 | Image& image = slot_images[id]; |
| @@ -593,7 +593,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | |||
| 593 | 593 | ||
| 594 | template <class P> | 594 | template <class P> |
| 595 | void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { | 595 | void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { |
| 596 | std::vector<ImageId> deleted_images; | 596 | boost::container::small_vector<ImageId, 16> deleted_images; |
| 597 | ForEachImageInRegionGPU(as_id, gpu_addr, size, | 597 | ForEachImageInRegionGPU(as_id, gpu_addr, size, |
| 598 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); | 598 | [&](ImageId id, Image&) { deleted_images.push_back(id); }); |
| 599 | for (const ImageId id : deleted_images) { | 599 | for (const ImageId id : deleted_images) { |
| @@ -1101,7 +1101,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, | |||
| 1101 | const bool native_bgr = runtime.HasNativeBgr(); | 1101 | const bool native_bgr = runtime.HasNativeBgr(); |
| 1102 | const bool flexible_formats = True(options & RelaxedOptions::Format); | 1102 | const bool flexible_formats = True(options & RelaxedOptions::Format); |
| 1103 | ImageId image_id{}; | 1103 | ImageId image_id{}; |
| 1104 | boost::container::small_vector<ImageId, 1> image_ids; | 1104 | boost::container::small_vector<ImageId, 8> image_ids; |
| 1105 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | 1105 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1106 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | 1106 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { |
| 1107 | return false; | 1107 | return false; |
| @@ -1622,7 +1622,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) | |||
| 1622 | } | 1622 | } |
| 1623 | } | 1623 | } |
| 1624 | ImageId image_id{}; | 1624 | ImageId image_id{}; |
| 1625 | boost::container::small_vector<ImageId, 1> image_ids; | 1625 | boost::container::small_vector<ImageId, 8> image_ids; |
| 1626 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | 1626 | const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { |
| 1627 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { | 1627 | if (True(existing_image.flags & ImageFlagBits::Remapped)) { |
| 1628 | return false; | 1628 | return false; |
| @@ -1942,7 +1942,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | |||
| 1942 | image.map_view_id = map_id; | 1942 | image.map_view_id = map_id; |
| 1943 | return; | 1943 | return; |
| 1944 | } | 1944 | } |
| 1945 | std::vector<ImageViewId> sparse_maps{}; | 1945 | boost::container::small_vector<ImageViewId, 16> sparse_maps; |
| 1946 | ForEachSparseSegment( | 1946 | ForEachSparseSegment( |
| 1947 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { | 1947 | image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { |
| 1948 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); | 1948 | auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); |
| @@ -2217,7 +2217,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept { | |||
| 2217 | 2217 | ||
| 2218 | template <class P> | 2218 | template <class P> |
| 2219 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { | 2219 | void TextureCache<P>::SynchronizeAliases(ImageId image_id) { |
| 2220 | boost::container::small_vector<const AliasedImage*, 1> aliased_images; | 2220 | boost::container::small_vector<const AliasedImage*, 8> aliased_images; |
| 2221 | Image& image = slot_images[image_id]; | 2221 | Image& image = slot_images[image_id]; |
| 2222 | bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); | 2222 | bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); |
| 2223 | bool any_modified = True(image.flags & ImageFlagBits::GpuModified); | 2223 | bool any_modified = True(image.flags & ImageFlagBits::GpuModified); |
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 44232b961..e9ec91265 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h | |||
| @@ -56,7 +56,7 @@ struct ImageViewInOut { | |||
| 56 | struct AsyncDecodeContext { | 56 | struct AsyncDecodeContext { |
| 57 | ImageId image_id; | 57 | ImageId image_id; |
| 58 | Common::ScratchBuffer<u8> decoded_data; | 58 | Common::ScratchBuffer<u8> decoded_data; |
| 59 | std::vector<BufferImageCopy> copies; | 59 | boost::container::small_vector<BufferImageCopy, 16> copies; |
| 60 | std::mutex mutex; | 60 | std::mutex mutex; |
| 61 | std::atomic_bool complete; | 61 | std::atomic_bool complete; |
| 62 | }; | 62 | }; |
| @@ -429,7 +429,7 @@ private: | |||
| 429 | 429 | ||
| 430 | std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; | 430 | std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; |
| 431 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; | 431 | std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; |
| 432 | std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; | 432 | std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; |
| 433 | 433 | ||
| 434 | VAddr virtual_invalid_space{}; | 434 | VAddr virtual_invalid_space{}; |
| 435 | 435 | ||
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 95a5b47d8..f781cb7a0 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp | |||
| @@ -329,13 +329,13 @@ template <u32 GOB_EXTENT> | |||
| 329 | 329 | ||
| 330 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( | 330 | [[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( |
| 331 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { | 331 | const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { |
| 332 | const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); | 332 | const auto slice_offsets = CalculateSliceOffsets(new_info); |
| 333 | const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); | 333 | const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); |
| 334 | const auto it = std::ranges::find(slice_offsets, diff); | 334 | const auto it = std::ranges::find(slice_offsets, diff); |
| 335 | if (it == slice_offsets.end()) { | 335 | if (it == slice_offsets.end()) { |
| 336 | return std::nullopt; | 336 | return std::nullopt; |
| 337 | } | 337 | } |
| 338 | const std::vector subresources = CalculateSliceSubresources(new_info); | 338 | const auto subresources = CalculateSliceSubresources(new_info); |
| 339 | const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; | 339 | const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; |
| 340 | const ImageInfo& info = overlap.info; | 340 | const ImageInfo& info = overlap.info; |
| 341 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { | 341 | if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { |
| @@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { | |||
| 655 | return sizes; | 655 | return sizes; |
| 656 | } | 656 | } |
| 657 | 657 | ||
| 658 | std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | 658 | boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) { |
| 659 | ASSERT(info.type == ImageType::e3D); | 659 | ASSERT(info.type == ImageType::e3D); |
| 660 | std::vector<u32> offsets; | 660 | boost::container::small_vector<u32, 16> offsets; |
| 661 | offsets.reserve(NumSlices(info)); | 661 | offsets.reserve(NumSlices(info)); |
| 662 | 662 | ||
| 663 | const LevelInfo level_info = MakeLevelInfo(info); | 663 | const LevelInfo level_info = MakeLevelInfo(info); |
| @@ -679,9 +679,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { | |||
| 679 | return offsets; | 679 | return offsets; |
| 680 | } | 680 | } |
| 681 | 681 | ||
| 682 | std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { | 682 | boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources( |
| 683 | const ImageInfo& info) { | ||
| 683 | ASSERT(info.type == ImageType::e3D); | 684 | ASSERT(info.type == ImageType::e3D); |
| 684 | std::vector<SubresourceBase> subresources; | 685 | boost::container::small_vector<SubresourceBase, 16> subresources; |
| 685 | subresources.reserve(NumSlices(info)); | 686 | subresources.reserve(NumSlices(info)); |
| 686 | for (s32 level = 0; level < info.resources.levels; ++level) { | 687 | for (s32 level = 0; level < info.resources.levels; ++level) { |
| 687 | const s32 depth = AdjustMipSize(info.size.depth, level); | 688 | const s32 depth = AdjustMipSize(info.size.depth, level); |
| @@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { | |||
| 723 | } | 724 | } |
| 724 | } | 725 | } |
| 725 | 726 | ||
| 726 | std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, | 727 | boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst, |
| 727 | SubresourceBase base, u32 up_scale, u32 down_shift) { | 728 | const ImageInfo& src, |
| 729 | SubresourceBase base, | ||
| 730 | u32 up_scale, u32 down_shift) { | ||
| 728 | ASSERT(dst.resources.levels >= src.resources.levels); | 731 | ASSERT(dst.resources.levels >= src.resources.levels); |
| 729 | 732 | ||
| 730 | const bool is_dst_3d = dst.type == ImageType::e3D; | 733 | const bool is_dst_3d = dst.type == ImageType::e3D; |
| @@ -733,7 +736,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | |||
| 733 | ASSERT(src.resources.levels == 1); | 736 | ASSERT(src.resources.levels == 1); |
| 734 | } | 737 | } |
| 735 | const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; | 738 | const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; |
| 736 | std::vector<ImageCopy> copies; | 739 | boost::container::small_vector<ImageCopy, 16> copies; |
| 737 | copies.reserve(src.resources.levels); | 740 | copies.reserve(src.resources.levels); |
| 738 | for (s32 level = 0; level < src.resources.levels; ++level) { | 741 | for (s32 level = 0; level < src.resources.levels; ++level) { |
| 739 | ImageCopy& copy = copies.emplace_back(); | 742 | ImageCopy& copy = copies.emplace_back(); |
| @@ -770,9 +773,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | |||
| 770 | return copies; | 773 | return copies; |
| 771 | } | 774 | } |
| 772 | 775 | ||
| 773 | std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, | 776 | boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src, |
| 774 | u32 down_shift) { | 777 | u32 up_scale, |
| 775 | std::vector<ImageCopy> copies; | 778 | u32 down_shift) { |
| 779 | boost::container::small_vector<ImageCopy, 16> copies; | ||
| 776 | copies.reserve(src.resources.levels); | 780 | copies.reserve(src.resources.levels); |
| 777 | const bool is_3d = src.type == ImageType::e3D; | 781 | const bool is_3d = src.type == ImageType::e3D; |
| 778 | for (s32 level = 0; level < src.resources.levels; ++level) { | 782 | for (s32 level = 0; level < src.resources.levels; ++level) { |
| @@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config | |||
| 824 | return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); | 828 | return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); |
| 825 | } | 829 | } |
| 826 | 830 | ||
| 827 | std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 831 | boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory, |
| 828 | const ImageInfo& info, std::span<const u8> input, | 832 | GPUVAddr gpu_addr, |
| 829 | std::span<u8> output) { | 833 | const ImageInfo& info, |
| 834 | std::span<const u8> input, | ||
| 835 | std::span<u8> output) { | ||
| 830 | const size_t guest_size_bytes = input.size_bytes(); | 836 | const size_t guest_size_bytes = input.size_bytes(); |
| 831 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); | 837 | const u32 bpp_log2 = BytesPerBlockLog2(info.format); |
| 832 | const Extent3D size = info.size; | 838 | const Extent3D size = info.size; |
| @@ -861,7 +867,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP | |||
| 861 | info.tile_width_spacing); | 867 | info.tile_width_spacing); |
| 862 | size_t guest_offset = 0; | 868 | size_t guest_offset = 0; |
| 863 | u32 host_offset = 0; | 869 | u32 host_offset = 0; |
| 864 | std::vector<BufferImageCopy> copies(num_levels); | 870 | boost::container::small_vector<BufferImageCopy, 16> copies(num_levels); |
| 865 | 871 | ||
| 866 | for (s32 level = 0; level < num_levels; ++level) { | 872 | for (s32 level = 0; level < num_levels; ++level) { |
| 867 | const Extent3D level_size = AdjustMipSize(size, level); | 873 | const Extent3D level_size = AdjustMipSize(size, level); |
| @@ -978,7 +984,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8 | |||
| 978 | } | 984 | } |
| 979 | } | 985 | } |
| 980 | 986 | ||
| 981 | std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { | 987 | boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) { |
| 982 | const Extent3D size = info.size; | 988 | const Extent3D size = info.size; |
| 983 | const u32 bytes_per_block = BytesPerBlock(info.format); | 989 | const u32 bytes_per_block = BytesPerBlock(info.format); |
| 984 | if (info.type == ImageType::Linear) { | 990 | if (info.type == ImageType::Linear) { |
| @@ -1006,7 +1012,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { | |||
| 1006 | 1012 | ||
| 1007 | u32 host_offset = 0; | 1013 | u32 host_offset = 0; |
| 1008 | 1014 | ||
| 1009 | std::vector<BufferImageCopy> copies(num_levels); | 1015 | boost::container::small_vector<BufferImageCopy, 16> copies(num_levels); |
| 1010 | for (s32 level = 0; level < num_levels; ++level) { | 1016 | for (s32 level = 0; level < num_levels; ++level) { |
| 1011 | const Extent3D level_size = AdjustMipSize(size, level); | 1017 | const Extent3D level_size = AdjustMipSize(size, level); |
| 1012 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); | 1018 | const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); |
| @@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) { | |||
| 1042 | return AdjustMipBlockSize(num_tiles, level_info.block, level); | 1048 | return AdjustMipBlockSize(num_tiles, level_info.block, level); |
| 1043 | } | 1049 | } |
| 1044 | 1050 | ||
| 1045 | std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { | 1051 | boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) { |
| 1046 | const Extent2D tile_size = DefaultBlockSize(info.format); | 1052 | const Extent2D tile_size = DefaultBlockSize(info.format); |
| 1047 | if (info.type == ImageType::Linear) { | 1053 | if (info.type == ImageType::Linear) { |
| 1048 | return std::vector{SwizzleParameters{ | 1054 | return {SwizzleParameters{ |
| 1049 | .num_tiles = AdjustTileSize(info.size, tile_size), | 1055 | .num_tiles = AdjustTileSize(info.size, tile_size), |
| 1050 | .block = {}, | 1056 | .block = {}, |
| 1051 | .buffer_offset = 0, | 1057 | .buffer_offset = 0, |
| @@ -1057,7 +1063,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { | |||
| 1057 | const s32 num_levels = info.resources.levels; | 1063 | const s32 num_levels = info.resources.levels; |
| 1058 | 1064 | ||
| 1059 | u32 guest_offset = 0; | 1065 | u32 guest_offset = 0; |
| 1060 | std::vector<SwizzleParameters> params(num_levels); | 1066 | boost::container::small_vector<SwizzleParameters, 16> params(num_levels); |
| 1061 | for (s32 level = 0; level < num_levels; ++level) { | 1067 | for (s32 level = 0; level < num_levels; ++level) { |
| 1062 | const Extent3D level_size = AdjustMipSize(size, level); | 1068 | const Extent3D level_size = AdjustMipSize(size, level); |
| 1063 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); | 1069 | const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); |
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 84aa6880d..ab45a43c4 100644 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | 5 | ||
| 6 | #include <optional> | 6 | #include <optional> |
| 7 | #include <span> | 7 | #include <span> |
| 8 | #include <boost/container/small_vector.hpp> | ||
| 8 | 9 | ||
| 9 | #include "common/common_types.h" | 10 | #include "common/common_types.h" |
| 10 | #include "common/scratch_buffer.h" | 11 | #include "common/scratch_buffer.h" |
| @@ -40,9 +41,10 @@ struct OverlapResult { | |||
| 40 | 41 | ||
| 41 | [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; | 42 | [[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; |
| 42 | 43 | ||
| 43 | [[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); | 44 | [[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info); |
| 44 | 45 | ||
| 45 | [[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); | 46 | [[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources( |
| 47 | const ImageInfo& info); | ||
| 46 | 48 | ||
| 47 | [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); | 49 | [[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); |
| 48 | 50 | ||
| @@ -51,21 +53,18 @@ struct OverlapResult { | |||
| 51 | 53 | ||
| 52 | [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; | 54 | [[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; |
| 53 | 55 | ||
| 54 | [[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, | 56 | [[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies( |
| 55 | const ImageInfo& src, | 57 | const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1, |
| 56 | SubresourceBase base, u32 up_scale = 1, | 58 | u32 down_shift = 0); |
| 57 | u32 down_shift = 0); | ||
| 58 | 59 | ||
| 59 | [[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, | 60 | [[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies( |
| 60 | u32 up_scale = 1, | 61 | const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0); |
| 61 | u32 down_shift = 0); | ||
| 62 | 62 | ||
| 63 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | 63 | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); |
| 64 | 64 | ||
| 65 | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | 65 | [[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage( |
| 66 | GPUVAddr gpu_addr, const ImageInfo& info, | 66 | Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |
| 67 | std::span<const u8> input, | 67 | std::span<const u8> input, std::span<u8> output); |
| 68 | std::span<u8> output); | ||
| 69 | 68 | ||
| 70 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, | 69 | [[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, |
| 71 | const ImageBase& image, std::span<u8> output); | 70 | const ImageBase& image, std::span<u8> output); |
| @@ -73,13 +72,15 @@ struct OverlapResult { | |||
| 73 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, | 72 | void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, |
| 74 | std::span<BufferImageCopy> copies); | 73 | std::span<BufferImageCopy> copies); |
| 75 | 74 | ||
| 76 | [[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); | 75 | [[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies( |
| 76 | const ImageInfo& info); | ||
| 77 | 77 | ||
| 78 | [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); | 78 | [[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); |
| 79 | 79 | ||
| 80 | [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); | 80 | [[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); |
| 81 | 81 | ||
| 82 | [[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); | 82 | [[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles( |
| 83 | const ImageInfo& info); | ||
| 83 | 84 | ||
| 84 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, | 85 | void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, |
| 85 | std::span<const BufferImageCopy> copies, std::span<const u8> memory, | 86 | std::span<const BufferImageCopy> copies, std::span<const u8> memory, |
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp index 155599316..1f353d2df 100644 --- a/src/video_core/transform_feedback.cpp +++ b/src/video_core/transform_feedback.cpp | |||
| @@ -13,7 +13,7 @@ | |||
| 13 | 13 | ||
| 14 | namespace VideoCommon { | 14 | namespace VideoCommon { |
| 15 | 15 | ||
| 16 | std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( | 16 | std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings( |
| 17 | const TransformFeedbackState& state) { | 17 | const TransformFeedbackState& state) { |
| 18 | static constexpr std::array VECTORS{ | 18 | static constexpr std::array VECTORS{ |
| 19 | 28U, // gl_Position | 19 | 28U, // gl_Position |
| @@ -62,7 +62,8 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( | |||
| 62 | 216U, // gl_TexCoord[6] | 62 | 216U, // gl_TexCoord[6] |
| 63 | 220U, // gl_TexCoord[7] | 63 | 220U, // gl_TexCoord[7] |
| 64 | }; | 64 | }; |
| 65 | std::vector<Shader::TransformFeedbackVarying> xfb(256); | 65 | std::array<Shader::TransformFeedbackVarying, 256> xfb{}; |
| 66 | u32 count{0}; | ||
| 66 | for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { | 67 | for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { |
| 67 | const auto& locations = state.varyings[buffer]; | 68 | const auto& locations = state.varyings[buffer]; |
| 68 | const auto& layout = state.layouts[buffer]; | 69 | const auto& layout = state.layouts[buffer]; |
| @@ -103,11 +104,12 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( | |||
| 103 | } | 104 | } |
| 104 | } | 105 | } |
| 105 | xfb[attribute] = varying; | 106 | xfb[attribute] = varying; |
| 107 | count = std::max(count, attribute); | ||
| 106 | highest = std::max(highest, (base_offset + varying.components) * 4); | 108 | highest = std::max(highest, (base_offset + varying.components) * 4); |
| 107 | } | 109 | } |
| 108 | UNIMPLEMENTED_IF(highest != layout.stride); | 110 | UNIMPLEMENTED_IF(highest != layout.stride); |
| 109 | } | 111 | } |
| 110 | return xfb; | 112 | return {xfb, count + 1}; |
| 111 | } | 113 | } |
| 112 | 114 | ||
| 113 | } // namespace VideoCommon | 115 | } // namespace VideoCommon |
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h index d13eb16c3..401b1352a 100644 --- a/src/video_core/transform_feedback.h +++ b/src/video_core/transform_feedback.h | |||
| @@ -24,7 +24,7 @@ struct TransformFeedbackState { | |||
| 24 | varyings; | 24 | varyings; |
| 25 | }; | 25 | }; |
| 26 | 26 | ||
| 27 | std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( | 27 | std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings( |
| 28 | const TransformFeedbackState& state); | 28 | const TransformFeedbackState& state); |
| 29 | 29 | ||
| 30 | } // namespace VideoCommon | 30 | } // namespace VideoCommon |
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index fa9cde75b..b11abe311 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp | |||
| @@ -316,6 +316,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical, | |||
| 316 | std::vector<const char*> ExtensionListForVulkan( | 316 | std::vector<const char*> ExtensionListForVulkan( |
| 317 | const std::set<std::string, std::less<>>& extensions) { | 317 | const std::set<std::string, std::less<>>& extensions) { |
| 318 | std::vector<const char*> output; | 318 | std::vector<const char*> output; |
| 319 | output.reserve(extensions.size()); | ||
| 319 | for (const auto& extension : extensions) { | 320 | for (const auto& extension : extensions) { |
| 320 | output.push_back(extension.c_str()); | 321 | output.push_back(extension.c_str()); |
| 321 | } | 322 | } |