summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Kelebek12023-05-23 14:45:54 +0100
committerGravatar Kelebek12023-06-22 08:05:10 +0100
commit5da70f719703084482933e103e561cc98163f370 (patch)
tree1926842ed2b90bf92b89cec6a314bb28c7287fe9 /src
parentMerge pull request #10086 from Morph1984/coretiming-ng-1 (diff)
downloadyuzu-5da70f719703084482933e103e561cc98163f370.tar.gz
yuzu-5da70f719703084482933e103e561cc98163f370.tar.xz
yuzu-5da70f719703084482933e103e561cc98163f370.zip
Remove memory allocations in some hot paths
Diffstat (limited to 'src')
-rw-r--r--src/audio_core/device/audio_buffers.h8
-rw-r--r--src/audio_core/device/device_session.cpp12
-rw-r--r--src/audio_core/device/device_session.h7
-rw-r--r--src/audio_core/in/audio_in_system.cpp5
-rw-r--r--src/audio_core/out/audio_out_system.cpp4
-rw-r--r--src/audio_core/renderer/command/data_source/decode.cpp23
-rw-r--r--src/audio_core/renderer/command/effect/compressor.cpp8
-rw-r--r--src/audio_core/renderer/command/effect/delay.cpp14
-rw-r--r--src/audio_core/renderer/command/effect/i3dl2_reverb.cpp4
-rw-r--r--src/audio_core/renderer/command/effect/light_limiter.cpp12
-rw-r--r--src/audio_core/renderer/command/effect/reverb.cpp12
-rw-r--r--src/audio_core/renderer/command/sink/circular_buffer.cpp4
-rw-r--r--src/audio_core/renderer/command/sink/device.cpp5
-rw-r--r--src/audio_core/renderer/mix/mix_context.cpp6
-rw-r--r--src/audio_core/renderer/nodes/node_states.cpp4
-rw-r--r--src/audio_core/renderer/nodes/node_states.h2
-rw-r--r--src/audio_core/renderer/system.cpp1
-rw-r--r--src/audio_core/sink/null_sink.h2
-rw-r--r--src/audio_core/sink/sink_stream.cpp15
-rw-r--r--src/audio_core/sink/sink_stream.h5
-rw-r--r--src/common/ring_buffer.h3
-rw-r--r--src/common/scratch_buffer.h9
-rw-r--r--src/core/hle/kernel/k_synchronization_object.cpp3
-rw-r--r--src/core/hle/kernel/k_thread.cpp8
-rw-r--r--src/core/hle/kernel/k_thread.h3
-rw-r--r--src/core/hle/kernel/svc/svc_ipc.cpp7
-rw-r--r--src/core/hle/kernel/svc/svc_synchronization.cpp10
-rw-r--r--src/core/hle/kernel/svc/svc_thread.cpp2
-rw-r--r--src/core/hle/service/audio/audin_u.cpp16
-rw-r--r--src/core/hle/service/audio/audout_u.cpp15
-rw-r--r--src/core/hle/service/audio/audren_u.cpp22
-rw-r--r--src/core/hle/service/audio/audren_u.h1
-rw-r--r--src/core/hle/service/audio/hwopus.cpp9
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdevice.h6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp31
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h30
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp19
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.h21
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp32
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h38
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp59
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_gpu.h36
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp15
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h12
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h10
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.cpp6
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_vic.h8
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.cpp20
-rw-r--r--src/core/hle/service/nvdrv/devices/nvmap.h20
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp8
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.h8
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.cpp24
-rw-r--r--src/core/hle/service/nvdrv/nvdrv_interface.h3
-rw-r--r--src/core/hle/service/nvnflinger/parcel.h7
-rw-r--r--src/shader_recompiler/backend/glsl/glsl_emit_context.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/emit_spirv.cpp2
-rw-r--r--src/shader_recompiler/backend/spirv/spirv_emit_context.cpp2
-rw-r--r--src/shader_recompiler/runtime_info.h3
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h4
-rw-r--r--src/video_core/buffer_cache/buffer_cache_base.h4
-rw-r--r--src/video_core/cdma_pusher.h1
-rw-r--r--src/video_core/dma_pusher.h8
-rw-r--r--src/video_core/engines/maxwell_dma.cpp35
-rw-r--r--src/video_core/host1x/codecs/h264.cpp4
-rw-r--r--src/video_core/memory_manager.cpp13
-rw-r--r--src/video_core/memory_manager.h15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp10
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp27
-rw-r--r--src/video_core/shader_cache.cpp4
-rw-r--r--src/video_core/texture_cache/image_base.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h14
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h4
-rw-r--r--src/video_core/texture_cache/util.cpp48
-rw-r--r--src/video_core/texture_cache/util.h31
-rw-r--r--src/video_core/transform_feedback.cpp8
-rw-r--r--src/video_core/transform_feedback.h2
-rw-r--r--src/video_core/vulkan_common/vulkan_device.cpp1
84 files changed, 503 insertions, 460 deletions
diff --git a/src/audio_core/device/audio_buffers.h b/src/audio_core/device/audio_buffers.h
index 15082f6c6..5d8ed0ef7 100644
--- a/src/audio_core/device/audio_buffers.h
+++ b/src/audio_core/device/audio_buffers.h
@@ -7,6 +7,7 @@
7#include <mutex> 7#include <mutex>
8#include <span> 8#include <span>
9#include <vector> 9#include <vector>
10#include <boost/container/static_vector.hpp>
10 11
11#include "audio_buffer.h" 12#include "audio_buffer.h"
12#include "audio_core/device/device_session.h" 13#include "audio_core/device/device_session.h"
@@ -48,7 +49,7 @@ public:
48 * 49 *
49 * @param out_buffers - The buffers which were registered. 50 * @param out_buffers - The buffers which were registered.
50 */ 51 */
51 void RegisterBuffers(std::vector<AudioBuffer>& out_buffers) { 52 void RegisterBuffers(boost::container::static_vector<AudioBuffer, N>& out_buffers) {
52 std::scoped_lock l{lock}; 53 std::scoped_lock l{lock};
53 const s32 to_register{std::min(std::min(appended_count, BufferAppendLimit), 54 const s32 to_register{std::min(std::min(appended_count, BufferAppendLimit),
54 BufferAppendLimit - registered_count)}; 55 BufferAppendLimit - registered_count)};
@@ -162,7 +163,8 @@ public:
162 * @param max_buffers - Maximum number of buffers to released. 163 * @param max_buffers - Maximum number of buffers to released.
163 * @return The number of buffers released. 164 * @return The number of buffers released.
164 */ 165 */
165 u32 GetRegisteredAppendedBuffers(std::vector<AudioBuffer>& buffers_flushed, u32 max_buffers) { 166 u32 GetRegisteredAppendedBuffers(
167 boost::container::static_vector<AudioBuffer, N>& buffers_flushed, u32 max_buffers) {
166 std::scoped_lock l{lock}; 168 std::scoped_lock l{lock};
167 if (registered_count + appended_count == 0) { 169 if (registered_count + appended_count == 0) {
168 return 0; 170 return 0;
@@ -270,7 +272,7 @@ public:
270 */ 272 */
271 bool FlushBuffers(u32& buffers_released) { 273 bool FlushBuffers(u32& buffers_released) {
272 std::scoped_lock l{lock}; 274 std::scoped_lock l{lock};
273 std::vector<AudioBuffer> buffers_flushed{}; 275 boost::container::static_vector<AudioBuffer, N> buffers_flushed{};
274 276
275 buffers_released = GetRegisteredAppendedBuffers(buffers_flushed, append_limit); 277 buffers_released = GetRegisteredAppendedBuffers(buffers_flushed, append_limit);
276 278
diff --git a/src/audio_core/device/device_session.cpp b/src/audio_core/device/device_session.cpp
index b5c0ef0e6..86811fcb8 100644
--- a/src/audio_core/device/device_session.cpp
+++ b/src/audio_core/device/device_session.cpp
@@ -79,7 +79,7 @@ void DeviceSession::ClearBuffers() {
79 } 79 }
80} 80}
81 81
82void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) const { 82void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) {
83 for (const auto& buffer : buffers) { 83 for (const auto& buffer : buffers) {
84 Sink::SinkBuffer new_buffer{ 84 Sink::SinkBuffer new_buffer{
85 .frames = buffer.size / (channel_count * sizeof(s16)), 85 .frames = buffer.size / (channel_count * sizeof(s16)),
@@ -88,13 +88,13 @@ void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) const {
88 .consumed = false, 88 .consumed = false,
89 }; 89 };
90 90
91 tmp_samples.resize_destructive(buffer.size / sizeof(s16));
91 if (type == Sink::StreamType::In) { 92 if (type == Sink::StreamType::In) {
92 std::vector<s16> samples{}; 93 stream->AppendBuffer(new_buffer, tmp_samples);
93 stream->AppendBuffer(new_buffer, samples);
94 } else { 94 } else {
95 std::vector<s16> samples(buffer.size / sizeof(s16)); 95 system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, tmp_samples.data(),
96 system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, samples.data(), buffer.size); 96 buffer.size);
97 stream->AppendBuffer(new_buffer, samples); 97 stream->AppendBuffer(new_buffer, tmp_samples);
98 } 98 }
99 } 99 }
100} 100}
diff --git a/src/audio_core/device/device_session.h b/src/audio_core/device/device_session.h
index 75f766c68..7d52f362d 100644
--- a/src/audio_core/device/device_session.h
+++ b/src/audio_core/device/device_session.h
@@ -10,6 +10,7 @@
10 10
11#include "audio_core/common/common.h" 11#include "audio_core/common/common.h"
12#include "audio_core/sink/sink.h" 12#include "audio_core/sink/sink.h"
13#include "common/scratch_buffer.h"
13#include "core/hle/service/audio/errors.h" 14#include "core/hle/service/audio/errors.h"
14 15
15namespace Core { 16namespace Core {
@@ -62,7 +63,7 @@ public:
62 * 63 *
63 * @param buffers - The buffers to play. 64 * @param buffers - The buffers to play.
64 */ 65 */
65 void AppendBuffers(std::span<const AudioBuffer> buffers) const; 66 void AppendBuffers(std::span<const AudioBuffer> buffers);
66 67
67 /** 68 /**
68 * (Audio In only) Pop samples from the backend, and write them back to this buffer's address. 69 * (Audio In only) Pop samples from the backend, and write them back to this buffer's address.
@@ -146,8 +147,8 @@ private:
146 std::shared_ptr<Core::Timing::EventType> thread_event; 147 std::shared_ptr<Core::Timing::EventType> thread_event;
147 /// Is this session initialised? 148 /// Is this session initialised?
148 bool initialized{}; 149 bool initialized{};
149 /// Buffer queue 150 /// Temporary sample buffer
150 std::vector<AudioBuffer> buffer_queue{}; 151 Common::ScratchBuffer<s16> tmp_samples{};
151}; 152};
152 153
153} // namespace AudioCore 154} // namespace AudioCore
diff --git a/src/audio_core/in/audio_in_system.cpp b/src/audio_core/in/audio_in_system.cpp
index e23e51758..579129121 100644
--- a/src/audio_core/in/audio_in_system.cpp
+++ b/src/audio_core/in/audio_in_system.cpp
@@ -2,6 +2,7 @@
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include <mutex> 4#include <mutex>
5
5#include "audio_core/audio_event.h" 6#include "audio_core/audio_event.h"
6#include "audio_core/audio_manager.h" 7#include "audio_core/audio_manager.h"
7#include "audio_core/in/audio_in_system.h" 8#include "audio_core/in/audio_in_system.h"
@@ -89,7 +90,7 @@ Result System::Start() {
89 session->Start(); 90 session->Start();
90 state = State::Started; 91 state = State::Started;
91 92
92 std::vector<AudioBuffer> buffers_to_flush{}; 93 boost::container::static_vector<AudioBuffer, BufferCount> buffers_to_flush{};
93 buffers.RegisterBuffers(buffers_to_flush); 94 buffers.RegisterBuffers(buffers_to_flush);
94 session->AppendBuffers(buffers_to_flush); 95 session->AppendBuffers(buffers_to_flush);
95 session->SetRingSize(static_cast<u32>(buffers_to_flush.size())); 96 session->SetRingSize(static_cast<u32>(buffers_to_flush.size()));
@@ -134,7 +135,7 @@ bool System::AppendBuffer(const AudioInBuffer& buffer, const u64 tag) {
134 135
135void System::RegisterBuffers() { 136void System::RegisterBuffers() {
136 if (state == State::Started) { 137 if (state == State::Started) {
137 std::vector<AudioBuffer> registered_buffers{}; 138 boost::container::static_vector<AudioBuffer, BufferCount> registered_buffers{};
138 buffers.RegisterBuffers(registered_buffers); 139 buffers.RegisterBuffers(registered_buffers);
139 session->AppendBuffers(registered_buffers); 140 session->AppendBuffers(registered_buffers);
140 } 141 }
diff --git a/src/audio_core/out/audio_out_system.cpp b/src/audio_core/out/audio_out_system.cpp
index bd13f7219..0adf64bd3 100644
--- a/src/audio_core/out/audio_out_system.cpp
+++ b/src/audio_core/out/audio_out_system.cpp
@@ -89,7 +89,7 @@ Result System::Start() {
89 session->Start(); 89 session->Start();
90 state = State::Started; 90 state = State::Started;
91 91
92 std::vector<AudioBuffer> buffers_to_flush{}; 92 boost::container::static_vector<AudioBuffer, BufferCount> buffers_to_flush{};
93 buffers.RegisterBuffers(buffers_to_flush); 93 buffers.RegisterBuffers(buffers_to_flush);
94 session->AppendBuffers(buffers_to_flush); 94 session->AppendBuffers(buffers_to_flush);
95 session->SetRingSize(static_cast<u32>(buffers_to_flush.size())); 95 session->SetRingSize(static_cast<u32>(buffers_to_flush.size()));
@@ -134,7 +134,7 @@ bool System::AppendBuffer(const AudioOutBuffer& buffer, u64 tag) {
134 134
135void System::RegisterBuffers() { 135void System::RegisterBuffers() {
136 if (state == State::Started) { 136 if (state == State::Started) {
137 std::vector<AudioBuffer> registered_buffers{}; 137 boost::container::static_vector<AudioBuffer, BufferCount> registered_buffers{};
138 buffers.RegisterBuffers(registered_buffers); 138 buffers.RegisterBuffers(registered_buffers);
139 session->AppendBuffers(registered_buffers); 139 session->AppendBuffers(registered_buffers);
140 } 140 }
diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp
index ff5d31bd6..f45933203 100644
--- a/src/audio_core/renderer/command/data_source/decode.cpp
+++ b/src/audio_core/renderer/command/data_source/decode.cpp
@@ -8,6 +8,7 @@
8#include "audio_core/renderer/command/resample/resample.h" 8#include "audio_core/renderer/command/resample/resample.h"
9#include "common/fixed_point.h" 9#include "common/fixed_point.h"
10#include "common/logging/log.h" 10#include "common/logging/log.h"
11#include "common/scratch_buffer.h"
11#include "core/memory.h" 12#include "core/memory.h"
12 13
13namespace AudioCore::AudioRenderer { 14namespace AudioCore::AudioRenderer {
@@ -27,6 +28,7 @@ constexpr std::array<u8, 3> PitchBySrcQuality = {4, 8, 4};
27template <typename T> 28template <typename T>
28static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, 29static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
29 const DecodeArg& req) { 30 const DecodeArg& req) {
31 std::array<T, TempBufferSize> tmp_samples{};
30 constexpr s32 min{std::numeric_limits<s16>::min()}; 32 constexpr s32 min{std::numeric_limits<s16>::min()};
31 constexpr s32 max{std::numeric_limits<s16>::max()}; 33 constexpr s32 max{std::numeric_limits<s16>::max()};
32 34
@@ -49,18 +51,17 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
49 const u64 size{channel_count * samples_to_decode}; 51 const u64 size{channel_count * samples_to_decode};
50 const u64 size_bytes{size * sizeof(T)}; 52 const u64 size_bytes{size * sizeof(T)};
51 53
52 std::vector<T> samples(size); 54 memory.ReadBlockUnsafe(source, tmp_samples.data(), size_bytes);
53 memory.ReadBlockUnsafe(source, samples.data(), size_bytes);
54 55
55 if constexpr (std::is_floating_point_v<T>) { 56 if constexpr (std::is_floating_point_v<T>) {
56 for (u32 i = 0; i < samples_to_decode; i++) { 57 for (u32 i = 0; i < samples_to_decode; i++) {
57 auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] * 58 auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] *
58 std::numeric_limits<s16>::max())}; 59 std::numeric_limits<s16>::max())};
59 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); 60 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max));
60 } 61 }
61 } else { 62 } else {
62 for (u32 i = 0; i < samples_to_decode; i++) { 63 for (u32 i = 0; i < samples_to_decode; i++) {
63 out_buffer[i] = samples[i * channel_count + req.target_channel]; 64 out_buffer[i] = tmp_samples[i * channel_count + req.target_channel];
64 } 65 }
65 } 66 }
66 } break; 67 } break;
@@ -73,17 +74,16 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
73 } 74 }
74 75
75 const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))}; 76 const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))};
76 std::vector<T> samples(samples_to_decode); 77 memory.ReadBlockUnsafe(source, tmp_samples.data(), samples_to_decode * sizeof(T));
77 memory.ReadBlockUnsafe(source, samples.data(), samples_to_decode * sizeof(T));
78 78
79 if constexpr (std::is_floating_point_v<T>) { 79 if constexpr (std::is_floating_point_v<T>) {
80 for (u32 i = 0; i < samples_to_decode; i++) { 80 for (u32 i = 0; i < samples_to_decode; i++) {
81 auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] * 81 auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] *
82 std::numeric_limits<s16>::max())}; 82 std::numeric_limits<s16>::max())};
83 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); 83 out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max));
84 } 84 }
85 } else { 85 } else {
86 std::memcpy(out_buffer.data(), samples.data(), samples_to_decode * sizeof(s16)); 86 std::memcpy(out_buffer.data(), tmp_samples.data(), samples_to_decode * sizeof(s16));
87 } 87 }
88 break; 88 break;
89 } 89 }
@@ -101,6 +101,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
101 */ 101 */
102static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, 102static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
103 const DecodeArg& req) { 103 const DecodeArg& req) {
104 std::array<u8, TempBufferSize> wavebuffer{};
104 constexpr u32 SamplesPerFrame{14}; 105 constexpr u32 SamplesPerFrame{14};
105 constexpr u32 NibblesPerFrame{16}; 106 constexpr u32 NibblesPerFrame{16};
106 107
@@ -138,9 +139,7 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
138 } 139 }
139 140
140 const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; 141 const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)};
141 std::vector<u8> wavebuffer(size); 142 memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), size);
142 memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(),
143 wavebuffer.size());
144 143
145 auto context{req.adpcm_context}; 144 auto context{req.adpcm_context};
146 auto header{context->header}; 145 auto header{context->header};
@@ -258,7 +257,7 @@ void DecodeFromWaveBuffers(Core::Memory::Memory& memory, const DecodeFromWaveBuf
258 u32 offset{voice_state.offset}; 257 u32 offset{voice_state.offset};
259 258
260 auto output_buffer{args.output}; 259 auto output_buffer{args.output};
261 std::vector<s16> temp_buffer(TempBufferSize, 0); 260 std::array<s16, TempBufferSize> temp_buffer{};
262 261
263 while (remaining_sample_count > 0) { 262 while (remaining_sample_count > 0) {
264 const auto samples_to_write{std::min(remaining_sample_count, max_remaining_sample_count)}; 263 const auto samples_to_write{std::min(remaining_sample_count, max_remaining_sample_count)};
diff --git a/src/audio_core/renderer/command/effect/compressor.cpp b/src/audio_core/renderer/command/effect/compressor.cpp
index 7229618e8..ee9b68d5b 100644
--- a/src/audio_core/renderer/command/effect/compressor.cpp
+++ b/src/audio_core/renderer/command/effect/compressor.cpp
@@ -44,8 +44,8 @@ static void InitializeCompressorEffect(const CompressorInfo::ParameterVersion2&
44 44
45static void ApplyCompressorEffect(const CompressorInfo::ParameterVersion2& params, 45static void ApplyCompressorEffect(const CompressorInfo::ParameterVersion2& params,
46 CompressorInfo::State& state, bool enabled, 46 CompressorInfo::State& state, bool enabled,
47 std::vector<std::span<const s32>> input_buffers, 47 std::span<std::span<const s32>> input_buffers,
48 std::vector<std::span<s32>> output_buffers, u32 sample_count) { 48 std::span<std::span<s32>> output_buffers, u32 sample_count) {
49 if (enabled) { 49 if (enabled) {
50 auto state_00{state.unk_00}; 50 auto state_00{state.unk_00};
51 auto state_04{state.unk_04}; 51 auto state_04{state.unk_04};
@@ -124,8 +124,8 @@ void CompressorCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor&
124} 124}
125 125
126void CompressorCommand::Process(const ADSP::CommandListProcessor& processor) { 126void CompressorCommand::Process(const ADSP::CommandListProcessor& processor) {
127 std::vector<std::span<const s32>> input_buffers(parameter.channel_count); 127 std::array<std::span<const s32>, MaxChannels> input_buffers{};
128 std::vector<std::span<s32>> output_buffers(parameter.channel_count); 128 std::array<std::span<s32>, MaxChannels> output_buffers{};
129 129
130 for (s16 i = 0; i < parameter.channel_count; i++) { 130 for (s16 i = 0; i < parameter.channel_count; i++) {
131 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, 131 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/effect/delay.cpp b/src/audio_core/renderer/command/effect/delay.cpp
index a4e408d40..e536cbb1e 100644
--- a/src/audio_core/renderer/command/effect/delay.cpp
+++ b/src/audio_core/renderer/command/effect/delay.cpp
@@ -51,7 +51,7 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params,
51 state.delay_lines[channel].sample_count_max = sample_count_max.to_int_floor(); 51 state.delay_lines[channel].sample_count_max = sample_count_max.to_int_floor();
52 state.delay_lines[channel].sample_count = sample_count.to_int_floor(); 52 state.delay_lines[channel].sample_count = sample_count.to_int_floor();
53 state.delay_lines[channel].buffer.resize(state.delay_lines[channel].sample_count, 0); 53 state.delay_lines[channel].buffer.resize(state.delay_lines[channel].sample_count, 0);
54 if (state.delay_lines[channel].buffer.size() == 0) { 54 if (state.delay_lines[channel].sample_count == 0) {
55 state.delay_lines[channel].buffer.push_back(0); 55 state.delay_lines[channel].buffer.push_back(0);
56 } 56 }
57 state.delay_lines[channel].buffer_pos = 0; 57 state.delay_lines[channel].buffer_pos = 0;
@@ -74,8 +74,8 @@ static void InitializeDelayEffect(const DelayInfo::ParameterVersion1& params,
74 */ 74 */
75template <size_t NumChannels> 75template <size_t NumChannels>
76static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, 76static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state,
77 std::vector<std::span<const s32>>& inputs, 77 std::span<std::span<const s32>> inputs, std::span<std::span<s32>> outputs,
78 std::vector<std::span<s32>>& outputs, const u32 sample_count) { 78 const u32 sample_count) {
79 for (u32 sample_index = 0; sample_index < sample_count; sample_index++) { 79 for (u32 sample_index = 0; sample_index < sample_count; sample_index++) {
80 std::array<Common::FixedPoint<50, 14>, NumChannels> input_samples{}; 80 std::array<Common::FixedPoint<50, 14>, NumChannels> input_samples{};
81 for (u32 channel = 0; channel < NumChannels; channel++) { 81 for (u32 channel = 0; channel < NumChannels; channel++) {
@@ -153,8 +153,8 @@ static void ApplyDelay(const DelayInfo::ParameterVersion1& params, DelayInfo::St
153 * @param sample_count - Number of samples to process. 153 * @param sample_count - Number of samples to process.
154 */ 154 */
155static void ApplyDelayEffect(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state, 155static void ApplyDelayEffect(const DelayInfo::ParameterVersion1& params, DelayInfo::State& state,
156 const bool enabled, std::vector<std::span<const s32>>& inputs, 156 const bool enabled, std::span<std::span<const s32>> inputs,
157 std::vector<std::span<s32>>& outputs, const u32 sample_count) { 157 std::span<std::span<s32>> outputs, const u32 sample_count) {
158 158
159 if (!IsChannelCountValid(params.channel_count)) { 159 if (!IsChannelCountValid(params.channel_count)) {
160 LOG_ERROR(Service_Audio, "Invalid delay channels {}", params.channel_count); 160 LOG_ERROR(Service_Audio, "Invalid delay channels {}", params.channel_count);
@@ -208,8 +208,8 @@ void DelayCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proce
208} 208}
209 209
210void DelayCommand::Process(const ADSP::CommandListProcessor& processor) { 210void DelayCommand::Process(const ADSP::CommandListProcessor& processor) {
211 std::vector<std::span<const s32>> input_buffers(parameter.channel_count); 211 std::array<std::span<const s32>, MaxChannels> input_buffers{};
212 std::vector<std::span<s32>> output_buffers(parameter.channel_count); 212 std::array<std::span<s32>, MaxChannels> output_buffers{};
213 213
214 for (s16 i = 0; i < parameter.channel_count; i++) { 214 for (s16 i = 0; i < parameter.channel_count; i++) {
215 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, 215 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp
index 27d8b9844..d2bfb67cc 100644
--- a/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp
+++ b/src/audio_core/renderer/command/effect/i3dl2_reverb.cpp
@@ -408,8 +408,8 @@ void I3dl2ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor&
408} 408}
409 409
410void I3dl2ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { 410void I3dl2ReverbCommand::Process(const ADSP::CommandListProcessor& processor) {
411 std::vector<std::span<const s32>> input_buffers(parameter.channel_count); 411 std::array<std::span<const s32>, MaxChannels> input_buffers{};
412 std::vector<std::span<s32>> output_buffers(parameter.channel_count); 412 std::array<std::span<s32>, MaxChannels> output_buffers{};
413 413
414 for (u32 i = 0; i < parameter.channel_count; i++) { 414 for (u32 i = 0; i < parameter.channel_count; i++) {
415 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, 415 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/effect/light_limiter.cpp b/src/audio_core/renderer/command/effect/light_limiter.cpp
index e8fb0e2fc..4161a9821 100644
--- a/src/audio_core/renderer/command/effect/light_limiter.cpp
+++ b/src/audio_core/renderer/command/effect/light_limiter.cpp
@@ -47,8 +47,8 @@ static void InitializeLightLimiterEffect(const LightLimiterInfo::ParameterVersio
47 */ 47 */
48static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& params, 48static void ApplyLightLimiterEffect(const LightLimiterInfo::ParameterVersion2& params,
49 LightLimiterInfo::State& state, const bool enabled, 49 LightLimiterInfo::State& state, const bool enabled,
50 std::vector<std::span<const s32>>& inputs, 50 std::span<std::span<const s32>> inputs,
51 std::vector<std::span<s32>>& outputs, const u32 sample_count, 51 std::span<std::span<s32>> outputs, const u32 sample_count,
52 LightLimiterInfo::StatisticsInternal* statistics) { 52 LightLimiterInfo::StatisticsInternal* statistics) {
53 constexpr s64 min{std::numeric_limits<s32>::min()}; 53 constexpr s64 min{std::numeric_limits<s32>::min()};
54 constexpr s64 max{std::numeric_limits<s32>::max()}; 54 constexpr s64 max{std::numeric_limits<s32>::max()};
@@ -147,8 +147,8 @@ void LightLimiterVersion1Command::Dump([[maybe_unused]] const ADSP::CommandListP
147} 147}
148 148
149void LightLimiterVersion1Command::Process(const ADSP::CommandListProcessor& processor) { 149void LightLimiterVersion1Command::Process(const ADSP::CommandListProcessor& processor) {
150 std::vector<std::span<const s32>> input_buffers(parameter.channel_count); 150 std::array<std::span<const s32>, MaxChannels> input_buffers{};
151 std::vector<std::span<s32>> output_buffers(parameter.channel_count); 151 std::array<std::span<s32>, MaxChannels> output_buffers{};
152 152
153 for (u32 i = 0; i < parameter.channel_count; i++) { 153 for (u32 i = 0; i < parameter.channel_count; i++) {
154 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, 154 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
@@ -190,8 +190,8 @@ void LightLimiterVersion2Command::Dump([[maybe_unused]] const ADSP::CommandListP
190} 190}
191 191
192void LightLimiterVersion2Command::Process(const ADSP::CommandListProcessor& processor) { 192void LightLimiterVersion2Command::Process(const ADSP::CommandListProcessor& processor) {
193 std::vector<std::span<const s32>> input_buffers(parameter.channel_count); 193 std::array<std::span<const s32>, MaxChannels> input_buffers{};
194 std::vector<std::span<s32>> output_buffers(parameter.channel_count); 194 std::array<std::span<s32>, MaxChannels> output_buffers{};
195 195
196 for (u32 i = 0; i < parameter.channel_count; i++) { 196 for (u32 i = 0; i < parameter.channel_count; i++) {
197 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, 197 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/effect/reverb.cpp b/src/audio_core/renderer/command/effect/reverb.cpp
index 8b9b65214..fc2f15a5e 100644
--- a/src/audio_core/renderer/command/effect/reverb.cpp
+++ b/src/audio_core/renderer/command/effect/reverb.cpp
@@ -250,8 +250,8 @@ static Common::FixedPoint<50, 14> Axfx2AllPassTick(ReverbInfo::ReverbDelayLine&
250 */ 250 */
251template <size_t NumChannels> 251template <size_t NumChannels>
252static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, 252static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state,
253 std::vector<std::span<const s32>>& inputs, 253 std::span<std::span<const s32>> inputs,
254 std::vector<std::span<s32>>& outputs, const u32 sample_count) { 254 std::span<std::span<s32>> outputs, const u32 sample_count) {
255 static constexpr std::array<u8, ReverbInfo::MaxDelayTaps> OutTapIndexes1Ch{ 255 static constexpr std::array<u8, ReverbInfo::MaxDelayTaps> OutTapIndexes1Ch{
256 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 256 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
257 }; 257 };
@@ -369,8 +369,8 @@ static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, Rever
369 * @param sample_count - Number of samples to process. 369 * @param sample_count - Number of samples to process.
370 */ 370 */
371static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state, 371static void ApplyReverbEffect(const ReverbInfo::ParameterVersion2& params, ReverbInfo::State& state,
372 const bool enabled, std::vector<std::span<const s32>>& inputs, 372 const bool enabled, std::span<std::span<const s32>> inputs,
373 std::vector<std::span<s32>>& outputs, const u32 sample_count) { 373 std::span<std::span<s32>> outputs, const u32 sample_count) {
374 if (enabled) { 374 if (enabled) {
375 switch (params.channel_count) { 375 switch (params.channel_count) {
376 case 0: 376 case 0:
@@ -412,8 +412,8 @@ void ReverbCommand::Dump([[maybe_unused]] const ADSP::CommandListProcessor& proc
412} 412}
413 413
414void ReverbCommand::Process(const ADSP::CommandListProcessor& processor) { 414void ReverbCommand::Process(const ADSP::CommandListProcessor& processor) {
415 std::vector<std::span<const s32>> input_buffers(parameter.channel_count); 415 std::array<std::span<const s32>, MaxChannels> input_buffers{};
416 std::vector<std::span<s32>> output_buffers(parameter.channel_count); 416 std::array<std::span<s32>, MaxChannels> output_buffers{};
417 417
418 for (u32 i = 0; i < parameter.channel_count; i++) { 418 for (u32 i = 0; i < parameter.channel_count; i++) {
419 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count, 419 input_buffers[i] = processor.mix_buffers.subspan(inputs[i] * processor.sample_count,
diff --git a/src/audio_core/renderer/command/sink/circular_buffer.cpp b/src/audio_core/renderer/command/sink/circular_buffer.cpp
index ded5afc94..e2ce59792 100644
--- a/src/audio_core/renderer/command/sink/circular_buffer.cpp
+++ b/src/audio_core/renderer/command/sink/circular_buffer.cpp
@@ -24,7 +24,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces
24 constexpr s32 min{std::numeric_limits<s16>::min()}; 24 constexpr s32 min{std::numeric_limits<s16>::min()};
25 constexpr s32 max{std::numeric_limits<s16>::max()}; 25 constexpr s32 max{std::numeric_limits<s16>::max()};
26 26
27 std::vector<s16> output(processor.sample_count); 27 std::array<s16, TargetSampleCount * MaxChannels> output{};
28 for (u32 channel = 0; channel < input_count; channel++) { 28 for (u32 channel = 0; channel < input_count; channel++) {
29 auto input{processor.mix_buffers.subspan(inputs[channel] * processor.sample_count, 29 auto input{processor.mix_buffers.subspan(inputs[channel] * processor.sample_count,
30 processor.sample_count)}; 30 processor.sample_count)};
@@ -33,7 +33,7 @@ void CircularBufferSinkCommand::Process(const ADSP::CommandListProcessor& proces
33 } 33 }
34 34
35 processor.memory->WriteBlockUnsafe(address + pos, output.data(), 35 processor.memory->WriteBlockUnsafe(address + pos, output.data(),
36 output.size() * sizeof(s16)); 36 processor.sample_count * sizeof(s16));
37 pos += static_cast<u32>(processor.sample_count * sizeof(s16)); 37 pos += static_cast<u32>(processor.sample_count * sizeof(s16));
38 if (pos >= size) { 38 if (pos >= size) {
39 pos = 0; 39 pos = 0;
diff --git a/src/audio_core/renderer/command/sink/device.cpp b/src/audio_core/renderer/command/sink/device.cpp
index e88372a75..5f74dd7ad 100644
--- a/src/audio_core/renderer/command/sink/device.cpp
+++ b/src/audio_core/renderer/command/sink/device.cpp
@@ -33,8 +33,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) {
33 .consumed{false}, 33 .consumed{false},
34 }; 34 };
35 35
36 std::vector<s16> samples(out_buffer.frames * input_count); 36 std::array<s16, TargetSampleCount * MaxChannels> samples{};
37
38 for (u32 channel = 0; channel < input_count; channel++) { 37 for (u32 channel = 0; channel < input_count; channel++) {
39 const auto offset{inputs[channel] * out_buffer.frames}; 38 const auto offset{inputs[channel] * out_buffer.frames};
40 39
@@ -45,7 +44,7 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) {
45 } 44 }
46 45
47 out_buffer.tag = reinterpret_cast<u64>(samples.data()); 46 out_buffer.tag = reinterpret_cast<u64>(samples.data());
48 stream->AppendBuffer(out_buffer, samples); 47 stream->AppendBuffer(out_buffer, {samples.data(), out_buffer.frames * input_count});
49 48
50 if (stream->IsPaused()) { 49 if (stream->IsPaused()) {
51 stream->Start(); 50 stream->Start();
diff --git a/src/audio_core/renderer/mix/mix_context.cpp b/src/audio_core/renderer/mix/mix_context.cpp
index 35b748ede..3a18ae7c2 100644
--- a/src/audio_core/renderer/mix/mix_context.cpp
+++ b/src/audio_core/renderer/mix/mix_context.cpp
@@ -125,10 +125,10 @@ bool MixContext::TSortInfo(const SplitterContext& splitter_context) {
125 return false; 125 return false;
126 } 126 }
127 127
128 std::vector<s32> sorted_results{node_states.GetSortedResuls()}; 128 auto sorted_results{node_states.GetSortedResuls()};
129 const auto result_size{std::min(count, static_cast<s32>(sorted_results.size()))}; 129 const auto result_size{std::min(count, static_cast<s32>(sorted_results.second))};
130 for (s32 i = 0; i < result_size; i++) { 130 for (s32 i = 0; i < result_size; i++) {
131 sorted_mix_infos[i] = &mix_infos[sorted_results[i]]; 131 sorted_mix_infos[i] = &mix_infos[sorted_results.first[i]];
132 } 132 }
133 133
134 CalcMixBufferOffset(); 134 CalcMixBufferOffset();
diff --git a/src/audio_core/renderer/nodes/node_states.cpp b/src/audio_core/renderer/nodes/node_states.cpp
index 1821a51e6..b7a44a54c 100644
--- a/src/audio_core/renderer/nodes/node_states.cpp
+++ b/src/audio_core/renderer/nodes/node_states.cpp
@@ -134,8 +134,8 @@ u32 NodeStates::GetNodeCount() const {
134 return node_count; 134 return node_count;
135} 135}
136 136
137std::vector<s32> NodeStates::GetSortedResuls() const { 137std::pair<std::span<u32>::reverse_iterator, size_t> NodeStates::GetSortedResuls() const {
138 return {results.rbegin(), results.rbegin() + result_pos}; 138 return {results.rbegin(), result_pos};
139} 139}
140 140
141} // namespace AudioCore::AudioRenderer 141} // namespace AudioCore::AudioRenderer
diff --git a/src/audio_core/renderer/nodes/node_states.h b/src/audio_core/renderer/nodes/node_states.h
index 94b1d1254..e768cd4b5 100644
--- a/src/audio_core/renderer/nodes/node_states.h
+++ b/src/audio_core/renderer/nodes/node_states.h
@@ -175,7 +175,7 @@ public:
175 * 175 *
176 * @return Vector of nodes in reverse order. 176 * @return Vector of nodes in reverse order.
177 */ 177 */
178 std::vector<s32> GetSortedResuls() const; 178 std::pair<std::span<u32>::reverse_iterator, size_t> GetSortedResuls() const;
179 179
180private: 180private:
181 /// Number of nodes in the graph 181 /// Number of nodes in the graph
diff --git a/src/audio_core/renderer/system.cpp b/src/audio_core/renderer/system.cpp
index 53b258c4f..a23627472 100644
--- a/src/audio_core/renderer/system.cpp
+++ b/src/audio_core/renderer/system.cpp
@@ -444,6 +444,7 @@ Result System::Update(std::span<const u8> input, std::span<u8> performance, std:
444 std::scoped_lock l{lock}; 444 std::scoped_lock l{lock};
445 445
446 const auto start_time{core.CoreTiming().GetClockTicks()}; 446 const auto start_time{core.CoreTiming().GetClockTicks()};
447 std::memset(output.data(), 0, output.size());
447 448
448 InfoUpdater info_updater(input, output, process_handle, behavior); 449 InfoUpdater info_updater(input, output, process_handle, behavior);
449 450
diff --git a/src/audio_core/sink/null_sink.h b/src/audio_core/sink/null_sink.h
index 1215d3cd2..b6b43c93e 100644
--- a/src/audio_core/sink/null_sink.h
+++ b/src/audio_core/sink/null_sink.h
@@ -20,7 +20,7 @@ public:
20 explicit NullSinkStreamImpl(Core::System& system_, StreamType type_) 20 explicit NullSinkStreamImpl(Core::System& system_, StreamType type_)
21 : SinkStream{system_, type_} {} 21 : SinkStream{system_, type_} {}
22 ~NullSinkStreamImpl() override {} 22 ~NullSinkStreamImpl() override {}
23 void AppendBuffer(SinkBuffer&, std::vector<s16>&) override {} 23 void AppendBuffer(SinkBuffer&, std::span<s16>) override {}
24 std::vector<s16> ReleaseBuffer(u64) override { 24 std::vector<s16> ReleaseBuffer(u64) override {
25 return {}; 25 return {};
26 } 26 }
diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp
index 9a718a9cc..404dcd0e9 100644
--- a/src/audio_core/sink/sink_stream.cpp
+++ b/src/audio_core/sink/sink_stream.cpp
@@ -18,7 +18,7 @@
18 18
19namespace AudioCore::Sink { 19namespace AudioCore::Sink {
20 20
21void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) { 21void SinkStream::AppendBuffer(SinkBuffer& buffer, std::span<s16> samples) {
22 if (type == StreamType::In) { 22 if (type == StreamType::In) {
23 queue.enqueue(buffer); 23 queue.enqueue(buffer);
24 queued_buffers++; 24 queued_buffers++;
@@ -66,15 +66,16 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
66 static_cast<s16>(std::clamp(right_sample, min, max)); 66 static_cast<s16>(std::clamp(right_sample, min, max));
67 } 67 }
68 68
69 samples.resize(samples.size() / system_channels * device_channels); 69 samples = samples.subspan(0, samples.size() / system_channels * device_channels);
70 70
71 } else if (system_channels == 2 && device_channels == 6) { 71 } else if (system_channels == 2 && device_channels == 6) {
72 // We need moar samples! Not all games will provide 6 channel audio. 72 // We need moar samples! Not all games will provide 6 channel audio.
73 // TODO: Implement some upmixing here. Currently just passthrough, with other 73 // TODO: Implement some upmixing here. Currently just passthrough, with other
74 // channels left as silence. 74 // channels left as silence.
75 std::vector<s16> new_samples(samples.size() / system_channels * device_channels, 0); 75 auto new_size = samples.size() / system_channels * device_channels;
76 tmp_samples.resize_destructive(new_size);
76 77
77 for (u32 read_index = 0, write_index = 0; read_index < samples.size(); 78 for (u32 read_index = 0, write_index = 0; read_index < new_size;
78 read_index += system_channels, write_index += device_channels) { 79 read_index += system_channels, write_index += device_channels) {
79 const auto left_sample{static_cast<s16>(std::clamp( 80 const auto left_sample{static_cast<s16>(std::clamp(
80 static_cast<s32>( 81 static_cast<s32>(
@@ -82,7 +83,7 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
82 volume), 83 volume),
83 min, max))}; 84 min, max))};
84 85
85 new_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample; 86 tmp_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample;
86 87
87 const auto right_sample{static_cast<s16>(std::clamp( 88 const auto right_sample{static_cast<s16>(std::clamp(
88 static_cast<s32>( 89 static_cast<s32>(
@@ -90,9 +91,9 @@ void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
90 volume), 91 volume),
91 min, max))}; 92 min, max))};
92 93
93 new_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample; 94 tmp_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample;
94 } 95 }
95 samples = std::move(new_samples); 96 samples = std::span<s16>(tmp_samples);
96 97
97 } else if (volume != 1.0f) { 98 } else if (volume != 1.0f) {
98 for (u32 i = 0; i < samples.size(); i++) { 99 for (u32 i = 0; i < samples.size(); i++) {
diff --git a/src/audio_core/sink/sink_stream.h b/src/audio_core/sink/sink_stream.h
index 41cbadc9c..98d72ace1 100644
--- a/src/audio_core/sink/sink_stream.h
+++ b/src/audio_core/sink/sink_stream.h
@@ -16,6 +16,7 @@
16#include "common/polyfill_thread.h" 16#include "common/polyfill_thread.h"
17#include "common/reader_writer_queue.h" 17#include "common/reader_writer_queue.h"
18#include "common/ring_buffer.h" 18#include "common/ring_buffer.h"
19#include "common/scratch_buffer.h"
19#include "common/thread.h" 20#include "common/thread.h"
20 21
21namespace Core { 22namespace Core {
@@ -170,7 +171,7 @@ public:
170 * @param buffer - Audio buffer information to be queued. 171 * @param buffer - Audio buffer information to be queued.
171 * @param samples - The s16 samples to be queue for playback. 172 * @param samples - The s16 samples to be queue for playback.
172 */ 173 */
173 virtual void AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples); 174 virtual void AppendBuffer(SinkBuffer& buffer, std::span<s16> samples);
174 175
175 /** 176 /**
176 * Release a buffer. Audio In only, will fill a buffer with recorded samples. 177 * Release a buffer. Audio In only, will fill a buffer with recorded samples.
@@ -255,6 +256,8 @@ private:
255 /// Signalled when ring buffer entries are consumed 256 /// Signalled when ring buffer entries are consumed
256 std::condition_variable_any release_cv; 257 std::condition_variable_any release_cv;
257 std::mutex release_mutex; 258 std::mutex release_mutex;
259 /// Temporary buffer for appending samples when upmixing
260 Common::ScratchBuffer<s16> tmp_samples{};
258}; 261};
259 262
260using SinkStreamPtr = std::unique_ptr<SinkStream>; 263using SinkStreamPtr = std::unique_ptr<SinkStream>;
diff --git a/src/common/ring_buffer.h b/src/common/ring_buffer.h
index 4c328ab44..416680d44 100644
--- a/src/common/ring_buffer.h
+++ b/src/common/ring_buffer.h
@@ -9,6 +9,7 @@
9#include <cstddef> 9#include <cstddef>
10#include <cstring> 10#include <cstring>
11#include <new> 11#include <new>
12#include <span>
12#include <type_traits> 13#include <type_traits>
13#include <vector> 14#include <vector>
14 15
@@ -53,7 +54,7 @@ public:
53 return push_count; 54 return push_count;
54 } 55 }
55 56
56 std::size_t Push(const std::vector<T>& input) { 57 std::size_t Push(const std::span<T> input) {
57 return Push(input.data(), input.size()); 58 return Push(input.data(), input.size());
58 } 59 }
59 60
diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h
index a69a5a7af..6fe907953 100644
--- a/src/common/scratch_buffer.h
+++ b/src/common/scratch_buffer.h
@@ -3,6 +3,9 @@
3 3
4#pragma once 4#pragma once
5 5
6#include <iterator>
7
8#include "common/concepts.h"
6#include "common/make_unique_for_overwrite.h" 9#include "common/make_unique_for_overwrite.h"
7 10
8namespace Common { 11namespace Common {
@@ -16,6 +19,12 @@ namespace Common {
16template <typename T> 19template <typename T>
17class ScratchBuffer { 20class ScratchBuffer {
18public: 21public:
22 using iterator = T*;
23 using const_iterator = const T*;
24 using value_type = T;
25 using element_type = T;
26 using iterator_category = std::contiguous_iterator_tag;
27
19 ScratchBuffer() = default; 28 ScratchBuffer() = default;
20 29
21 explicit ScratchBuffer(size_t initial_capacity) 30 explicit ScratchBuffer(size_t initial_capacity)
diff --git a/src/core/hle/kernel/k_synchronization_object.cpp b/src/core/hle/kernel/k_synchronization_object.cpp
index b7da3eee7..3e5b735b1 100644
--- a/src/core/hle/kernel/k_synchronization_object.cpp
+++ b/src/core/hle/kernel/k_synchronization_object.cpp
@@ -3,6 +3,7 @@
3 3
4#include "common/assert.h" 4#include "common/assert.h"
5#include "common/common_types.h" 5#include "common/common_types.h"
6#include "common/scratch_buffer.h"
6#include "core/hle/kernel/k_scheduler.h" 7#include "core/hle/kernel/k_scheduler.h"
7#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h" 8#include "core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h"
8#include "core/hle/kernel/k_synchronization_object.h" 9#include "core/hle/kernel/k_synchronization_object.h"
@@ -75,7 +76,7 @@ Result KSynchronizationObject::Wait(KernelCore& kernel, s32* out_index,
75 KSynchronizationObject** objects, const s32 num_objects, 76 KSynchronizationObject** objects, const s32 num_objects,
76 s64 timeout) { 77 s64 timeout) {
77 // Allocate space on stack for thread nodes. 78 // Allocate space on stack for thread nodes.
78 std::vector<ThreadListNode> thread_nodes(num_objects); 79 std::array<ThreadListNode, Svc::ArgumentHandleCountMax> thread_nodes;
79 80
80 // Prepare for wait. 81 // Prepare for wait.
81 KThread* thread = GetCurrentThreadPointer(kernel); 82 KThread* thread = GetCurrentThreadPointer(kernel);
diff --git a/src/core/hle/kernel/k_thread.cpp b/src/core/hle/kernel/k_thread.cpp
index 908811e2c..adb6ec581 100644
--- a/src/core/hle/kernel/k_thread.cpp
+++ b/src/core/hle/kernel/k_thread.cpp
@@ -909,7 +909,7 @@ Result KThread::SetActivity(Svc::ThreadActivity activity) {
909 R_SUCCEED(); 909 R_SUCCEED();
910} 910}
911 911
912Result KThread::GetThreadContext3(std::vector<u8>& out) { 912Result KThread::GetThreadContext3(Common::ScratchBuffer<u8>& out) {
913 // Lock ourselves. 913 // Lock ourselves.
914 KScopedLightLock lk{m_activity_pause_lock}; 914 KScopedLightLock lk{m_activity_pause_lock};
915 915
@@ -927,15 +927,13 @@ Result KThread::GetThreadContext3(std::vector<u8>& out) {
927 // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. 927 // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
928 auto context = GetContext64(); 928 auto context = GetContext64();
929 context.pstate &= 0xFF0FFE20; 929 context.pstate &= 0xFF0FFE20;
930 930 out.resize_destructive(sizeof(context));
931 out.resize(sizeof(context));
932 std::memcpy(out.data(), std::addressof(context), sizeof(context)); 931 std::memcpy(out.data(), std::addressof(context), sizeof(context));
933 } else { 932 } else {
934 // Mask away mode bits, interrupt bits, IL bit, and other reserved bits. 933 // Mask away mode bits, interrupt bits, IL bit, and other reserved bits.
935 auto context = GetContext32(); 934 auto context = GetContext32();
936 context.cpsr &= 0xFF0FFE20; 935 context.cpsr &= 0xFF0FFE20;
937 936 out.resize_destructive(sizeof(context));
938 out.resize(sizeof(context));
939 std::memcpy(out.data(), std::addressof(context), sizeof(context)); 937 std::memcpy(out.data(), std::addressof(context), sizeof(context));
940 } 938 }
941 } 939 }
diff --git a/src/core/hle/kernel/k_thread.h b/src/core/hle/kernel/k_thread.h
index 37fe5db77..dd662b3f8 100644
--- a/src/core/hle/kernel/k_thread.h
+++ b/src/core/hle/kernel/k_thread.h
@@ -15,6 +15,7 @@
15#include "common/intrusive_list.h" 15#include "common/intrusive_list.h"
16 16
17#include "common/intrusive_red_black_tree.h" 17#include "common/intrusive_red_black_tree.h"
18#include "common/scratch_buffer.h"
18#include "common/spin_lock.h" 19#include "common/spin_lock.h"
19#include "core/arm/arm_interface.h" 20#include "core/arm/arm_interface.h"
20#include "core/hle/kernel/k_affinity_mask.h" 21#include "core/hle/kernel/k_affinity_mask.h"
@@ -567,7 +568,7 @@ public:
567 568
568 void RemoveWaiter(KThread* thread); 569 void RemoveWaiter(KThread* thread);
569 570
570 Result GetThreadContext3(std::vector<u8>& out); 571 Result GetThreadContext3(Common::ScratchBuffer<u8>& out);
571 572
572 KThread* RemoveUserWaiterByKey(bool* out_has_waiters, KProcessAddress key) { 573 KThread* RemoveUserWaiterByKey(bool* out_has_waiters, KProcessAddress key) {
573 return this->RemoveWaiterByKey(out_has_waiters, key, false); 574 return this->RemoveWaiterByKey(out_has_waiters, key, false);
diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp
index ea03068aa..60247df2e 100644
--- a/src/core/hle/kernel/svc/svc_ipc.cpp
+++ b/src/core/hle/kernel/svc/svc_ipc.cpp
@@ -2,6 +2,7 @@
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include "common/scope_exit.h" 4#include "common/scope_exit.h"
5#include "common/scratch_buffer.h"
5#include "core/core.h" 6#include "core/core.h"
6#include "core/hle/kernel/k_client_session.h" 7#include "core/hle/kernel/k_client_session.h"
7#include "core/hle/kernel/k_process.h" 8#include "core/hle/kernel/k_process.h"
@@ -45,11 +46,11 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad
45 handles_addr, static_cast<u64>(sizeof(Handle) * num_handles)), 46 handles_addr, static_cast<u64>(sizeof(Handle) * num_handles)),
46 ResultInvalidPointer); 47 ResultInvalidPointer);
47 48
48 std::vector<Handle> handles(num_handles); 49 std::array<Handle, Svc::ArgumentHandleCountMax> handles;
49 GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(), sizeof(Handle) * num_handles); 50 GetCurrentMemory(kernel).ReadBlock(handles_addr, handles.data(), sizeof(Handle) * num_handles);
50 51
51 // Convert handle list to object table. 52 // Convert handle list to object table.
52 std::vector<KSynchronizationObject*> objs(num_handles); 53 std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs;
53 R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles.data(), 54 R_UNLESS(handle_table.GetMultipleObjects<KSynchronizationObject>(objs.data(), handles.data(),
54 num_handles), 55 num_handles),
55 ResultInvalidHandle); 56 ResultInvalidHandle);
@@ -80,7 +81,7 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad
80 // Wait for an object. 81 // Wait for an object.
81 s32 index; 82 s32 index;
82 Result result = KSynchronizationObject::Wait(kernel, std::addressof(index), objs.data(), 83 Result result = KSynchronizationObject::Wait(kernel, std::addressof(index), objs.data(),
83 static_cast<s32>(objs.size()), timeout_ns); 84 num_handles, timeout_ns);
84 if (result == ResultTimedOut) { 85 if (result == ResultTimedOut) {
85 R_RETURN(result); 86 R_RETURN(result);
86 } 87 }
diff --git a/src/core/hle/kernel/svc/svc_synchronization.cpp b/src/core/hle/kernel/svc/svc_synchronization.cpp
index 04d65f0bd..53df5bcd8 100644
--- a/src/core/hle/kernel/svc/svc_synchronization.cpp
+++ b/src/core/hle/kernel/svc/svc_synchronization.cpp
@@ -2,6 +2,7 @@
2// SPDX-License-Identifier: GPL-2.0-or-later 2// SPDX-License-Identifier: GPL-2.0-or-later
3 3
4#include "common/scope_exit.h" 4#include "common/scope_exit.h"
5#include "common/scratch_buffer.h"
5#include "core/core.h" 6#include "core/core.h"
6#include "core/hle/kernel/k_process.h" 7#include "core/hle/kernel/k_process.h"
7#include "core/hle/kernel/k_readable_event.h" 8#include "core/hle/kernel/k_readable_event.h"
@@ -54,7 +55,7 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons
54 // Get the synchronization context. 55 // Get the synchronization context.
55 auto& kernel = system.Kernel(); 56 auto& kernel = system.Kernel();
56 auto& handle_table = GetCurrentProcess(kernel).GetHandleTable(); 57 auto& handle_table = GetCurrentProcess(kernel).GetHandleTable();
57 std::vector<KSynchronizationObject*> objs(num_handles); 58 std::array<KSynchronizationObject*, Svc::ArgumentHandleCountMax> objs;
58 59
59 // Copy user handles. 60 // Copy user handles.
60 if (num_handles > 0) { 61 if (num_handles > 0) {
@@ -72,8 +73,8 @@ static Result WaitSynchronization(Core::System& system, int32_t* out_index, cons
72 }); 73 });
73 74
74 // Wait on the objects. 75 // Wait on the objects.
75 Result res = KSynchronizationObject::Wait(kernel, out_index, objs.data(), 76 Result res =
76 static_cast<s32>(objs.size()), timeout_ns); 77 KSynchronizationObject::Wait(kernel, out_index, objs.data(), num_handles, timeout_ns);
77 78
78 R_SUCCEED_IF(res == ResultSessionClosed); 79 R_SUCCEED_IF(res == ResultSessionClosed);
79 R_RETURN(res); 80 R_RETURN(res);
@@ -87,8 +88,7 @@ Result WaitSynchronization(Core::System& system, int32_t* out_index, u64 user_ha
87 88
88 // Ensure number of handles is valid. 89 // Ensure number of handles is valid.
89 R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange); 90 R_UNLESS(0 <= num_handles && num_handles <= Svc::ArgumentHandleCountMax, ResultOutOfRange);
90 91 std::array<Handle, Svc::ArgumentHandleCountMax> handles;
91 std::vector<Handle> handles(num_handles);
92 if (num_handles > 0) { 92 if (num_handles > 0) {
93 GetCurrentMemory(system.Kernel()) 93 GetCurrentMemory(system.Kernel())
94 .ReadBlock(user_handles, handles.data(), num_handles * sizeof(Handle)); 94 .ReadBlock(user_handles, handles.data(), num_handles * sizeof(Handle));
diff --git a/src/core/hle/kernel/svc/svc_thread.cpp b/src/core/hle/kernel/svc/svc_thread.cpp
index 37b54079c..36b94e6bf 100644
--- a/src/core/hle/kernel/svc/svc_thread.cpp
+++ b/src/core/hle/kernel/svc/svc_thread.cpp
@@ -174,7 +174,7 @@ Result GetThreadContext3(Core::System& system, u64 out_context, Handle thread_ha
174 } 174 }
175 175
176 // Get the thread context. 176 // Get the thread context.
177 std::vector<u8> context; 177 static thread_local Common::ScratchBuffer<u8> context;
178 R_TRY(thread->GetThreadContext3(context)); 178 R_TRY(thread->GetThreadContext3(context));
179 179
180 // Copy the thread context to user space. 180 // Copy the thread context to user space.
diff --git a/src/core/hle/service/audio/audin_u.cpp b/src/core/hle/service/audio/audin_u.cpp
index f0640c64f..c8d574993 100644
--- a/src/core/hle/service/audio/audin_u.cpp
+++ b/src/core/hle/service/audio/audin_u.cpp
@@ -5,6 +5,7 @@
5#include "audio_core/renderer/audio_device.h" 5#include "audio_core/renderer/audio_device.h"
6#include "common/common_funcs.h" 6#include "common/common_funcs.h"
7#include "common/logging/log.h" 7#include "common/logging/log.h"
8#include "common/settings.h"
8#include "common/string_util.h" 9#include "common/string_util.h"
9#include "core/core.h" 10#include "core/core.h"
10#include "core/hle/kernel/k_event.h" 11#include "core/hle/kernel/k_event.h"
@@ -123,19 +124,13 @@ private:
123 124
124 void GetReleasedAudioInBuffer(HLERequestContext& ctx) { 125 void GetReleasedAudioInBuffer(HLERequestContext& ctx) {
125 const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); 126 const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
126 std::vector<u64> released_buffers(write_buffer_size); 127 tmp_buffer.resize_destructive(write_buffer_size);
128 tmp_buffer[0] = 0;
127 129
128 const auto count = impl->GetReleasedBuffers(released_buffers); 130 const auto count = impl->GetReleasedBuffers(tmp_buffer);
129 131
130 [[maybe_unused]] std::string tags{}; 132 ctx.WriteBuffer(tmp_buffer);
131 for (u32 i = 0; i < count; i++) {
132 tags += fmt::format("{:08X}, ", released_buffers[i]);
133 }
134 [[maybe_unused]] auto sessionid{impl->GetSystem().GetSessionId()};
135 LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count,
136 tags);
137 133
138 ctx.WriteBuffer(released_buffers);
139 IPC::ResponseBuilder rb{ctx, 3}; 134 IPC::ResponseBuilder rb{ctx, 3};
140 rb.Push(ResultSuccess); 135 rb.Push(ResultSuccess);
141 rb.Push(count); 136 rb.Push(count);
@@ -200,6 +195,7 @@ private:
200 KernelHelpers::ServiceContext service_context; 195 KernelHelpers::ServiceContext service_context;
201 Kernel::KEvent* event; 196 Kernel::KEvent* event;
202 std::shared_ptr<AudioCore::AudioIn::In> impl; 197 std::shared_ptr<AudioCore::AudioIn::In> impl;
198 Common::ScratchBuffer<u64> tmp_buffer;
203}; 199};
204 200
205AudInU::AudInU(Core::System& system_) 201AudInU::AudInU(Core::System& system_)
diff --git a/src/core/hle/service/audio/audout_u.cpp b/src/core/hle/service/audio/audout_u.cpp
index 3e62fa4fc..032c8c11f 100644
--- a/src/core/hle/service/audio/audout_u.cpp
+++ b/src/core/hle/service/audio/audout_u.cpp
@@ -123,19 +123,13 @@ private:
123 123
124 void GetReleasedAudioOutBuffers(HLERequestContext& ctx) { 124 void GetReleasedAudioOutBuffers(HLERequestContext& ctx) {
125 const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>(); 125 const auto write_buffer_size = ctx.GetWriteBufferNumElements<u64>();
126 std::vector<u64> released_buffers(write_buffer_size); 126 tmp_buffer.resize_destructive(write_buffer_size);
127 tmp_buffer[0] = 0;
127 128
128 const auto count = impl->GetReleasedBuffers(released_buffers); 129 const auto count = impl->GetReleasedBuffers(tmp_buffer);
129 130
130 [[maybe_unused]] std::string tags{}; 131 ctx.WriteBuffer(tmp_buffer);
131 for (u32 i = 0; i < count; i++) {
132 tags += fmt::format("{:08X}, ", released_buffers[i]);
133 }
134 [[maybe_unused]] const auto sessionid{impl->GetSystem().GetSessionId()};
135 LOG_TRACE(Service_Audio, "called. Session {} released {} buffers: {}", sessionid, count,
136 tags);
137 132
138 ctx.WriteBuffer(released_buffers);
139 IPC::ResponseBuilder rb{ctx, 3}; 133 IPC::ResponseBuilder rb{ctx, 3};
140 rb.Push(ResultSuccess); 134 rb.Push(ResultSuccess);
141 rb.Push(count); 135 rb.Push(count);
@@ -211,6 +205,7 @@ private:
211 KernelHelpers::ServiceContext service_context; 205 KernelHelpers::ServiceContext service_context;
212 Kernel::KEvent* event; 206 Kernel::KEvent* event;
213 std::shared_ptr<AudioCore::AudioOut::Out> impl; 207 std::shared_ptr<AudioCore::AudioOut::Out> impl;
208 Common::ScratchBuffer<u64> tmp_buffer;
214}; 209};
215 210
216AudOutU::AudOutU(Core::System& system_) 211AudOutU::AudOutU(Core::System& system_)
diff --git a/src/core/hle/service/audio/audren_u.cpp b/src/core/hle/service/audio/audren_u.cpp
index 7086d4750..12845c23a 100644
--- a/src/core/hle/service/audio/audren_u.cpp
+++ b/src/core/hle/service/audio/audren_u.cpp
@@ -116,28 +116,26 @@ private:
116 // These buffers are written manually to avoid an issue with WriteBuffer throwing errors for 116 // These buffers are written manually to avoid an issue with WriteBuffer throwing errors for
117 // checking size 0. Performance size is 0 for most games. 117 // checking size 0. Performance size is 0 for most games.
118 118
119 std::vector<u8> output{};
120 std::vector<u8> performance{};
121 auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0}; 119 auto is_buffer_b{ctx.BufferDescriptorB()[0].Size() != 0};
122 if (is_buffer_b) { 120 if (is_buffer_b) {
123 const auto buffersB{ctx.BufferDescriptorB()}; 121 const auto buffersB{ctx.BufferDescriptorB()};
124 output.resize(buffersB[0].Size(), 0); 122 tmp_output.resize_destructive(buffersB[0].Size());
125 performance.resize(buffersB[1].Size(), 0); 123 tmp_performance.resize_destructive(buffersB[1].Size());
126 } else { 124 } else {
127 const auto buffersC{ctx.BufferDescriptorC()}; 125 const auto buffersC{ctx.BufferDescriptorC()};
128 output.resize(buffersC[0].Size(), 0); 126 tmp_output.resize_destructive(buffersC[0].Size());
129 performance.resize(buffersC[1].Size(), 0); 127 tmp_performance.resize_destructive(buffersC[1].Size());
130 } 128 }
131 129
132 auto result = impl->RequestUpdate(input, performance, output); 130 auto result = impl->RequestUpdate(input, tmp_performance, tmp_output);
133 131
134 if (result.IsSuccess()) { 132 if (result.IsSuccess()) {
135 if (is_buffer_b) { 133 if (is_buffer_b) {
136 ctx.WriteBufferB(output.data(), output.size(), 0); 134 ctx.WriteBufferB(tmp_output.data(), tmp_output.size(), 0);
137 ctx.WriteBufferB(performance.data(), performance.size(), 1); 135 ctx.WriteBufferB(tmp_performance.data(), tmp_performance.size(), 1);
138 } else { 136 } else {
139 ctx.WriteBufferC(output.data(), output.size(), 0); 137 ctx.WriteBufferC(tmp_output.data(), tmp_output.size(), 0);
140 ctx.WriteBufferC(performance.data(), performance.size(), 1); 138 ctx.WriteBufferC(tmp_performance.data(), tmp_performance.size(), 1);
141 } 139 }
142 } else { 140 } else {
143 LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description); 141 LOG_ERROR(Service_Audio, "RequestUpdate failed error 0x{:02X}!", result.description);
@@ -235,6 +233,8 @@ private:
235 Kernel::KEvent* rendered_event; 233 Kernel::KEvent* rendered_event;
236 Manager& manager; 234 Manager& manager;
237 std::unique_ptr<Renderer> impl; 235 std::unique_ptr<Renderer> impl;
236 Common::ScratchBuffer<u8> tmp_output;
237 Common::ScratchBuffer<u8> tmp_performance;
238}; 238};
239 239
240class IAudioDevice final : public ServiceFramework<IAudioDevice> { 240class IAudioDevice final : public ServiceFramework<IAudioDevice> {
diff --git a/src/core/hle/service/audio/audren_u.h b/src/core/hle/service/audio/audren_u.h
index 24ce37e87..d8e9c8719 100644
--- a/src/core/hle/service/audio/audren_u.h
+++ b/src/core/hle/service/audio/audren_u.h
@@ -4,6 +4,7 @@
4#pragma once 4#pragma once
5 5
6#include "audio_core/audio_render_manager.h" 6#include "audio_core/audio_render_manager.h"
7#include "common/scratch_buffer.h"
7#include "core/hle/service/kernel_helpers.h" 8#include "core/hle/service/kernel_helpers.h"
8#include "core/hle/service/service.h" 9#include "core/hle/service/service.h"
9 10
diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp
index 451ac224a..c835f6cb7 100644
--- a/src/core/hle/service/audio/hwopus.cpp
+++ b/src/core/hle/service/audio/hwopus.cpp
@@ -68,13 +68,13 @@ private:
68 ExtraBehavior extra_behavior) { 68 ExtraBehavior extra_behavior) {
69 u32 consumed = 0; 69 u32 consumed = 0;
70 u32 sample_count = 0; 70 u32 sample_count = 0;
71 std::vector<opus_int16> samples(ctx.GetWriteBufferNumElements<opus_int16>()); 71 tmp_samples.resize_destructive(ctx.GetWriteBufferNumElements<opus_int16>());
72 72
73 if (extra_behavior == ExtraBehavior::ResetContext) { 73 if (extra_behavior == ExtraBehavior::ResetContext) {
74 ResetDecoderContext(); 74 ResetDecoderContext();
75 } 75 }
76 76
77 if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), samples, performance)) { 77 if (!DecodeOpusData(consumed, sample_count, ctx.ReadBuffer(), tmp_samples, performance)) {
78 LOG_ERROR(Audio, "Failed to decode opus data"); 78 LOG_ERROR(Audio, "Failed to decode opus data");
79 IPC::ResponseBuilder rb{ctx, 2}; 79 IPC::ResponseBuilder rb{ctx, 2};
80 // TODO(ogniK): Use correct error code 80 // TODO(ogniK): Use correct error code
@@ -90,11 +90,11 @@ private:
90 if (performance) { 90 if (performance) {
91 rb.Push<u64>(*performance); 91 rb.Push<u64>(*performance);
92 } 92 }
93 ctx.WriteBuffer(samples); 93 ctx.WriteBuffer(tmp_samples);
94 } 94 }
95 95
96 bool DecodeOpusData(u32& consumed, u32& sample_count, std::span<const u8> input, 96 bool DecodeOpusData(u32& consumed, u32& sample_count, std::span<const u8> input,
97 std::vector<opus_int16>& output, u64* out_performance_time) const { 97 std::span<opus_int16> output, u64* out_performance_time) const {
98 const auto start_time = std::chrono::steady_clock::now(); 98 const auto start_time = std::chrono::steady_clock::now();
99 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); 99 const std::size_t raw_output_sz = output.size() * sizeof(opus_int16);
100 if (sizeof(OpusPacketHeader) > input.size()) { 100 if (sizeof(OpusPacketHeader) > input.size()) {
@@ -154,6 +154,7 @@ private:
154 OpusDecoderPtr decoder; 154 OpusDecoderPtr decoder;
155 u32 sample_rate; 155 u32 sample_rate;
156 u32 channel_count; 156 u32 channel_count;
157 Common::ScratchBuffer<opus_int16> tmp_samples;
157}; 158};
158 159
159class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> { 160class IHardwareOpusDecoderManager final : public ServiceFramework<IHardwareOpusDecoderManager> {
diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h
index ab1f30f9e..a04538d5d 100644
--- a/src/core/hle/service/nvdrv/devices/nvdevice.h
+++ b/src/core/hle/service/nvdrv/devices/nvdevice.h
@@ -34,7 +34,7 @@ public:
34 * @returns The result code of the ioctl. 34 * @returns The result code of the ioctl.
35 */ 35 */
36 virtual NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 36 virtual NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
37 std::vector<u8>& output) = 0; 37 std::span<u8> output) = 0;
38 38
39 /** 39 /**
40 * Handles an ioctl2 request. 40 * Handles an ioctl2 request.
@@ -45,7 +45,7 @@ public:
45 * @returns The result code of the ioctl. 45 * @returns The result code of the ioctl.
46 */ 46 */
47 virtual NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 47 virtual NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
48 std::span<const u8> inline_input, std::vector<u8>& output) = 0; 48 std::span<const u8> inline_input, std::span<u8> output) = 0;
49 49
50 /** 50 /**
51 * Handles an ioctl3 request. 51 * Handles an ioctl3 request.
@@ -56,7 +56,7 @@ public:
56 * @returns The result code of the ioctl. 56 * @returns The result code of the ioctl.
57 */ 57 */
58 virtual NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 58 virtual NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
59 std::vector<u8>& output, std::vector<u8>& inline_output) = 0; 59 std::span<u8> output, std::span<u8> inline_output) = 0;
60 60
61 /** 61 /**
62 * Called once a device is opened 62 * Called once a device is opened
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index 0fe242e9d..05a43d8dc 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -18,19 +18,19 @@ nvdisp_disp0::nvdisp_disp0(Core::System& system_, NvCore::Container& core)
18nvdisp_disp0::~nvdisp_disp0() = default; 18nvdisp_disp0::~nvdisp_disp0() = default;
19 19
20NvResult nvdisp_disp0::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 20NvResult nvdisp_disp0::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
21 std::vector<u8>& output) { 21 std::span<u8> output) {
22 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 22 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
23 return NvResult::NotImplemented; 23 return NvResult::NotImplemented;
24} 24}
25 25
26NvResult nvdisp_disp0::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 26NvResult nvdisp_disp0::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
27 std::span<const u8> inline_input, std::vector<u8>& output) { 27 std::span<const u8> inline_input, std::span<u8> output) {
28 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 28 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
29 return NvResult::NotImplemented; 29 return NvResult::NotImplemented;
30} 30}
31 31
32NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 32NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
33 std::vector<u8>& output, std::vector<u8>& inline_output) { 33 std::span<u8> output, std::span<u8> inline_output) {
34 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 34 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
35 return NvResult::NotImplemented; 35 return NvResult::NotImplemented;
36} 36}
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
index bcd0e3ed5..daee05fe8 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h
@@ -26,11 +26,11 @@ public:
26 ~nvdisp_disp0() override; 26 ~nvdisp_disp0() override;
27 27
28 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 28 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
29 std::vector<u8>& output) override; 29 std::span<u8> output) override;
30 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 30 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
31 std::span<const u8> inline_input, std::vector<u8>& output) override; 31 std::span<const u8> inline_input, std::span<u8> output) override;
32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
33 std::vector<u8>& inline_output) override; 33 std::span<u8> inline_output) override;
34 34
35 void OnOpen(DeviceFD fd) override; 35 void OnOpen(DeviceFD fd) override;
36 void OnClose(DeviceFD fd) override; 36 void OnClose(DeviceFD fd) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
index 681bd0867..07e570a9f 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -28,7 +28,7 @@ nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Con
28nvhost_as_gpu::~nvhost_as_gpu() = default; 28nvhost_as_gpu::~nvhost_as_gpu() = default;
29 29
30NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 30NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
31 std::vector<u8>& output) { 31 std::span<u8> output) {
32 switch (command.group) { 32 switch (command.group) {
33 case 'A': 33 case 'A':
34 switch (command.cmd) { 34 switch (command.cmd) {
@@ -61,13 +61,13 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> i
61} 61}
62 62
63NvResult nvhost_as_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 63NvResult nvhost_as_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
64 std::span<const u8> inline_input, std::vector<u8>& output) { 64 std::span<const u8> inline_input, std::span<u8> output) {
65 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 65 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
66 return NvResult::NotImplemented; 66 return NvResult::NotImplemented;
67} 67}
68 68
69NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 69NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
70 std::vector<u8>& output, std::vector<u8>& inline_output) { 70 std::span<u8> output, std::span<u8> inline_output) {
71 switch (command.group) { 71 switch (command.group) {
72 case 'A': 72 case 'A':
73 switch (command.cmd) { 73 switch (command.cmd) {
@@ -87,7 +87,7 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> i
87void nvhost_as_gpu::OnOpen(DeviceFD fd) {} 87void nvhost_as_gpu::OnOpen(DeviceFD fd) {}
88void nvhost_as_gpu::OnClose(DeviceFD fd) {} 88void nvhost_as_gpu::OnClose(DeviceFD fd) {}
89 89
90NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::vector<u8>& output) { 90NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::span<u8> output) {
91 IoctlAllocAsEx params{}; 91 IoctlAllocAsEx params{};
92 std::memcpy(&params, input.data(), input.size()); 92 std::memcpy(&params, input.data(), input.size());
93 93
@@ -141,7 +141,7 @@ NvResult nvhost_as_gpu::AllocAsEx(std::span<const u8> input, std::vector<u8>& ou
141 return NvResult::Success; 141 return NvResult::Success;
142} 142}
143 143
144NvResult nvhost_as_gpu::AllocateSpace(std::span<const u8> input, std::vector<u8>& output) { 144NvResult nvhost_as_gpu::AllocateSpace(std::span<const u8> input, std::span<u8> output) {
145 IoctlAllocSpace params{}; 145 IoctlAllocSpace params{};
146 std::memcpy(&params, input.data(), input.size()); 146 std::memcpy(&params, input.data(), input.size());
147 147
@@ -220,7 +220,7 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) {
220 mapping_map.erase(offset); 220 mapping_map.erase(offset);
221} 221}
222 222
223NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::vector<u8>& output) { 223NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::span<u8> output) {
224 IoctlFreeSpace params{}; 224 IoctlFreeSpace params{};
225 std::memcpy(&params, input.data(), input.size()); 225 std::memcpy(&params, input.data(), input.size());
226 226
@@ -266,15 +266,14 @@ NvResult nvhost_as_gpu::FreeSpace(std::span<const u8> input, std::vector<u8>& ou
266 return NvResult::Success; 266 return NvResult::Success;
267} 267}
268 268
269NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::vector<u8>& output) { 269NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::span<u8> output) {
270 const auto num_entries = input.size() / sizeof(IoctlRemapEntry); 270 const auto num_entries = input.size() / sizeof(IoctlRemapEntry);
271 271
272 LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries); 272 LOG_DEBUG(Service_NVDRV, "called, num_entries=0x{:X}", num_entries);
273 273
274 std::vector<IoctlRemapEntry> entries(num_entries);
275 std::memcpy(entries.data(), input.data(), input.size());
276
277 std::scoped_lock lock(mutex); 274 std::scoped_lock lock(mutex);
275 entries.resize_destructive(num_entries);
276 std::memcpy(entries.data(), input.data(), input.size());
278 277
279 if (!vm.initialised) { 278 if (!vm.initialised) {
280 return NvResult::BadValue; 279 return NvResult::BadValue;
@@ -320,7 +319,7 @@ NvResult nvhost_as_gpu::Remap(std::span<const u8> input, std::vector<u8>& output
320 return NvResult::Success; 319 return NvResult::Success;
321} 320}
322 321
323NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::vector<u8>& output) { 322NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::span<u8> output) {
324 IoctlMapBufferEx params{}; 323 IoctlMapBufferEx params{};
325 std::memcpy(&params, input.data(), input.size()); 324 std::memcpy(&params, input.data(), input.size());
326 325
@@ -424,7 +423,7 @@ NvResult nvhost_as_gpu::MapBufferEx(std::span<const u8> input, std::vector<u8>&
424 return NvResult::Success; 423 return NvResult::Success;
425} 424}
426 425
427NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::vector<u8>& output) { 426NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::span<u8> output) {
428 IoctlUnmapBuffer params{}; 427 IoctlUnmapBuffer params{};
429 std::memcpy(&params, input.data(), input.size()); 428 std::memcpy(&params, input.data(), input.size());
430 429
@@ -463,7 +462,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(std::span<const u8> input, std::vector<u8>&
463 return NvResult::Success; 462 return NvResult::Success;
464} 463}
465 464
466NvResult nvhost_as_gpu::BindChannel(std::span<const u8> input, std::vector<u8>& output) { 465NvResult nvhost_as_gpu::BindChannel(std::span<const u8> input, std::span<u8> output) {
467 IoctlBindChannel params{}; 466 IoctlBindChannel params{};
468 std::memcpy(&params, input.data(), input.size()); 467 std::memcpy(&params, input.data(), input.size());
469 LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd); 468 LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
@@ -492,7 +491,7 @@ void nvhost_as_gpu::GetVARegionsImpl(IoctlGetVaRegions& params) {
492 }; 491 };
493} 492}
494 493
495NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>& output) { 494NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::span<u8> output) {
496 IoctlGetVaRegions params{}; 495 IoctlGetVaRegions params{};
497 std::memcpy(&params, input.data(), input.size()); 496 std::memcpy(&params, input.data(), input.size());
498 497
@@ -511,8 +510,8 @@ NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>&
511 return NvResult::Success; 510 return NvResult::Success;
512} 511}
513 512
514NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::vector<u8>& output, 513NvResult nvhost_as_gpu::GetVARegions(std::span<const u8> input, std::span<u8> output,
515 std::vector<u8>& inline_output) { 514 std::span<u8> inline_output) {
516 IoctlGetVaRegions params{}; 515 IoctlGetVaRegions params{};
517 std::memcpy(&params, input.data(), input.size()); 516 std::memcpy(&params, input.data(), input.size());
518 517
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
index 1aba8d579..2af3e1260 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -15,6 +15,7 @@
15#include "common/address_space.h" 15#include "common/address_space.h"
16#include "common/common_funcs.h" 16#include "common/common_funcs.h"
17#include "common/common_types.h" 17#include "common/common_types.h"
18#include "common/scratch_buffer.h"
18#include "common/swap.h" 19#include "common/swap.h"
19#include "core/hle/service/nvdrv/core/nvmap.h" 20#include "core/hle/service/nvdrv/core/nvmap.h"
20#include "core/hle/service/nvdrv/devices/nvdevice.h" 21#include "core/hle/service/nvdrv/devices/nvdevice.h"
@@ -48,11 +49,11 @@ public:
48 ~nvhost_as_gpu() override; 49 ~nvhost_as_gpu() override;
49 50
50 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 51 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
51 std::vector<u8>& output) override; 52 std::span<u8> output) override;
52 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 53 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
53 std::span<const u8> inline_input, std::vector<u8>& output) override; 54 std::span<const u8> inline_input, std::span<u8> output) override;
54 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 55 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
55 std::vector<u8>& inline_output) override; 56 std::span<u8> inline_output) override;
56 57
57 void OnOpen(DeviceFD fd) override; 58 void OnOpen(DeviceFD fd) override;
58 void OnClose(DeviceFD fd) override; 59 void OnClose(DeviceFD fd) override;
@@ -138,18 +139,18 @@ private:
138 static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2, 139 static_assert(sizeof(IoctlGetVaRegions) == 16 + sizeof(VaRegion) * 2,
139 "IoctlGetVaRegions is incorrect size"); 140 "IoctlGetVaRegions is incorrect size");
140 141
141 NvResult AllocAsEx(std::span<const u8> input, std::vector<u8>& output); 142 NvResult AllocAsEx(std::span<const u8> input, std::span<u8> output);
142 NvResult AllocateSpace(std::span<const u8> input, std::vector<u8>& output); 143 NvResult AllocateSpace(std::span<const u8> input, std::span<u8> output);
143 NvResult Remap(std::span<const u8> input, std::vector<u8>& output); 144 NvResult Remap(std::span<const u8> input, std::span<u8> output);
144 NvResult MapBufferEx(std::span<const u8> input, std::vector<u8>& output); 145 NvResult MapBufferEx(std::span<const u8> input, std::span<u8> output);
145 NvResult UnmapBuffer(std::span<const u8> input, std::vector<u8>& output); 146 NvResult UnmapBuffer(std::span<const u8> input, std::span<u8> output);
146 NvResult FreeSpace(std::span<const u8> input, std::vector<u8>& output); 147 NvResult FreeSpace(std::span<const u8> input, std::span<u8> output);
147 NvResult BindChannel(std::span<const u8> input, std::vector<u8>& output); 148 NvResult BindChannel(std::span<const u8> input, std::span<u8> output);
148 149
149 void GetVARegionsImpl(IoctlGetVaRegions& params); 150 void GetVARegionsImpl(IoctlGetVaRegions& params);
150 NvResult GetVARegions(std::span<const u8> input, std::vector<u8>& output); 151 NvResult GetVARegions(std::span<const u8> input, std::span<u8> output);
151 NvResult GetVARegions(std::span<const u8> input, std::vector<u8>& output, 152 NvResult GetVARegions(std::span<const u8> input, std::span<u8> output,
152 std::vector<u8>& inline_output); 153 std::span<u8> inline_output);
153 154
154 void FreeMappingLocked(u64 offset); 155 void FreeMappingLocked(u64 offset);
155 156
@@ -212,6 +213,7 @@ private:
212 bool initialised{}; 213 bool initialised{};
213 } vm; 214 } vm;
214 std::shared_ptr<Tegra::MemoryManager> gmmu; 215 std::shared_ptr<Tegra::MemoryManager> gmmu;
216 Common::ScratchBuffer<IoctlRemapEntry> entries;
215 217
216 // s32 channel{}; 218 // s32 channel{};
217 // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE}; 219 // u32 big_page_size{VM::DEFAULT_BIG_PAGE_SIZE};
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index e12025560..4d55554b4 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -35,7 +35,7 @@ nvhost_ctrl::~nvhost_ctrl() {
35} 35}
36 36
37NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 37NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
38 std::vector<u8>& output) { 38 std::span<u8> output) {
39 switch (command.group) { 39 switch (command.group) {
40 case 0x0: 40 case 0x0:
41 switch (command.cmd) { 41 switch (command.cmd) {
@@ -64,13 +64,13 @@ NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inp
64} 64}
65 65
66NvResult nvhost_ctrl::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 66NvResult nvhost_ctrl::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
67 std::span<const u8> inline_input, std::vector<u8>& output) { 67 std::span<const u8> inline_input, std::span<u8> output) {
68 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 68 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
69 return NvResult::NotImplemented; 69 return NvResult::NotImplemented;
70} 70}
71 71
72NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 72NvResult nvhost_ctrl::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
73 std::vector<u8>& output, std::vector<u8>& inline_outpu) { 73 std::span<u8> output, std::span<u8> inline_outpu) {
74 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 74 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
75 return NvResult::NotImplemented; 75 return NvResult::NotImplemented;
76} 76}
@@ -79,7 +79,7 @@ void nvhost_ctrl::OnOpen(DeviceFD fd) {}
79 79
80void nvhost_ctrl::OnClose(DeviceFD fd) {} 80void nvhost_ctrl::OnClose(DeviceFD fd) {}
81 81
82NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::vector<u8>& output) { 82NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::span<u8> output) {
83 IocGetConfigParams params{}; 83 IocGetConfigParams params{};
84 std::memcpy(&params, input.data(), sizeof(params)); 84 std::memcpy(&params, input.data(), sizeof(params));
85 LOG_TRACE(Service_NVDRV, "called, setting={}!{}", params.domain_str.data(), 85 LOG_TRACE(Service_NVDRV, "called, setting={}!{}", params.domain_str.data(),
@@ -87,7 +87,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(std::span<const u8> input, std::vector<u8
87 return NvResult::ConfigVarNotFound; // Returns error on production mode 87 return NvResult::ConfigVarNotFound; // Returns error on production mode
88} 88}
89 89
90NvResult nvhost_ctrl::IocCtrlEventWait(std::span<const u8> input, std::vector<u8>& output, 90NvResult nvhost_ctrl::IocCtrlEventWait(std::span<const u8> input, std::span<u8> output,
91 bool is_allocation) { 91 bool is_allocation) {
92 IocCtrlEventWaitParams params{}; 92 IocCtrlEventWaitParams params{};
93 std::memcpy(&params, input.data(), sizeof(params)); 93 std::memcpy(&params, input.data(), sizeof(params));
@@ -231,7 +231,7 @@ NvResult nvhost_ctrl::FreeEvent(u32 slot) {
231 return NvResult::Success; 231 return NvResult::Success;
232} 232}
233 233
234NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::vector<u8>& output) { 234NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::span<u8> output) {
235 IocCtrlEventRegisterParams params{}; 235 IocCtrlEventRegisterParams params{};
236 std::memcpy(&params, input.data(), sizeof(params)); 236 std::memcpy(&params, input.data(), sizeof(params));
237 const u32 event_id = params.user_event_id; 237 const u32 event_id = params.user_event_id;
@@ -252,7 +252,7 @@ NvResult nvhost_ctrl::IocCtrlEventRegister(std::span<const u8> input, std::vecto
252 return NvResult::Success; 252 return NvResult::Success;
253} 253}
254 254
255NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::vector<u8>& output) { 255NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::span<u8> output) {
256 IocCtrlEventUnregisterParams params{}; 256 IocCtrlEventUnregisterParams params{};
257 std::memcpy(&params, input.data(), sizeof(params)); 257 std::memcpy(&params, input.data(), sizeof(params));
258 const u32 event_id = params.user_event_id & 0x00FF; 258 const u32 event_id = params.user_event_id & 0x00FF;
@@ -262,8 +262,7 @@ NvResult nvhost_ctrl::IocCtrlEventUnregister(std::span<const u8> input, std::vec
262 return FreeEvent(event_id); 262 return FreeEvent(event_id);
263} 263}
264 264
265NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input, 265NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input, std::span<u8> output) {
266 std::vector<u8>& output) {
267 IocCtrlEventUnregisterBatchParams params{}; 266 IocCtrlEventUnregisterBatchParams params{};
268 std::memcpy(&params, input.data(), sizeof(params)); 267 std::memcpy(&params, input.data(), sizeof(params));
269 u64 event_mask = params.user_events; 268 u64 event_mask = params.user_events;
@@ -281,7 +280,7 @@ NvResult nvhost_ctrl::IocCtrlEventUnregisterBatch(std::span<const u8> input,
281 return NvResult::Success; 280 return NvResult::Success;
282} 281}
283 282
284NvResult nvhost_ctrl::IocCtrlClearEventWait(std::span<const u8> input, std::vector<u8>& output) { 283NvResult nvhost_ctrl::IocCtrlClearEventWait(std::span<const u8> input, std::span<u8> output) {
285 IocCtrlEventClearParams params{}; 284 IocCtrlEventClearParams params{};
286 std::memcpy(&params, input.data(), sizeof(params)); 285 std::memcpy(&params, input.data(), sizeof(params));
287 286
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
index dd2e7888a..2efed4862 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h
@@ -26,11 +26,11 @@ public:
26 ~nvhost_ctrl() override; 26 ~nvhost_ctrl() override;
27 27
28 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 28 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
29 std::vector<u8>& output) override; 29 std::span<u8> output) override;
30 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 30 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
31 std::span<const u8> inline_input, std::vector<u8>& output) override; 31 std::span<const u8> inline_input, std::span<u8> output) override;
32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 32 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
33 std::vector<u8>& inline_output) override; 33 std::span<u8> inline_output) override;
34 34
35 void OnOpen(DeviceFD fd) override; 35 void OnOpen(DeviceFD fd) override;
36 void OnClose(DeviceFD fd) override; 36 void OnClose(DeviceFD fd) override;
@@ -186,13 +186,12 @@ private:
186 static_assert(sizeof(IocCtrlEventUnregisterBatchParams) == 8, 186 static_assert(sizeof(IocCtrlEventUnregisterBatchParams) == 8,
187 "IocCtrlEventKill is incorrect size"); 187 "IocCtrlEventKill is incorrect size");
188 188
189 NvResult NvOsGetConfigU32(std::span<const u8> input, std::vector<u8>& output); 189 NvResult NvOsGetConfigU32(std::span<const u8> input, std::span<u8> output);
190 NvResult IocCtrlEventWait(std::span<const u8> input, std::vector<u8>& output, 190 NvResult IocCtrlEventWait(std::span<const u8> input, std::span<u8> output, bool is_allocation);
191 bool is_allocation); 191 NvResult IocCtrlEventRegister(std::span<const u8> input, std::span<u8> output);
192 NvResult IocCtrlEventRegister(std::span<const u8> input, std::vector<u8>& output); 192 NvResult IocCtrlEventUnregister(std::span<const u8> input, std::span<u8> output);
193 NvResult IocCtrlEventUnregister(std::span<const u8> input, std::vector<u8>& output); 193 NvResult IocCtrlEventUnregisterBatch(std::span<const u8> input, std::span<u8> output);
194 NvResult IocCtrlEventUnregisterBatch(std::span<const u8> input, std::vector<u8>& output); 194 NvResult IocCtrlClearEventWait(std::span<const u8> input, std::span<u8> output);
195 NvResult IocCtrlClearEventWait(std::span<const u8> input, std::vector<u8>& output);
196 195
197 NvResult FreeEvent(u32 slot); 196 NvResult FreeEvent(u32 slot);
198 197
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
index be3c083db..6081d92e9 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp
@@ -22,7 +22,7 @@ nvhost_ctrl_gpu::~nvhost_ctrl_gpu() {
22} 22}
23 23
24NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 24NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
25 std::vector<u8>& output) { 25 std::span<u8> output) {
26 switch (command.group) { 26 switch (command.group) {
27 case 'G': 27 case 'G':
28 switch (command.cmd) { 28 switch (command.cmd) {
@@ -54,13 +54,13 @@ NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8>
54} 54}
55 55
56NvResult nvhost_ctrl_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 56NvResult nvhost_ctrl_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
57 std::span<const u8> inline_input, std::vector<u8>& output) { 57 std::span<const u8> inline_input, std::span<u8> output) {
58 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 58 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
59 return NvResult::NotImplemented; 59 return NvResult::NotImplemented;
60} 60}
61 61
62NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 62NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
63 std::vector<u8>& output, std::vector<u8>& inline_output) { 63 std::span<u8> output, std::span<u8> inline_output) {
64 switch (command.group) { 64 switch (command.group) {
65 case 'G': 65 case 'G':
66 switch (command.cmd) { 66 switch (command.cmd) {
@@ -82,7 +82,7 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8>
82void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {} 82void nvhost_ctrl_gpu::OnOpen(DeviceFD fd) {}
83void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {} 83void nvhost_ctrl_gpu::OnClose(DeviceFD fd) {}
84 84
85NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vector<u8>& output) { 85NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::span<u8> output) {
86 LOG_DEBUG(Service_NVDRV, "called"); 86 LOG_DEBUG(Service_NVDRV, "called");
87 IoctlCharacteristics params{}; 87 IoctlCharacteristics params{};
88 std::memcpy(&params, input.data(), input.size()); 88 std::memcpy(&params, input.data(), input.size());
@@ -127,8 +127,8 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vec
127 return NvResult::Success; 127 return NvResult::Success;
128} 128}
129 129
130NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vector<u8>& output, 130NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::span<u8> output,
131 std::vector<u8>& inline_output) { 131 std::span<u8> inline_output) {
132 LOG_DEBUG(Service_NVDRV, "called"); 132 LOG_DEBUG(Service_NVDRV, "called");
133 IoctlCharacteristics params{}; 133 IoctlCharacteristics params{};
134 std::memcpy(&params, input.data(), input.size()); 134 std::memcpy(&params, input.data(), input.size());
@@ -175,7 +175,7 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics(std::span<const u8> input, std::vec
175 return NvResult::Success; 175 return NvResult::Success;
176} 176}
177 177
178NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>& output) { 178NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::span<u8> output) {
179 IoctlGpuGetTpcMasksArgs params{}; 179 IoctlGpuGetTpcMasksArgs params{};
180 std::memcpy(&params, input.data(), input.size()); 180 std::memcpy(&params, input.data(), input.size());
181 LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); 181 LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
@@ -186,8 +186,8 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>
186 return NvResult::Success; 186 return NvResult::Success;
187} 187}
188 188
189NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>& output, 189NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::span<u8> output,
190 std::vector<u8>& inline_output) { 190 std::span<u8> inline_output) {
191 IoctlGpuGetTpcMasksArgs params{}; 191 IoctlGpuGetTpcMasksArgs params{};
192 std::memcpy(&params, input.data(), input.size()); 192 std::memcpy(&params, input.data(), input.size());
193 LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); 193 LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size);
@@ -199,7 +199,7 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks(std::span<const u8> input, std::vector<u8>
199 return NvResult::Success; 199 return NvResult::Success;
200} 200}
201 201
202NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::vector<u8>& output) { 202NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::span<u8> output) {
203 LOG_DEBUG(Service_NVDRV, "called"); 203 LOG_DEBUG(Service_NVDRV, "called");
204 204
205 IoctlActiveSlotMask params{}; 205 IoctlActiveSlotMask params{};
@@ -212,7 +212,7 @@ NvResult nvhost_ctrl_gpu::GetActiveSlotMask(std::span<const u8> input, std::vect
212 return NvResult::Success; 212 return NvResult::Success;
213} 213}
214 214
215NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::vector<u8>& output) { 215NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::span<u8> output) {
216 LOG_DEBUG(Service_NVDRV, "called"); 216 LOG_DEBUG(Service_NVDRV, "called");
217 217
218 IoctlZcullGetCtxSize params{}; 218 IoctlZcullGetCtxSize params{};
@@ -224,7 +224,7 @@ NvResult nvhost_ctrl_gpu::ZCullGetCtxSize(std::span<const u8> input, std::vector
224 return NvResult::Success; 224 return NvResult::Success;
225} 225}
226 226
227NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::vector<u8>& output) { 227NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::span<u8> output) {
228 LOG_DEBUG(Service_NVDRV, "called"); 228 LOG_DEBUG(Service_NVDRV, "called");
229 229
230 IoctlNvgpuGpuZcullGetInfoArgs params{}; 230 IoctlNvgpuGpuZcullGetInfoArgs params{};
@@ -247,7 +247,7 @@ NvResult nvhost_ctrl_gpu::ZCullGetInfo(std::span<const u8> input, std::vector<u8
247 return NvResult::Success; 247 return NvResult::Success;
248} 248}
249 249
250NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::vector<u8>& output) { 250NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::span<u8> output) {
251 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 251 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
252 252
253 IoctlZbcSetTable params{}; 253 IoctlZbcSetTable params{};
@@ -263,7 +263,7 @@ NvResult nvhost_ctrl_gpu::ZBCSetTable(std::span<const u8> input, std::vector<u8>
263 return NvResult::Success; 263 return NvResult::Success;
264} 264}
265 265
266NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::vector<u8>& output) { 266NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::span<u8> output) {
267 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 267 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
268 268
269 IoctlZbcQueryTable params{}; 269 IoctlZbcQueryTable params{};
@@ -273,7 +273,7 @@ NvResult nvhost_ctrl_gpu::ZBCQueryTable(std::span<const u8> input, std::vector<u
273 return NvResult::Success; 273 return NvResult::Success;
274} 274}
275 275
276NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::vector<u8>& output) { 276NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::span<u8> output) {
277 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 277 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
278 278
279 IoctlFlushL2 params{}; 279 IoctlFlushL2 params{};
@@ -283,7 +283,7 @@ NvResult nvhost_ctrl_gpu::FlushL2(std::span<const u8> input, std::vector<u8>& ou
283 return NvResult::Success; 283 return NvResult::Success;
284} 284}
285 285
286NvResult nvhost_ctrl_gpu::GetGpuTime(std::span<const u8> input, std::vector<u8>& output) { 286NvResult nvhost_ctrl_gpu::GetGpuTime(std::span<const u8> input, std::span<u8> output) {
287 LOG_DEBUG(Service_NVDRV, "called"); 287 LOG_DEBUG(Service_NVDRV, "called");
288 288
289 IoctlGetGpuTime params{}; 289 IoctlGetGpuTime params{};
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
index b9333d9d3..97995551c 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h
@@ -22,11 +22,11 @@ public:
22 ~nvhost_ctrl_gpu() override; 22 ~nvhost_ctrl_gpu() override;
23 23
24 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 24 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
25 std::vector<u8>& output) override; 25 std::span<u8> output) override;
26 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 26 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
27 std::span<const u8> inline_input, std::vector<u8>& output) override; 27 std::span<const u8> inline_input, std::span<u8> output) override;
28 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 28 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
29 std::vector<u8>& inline_output) override; 29 std::span<u8> inline_output) override;
30 30
31 void OnOpen(DeviceFD fd) override; 31 void OnOpen(DeviceFD fd) override;
32 void OnClose(DeviceFD fd) override; 32 void OnClose(DeviceFD fd) override;
@@ -151,21 +151,21 @@ private:
151 }; 151 };
152 static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size"); 152 static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size");
153 153
154 NvResult GetCharacteristics(std::span<const u8> input, std::vector<u8>& output); 154 NvResult GetCharacteristics(std::span<const u8> input, std::span<u8> output);
155 NvResult GetCharacteristics(std::span<const u8> input, std::vector<u8>& output, 155 NvResult GetCharacteristics(std::span<const u8> input, std::span<u8> output,
156 std::vector<u8>& inline_output); 156 std::span<u8> inline_output);
157 157
158 NvResult GetTPCMasks(std::span<const u8> input, std::vector<u8>& output); 158 NvResult GetTPCMasks(std::span<const u8> input, std::span<u8> output);
159 NvResult GetTPCMasks(std::span<const u8> input, std::vector<u8>& output, 159 NvResult GetTPCMasks(std::span<const u8> input, std::span<u8> output,
160 std::vector<u8>& inline_output); 160 std::span<u8> inline_output);
161 161
162 NvResult GetActiveSlotMask(std::span<const u8> input, std::vector<u8>& output); 162 NvResult GetActiveSlotMask(std::span<const u8> input, std::span<u8> output);
163 NvResult ZCullGetCtxSize(std::span<const u8> input, std::vector<u8>& output); 163 NvResult ZCullGetCtxSize(std::span<const u8> input, std::span<u8> output);
164 NvResult ZCullGetInfo(std::span<const u8> input, std::vector<u8>& output); 164 NvResult ZCullGetInfo(std::span<const u8> input, std::span<u8> output);
165 NvResult ZBCSetTable(std::span<const u8> input, std::vector<u8>& output); 165 NvResult ZBCSetTable(std::span<const u8> input, std::span<u8> output);
166 NvResult ZBCQueryTable(std::span<const u8> input, std::vector<u8>& output); 166 NvResult ZBCQueryTable(std::span<const u8> input, std::span<u8> output);
167 NvResult FlushL2(std::span<const u8> input, std::vector<u8>& output); 167 NvResult FlushL2(std::span<const u8> input, std::span<u8> output);
168 NvResult GetGpuTime(std::span<const u8> input, std::vector<u8>& output); 168 NvResult GetGpuTime(std::span<const u8> input, std::span<u8> output);
169 169
170 EventInterface& events_interface; 170 EventInterface& events_interface;
171 171
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 453a965dc..46a25fcab 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -47,7 +47,7 @@ nvhost_gpu::~nvhost_gpu() {
47} 47}
48 48
49NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 49NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
50 std::vector<u8>& output) { 50 std::span<u8> output) {
51 switch (command.group) { 51 switch (command.group) {
52 case 0x0: 52 case 0x0:
53 switch (command.cmd) { 53 switch (command.cmd) {
@@ -99,7 +99,7 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
99}; 99};
100 100
101NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 101NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
102 std::span<const u8> inline_input, std::vector<u8>& output) { 102 std::span<const u8> inline_input, std::span<u8> output) {
103 switch (command.group) { 103 switch (command.group) {
104 case 'H': 104 case 'H':
105 switch (command.cmd) { 105 switch (command.cmd) {
@@ -113,7 +113,7 @@ NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> inpu
113} 113}
114 114
115NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 115NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
116 std::vector<u8>& output, std::vector<u8>& inline_output) { 116 std::span<u8> output, std::span<u8> inline_output) {
117 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 117 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
118 return NvResult::NotImplemented; 118 return NvResult::NotImplemented;
119} 119}
@@ -121,7 +121,7 @@ NvResult nvhost_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> inpu
121void nvhost_gpu::OnOpen(DeviceFD fd) {} 121void nvhost_gpu::OnOpen(DeviceFD fd) {}
122void nvhost_gpu::OnClose(DeviceFD fd) {} 122void nvhost_gpu::OnClose(DeviceFD fd) {}
123 123
124NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output) { 124NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::span<u8> output) {
125 IoctlSetNvmapFD params{}; 125 IoctlSetNvmapFD params{};
126 std::memcpy(&params, input.data(), input.size()); 126 std::memcpy(&params, input.data(), input.size());
127 LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); 127 LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
@@ -130,7 +130,7 @@ NvResult nvhost_gpu::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& outp
130 return NvResult::Success; 130 return NvResult::Success;
131} 131}
132 132
133NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::vector<u8>& output) { 133NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::span<u8> output) {
134 LOG_DEBUG(Service_NVDRV, "called"); 134 LOG_DEBUG(Service_NVDRV, "called");
135 135
136 IoctlClientData params{}; 136 IoctlClientData params{};
@@ -139,7 +139,7 @@ NvResult nvhost_gpu::SetClientData(std::span<const u8> input, std::vector<u8>& o
139 return NvResult::Success; 139 return NvResult::Success;
140} 140}
141 141
142NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::vector<u8>& output) { 142NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::span<u8> output) {
143 LOG_DEBUG(Service_NVDRV, "called"); 143 LOG_DEBUG(Service_NVDRV, "called");
144 144
145 IoctlClientData params{}; 145 IoctlClientData params{};
@@ -149,7 +149,7 @@ NvResult nvhost_gpu::GetClientData(std::span<const u8> input, std::vector<u8>& o
149 return NvResult::Success; 149 return NvResult::Success;
150} 150}
151 151
152NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::vector<u8>& output) { 152NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::span<u8> output) {
153 std::memcpy(&zcull_params, input.data(), input.size()); 153 std::memcpy(&zcull_params, input.data(), input.size());
154 LOG_DEBUG(Service_NVDRV, "called, gpu_va={:X}, mode={:X}", zcull_params.gpu_va, 154 LOG_DEBUG(Service_NVDRV, "called, gpu_va={:X}, mode={:X}", zcull_params.gpu_va,
155 zcull_params.mode); 155 zcull_params.mode);
@@ -158,7 +158,7 @@ NvResult nvhost_gpu::ZCullBind(std::span<const u8> input, std::vector<u8>& outpu
158 return NvResult::Success; 158 return NvResult::Success;
159} 159}
160 160
161NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::vector<u8>& output) { 161NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::span<u8> output) {
162 IoctlSetErrorNotifier params{}; 162 IoctlSetErrorNotifier params{};
163 std::memcpy(&params, input.data(), input.size()); 163 std::memcpy(&params, input.data(), input.size());
164 LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset={:X}, size={:X}, mem={:X}", params.offset, 164 LOG_WARNING(Service_NVDRV, "(STUBBED) called, offset={:X}, size={:X}, mem={:X}", params.offset,
@@ -168,14 +168,14 @@ NvResult nvhost_gpu::SetErrorNotifier(std::span<const u8> input, std::vector<u8>
168 return NvResult::Success; 168 return NvResult::Success;
169} 169}
170 170
171NvResult nvhost_gpu::SetChannelPriority(std::span<const u8> input, std::vector<u8>& output) { 171NvResult nvhost_gpu::SetChannelPriority(std::span<const u8> input, std::span<u8> output) {
172 std::memcpy(&channel_priority, input.data(), input.size()); 172 std::memcpy(&channel_priority, input.data(), input.size());
173 LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority); 173 LOG_DEBUG(Service_NVDRV, "(STUBBED) called, priority={:X}", channel_priority);
174 174
175 return NvResult::Success; 175 return NvResult::Success;
176} 176}
177 177
178NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>& output) { 178NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::span<u8> output) {
179 IoctlAllocGpfifoEx2 params{}; 179 IoctlAllocGpfifoEx2 params{};
180 std::memcpy(&params, input.data(), input.size()); 180 std::memcpy(&params, input.data(), input.size());
181 LOG_WARNING(Service_NVDRV, 181 LOG_WARNING(Service_NVDRV,
@@ -197,7 +197,7 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>&
197 return NvResult::Success; 197 return NvResult::Success;
198} 198}
199 199
200NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::vector<u8>& output) { 200NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::span<u8> output) {
201 IoctlAllocObjCtx params{}; 201 IoctlAllocObjCtx params{};
202 std::memcpy(&params, input.data(), input.size()); 202 std::memcpy(&params, input.data(), input.size());
203 LOG_WARNING(Service_NVDRV, "(STUBBED) called, class_num={:X}, flags={:X}", params.class_num, 203 LOG_WARNING(Service_NVDRV, "(STUBBED) called, class_num={:X}, flags={:X}", params.class_num,
@@ -208,7 +208,8 @@ NvResult nvhost_gpu::AllocateObjectContext(std::span<const u8> input, std::vecto
208 return NvResult::Success; 208 return NvResult::Success;
209} 209}
210 210
211static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) { 211static boost::container::small_vector<Tegra::CommandHeader, 512> BuildWaitCommandList(
212 NvFence fence) {
212 return { 213 return {
213 Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, 214 Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
214 Tegra::SubmissionMode::Increasing), 215 Tegra::SubmissionMode::Increasing),
@@ -219,35 +220,35 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
219 }; 220 };
220} 221}
221 222
222static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence) { 223static boost::container::small_vector<Tegra::CommandHeader, 512> BuildIncrementCommandList(
223 std::vector<Tegra::CommandHeader> result{ 224 NvFence fence) {
225 boost::container::small_vector<Tegra::CommandHeader, 512> result{
224 Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1, 226 Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointPayload, 1,
225 Tegra::SubmissionMode::Increasing), 227 Tegra::SubmissionMode::Increasing),
226 {}}; 228 {}};
227 229
228 for (u32 count = 0; count < 2; ++count) { 230 for (u32 count = 0; count < 2; ++count) {
229 result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1, 231 result.push_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::SyncpointOperation, 1,
230 Tegra::SubmissionMode::Increasing)); 232 Tegra::SubmissionMode::Increasing));
231 result.emplace_back( 233 result.push_back(
232 BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id)); 234 BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
233 } 235 }
234 236
235 return result; 237 return result;
236} 238}
237 239
238static std::vector<Tegra::CommandHeader> BuildIncrementWithWfiCommandList(NvFence fence) { 240static boost::container::small_vector<Tegra::CommandHeader, 512> BuildIncrementWithWfiCommandList(
239 std::vector<Tegra::CommandHeader> result{ 241 NvFence fence) {
242 boost::container::small_vector<Tegra::CommandHeader, 512> result{
240 Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1, 243 Tegra::BuildCommandHeader(Tegra::BufferMethods::WaitForIdle, 1,
241 Tegra::SubmissionMode::Increasing), 244 Tegra::SubmissionMode::Increasing),
242 {}}; 245 {}};
243 const std::vector<Tegra::CommandHeader> increment{BuildIncrementCommandList(fence)}; 246 auto increment_list{BuildIncrementCommandList(fence)};
244 247 result.insert(result.end(), increment_list.begin(), increment_list.end());
245 result.insert(result.end(), increment.begin(), increment.end());
246
247 return result; 248 return result;
248} 249}
249 250
250NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, 251NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::span<u8> output,
251 Tegra::CommandList&& entries) { 252 Tegra::CommandList&& entries) {
252 LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address, 253 LOG_TRACE(Service_NVDRV, "called, gpfifo={:X}, num_entries={:X}, flags={:X}", params.address,
253 params.num_entries, params.flags.raw); 254 params.num_entries, params.flags.raw);
@@ -293,7 +294,7 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
293 return NvResult::Success; 294 return NvResult::Success;
294} 295}
295 296
296NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>& output, 297NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<u8> output,
297 bool kickoff) { 298 bool kickoff) {
298 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 299 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
299 UNIMPLEMENTED(); 300 UNIMPLEMENTED();
@@ -315,7 +316,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>
315} 316}
316 317
317NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline, 318NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline,
318 std::vector<u8>& output) { 319 std::span<u8> output) {
319 if (input.size() < sizeof(IoctlSubmitGpfifo)) { 320 if (input.size() < sizeof(IoctlSubmitGpfifo)) {
320 UNIMPLEMENTED(); 321 UNIMPLEMENTED();
321 return NvResult::InvalidSize; 322 return NvResult::InvalidSize;
@@ -327,7 +328,7 @@ NvResult nvhost_gpu::SubmitGPFIFOBase(std::span<const u8> input, std::span<const
327 return SubmitGPFIFOImpl(params, output, std::move(entries)); 328 return SubmitGPFIFOImpl(params, output, std::move(entries));
328} 329}
329 330
330NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::vector<u8>& output) { 331NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::span<u8> output) {
331 IoctlGetWaitbase params{}; 332 IoctlGetWaitbase params{};
332 std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase)); 333 std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
333 LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown); 334 LOG_INFO(Service_NVDRV, "called, unknown=0x{:X}", params.unknown);
@@ -337,7 +338,7 @@ NvResult nvhost_gpu::GetWaitbase(std::span<const u8> input, std::vector<u8>& out
337 return NvResult::Success; 338 return NvResult::Success;
338} 339}
339 340
340NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::vector<u8>& output) { 341NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::span<u8> output) {
341 IoctlChannelSetTimeout params{}; 342 IoctlChannelSetTimeout params{};
342 std::memcpy(&params, input.data(), sizeof(IoctlChannelSetTimeout)); 343 std::memcpy(&params, input.data(), sizeof(IoctlChannelSetTimeout));
343 LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout); 344 LOG_INFO(Service_NVDRV, "called, timeout=0x{:X}", params.timeout);
@@ -345,7 +346,7 @@ NvResult nvhost_gpu::ChannelSetTimeout(std::span<const u8> input, std::vector<u8
345 return NvResult::Success; 346 return NvResult::Success;
346} 347}
347 348
348NvResult nvhost_gpu::ChannelSetTimeslice(std::span<const u8> input, std::vector<u8>& output) { 349NvResult nvhost_gpu::ChannelSetTimeslice(std::span<const u8> input, std::span<u8> output) {
349 IoctlSetTimeslice params{}; 350 IoctlSetTimeslice params{};
350 std::memcpy(&params, input.data(), sizeof(IoctlSetTimeslice)); 351 std::memcpy(&params, input.data(), sizeof(IoctlSetTimeslice));
351 LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice); 352 LOG_INFO(Service_NVDRV, "called, timeslice=0x{:X}", params.timeslice);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 3ca58202d..529c20526 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -41,11 +41,11 @@ public:
41 ~nvhost_gpu() override; 41 ~nvhost_gpu() override;
42 42
43 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 43 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
44 std::vector<u8>& output) override; 44 std::span<u8> output) override;
45 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 45 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
46 std::span<const u8> inline_input, std::vector<u8>& output) override; 46 std::span<const u8> inline_input, std::span<u8> output) override;
47 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 47 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
48 std::vector<u8>& inline_output) override; 48 std::span<u8> inline_output) override;
49 49
50 void OnOpen(DeviceFD fd) override; 50 void OnOpen(DeviceFD fd) override;
51 void OnClose(DeviceFD fd) override; 51 void OnClose(DeviceFD fd) override;
@@ -186,23 +186,23 @@ private:
186 u32_le channel_priority{}; 186 u32_le channel_priority{};
187 u32_le channel_timeslice{}; 187 u32_le channel_timeslice{};
188 188
189 NvResult SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output); 189 NvResult SetNVMAPfd(std::span<const u8> input, std::span<u8> output);
190 NvResult SetClientData(std::span<const u8> input, std::vector<u8>& output); 190 NvResult SetClientData(std::span<const u8> input, std::span<u8> output);
191 NvResult GetClientData(std::span<const u8> input, std::vector<u8>& output); 191 NvResult GetClientData(std::span<const u8> input, std::span<u8> output);
192 NvResult ZCullBind(std::span<const u8> input, std::vector<u8>& output); 192 NvResult ZCullBind(std::span<const u8> input, std::span<u8> output);
193 NvResult SetErrorNotifier(std::span<const u8> input, std::vector<u8>& output); 193 NvResult SetErrorNotifier(std::span<const u8> input, std::span<u8> output);
194 NvResult SetChannelPriority(std::span<const u8> input, std::vector<u8>& output); 194 NvResult SetChannelPriority(std::span<const u8> input, std::span<u8> output);
195 NvResult AllocGPFIFOEx2(std::span<const u8> input, std::vector<u8>& output); 195 NvResult AllocGPFIFOEx2(std::span<const u8> input, std::span<u8> output);
196 NvResult AllocateObjectContext(std::span<const u8> input, std::vector<u8>& output); 196 NvResult AllocateObjectContext(std::span<const u8> input, std::span<u8> output);
197 NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>& output, 197 NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::span<u8> output,
198 Tegra::CommandList&& entries); 198 Tegra::CommandList&& entries);
199 NvResult SubmitGPFIFOBase(std::span<const u8> input, std::vector<u8>& output, 199 NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<u8> output,
200 bool kickoff = false); 200 bool kickoff = false);
201 NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline, 201 NvResult SubmitGPFIFOBase(std::span<const u8> input, std::span<const u8> input_inline,
202 std::vector<u8>& output); 202 std::span<u8> output);
203 NvResult GetWaitbase(std::span<const u8> input, std::vector<u8>& output); 203 NvResult GetWaitbase(std::span<const u8> input, std::span<u8> output);
204 NvResult ChannelSetTimeout(std::span<const u8> input, std::vector<u8>& output); 204 NvResult ChannelSetTimeout(std::span<const u8> input, std::span<u8> output);
205 NvResult ChannelSetTimeslice(std::span<const u8> input, std::vector<u8>& output); 205 NvResult ChannelSetTimeslice(std::span<const u8> input, std::span<u8> output);
206 206
207 EventInterface& events_interface; 207 EventInterface& events_interface;
208 NvCore::Container& core; 208 NvCore::Container& core;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index dc45169ad..a174442a6 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -16,7 +16,7 @@ nvhost_nvdec::nvhost_nvdec(Core::System& system_, NvCore::Container& core_)
16nvhost_nvdec::~nvhost_nvdec() = default; 16nvhost_nvdec::~nvhost_nvdec() = default;
17 17
18NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 18NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
19 std::vector<u8>& output) { 19 std::span<u8> output) {
20 switch (command.group) { 20 switch (command.group) {
21 case 0x0: 21 case 0x0:
22 switch (command.cmd) { 22 switch (command.cmd) {
@@ -56,13 +56,13 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
56} 56}
57 57
58NvResult nvhost_nvdec::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 58NvResult nvhost_nvdec::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
59 std::span<const u8> inline_input, std::vector<u8>& output) { 59 std::span<const u8> inline_input, std::span<u8> output) {
60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
61 return NvResult::NotImplemented; 61 return NvResult::NotImplemented;
62} 62}
63 63
64NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 64NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
65 std::vector<u8>& output, std::vector<u8>& inline_output) { 65 std::span<u8> output, std::span<u8> inline_output) {
66 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 66 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
67 return NvResult::NotImplemented; 67 return NvResult::NotImplemented;
68} 68}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
index 0d615bbcb..ad2233c49 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.h
@@ -14,11 +14,11 @@ public:
14 ~nvhost_nvdec() override; 14 ~nvhost_nvdec() override;
15 15
16 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 16 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
17 std::vector<u8>& output) override; 17 std::span<u8> output) override;
18 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 18 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
19 std::span<const u8> inline_input, std::vector<u8>& output) override; 19 std::span<const u8> inline_input, std::span<u8> output) override;
20 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 20 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
21 std::vector<u8>& inline_output) override; 21 std::span<u8> inline_output) override;
22 22
23 void OnOpen(DeviceFD fd) override; 23 void OnOpen(DeviceFD fd) override;
24 void OnClose(DeviceFD fd) override; 24 void OnClose(DeviceFD fd) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
index 1ab51f10b..61649aa4a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp
@@ -36,7 +36,7 @@ std::size_t SliceVectors(std::span<const u8> input, std::vector<T>& dst, std::si
36// Writes the data in src to an offset into the dst vector. The offset is specified in bytes 36// Writes the data in src to an offset into the dst vector. The offset is specified in bytes
37// Returns the number of bytes written into dst. 37// Returns the number of bytes written into dst.
38template <typename T> 38template <typename T>
39std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) { 39std::size_t WriteVectors(std::span<u8> dst, const std::vector<T>& src, std::size_t offset) {
40 if (src.empty()) { 40 if (src.empty()) {
41 return 0; 41 return 0;
42 } 42 }
@@ -72,8 +72,7 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(std::span<const u8> input) {
72 return NvResult::Success; 72 return NvResult::Success;
73} 73}
74 74
75NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, 75NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output) {
76 std::vector<u8>& output) {
77 IoctlSubmit params{}; 76 IoctlSubmit params{};
78 std::memcpy(&params, input.data(), sizeof(IoctlSubmit)); 77 std::memcpy(&params, input.data(), sizeof(IoctlSubmit));
79 LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); 78 LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
@@ -121,7 +120,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span<const u8> input,
121 return NvResult::Success; 120 return NvResult::Success;
122} 121}
123 122
124NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::vector<u8>& output) { 123NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::span<u8> output) {
125 IoctlGetSyncpoint params{}; 124 IoctlGetSyncpoint params{};
126 std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint)); 125 std::memcpy(&params, input.data(), sizeof(IoctlGetSyncpoint));
127 LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param); 126 LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
@@ -133,7 +132,7 @@ NvResult nvhost_nvdec_common::GetSyncpoint(std::span<const u8> input, std::vecto
133 return NvResult::Success; 132 return NvResult::Success;
134} 133}
135 134
136NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::vector<u8>& output) { 135NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::span<u8> output) {
137 IoctlGetWaitbase params{}; 136 IoctlGetWaitbase params{};
138 LOG_CRITICAL(Service_NVDRV, "called WAITBASE"); 137 LOG_CRITICAL(Service_NVDRV, "called WAITBASE");
139 std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase)); 138 std::memcpy(&params, input.data(), sizeof(IoctlGetWaitbase));
@@ -142,7 +141,7 @@ NvResult nvhost_nvdec_common::GetWaitbase(std::span<const u8> input, std::vector
142 return NvResult::Success; 141 return NvResult::Success;
143} 142}
144 143
145NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::vector<u8>& output) { 144NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::span<u8> output) {
146 IoctlMapBuffer params{}; 145 IoctlMapBuffer params{};
147 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer)); 146 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
148 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); 147 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
@@ -159,7 +158,7 @@ NvResult nvhost_nvdec_common::MapBuffer(std::span<const u8> input, std::vector<u
159 return NvResult::Success; 158 return NvResult::Success;
160} 159}
161 160
162NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::vector<u8>& output) { 161NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::span<u8> output) {
163 IoctlMapBuffer params{}; 162 IoctlMapBuffer params{};
164 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer)); 163 std::memcpy(&params, input.data(), sizeof(IoctlMapBuffer));
165 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries); 164 std::vector<MapBufferEntry> cmd_buffer_handles(params.num_entries);
@@ -173,7 +172,7 @@ NvResult nvhost_nvdec_common::UnmapBuffer(std::span<const u8> input, std::vector
173 return NvResult::Success; 172 return NvResult::Success;
174} 173}
175 174
176NvResult nvhost_nvdec_common::SetSubmitTimeout(std::span<const u8> input, std::vector<u8>& output) { 175NvResult nvhost_nvdec_common::SetSubmitTimeout(std::span<const u8> input, std::span<u8> output) {
177 std::memcpy(&submit_timeout, input.data(), input.size()); 176 std::memcpy(&submit_timeout, input.data(), input.size());
178 LOG_WARNING(Service_NVDRV, "(STUBBED) called"); 177 LOG_WARNING(Service_NVDRV, "(STUBBED) called");
179 return NvResult::Success; 178 return NvResult::Success;
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
index 5af26a26f..9bb573bfe 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h
@@ -108,12 +108,12 @@ protected:
108 108
109 /// Ioctl command implementations 109 /// Ioctl command implementations
110 NvResult SetNVMAPfd(std::span<const u8> input); 110 NvResult SetNVMAPfd(std::span<const u8> input);
111 NvResult Submit(DeviceFD fd, std::span<const u8> input, std::vector<u8>& output); 111 NvResult Submit(DeviceFD fd, std::span<const u8> input, std::span<u8> output);
112 NvResult GetSyncpoint(std::span<const u8> input, std::vector<u8>& output); 112 NvResult GetSyncpoint(std::span<const u8> input, std::span<u8> output);
113 NvResult GetWaitbase(std::span<const u8> input, std::vector<u8>& output); 113 NvResult GetWaitbase(std::span<const u8> input, std::span<u8> output);
114 NvResult MapBuffer(std::span<const u8> input, std::vector<u8>& output); 114 NvResult MapBuffer(std::span<const u8> input, std::span<u8> output);
115 NvResult UnmapBuffer(std::span<const u8> input, std::vector<u8>& output); 115 NvResult UnmapBuffer(std::span<const u8> input, std::span<u8> output);
116 NvResult SetSubmitTimeout(std::span<const u8> input, std::vector<u8>& output); 116 NvResult SetSubmitTimeout(std::span<const u8> input, std::span<u8> output);
117 117
118 Kernel::KEvent* QueryEvent(u32 event_id) override; 118 Kernel::KEvent* QueryEvent(u32 event_id) override;
119 119
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
index 39f30e7c8..a05c8cdae 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp
@@ -13,7 +13,7 @@ nvhost_nvjpg::nvhost_nvjpg(Core::System& system_) : nvdevice{system_} {}
13nvhost_nvjpg::~nvhost_nvjpg() = default; 13nvhost_nvjpg::~nvhost_nvjpg() = default;
14 14
15NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 15NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
16 std::vector<u8>& output) { 16 std::span<u8> output) {
17 switch (command.group) { 17 switch (command.group) {
18 case 'H': 18 case 'H':
19 switch (command.cmd) { 19 switch (command.cmd) {
@@ -32,13 +32,13 @@ NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> in
32} 32}
33 33
34NvResult nvhost_nvjpg::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 34NvResult nvhost_nvjpg::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
35 std::span<const u8> inline_input, std::vector<u8>& output) { 35 std::span<const u8> inline_input, std::span<u8> output) {
36 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 36 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
37 return NvResult::NotImplemented; 37 return NvResult::NotImplemented;
38} 38}
39 39
40NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 40NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
41 std::vector<u8>& output, std::vector<u8>& inline_output) { 41 std::span<u8> output, std::span<u8> inline_output) {
42 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 42 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
43 return NvResult::NotImplemented; 43 return NvResult::NotImplemented;
44} 44}
@@ -46,7 +46,7 @@ NvResult nvhost_nvjpg::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in
46void nvhost_nvjpg::OnOpen(DeviceFD fd) {} 46void nvhost_nvjpg::OnOpen(DeviceFD fd) {}
47void nvhost_nvjpg::OnClose(DeviceFD fd) {} 47void nvhost_nvjpg::OnClose(DeviceFD fd) {}
48 48
49NvResult nvhost_nvjpg::SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output) { 49NvResult nvhost_nvjpg::SetNVMAPfd(std::span<const u8> input, std::span<u8> output) {
50 IoctlSetNvmapFD params{}; 50 IoctlSetNvmapFD params{};
51 std::memcpy(&params, input.data(), input.size()); 51 std::memcpy(&params, input.data(), input.size());
52 LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd); 52 LOG_DEBUG(Service_NVDRV, "called, fd={}", params.nvmap_fd);
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
index 41b57e872..5623e0d47 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.h
@@ -16,11 +16,11 @@ public:
16 ~nvhost_nvjpg() override; 16 ~nvhost_nvjpg() override;
17 17
18 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 18 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
19 std::vector<u8>& output) override; 19 std::span<u8> output) override;
20 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 20 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
21 std::span<const u8> inline_input, std::vector<u8>& output) override; 21 std::span<const u8> inline_input, std::span<u8> output) override;
22 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 22 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
23 std::vector<u8>& inline_output) override; 23 std::span<u8> inline_output) override;
24 24
25 void OnOpen(DeviceFD fd) override; 25 void OnOpen(DeviceFD fd) override;
26 void OnClose(DeviceFD fd) override; 26 void OnClose(DeviceFD fd) override;
@@ -33,7 +33,7 @@ private:
33 33
34 s32_le nvmap_fd{}; 34 s32_le nvmap_fd{};
35 35
36 NvResult SetNVMAPfd(std::span<const u8> input, std::vector<u8>& output); 36 NvResult SetNVMAPfd(std::span<const u8> input, std::span<u8> output);
37}; 37};
38 38
39} // namespace Service::Nvidia::Devices 39} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
index b0ea402a7..c0b8684c3 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp
@@ -16,7 +16,7 @@ nvhost_vic::nvhost_vic(Core::System& system_, NvCore::Container& core_)
16nvhost_vic::~nvhost_vic() = default; 16nvhost_vic::~nvhost_vic() = default;
17 17
18NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 18NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
19 std::vector<u8>& output) { 19 std::span<u8> output) {
20 switch (command.group) { 20 switch (command.group) {
21 case 0x0: 21 case 0x0:
22 switch (command.cmd) { 22 switch (command.cmd) {
@@ -56,13 +56,13 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> inpu
56} 56}
57 57
58NvResult nvhost_vic::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 58NvResult nvhost_vic::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
59 std::span<const u8> inline_input, std::vector<u8>& output) { 59 std::span<const u8> inline_input, std::span<u8> output) {
60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 60 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
61 return NvResult::NotImplemented; 61 return NvResult::NotImplemented;
62} 62}
63 63
64NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 64NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
65 std::vector<u8>& output, std::vector<u8>& inline_output) { 65 std::span<u8> output, std::span<u8> inline_output) {
66 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 66 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
67 return NvResult::NotImplemented; 67 return NvResult::NotImplemented;
68} 68}
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.h b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
index b5e350a83..cadbcb0a5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_vic.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.h
@@ -13,11 +13,11 @@ public:
13 ~nvhost_vic(); 13 ~nvhost_vic();
14 14
15 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 15 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
16 std::vector<u8>& output) override; 16 std::span<u8> output) override;
17 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 17 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
18 std::span<const u8> inline_input, std::vector<u8>& output) override; 18 std::span<const u8> inline_input, std::span<u8> output) override;
19 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 19 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
20 std::vector<u8>& inline_output) override; 20 std::span<u8> inline_output) override;
21 21
22 void OnOpen(DeviceFD fd) override; 22 void OnOpen(DeviceFD fd) override;
23 void OnClose(DeviceFD fd) override; 23 void OnClose(DeviceFD fd) override;
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp
index 07417f045..e7f7e273b 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -26,7 +26,7 @@ nvmap::nvmap(Core::System& system_, NvCore::Container& container_)
26nvmap::~nvmap() = default; 26nvmap::~nvmap() = default;
27 27
28NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 28NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
29 std::vector<u8>& output) { 29 std::span<u8> output) {
30 switch (command.group) { 30 switch (command.group) {
31 case 0x1: 31 case 0x1:
32 switch (command.cmd) { 32 switch (command.cmd) {
@@ -55,13 +55,13 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
55} 55}
56 56
57NvResult nvmap::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 57NvResult nvmap::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
58 std::span<const u8> inline_input, std::vector<u8>& output) { 58 std::span<const u8> inline_input, std::span<u8> output) {
59 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 59 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
60 return NvResult::NotImplemented; 60 return NvResult::NotImplemented;
61} 61}
62 62
63NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 63NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
64 std::vector<u8>& output, std::vector<u8>& inline_output) { 64 std::span<u8> inline_output) {
65 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw); 65 UNIMPLEMENTED_MSG("Unimplemented ioctl={:08X}", command.raw);
66 return NvResult::NotImplemented; 66 return NvResult::NotImplemented;
67} 67}
@@ -69,7 +69,7 @@ NvResult nvmap::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input,
69void nvmap::OnOpen(DeviceFD fd) {} 69void nvmap::OnOpen(DeviceFD fd) {}
70void nvmap::OnClose(DeviceFD fd) {} 70void nvmap::OnClose(DeviceFD fd) {}
71 71
72NvResult nvmap::IocCreate(std::span<const u8> input, std::vector<u8>& output) { 72NvResult nvmap::IocCreate(std::span<const u8> input, std::span<u8> output) {
73 IocCreateParams params; 73 IocCreateParams params;
74 std::memcpy(&params, input.data(), sizeof(params)); 74 std::memcpy(&params, input.data(), sizeof(params));
75 LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size); 75 LOG_DEBUG(Service_NVDRV, "called, size=0x{:08X}", params.size);
@@ -89,7 +89,7 @@ NvResult nvmap::IocCreate(std::span<const u8> input, std::vector<u8>& output) {
89 return NvResult::Success; 89 return NvResult::Success;
90} 90}
91 91
92NvResult nvmap::IocAlloc(std::span<const u8> input, std::vector<u8>& output) { 92NvResult nvmap::IocAlloc(std::span<const u8> input, std::span<u8> output) {
93 IocAllocParams params; 93 IocAllocParams params;
94 std::memcpy(&params, input.data(), sizeof(params)); 94 std::memcpy(&params, input.data(), sizeof(params));
95 LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address); 95 LOG_DEBUG(Service_NVDRV, "called, addr={:X}", params.address);
@@ -137,7 +137,7 @@ NvResult nvmap::IocAlloc(std::span<const u8> input, std::vector<u8>& output) {
137 return result; 137 return result;
138} 138}
139 139
140NvResult nvmap::IocGetId(std::span<const u8> input, std::vector<u8>& output) { 140NvResult nvmap::IocGetId(std::span<const u8> input, std::span<u8> output) {
141 IocGetIdParams params; 141 IocGetIdParams params;
142 std::memcpy(&params, input.data(), sizeof(params)); 142 std::memcpy(&params, input.data(), sizeof(params));
143 143
@@ -161,7 +161,7 @@ NvResult nvmap::IocGetId(std::span<const u8> input, std::vector<u8>& output) {
161 return NvResult::Success; 161 return NvResult::Success;
162} 162}
163 163
164NvResult nvmap::IocFromId(std::span<const u8> input, std::vector<u8>& output) { 164NvResult nvmap::IocFromId(std::span<const u8> input, std::span<u8> output) {
165 IocFromIdParams params; 165 IocFromIdParams params;
166 std::memcpy(&params, input.data(), sizeof(params)); 166 std::memcpy(&params, input.data(), sizeof(params));
167 167
@@ -192,7 +192,7 @@ NvResult nvmap::IocFromId(std::span<const u8> input, std::vector<u8>& output) {
192 return NvResult::Success; 192 return NvResult::Success;
193} 193}
194 194
195NvResult nvmap::IocParam(std::span<const u8> input, std::vector<u8>& output) { 195NvResult nvmap::IocParam(std::span<const u8> input, std::span<u8> output) {
196 enum class ParamTypes { Size = 1, Alignment = 2, Base = 3, Heap = 4, Kind = 5, Compr = 6 }; 196 enum class ParamTypes { Size = 1, Alignment = 2, Base = 3, Heap = 4, Kind = 5, Compr = 6 };
197 197
198 IocParamParams params; 198 IocParamParams params;
@@ -241,7 +241,7 @@ NvResult nvmap::IocParam(std::span<const u8> input, std::vector<u8>& output) {
241 return NvResult::Success; 241 return NvResult::Success;
242} 242}
243 243
244NvResult nvmap::IocFree(std::span<const u8> input, std::vector<u8>& output) { 244NvResult nvmap::IocFree(std::span<const u8> input, std::span<u8> output) {
245 IocFreeParams params; 245 IocFreeParams params;
246 std::memcpy(&params, input.data(), sizeof(params)); 246 std::memcpy(&params, input.data(), sizeof(params));
247 247
diff --git a/src/core/hle/service/nvdrv/devices/nvmap.h b/src/core/hle/service/nvdrv/devices/nvmap.h
index 82bd3b118..40c65b430 100644
--- a/src/core/hle/service/nvdrv/devices/nvmap.h
+++ b/src/core/hle/service/nvdrv/devices/nvmap.h
@@ -27,11 +27,11 @@ public:
27 nvmap& operator=(const nvmap&) = delete; 27 nvmap& operator=(const nvmap&) = delete;
28 28
29 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 29 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
30 std::vector<u8>& output) override; 30 std::span<u8> output) override;
31 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 31 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
32 std::span<const u8> inline_input, std::vector<u8>& output) override; 32 std::span<const u8> inline_input, std::span<u8> output) override;
33 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 33 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
34 std::vector<u8>& inline_output) override; 34 std::span<u8> inline_output) override;
35 35
36 void OnOpen(DeviceFD fd) override; 36 void OnOpen(DeviceFD fd) override;
37 void OnClose(DeviceFD fd) override; 37 void OnClose(DeviceFD fd) override;
@@ -106,12 +106,12 @@ private:
106 }; 106 };
107 static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size"); 107 static_assert(sizeof(IocGetIdParams) == 8, "IocGetIdParams has wrong size");
108 108
109 NvResult IocCreate(std::span<const u8> input, std::vector<u8>& output); 109 NvResult IocCreate(std::span<const u8> input, std::span<u8> output);
110 NvResult IocAlloc(std::span<const u8> input, std::vector<u8>& output); 110 NvResult IocAlloc(std::span<const u8> input, std::span<u8> output);
111 NvResult IocGetId(std::span<const u8> input, std::vector<u8>& output); 111 NvResult IocGetId(std::span<const u8> input, std::span<u8> output);
112 NvResult IocFromId(std::span<const u8> input, std::vector<u8>& output); 112 NvResult IocFromId(std::span<const u8> input, std::span<u8> output);
113 NvResult IocParam(std::span<const u8> input, std::vector<u8>& output); 113 NvResult IocParam(std::span<const u8> input, std::span<u8> output);
114 NvResult IocFree(std::span<const u8> input, std::vector<u8>& output); 114 NvResult IocFree(std::span<const u8> input, std::span<u8> output);
115 115
116 NvCore::Container& container; 116 NvCore::Container& container;
117 NvCore::NvMap& file; 117 NvCore::NvMap& file;
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 3d774eec4..9e46ee8dd 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -130,7 +130,7 @@ DeviceFD Module::Open(const std::string& device_name) {
130} 130}
131 131
132NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, 132NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
133 std::vector<u8>& output) { 133 std::span<u8> output) {
134 if (fd < 0) { 134 if (fd < 0) {
135 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); 135 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
136 return NvResult::InvalidState; 136 return NvResult::InvalidState;
@@ -147,7 +147,7 @@ NvResult Module::Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input,
147} 147}
148 148
149NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 149NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
150 std::span<const u8> inline_input, std::vector<u8>& output) { 150 std::span<const u8> inline_input, std::span<u8> output) {
151 if (fd < 0) { 151 if (fd < 0) {
152 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); 152 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
153 return NvResult::InvalidState; 153 return NvResult::InvalidState;
@@ -163,8 +163,8 @@ NvResult Module::Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
163 return itr->second->Ioctl2(fd, command, input, inline_input, output); 163 return itr->second->Ioctl2(fd, command, input, inline_input, output);
164} 164}
165 165
166NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, 166NvResult Module::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
167 std::vector<u8>& output, std::vector<u8>& inline_output) { 167 std::span<u8> inline_output) {
168 if (fd < 0) { 168 if (fd < 0) {
169 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd); 169 LOG_ERROR(Service_NVDRV, "Invalid DeviceFD={}!", fd);
170 return NvResult::InvalidState; 170 return NvResult::InvalidState;
diff --git a/src/core/hle/service/nvdrv/nvdrv.h b/src/core/hle/service/nvdrv/nvdrv.h
index 668be742b..d8622b3ca 100644
--- a/src/core/hle/service/nvdrv/nvdrv.h
+++ b/src/core/hle/service/nvdrv/nvdrv.h
@@ -80,13 +80,13 @@ public:
80 DeviceFD Open(const std::string& device_name); 80 DeviceFD Open(const std::string& device_name);
81 81
82 /// Sends an ioctl command to the specified file descriptor. 82 /// Sends an ioctl command to the specified file descriptor.
83 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output); 83 NvResult Ioctl1(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output);
84 84
85 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input, 85 NvResult Ioctl2(DeviceFD fd, Ioctl command, std::span<const u8> input,
86 std::span<const u8> inline_input, std::vector<u8>& output); 86 std::span<const u8> inline_input, std::span<u8> output);
87 87
88 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::vector<u8>& output, 88 NvResult Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> input, std::span<u8> output,
89 std::vector<u8>& inline_output); 89 std::span<u8> inline_output);
90 90
91 /// Closes a device file descriptor and returns operation success. 91 /// Closes a device file descriptor and returns operation success.
92 NvResult Close(DeviceFD fd); 92 NvResult Close(DeviceFD fd);
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.cpp b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
index d010a1e03..348207e25 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.cpp
@@ -63,12 +63,12 @@ void NVDRV::Ioctl1(HLERequestContext& ctx) {
63 } 63 }
64 64
65 // Check device 65 // Check device
66 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); 66 tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
67 const auto input_buffer = ctx.ReadBuffer(0); 67 const auto input_buffer = ctx.ReadBuffer(0);
68 68
69 const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, output_buffer); 69 const auto nv_result = nvdrv->Ioctl1(fd, command, input_buffer, tmp_output);
70 if (command.is_out != 0) { 70 if (command.is_out != 0) {
71 ctx.WriteBuffer(output_buffer); 71 ctx.WriteBuffer(tmp_output);
72 } 72 }
73 73
74 IPC::ResponseBuilder rb{ctx, 3}; 74 IPC::ResponseBuilder rb{ctx, 3};
@@ -90,12 +90,12 @@ void NVDRV::Ioctl2(HLERequestContext& ctx) {
90 90
91 const auto input_buffer = ctx.ReadBuffer(0); 91 const auto input_buffer = ctx.ReadBuffer(0);
92 const auto input_inlined_buffer = ctx.ReadBuffer(1); 92 const auto input_inlined_buffer = ctx.ReadBuffer(1);
93 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); 93 tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
94 94
95 const auto nv_result = 95 const auto nv_result =
96 nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, output_buffer); 96 nvdrv->Ioctl2(fd, command, input_buffer, input_inlined_buffer, tmp_output);
97 if (command.is_out != 0) { 97 if (command.is_out != 0) {
98 ctx.WriteBuffer(output_buffer); 98 ctx.WriteBuffer(tmp_output);
99 } 99 }
100 100
101 IPC::ResponseBuilder rb{ctx, 3}; 101 IPC::ResponseBuilder rb{ctx, 3};
@@ -116,14 +116,12 @@ void NVDRV::Ioctl3(HLERequestContext& ctx) {
116 } 116 }
117 117
118 const auto input_buffer = ctx.ReadBuffer(0); 118 const auto input_buffer = ctx.ReadBuffer(0);
119 std::vector<u8> output_buffer(ctx.GetWriteBufferSize(0)); 119 tmp_output.resize_destructive(ctx.GetWriteBufferSize(0));
120 std::vector<u8> output_buffer_inline(ctx.GetWriteBufferSize(1)); 120 tmp_output_inline.resize_destructive(ctx.GetWriteBufferSize(1));
121 121 const auto nv_result = nvdrv->Ioctl3(fd, command, input_buffer, tmp_output, tmp_output_inline);
122 const auto nv_result =
123 nvdrv->Ioctl3(fd, command, input_buffer, output_buffer, output_buffer_inline);
124 if (command.is_out != 0) { 122 if (command.is_out != 0) {
125 ctx.WriteBuffer(output_buffer, 0); 123 ctx.WriteBuffer(tmp_output, 0);
126 ctx.WriteBuffer(output_buffer_inline, 1); 124 ctx.WriteBuffer(tmp_output_inline, 1);
127 } 125 }
128 126
129 IPC::ResponseBuilder rb{ctx, 3}; 127 IPC::ResponseBuilder rb{ctx, 3};
diff --git a/src/core/hle/service/nvdrv/nvdrv_interface.h b/src/core/hle/service/nvdrv/nvdrv_interface.h
index 881ea1a6b..4b593ff90 100644
--- a/src/core/hle/service/nvdrv/nvdrv_interface.h
+++ b/src/core/hle/service/nvdrv/nvdrv_interface.h
@@ -4,6 +4,7 @@
4#pragma once 4#pragma once
5 5
6#include <memory> 6#include <memory>
7#include "common/scratch_buffer.h"
7#include "core/hle/service/nvdrv/nvdrv.h" 8#include "core/hle/service/nvdrv/nvdrv.h"
8#include "core/hle/service/service.h" 9#include "core/hle/service/service.h"
9 10
@@ -33,6 +34,8 @@ private:
33 34
34 u64 pid{}; 35 u64 pid{};
35 bool is_initialized{}; 36 bool is_initialized{};
37 Common::ScratchBuffer<u8> tmp_output;
38 Common::ScratchBuffer<u8> tmp_output_inline;
36}; 39};
37 40
38} // namespace Service::Nvidia 41} // namespace Service::Nvidia
diff --git a/src/core/hle/service/nvnflinger/parcel.h b/src/core/hle/service/nvnflinger/parcel.h
index fb56d75d7..23ba315a0 100644
--- a/src/core/hle/service/nvnflinger/parcel.h
+++ b/src/core/hle/service/nvnflinger/parcel.h
@@ -6,6 +6,7 @@
6#include <memory> 6#include <memory>
7#include <span> 7#include <span>
8#include <vector> 8#include <vector>
9#include <boost/container/small_vector.hpp>
9 10
10#include "common/alignment.h" 11#include "common/alignment.h"
11#include "common/assert.h" 12#include "common/assert.h"
@@ -167,7 +168,7 @@ public:
167private: 168private:
168 template <typename T> 169 template <typename T>
169 requires(std::is_trivially_copyable_v<T>) 170 requires(std::is_trivially_copyable_v<T>)
170 void WriteImpl(const T& val, std::vector<u8>& buffer) { 171 void WriteImpl(const T& val, boost::container::small_vector<u8, 0x200>& buffer) {
171 const size_t aligned_size = Common::AlignUp(sizeof(T), 4); 172 const size_t aligned_size = Common::AlignUp(sizeof(T), 4);
172 const size_t old_size = buffer.size(); 173 const size_t old_size = buffer.size();
173 buffer.resize(old_size + aligned_size); 174 buffer.resize(old_size + aligned_size);
@@ -176,8 +177,8 @@ private:
176 } 177 }
177 178
178private: 179private:
179 std::vector<u8> m_data_buffer; 180 boost::container::small_vector<u8, 0x200> m_data_buffer;
180 std::vector<u8> m_object_buffer; 181 boost::container::small_vector<u8, 0x200> m_object_buffer;
181}; 182};
182 183
183} // namespace Service::android 184} // namespace Service::android
diff --git a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
index c3c2281bb..9ff4028c2 100644
--- a/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
+++ b/src/shader_recompiler/backend/glsl/glsl_emit_context.cpp
@@ -479,7 +479,7 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
479 const u32 remainder{4 - element}; 479 const u32 remainder{4 - element};
480 const TransformFeedbackVarying* xfb_varying{}; 480 const TransformFeedbackVarying* xfb_varying{};
481 const size_t xfb_varying_index{base_index + element}; 481 const size_t xfb_varying_index{base_index + element};
482 if (xfb_varying_index < runtime_info.xfb_varyings.size()) { 482 if (xfb_varying_index < runtime_info.xfb_count) {
483 xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index]; 483 xfb_varying = &runtime_info.xfb_varyings[xfb_varying_index];
484 xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; 484 xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
485 } 485 }
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
index 0f86a8004..34592a01f 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp
@@ -387,7 +387,7 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
387} 387}
388 388
389void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) { 389void SetupTransformFeedbackCapabilities(EmitContext& ctx, Id main_func) {
390 if (ctx.runtime_info.xfb_varyings.empty()) { 390 if (ctx.runtime_info.xfb_count == 0) {
391 return; 391 return;
392 } 392 }
393 ctx.AddCapability(spv::Capability::TransformFeedback); 393 ctx.AddCapability(spv::Capability::TransformFeedback);
diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
index fd15f47ea..bec5db173 100644
--- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
+++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp
@@ -160,7 +160,7 @@ void DefineGenericOutput(EmitContext& ctx, size_t index, std::optional<u32> invo
160 const u32 remainder{4 - element}; 160 const u32 remainder{4 - element};
161 const TransformFeedbackVarying* xfb_varying{}; 161 const TransformFeedbackVarying* xfb_varying{};
162 const size_t xfb_varying_index{base_attr_index + element}; 162 const size_t xfb_varying_index{base_attr_index + element};
163 if (xfb_varying_index < ctx.runtime_info.xfb_varyings.size()) { 163 if (xfb_varying_index < ctx.runtime_info.xfb_count) {
164 xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index]; 164 xfb_varying = &ctx.runtime_info.xfb_varyings[xfb_varying_index];
165 xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr; 165 xfb_varying = xfb_varying->components > 0 ? xfb_varying : nullptr;
166 } 166 }
diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h
index 3b63c249f..619c0b138 100644
--- a/src/shader_recompiler/runtime_info.h
+++ b/src/shader_recompiler/runtime_info.h
@@ -84,7 +84,8 @@ struct RuntimeInfo {
84 bool glasm_use_storage_buffers{}; 84 bool glasm_use_storage_buffers{};
85 85
86 /// Transform feedback state for each varying 86 /// Transform feedback state for each varying
87 std::vector<TransformFeedbackVarying> xfb_varyings; 87 std::array<TransformFeedbackVarying, 256> xfb_varyings{};
88 u32 xfb_count{0};
88}; 89};
89 90
90} // namespace Shader 91} // namespace Shader
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 45977d578..58a45ab67 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -207,7 +207,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
207 if (has_new_downloads) { 207 if (has_new_downloads) {
208 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); 208 memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
209 } 209 }
210 tmp_buffer.resize(amount); 210 tmp_buffer.resize_destructive(amount);
211 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); 211 cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
212 cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); 212 cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
213 return true; 213 return true;
@@ -1279,7 +1279,7 @@ template <class P>
1279typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr, 1279typename BufferCache<P>::OverlapResult BufferCache<P>::ResolveOverlaps(VAddr cpu_addr,
1280 u32 wanted_size) { 1280 u32 wanted_size) {
1281 static constexpr int STREAM_LEAP_THRESHOLD = 16; 1281 static constexpr int STREAM_LEAP_THRESHOLD = 16;
1282 std::vector<BufferId> overlap_ids; 1282 boost::container::small_vector<BufferId, 16> overlap_ids;
1283 VAddr begin = cpu_addr; 1283 VAddr begin = cpu_addr;
1284 VAddr end = cpu_addr + wanted_size; 1284 VAddr end = cpu_addr + wanted_size;
1285 int stream_score = 0; 1285 int stream_score = 0;
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 63a120f7a..fe6068cfe 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -229,7 +229,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<BufferCacheChannelInf
229 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>; 229 using OverlapCounter = boost::icl::split_interval_map<VAddr, int>;
230 230
231 struct OverlapResult { 231 struct OverlapResult {
232 std::vector<BufferId> ids; 232 boost::container::small_vector<BufferId, 16> ids;
233 VAddr begin; 233 VAddr begin;
234 VAddr end; 234 VAddr end;
235 bool has_stream_leap = false; 235 bool has_stream_leap = false;
@@ -582,7 +582,7 @@ private:
582 BufferId inline_buffer_id; 582 BufferId inline_buffer_id;
583 583
584 std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; 584 std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table;
585 std::vector<u8> tmp_buffer; 585 Common::ScratchBuffer<u8> tmp_buffer;
586}; 586};
587 587
588} // namespace VideoCommon 588} // namespace VideoCommon
diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h
index 83112dfce..7d660af47 100644
--- a/src/video_core/cdma_pusher.h
+++ b/src/video_core/cdma_pusher.h
@@ -63,7 +63,6 @@ struct ChCommand {
63}; 63};
64 64
65using ChCommandHeaderList = std::vector<ChCommandHeader>; 65using ChCommandHeaderList = std::vector<ChCommandHeader>;
66using ChCommandList = std::vector<ChCommand>;
67 66
68struct ThiRegisters { 67struct ThiRegisters {
69 u32_le increment_syncpt{}; 68 u32_le increment_syncpt{};
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 1cdb690ed..8a2784cdc 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -6,6 +6,7 @@
6#include <array> 6#include <array>
7#include <span> 7#include <span>
8#include <vector> 8#include <vector>
9#include <boost/container/small_vector.hpp>
9#include <queue> 10#include <queue>
10 11
11#include "common/bit_field.h" 12#include "common/bit_field.h"
@@ -102,11 +103,12 @@ inline CommandHeader BuildCommandHeader(BufferMethods method, u32 arg_count, Sub
102struct CommandList final { 103struct CommandList final {
103 CommandList() = default; 104 CommandList() = default;
104 explicit CommandList(std::size_t size) : command_lists(size) {} 105 explicit CommandList(std::size_t size) : command_lists(size) {}
105 explicit CommandList(std::vector<CommandHeader>&& prefetch_command_list_) 106 explicit CommandList(
107 boost::container::small_vector<CommandHeader, 512>&& prefetch_command_list_)
106 : prefetch_command_list{std::move(prefetch_command_list_)} {} 108 : prefetch_command_list{std::move(prefetch_command_list_)} {}
107 109
108 std::vector<CommandListHeader> command_lists; 110 boost::container::small_vector<CommandListHeader, 512> command_lists;
109 std::vector<CommandHeader> prefetch_command_list; 111 boost::container::small_vector<CommandHeader, 512> prefetch_command_list;
110}; 112};
111 113
112/** 114/**
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index ebe5536de..bc1eb41e7 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -108,9 +108,11 @@ void MaxwellDMA::Launch() {
108 if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) { 108 if (regs.launch_dma.remap_enable != 0 && is_const_a_dst) {
109 ASSERT(regs.remap_const.component_size_minus_one == 3); 109 ASSERT(regs.remap_const.component_size_minus_one == 3);
110 accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value); 110 accelerate.BufferClear(regs.offset_out, regs.line_length_in, regs.remap_consta_value);
111 std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); 111 read_buffer.resize_destructive(regs.line_length_in * sizeof(u32));
112 std::span<u32> span(reinterpret_cast<u32*>(read_buffer.data()), regs.line_length_in);
113 std::ranges::fill(span, regs.remap_consta_value);
112 memory_manager.WriteBlockUnsafe(regs.offset_out, 114 memory_manager.WriteBlockUnsafe(regs.offset_out,
113 reinterpret_cast<u8*>(tmp_buffer.data()), 115 reinterpret_cast<u8*>(read_buffer.data()),
114 regs.line_length_in * sizeof(u32)); 116 regs.line_length_in * sizeof(u32));
115 } else { 117 } else {
116 memory_manager.FlushCaching(); 118 memory_manager.FlushCaching();
@@ -126,32 +128,32 @@ void MaxwellDMA::Launch() {
126 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); 128 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
127 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); 129 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
128 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 130 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
129 std::vector<u8> tmp_buffer(16); 131 read_buffer.resize_destructive(16);
130 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 132 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
131 memory_manager.ReadBlockUnsafe( 133 memory_manager.ReadBlockUnsafe(
132 convert_linear_2_blocklinear_addr(regs.offset_in + offset), 134 convert_linear_2_blocklinear_addr(regs.offset_in + offset),
133 tmp_buffer.data(), tmp_buffer.size()); 135 read_buffer.data(), read_buffer.size());
134 memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(), 136 memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(),
135 tmp_buffer.size()); 137 read_buffer.size());
136 } 138 }
137 } else if (is_src_pitch && !is_dst_pitch) { 139 } else if (is_src_pitch && !is_dst_pitch) {
138 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); 140 UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
139 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0); 141 UNIMPLEMENTED_IF(regs.offset_in % 16 != 0);
140 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); 142 UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
141 std::vector<u8> tmp_buffer(16); 143 read_buffer.resize_destructive(16);
142 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { 144 for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
143 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, tmp_buffer.data(), 145 memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(),
144 tmp_buffer.size()); 146 read_buffer.size());
145 memory_manager.WriteBlockCached( 147 memory_manager.WriteBlockCached(
146 convert_linear_2_blocklinear_addr(regs.offset_out + offset), 148 convert_linear_2_blocklinear_addr(regs.offset_out + offset),
147 tmp_buffer.data(), tmp_buffer.size()); 149 read_buffer.data(), read_buffer.size());
148 } 150 }
149 } else { 151 } else {
150 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { 152 if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
151 std::vector<u8> tmp_buffer(regs.line_length_in); 153 read_buffer.resize_destructive(regs.line_length_in);
152 memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), 154 memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(),
153 regs.line_length_in); 155 regs.line_length_in);
154 memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(), 156 memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(),
155 regs.line_length_in); 157 regs.line_length_in);
156 } 158 }
157 } 159 }
@@ -171,7 +173,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
171 src_operand.address = regs.offset_in; 173 src_operand.address = regs.offset_in;
172 174
173 DMA::BufferOperand dst_operand; 175 DMA::BufferOperand dst_operand;
174 dst_operand.pitch = regs.pitch_out; 176 u32 abs_pitch_out = std::abs(static_cast<s32>(regs.pitch_out));
177 dst_operand.pitch = abs_pitch_out;
175 dst_operand.width = regs.line_length_in; 178 dst_operand.width = regs.line_length_in;
176 dst_operand.height = regs.line_count; 179 dst_operand.height = regs.line_count;
177 dst_operand.address = regs.offset_out; 180 dst_operand.address = regs.offset_out;
@@ -218,7 +221,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
218 const size_t src_size = 221 const size_t src_size =
219 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); 222 CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
220 223
221 const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; 224 const size_t dst_size = static_cast<size_t>(abs_pitch_out) * regs.line_count;
222 read_buffer.resize_destructive(src_size); 225 read_buffer.resize_destructive(src_size);
223 write_buffer.resize_destructive(dst_size); 226 write_buffer.resize_destructive(dst_size);
224 227
@@ -227,7 +230,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
227 230
228 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, 231 UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
229 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, 232 src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
230 regs.pitch_out); 233 abs_pitch_out);
231 234
232 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); 235 memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
233} 236}
diff --git a/src/video_core/host1x/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp
index 6ce179167..ce827eb6c 100644
--- a/src/video_core/host1x/codecs/h264.cpp
+++ b/src/video_core/host1x/codecs/h264.cpp
@@ -4,6 +4,7 @@
4#include <array> 4#include <array>
5#include <bit> 5#include <bit>
6 6
7#include "common/scratch_buffer.h"
7#include "common/settings.h" 8#include "common/settings.h"
8#include "video_core/host1x/codecs/h264.h" 9#include "video_core/host1x/codecs/h264.h"
9#include "video_core/host1x/host1x.h" 10#include "video_core/host1x/host1x.h"
@@ -188,7 +189,8 @@ void H264BitWriter::WriteBit(bool state) {
188} 189}
189 190
190void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) { 191void H264BitWriter::WriteScalingList(std::span<const u8> list, s32 start, s32 count) {
191 std::vector<u8> scan(count); 192 static Common::ScratchBuffer<u8> scan{};
193 scan.resize_destructive(count);
192 if (count == 16) { 194 if (count == 16) {
193 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size()); 195 std::memcpy(scan.data(), zig_zag_scan.data(), scan.size());
194 } else { 196 } else {
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index b2f7e160a..45141e488 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -587,7 +587,7 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
587 587
588void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, 588void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
589 VideoCommon::CacheType which) { 589 VideoCommon::CacheType which) {
590 std::vector<u8> tmp_buffer(size); 590 tmp_buffer.resize_destructive(size);
591 ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); 591 ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which);
592 592
593 // The output block must be flushed in case it has data modified from the GPU. 593 // The output block must be flushed in case it has data modified from the GPU.
@@ -670,9 +670,9 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons
670 return result; 670 return result;
671} 671}
672 672
673std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( 673boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32>
674 GPUVAddr gpu_addr, std::size_t size) const { 674MemoryManager::GetSubmappedRange(GPUVAddr gpu_addr, std::size_t size) const {
675 std::vector<std::pair<GPUVAddr, std::size_t>> result{}; 675 boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> result{};
676 GetSubmappedRangeImpl<true>(gpu_addr, size, result); 676 GetSubmappedRangeImpl<true>(gpu_addr, size, result);
677 return result; 677 return result;
678} 678}
@@ -680,8 +680,9 @@ std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
680template <bool is_gpu_address> 680template <bool is_gpu_address>
681void MemoryManager::GetSubmappedRangeImpl( 681void MemoryManager::GetSubmappedRangeImpl(
682 GPUVAddr gpu_addr, std::size_t size, 682 GPUVAddr gpu_addr, std::size_t size,
683 std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& 683 boost::container::small_vector<
684 result) const { 684 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>& result)
685 const {
685 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>> 686 std::optional<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>
686 last_segment{}; 687 last_segment{};
687 std::optional<VAddr> old_page_addr{}; 688 std::optional<VAddr> old_page_addr{};
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 794535122..4202c26ff 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -8,10 +8,12 @@
8#include <mutex> 8#include <mutex>
9#include <optional> 9#include <optional>
10#include <vector> 10#include <vector>
11#include <boost/container/small_vector.hpp>
11 12
12#include "common/common_types.h" 13#include "common/common_types.h"
13#include "common/multi_level_page_table.h" 14#include "common/multi_level_page_table.h"
14#include "common/range_map.h" 15#include "common/range_map.h"
16#include "common/scratch_buffer.h"
15#include "common/virtual_buffer.h" 17#include "common/virtual_buffer.h"
16#include "video_core/cache_types.h" 18#include "video_core/cache_types.h"
17#include "video_core/pte_kind.h" 19#include "video_core/pte_kind.h"
@@ -107,8 +109,8 @@ public:
107 * if the region is continuous, a single pair will be returned. If it's unmapped, an empty 109 * if the region is continuous, a single pair will be returned. If it's unmapped, an empty
108 * vector will be returned; 110 * vector will be returned;
109 */ 111 */
110 std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, 112 boost::container::small_vector<std::pair<GPUVAddr, std::size_t>, 32> GetSubmappedRange(
111 std::size_t size) const; 113 GPUVAddr gpu_addr, std::size_t size) const;
112 114
113 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, 115 GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
114 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true); 116 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
@@ -165,7 +167,8 @@ private:
165 template <bool is_gpu_address> 167 template <bool is_gpu_address>
166 void GetSubmappedRangeImpl( 168 void GetSubmappedRangeImpl(
167 GPUVAddr gpu_addr, std::size_t size, 169 GPUVAddr gpu_addr, std::size_t size,
168 std::vector<std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>>& 170 boost::container::small_vector<
171 std::pair<std::conditional_t<is_gpu_address, GPUVAddr, VAddr>, std::size_t>, 32>&
169 result) const; 172 result) const;
170 173
171 Core::System& system; 174 Core::System& system;
@@ -215,8 +218,8 @@ private:
215 Common::VirtualBuffer<u32> big_page_table_cpu; 218 Common::VirtualBuffer<u32> big_page_table_cpu;
216 219
217 std::vector<u64> big_page_continuous; 220 std::vector<u64> big_page_continuous;
218 std::vector<std::pair<VAddr, std::size_t>> page_stash{}; 221 boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash{};
219 std::vector<std::pair<VAddr, std::size_t>> page_stash2{}; 222 boost::container::small_vector<std::pair<VAddr, std::size_t>, 32> page_stash2{};
220 223
221 mutable std::mutex guard; 224 mutable std::mutex guard;
222 225
@@ -226,6 +229,8 @@ private:
226 std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator; 229 std::unique_ptr<VideoCommon::InvalidationAccumulator> accumulator;
227 230
228 static std::atomic<size_t> unique_identifier_generator; 231 static std::atomic<size_t> unique_identifier_generator;
232
233 Common::ScratchBuffer<u8> tmp_buffer;
229}; 234};
230 235
231} // namespace Tegra 236} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3f077311e..0329ed820 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -85,7 +85,9 @@ Shader::RuntimeInfo MakeRuntimeInfo(const GraphicsPipelineKey& key,
85 case Shader::Stage::VertexB: 85 case Shader::Stage::VertexB:
86 case Shader::Stage::Geometry: 86 case Shader::Stage::Geometry:
87 if (!use_assembly_shaders && key.xfb_enabled != 0) { 87 if (!use_assembly_shaders && key.xfb_enabled != 0) {
88 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state); 88 auto [varyings, count] = VideoCommon::MakeTransformFeedbackVaryings(key.xfb_state);
89 info.xfb_varyings = varyings;
90 info.xfb_count = count;
89 } 91 }
90 break; 92 break;
91 case Shader::Stage::TessellationEval: 93 case Shader::Stage::TessellationEval:
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index e30fcb1ed..f47301ad5 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -361,7 +361,7 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer,
361 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, 361 .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
362 }; 362 };
363 // Measuring a popular game, this number never exceeds the specified size once data is warmed up 363 // Measuring a popular game, this number never exceeds the specified size once data is warmed up
364 boost::container::small_vector<VkBufferCopy, 3> vk_copies(copies.size()); 364 boost::container::small_vector<VkBufferCopy, 8> vk_copies(copies.size());
365 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy); 365 std::ranges::transform(copies, vk_copies.begin(), MakeBufferCopy);
366 scheduler.RequestOutsideRenderPassOperationContext(); 366 scheduler.RequestOutsideRenderPassOperationContext();
367 scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { 367 scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index a2cfb2105..9f316113c 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -167,7 +167,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
167 info.fixed_state_point_size = point_size; 167 info.fixed_state_point_size = point_size;
168 } 168 }
169 if (key.state.xfb_enabled) { 169 if (key.state.xfb_enabled) {
170 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); 170 auto [varyings, count] =
171 VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
172 info.xfb_varyings = varyings;
173 info.xfb_count = count;
171 } 174 }
172 info.convert_depth_mode = gl_ndc; 175 info.convert_depth_mode = gl_ndc;
173 } 176 }
@@ -214,7 +217,10 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program
214 info.fixed_state_point_size = point_size; 217 info.fixed_state_point_size = point_size;
215 } 218 }
216 if (key.state.xfb_enabled != 0) { 219 if (key.state.xfb_enabled != 0) {
217 info.xfb_varyings = VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state); 220 auto [varyings, count] =
221 VideoCommon::MakeTransformFeedbackVaryings(key.state.xfb_state);
222 info.xfb_varyings = varyings;
223 info.xfb_count = count;
218 } 224 }
219 info.convert_depth_mode = gl_ndc; 225 info.convert_depth_mode = gl_ndc;
220 break; 226 break;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index f025f618b..f3cef09dd 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -330,9 +330,9 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
330 }; 330 };
331} 331}
332 332
333[[maybe_unused]] [[nodiscard]] std::vector<VkBufferCopy> TransformBufferCopies( 333[[maybe_unused]] [[nodiscard]] boost::container::small_vector<VkBufferCopy, 16>
334 std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) { 334TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t buffer_offset) {
335 std::vector<VkBufferCopy> result(copies.size()); 335 boost::container::small_vector<VkBufferCopy, 16> result(copies.size());
336 std::ranges::transform( 336 std::ranges::transform(
337 copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) { 337 copies, result.begin(), [buffer_offset](const VideoCommon::BufferCopy& copy) {
338 return VkBufferCopy{ 338 return VkBufferCopy{
@@ -344,7 +344,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
344 return result; 344 return result;
345} 345}
346 346
347[[nodiscard]] std::vector<VkBufferImageCopy> TransformBufferImageCopies( 347[[nodiscard]] boost::container::small_vector<VkBufferImageCopy, 16> TransformBufferImageCopies(
348 std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) { 348 std::span<const BufferImageCopy> copies, size_t buffer_offset, VkImageAspectFlags aspect_mask) {
349 struct Maker { 349 struct Maker {
350 VkBufferImageCopy operator()(const BufferImageCopy& copy) const { 350 VkBufferImageCopy operator()(const BufferImageCopy& copy) const {
@@ -377,14 +377,14 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
377 VkImageAspectFlags aspect_mask; 377 VkImageAspectFlags aspect_mask;
378 }; 378 };
379 if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { 379 if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
380 std::vector<VkBufferImageCopy> result(copies.size() * 2); 380 boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size() * 2);
381 std::ranges::transform(copies, result.begin(), 381 std::ranges::transform(copies, result.begin(),
382 Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT}); 382 Maker{buffer_offset, VK_IMAGE_ASPECT_DEPTH_BIT});
383 std::ranges::transform(copies, result.begin() + copies.size(), 383 std::ranges::transform(copies, result.begin() + copies.size(),
384 Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT}); 384 Maker{buffer_offset, VK_IMAGE_ASPECT_STENCIL_BIT});
385 return result; 385 return result;
386 } else { 386 } else {
387 std::vector<VkBufferImageCopy> result(copies.size()); 387 boost::container::small_vector<VkBufferImageCopy, 16> result(copies.size());
388 std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask}); 388 std::ranges::transform(copies, result.begin(), Maker{buffer_offset, aspect_mask});
389 return result; 389 return result;
390 } 390 }
@@ -867,8 +867,8 @@ void TextureCacheRuntime::BarrierFeedbackLoop() {
867 867
868void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, 868void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
869 std::span<const VideoCommon::ImageCopy> copies) { 869 std::span<const VideoCommon::ImageCopy> copies) {
870 std::vector<VkBufferImageCopy> vk_in_copies(copies.size()); 870 boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size());
871 std::vector<VkBufferImageCopy> vk_out_copies(copies.size()); 871 boost::container::small_vector<VkBufferImageCopy, 16> vk_out_copies(copies.size());
872 const VkImageAspectFlags src_aspect_mask = src.AspectMask(); 872 const VkImageAspectFlags src_aspect_mask = src.AspectMask();
873 const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); 873 const VkImageAspectFlags dst_aspect_mask = dst.AspectMask();
874 874
@@ -1157,7 +1157,7 @@ void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, Im
1157 1157
1158void TextureCacheRuntime::CopyImage(Image& dst, Image& src, 1158void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
1159 std::span<const VideoCommon::ImageCopy> copies) { 1159 std::span<const VideoCommon::ImageCopy> copies) {
1160 std::vector<VkImageCopy> vk_copies(copies.size()); 1160 boost::container::small_vector<VkImageCopy, 16> vk_copies(copies.size());
1161 const VkImageAspectFlags aspect_mask = dst.AspectMask(); 1161 const VkImageAspectFlags aspect_mask = dst.AspectMask();
1162 ASSERT(aspect_mask == src.AspectMask()); 1162 ASSERT(aspect_mask == src.AspectMask());
1163 1163
@@ -1332,7 +1332,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
1332 ScaleDown(true); 1332 ScaleDown(true);
1333 } 1333 }
1334 scheduler->RequestOutsideRenderPassOperationContext(); 1334 scheduler->RequestOutsideRenderPassOperationContext();
1335 std::vector vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); 1335 auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
1336 const VkBuffer src_buffer = buffer; 1336 const VkBuffer src_buffer = buffer;
1337 const VkImage vk_image = *original_image; 1337 const VkImage vk_image = *original_image;
1338 const VkImageAspectFlags vk_aspect_mask = aspect_mask; 1338 const VkImageAspectFlags vk_aspect_mask = aspect_mask;
@@ -1367,8 +1367,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<VkDeviceS
1367 if (is_rescaled) { 1367 if (is_rescaled) {
1368 ScaleDown(); 1368 ScaleDown();
1369 } 1369 }
1370 boost::container::small_vector<VkBuffer, 1> buffers_vector{}; 1370 boost::container::small_vector<VkBuffer, 8> buffers_vector{};
1371 boost::container::small_vector<std::vector<VkBufferImageCopy>, 1> vk_copies; 1371 boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
1372 vk_copies;
1372 for (size_t index = 0; index < buffers_span.size(); index++) { 1373 for (size_t index = 0; index < buffers_span.size(); index++) {
1373 buffers_vector.emplace_back(buffers_span[index]); 1374 buffers_vector.emplace_back(buffers_span[index]);
1374 vk_copies.emplace_back( 1375 vk_copies.emplace_back(
@@ -1858,7 +1859,7 @@ Framebuffer::~Framebuffer() = default;
1858void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, 1859void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
1859 std::span<ImageView*, NUM_RT> color_buffers, 1860 std::span<ImageView*, NUM_RT> color_buffers,
1860 ImageView* depth_buffer, bool is_rescaled) { 1861 ImageView* depth_buffer, bool is_rescaled) {
1861 std::vector<VkImageView> attachments; 1862 boost::container::small_vector<VkImageView, NUM_RT + 1> attachments;
1862 RenderPassKey renderpass_key{}; 1863 RenderPassKey renderpass_key{};
1863 s32 num_layers = 1; 1864 s32 num_layers = 1;
1864 1865
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index c5213875b..4db948b6d 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -151,11 +151,9 @@ void ShaderCache::RemovePendingShaders() {
151 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), 151 marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
152 marked_for_removal.end()); 152 marked_for_removal.end());
153 153
154 std::vector<ShaderInfo*> removed_shaders; 154 boost::container::small_vector<ShaderInfo*, 16> removed_shaders;
155 removed_shaders.reserve(marked_for_removal.size());
156 155
157 std::scoped_lock lock{lookup_mutex}; 156 std::scoped_lock lock{lookup_mutex};
158
159 for (Entry* const entry : marked_for_removal) { 157 for (Entry* const entry : marked_for_removal) {
160 removed_shaders.push_back(entry->data); 158 removed_shaders.push_back(entry->data);
161 159
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 1b8a17ee8..55d49d017 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -6,6 +6,7 @@
6#include <array> 6#include <array>
7#include <optional> 7#include <optional>
8#include <vector> 8#include <vector>
9#include <boost/container/small_vector.hpp>
9 10
10#include "common/common_funcs.h" 11#include "common/common_funcs.h"
11#include "common/common_types.h" 12#include "common/common_types.h"
@@ -108,8 +109,8 @@ struct ImageBase {
108 std::vector<ImageViewInfo> image_view_infos; 109 std::vector<ImageViewInfo> image_view_infos;
109 std::vector<ImageViewId> image_view_ids; 110 std::vector<ImageViewId> image_view_ids;
110 111
111 std::vector<u32> slice_offsets; 112 boost::container::small_vector<u32, 16> slice_offsets;
112 std::vector<SubresourceBase> slice_subresources; 113 boost::container::small_vector<SubresourceBase, 16> slice_subresources;
113 114
114 std::vector<AliasedImage> aliased_images; 115 std::vector<AliasedImage> aliased_images;
115 std::vector<ImageId> overlapping_images; 116 std::vector<ImageId> overlapping_images;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d58bb69ff..d3f03a995 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -526,7 +526,7 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
526 526
527template <class P> 527template <class P>
528void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { 528void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
529 std::vector<ImageId> images; 529 boost::container::small_vector<ImageId, 16> images;
530 ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) { 530 ForEachImageInRegion(cpu_addr, size, [&images](ImageId image_id, ImageBase& image) {
531 if (!image.IsSafeDownload()) { 531 if (!image.IsSafeDownload()) {
532 return; 532 return;
@@ -579,7 +579,7 @@ std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(V
579 579
580template <class P> 580template <class P>
581void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { 581void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
582 std::vector<ImageId> deleted_images; 582 boost::container::small_vector<ImageId, 16> deleted_images;
583 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); 583 ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); });
584 for (const ImageId id : deleted_images) { 584 for (const ImageId id : deleted_images) {
585 Image& image = slot_images[id]; 585 Image& image = slot_images[id];
@@ -593,7 +593,7 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
593 593
594template <class P> 594template <class P>
595void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { 595void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) {
596 std::vector<ImageId> deleted_images; 596 boost::container::small_vector<ImageId, 16> deleted_images;
597 ForEachImageInRegionGPU(as_id, gpu_addr, size, 597 ForEachImageInRegionGPU(as_id, gpu_addr, size,
598 [&](ImageId id, Image&) { deleted_images.push_back(id); }); 598 [&](ImageId id, Image&) { deleted_images.push_back(id); });
599 for (const ImageId id : deleted_images) { 599 for (const ImageId id : deleted_images) {
@@ -1101,7 +1101,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
1101 const bool native_bgr = runtime.HasNativeBgr(); 1101 const bool native_bgr = runtime.HasNativeBgr();
1102 const bool flexible_formats = True(options & RelaxedOptions::Format); 1102 const bool flexible_formats = True(options & RelaxedOptions::Format);
1103 ImageId image_id{}; 1103 ImageId image_id{};
1104 boost::container::small_vector<ImageId, 1> image_ids; 1104 boost::container::small_vector<ImageId, 8> image_ids;
1105 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { 1105 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1106 if (True(existing_image.flags & ImageFlagBits::Remapped)) { 1106 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
1107 return false; 1107 return false;
@@ -1622,7 +1622,7 @@ ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr)
1622 } 1622 }
1623 } 1623 }
1624 ImageId image_id{}; 1624 ImageId image_id{};
1625 boost::container::small_vector<ImageId, 1> image_ids; 1625 boost::container::small_vector<ImageId, 8> image_ids;
1626 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { 1626 const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
1627 if (True(existing_image.flags & ImageFlagBits::Remapped)) { 1627 if (True(existing_image.flags & ImageFlagBits::Remapped)) {
1628 return false; 1628 return false;
@@ -1942,7 +1942,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
1942 image.map_view_id = map_id; 1942 image.map_view_id = map_id;
1943 return; 1943 return;
1944 } 1944 }
1945 std::vector<ImageViewId> sparse_maps{}; 1945 boost::container::small_vector<ImageViewId, 16> sparse_maps;
1946 ForEachSparseSegment( 1946 ForEachSparseSegment(
1947 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { 1947 image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
1948 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); 1948 auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
@@ -2217,7 +2217,7 @@ void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
2217 2217
2218template <class P> 2218template <class P>
2219void TextureCache<P>::SynchronizeAliases(ImageId image_id) { 2219void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
2220 boost::container::small_vector<const AliasedImage*, 1> aliased_images; 2220 boost::container::small_vector<const AliasedImage*, 8> aliased_images;
2221 Image& image = slot_images[image_id]; 2221 Image& image = slot_images[image_id];
2222 bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled); 2222 bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
2223 bool any_modified = True(image.flags & ImageFlagBits::GpuModified); 2223 bool any_modified = True(image.flags & ImageFlagBits::GpuModified);
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 44232b961..e9ec91265 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -56,7 +56,7 @@ struct ImageViewInOut {
56struct AsyncDecodeContext { 56struct AsyncDecodeContext {
57 ImageId image_id; 57 ImageId image_id;
58 Common::ScratchBuffer<u8> decoded_data; 58 Common::ScratchBuffer<u8> decoded_data;
59 std::vector<BufferImageCopy> copies; 59 boost::container::small_vector<BufferImageCopy, 16> copies;
60 std::mutex mutex; 60 std::mutex mutex;
61 std::atomic_bool complete; 61 std::atomic_bool complete;
62}; 62};
@@ -429,7 +429,7 @@ private:
429 429
430 std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; 430 std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
431 std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; 431 std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
432 std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; 432 std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
433 433
434 VAddr virtual_invalid_space{}; 434 VAddr virtual_invalid_space{};
435 435
diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp
index 95a5b47d8..f781cb7a0 100644
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@@ -329,13 +329,13 @@ template <u32 GOB_EXTENT>
329 329
330[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D( 330[[nodiscard]] std::optional<SubresourceExtent> ResolveOverlapRightAddress3D(
331 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) { 331 const ImageInfo& new_info, GPUVAddr gpu_addr, const ImageBase& overlap, bool strict_size) {
332 const std::vector<u32> slice_offsets = CalculateSliceOffsets(new_info); 332 const auto slice_offsets = CalculateSliceOffsets(new_info);
333 const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr); 333 const u32 diff = static_cast<u32>(overlap.gpu_addr - gpu_addr);
334 const auto it = std::ranges::find(slice_offsets, diff); 334 const auto it = std::ranges::find(slice_offsets, diff);
335 if (it == slice_offsets.end()) { 335 if (it == slice_offsets.end()) {
336 return std::nullopt; 336 return std::nullopt;
337 } 337 }
338 const std::vector subresources = CalculateSliceSubresources(new_info); 338 const auto subresources = CalculateSliceSubresources(new_info);
339 const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)]; 339 const SubresourceBase base = subresources[std::distance(slice_offsets.begin(), it)];
340 const ImageInfo& info = overlap.info; 340 const ImageInfo& info = overlap.info;
341 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) { 341 if (!IsBlockLinearSizeCompatible(new_info, info, base.level, 0, strict_size)) {
@@ -655,9 +655,9 @@ LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
655 return sizes; 655 return sizes;
656} 656}
657 657
658std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) { 658boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info) {
659 ASSERT(info.type == ImageType::e3D); 659 ASSERT(info.type == ImageType::e3D);
660 std::vector<u32> offsets; 660 boost::container::small_vector<u32, 16> offsets;
661 offsets.reserve(NumSlices(info)); 661 offsets.reserve(NumSlices(info));
662 662
663 const LevelInfo level_info = MakeLevelInfo(info); 663 const LevelInfo level_info = MakeLevelInfo(info);
@@ -679,9 +679,10 @@ std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
679 return offsets; 679 return offsets;
680} 680}
681 681
682std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info) { 682boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
683 const ImageInfo& info) {
683 ASSERT(info.type == ImageType::e3D); 684 ASSERT(info.type == ImageType::e3D);
684 std::vector<SubresourceBase> subresources; 685 boost::container::small_vector<SubresourceBase, 16> subresources;
685 subresources.reserve(NumSlices(info)); 686 subresources.reserve(NumSlices(info));
686 for (s32 level = 0; level < info.resources.levels; ++level) { 687 for (s32 level = 0; level < info.resources.levels; ++level) {
687 const s32 depth = AdjustMipSize(info.size.depth, level); 688 const s32 depth = AdjustMipSize(info.size.depth, level);
@@ -723,8 +724,10 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept {
723 } 724 }
724} 725}
725 726
726std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, 727boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(const ImageInfo& dst,
727 SubresourceBase base, u32 up_scale, u32 down_shift) { 728 const ImageInfo& src,
729 SubresourceBase base,
730 u32 up_scale, u32 down_shift) {
728 ASSERT(dst.resources.levels >= src.resources.levels); 731 ASSERT(dst.resources.levels >= src.resources.levels);
729 732
730 const bool is_dst_3d = dst.type == ImageType::e3D; 733 const bool is_dst_3d = dst.type == ImageType::e3D;
@@ -733,7 +736,7 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
733 ASSERT(src.resources.levels == 1); 736 ASSERT(src.resources.levels == 1);
734 } 737 }
735 const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D}; 738 const bool both_2d{src.type == ImageType::e2D && dst.type == ImageType::e2D};
736 std::vector<ImageCopy> copies; 739 boost::container::small_vector<ImageCopy, 16> copies;
737 copies.reserve(src.resources.levels); 740 copies.reserve(src.resources.levels);
738 for (s32 level = 0; level < src.resources.levels; ++level) { 741 for (s32 level = 0; level < src.resources.levels; ++level) {
739 ImageCopy& copy = copies.emplace_back(); 742 ImageCopy& copy = copies.emplace_back();
@@ -770,9 +773,10 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
770 return copies; 773 return copies;
771} 774}
772 775
773std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, 776boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(const ImageInfo& src,
774 u32 down_shift) { 777 u32 up_scale,
775 std::vector<ImageCopy> copies; 778 u32 down_shift) {
779 boost::container::small_vector<ImageCopy, 16> copies;
776 copies.reserve(src.resources.levels); 780 copies.reserve(src.resources.levels);
777 const bool is_3d = src.type == ImageType::e3D; 781 const bool is_3d = src.type == ImageType::e3D;
778 for (s32 level = 0; level < src.resources.levels; ++level) { 782 for (s32 level = 0; level < src.resources.levels; ++level) {
@@ -824,9 +828,11 @@ bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config
824 return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); 828 return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
825} 829}
826 830
827std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 831boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
828 const ImageInfo& info, std::span<const u8> input, 832 GPUVAddr gpu_addr,
829 std::span<u8> output) { 833 const ImageInfo& info,
834 std::span<const u8> input,
835 std::span<u8> output) {
830 const size_t guest_size_bytes = input.size_bytes(); 836 const size_t guest_size_bytes = input.size_bytes();
831 const u32 bpp_log2 = BytesPerBlockLog2(info.format); 837 const u32 bpp_log2 = BytesPerBlockLog2(info.format);
832 const Extent3D size = info.size; 838 const Extent3D size = info.size;
@@ -861,7 +867,7 @@ std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GP
861 info.tile_width_spacing); 867 info.tile_width_spacing);
862 size_t guest_offset = 0; 868 size_t guest_offset = 0;
863 u32 host_offset = 0; 869 u32 host_offset = 0;
864 std::vector<BufferImageCopy> copies(num_levels); 870 boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
865 871
866 for (s32 level = 0; level < num_levels; ++level) { 872 for (s32 level = 0; level < num_levels; ++level) {
867 const Extent3D level_size = AdjustMipSize(size, level); 873 const Extent3D level_size = AdjustMipSize(size, level);
@@ -978,7 +984,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
978 } 984 }
979} 985}
980 986
981std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) { 987boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
982 const Extent3D size = info.size; 988 const Extent3D size = info.size;
983 const u32 bytes_per_block = BytesPerBlock(info.format); 989 const u32 bytes_per_block = BytesPerBlock(info.format);
984 if (info.type == ImageType::Linear) { 990 if (info.type == ImageType::Linear) {
@@ -1006,7 +1012,7 @@ std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
1006 1012
1007 u32 host_offset = 0; 1013 u32 host_offset = 0;
1008 1014
1009 std::vector<BufferImageCopy> copies(num_levels); 1015 boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
1010 for (s32 level = 0; level < num_levels; ++level) { 1016 for (s32 level = 0; level < num_levels; ++level) {
1011 const Extent3D level_size = AdjustMipSize(size, level); 1017 const Extent3D level_size = AdjustMipSize(size, level);
1012 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size); 1018 const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
@@ -1042,10 +1048,10 @@ Extent3D MipBlockSize(const ImageInfo& info, u32 level) {
1042 return AdjustMipBlockSize(num_tiles, level_info.block, level); 1048 return AdjustMipBlockSize(num_tiles, level_info.block, level);
1043} 1049}
1044 1050
1045std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) { 1051boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(const ImageInfo& info) {
1046 const Extent2D tile_size = DefaultBlockSize(info.format); 1052 const Extent2D tile_size = DefaultBlockSize(info.format);
1047 if (info.type == ImageType::Linear) { 1053 if (info.type == ImageType::Linear) {
1048 return std::vector{SwizzleParameters{ 1054 return {SwizzleParameters{
1049 .num_tiles = AdjustTileSize(info.size, tile_size), 1055 .num_tiles = AdjustTileSize(info.size, tile_size),
1050 .block = {}, 1056 .block = {},
1051 .buffer_offset = 0, 1057 .buffer_offset = 0,
@@ -1057,7 +1063,7 @@ std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info) {
1057 const s32 num_levels = info.resources.levels; 1063 const s32 num_levels = info.resources.levels;
1058 1064
1059 u32 guest_offset = 0; 1065 u32 guest_offset = 0;
1060 std::vector<SwizzleParameters> params(num_levels); 1066 boost::container::small_vector<SwizzleParameters, 16> params(num_levels);
1061 for (s32 level = 0; level < num_levels; ++level) { 1067 for (s32 level = 0; level < num_levels; ++level) {
1062 const Extent3D level_size = AdjustMipSize(size, level); 1068 const Extent3D level_size = AdjustMipSize(size, level);
1063 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size); 1069 const Extent3D num_tiles = AdjustTileSize(level_size, tile_size);
diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h
index 84aa6880d..ab45a43c4 100644
--- a/src/video_core/texture_cache/util.h
+++ b/src/video_core/texture_cache/util.h
@@ -5,6 +5,7 @@
5 5
6#include <optional> 6#include <optional>
7#include <span> 7#include <span>
8#include <boost/container/small_vector.hpp>
8 9
9#include "common/common_types.h" 10#include "common/common_types.h"
10#include "common/scratch_buffer.h" 11#include "common/scratch_buffer.h"
@@ -40,9 +41,10 @@ struct OverlapResult {
40 41
41[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; 42[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
42 43
43[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info); 44[[nodiscard]] boost::container::small_vector<u32, 16> CalculateSliceOffsets(const ImageInfo& info);
44 45
45[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info); 46[[nodiscard]] boost::container::small_vector<SubresourceBase, 16> CalculateSliceSubresources(
47 const ImageInfo& info);
46 48
47[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level); 49[[nodiscard]] u32 CalculateLevelStrideAlignment(const ImageInfo& info, u32 level);
48 50
@@ -51,21 +53,18 @@ struct OverlapResult {
51 53
52[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept; 54[[nodiscard]] ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept;
53 55
54[[nodiscard]] std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, 56[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeShrinkImageCopies(
55 const ImageInfo& src, 57 const ImageInfo& dst, const ImageInfo& src, SubresourceBase base, u32 up_scale = 1,
56 SubresourceBase base, u32 up_scale = 1, 58 u32 down_shift = 0);
57 u32 down_shift = 0);
58 59
59[[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, 60[[nodiscard]] boost::container::small_vector<ImageCopy, 16> MakeReinterpretImageCopies(
60 u32 up_scale = 1, 61 const ImageInfo& src, u32 up_scale = 1, u32 down_shift = 0);
61 u32 down_shift = 0);
62 62
63[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); 63[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
64 64
65[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, 65[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(
66 GPUVAddr gpu_addr, const ImageInfo& info, 66 Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
67 std::span<const u8> input, 67 std::span<const u8> input, std::span<u8> output);
68 std::span<u8> output);
69 68
70[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, 69[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
71 const ImageBase& image, std::span<u8> output); 70 const ImageBase& image, std::span<u8> output);
@@ -73,13 +72,15 @@ struct OverlapResult {
73void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, 72void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
74 std::span<BufferImageCopy> copies); 73 std::span<BufferImageCopy> copies);
75 74
76[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info); 75[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(
76 const ImageInfo& info);
77 77
78[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level); 78[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
79 79
80[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level); 80[[nodiscard]] Extent3D MipBlockSize(const ImageInfo& info, u32 level);
81 81
82[[nodiscard]] std::vector<SwizzleParameters> FullUploadSwizzles(const ImageInfo& info); 82[[nodiscard]] boost::container::small_vector<SwizzleParameters, 16> FullUploadSwizzles(
83 const ImageInfo& info);
83 84
84void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, 85void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
85 std::span<const BufferImageCopy> copies, std::span<const u8> memory, 86 std::span<const BufferImageCopy> copies, std::span<const u8> memory,
diff --git a/src/video_core/transform_feedback.cpp b/src/video_core/transform_feedback.cpp
index 155599316..1f353d2df 100644
--- a/src/video_core/transform_feedback.cpp
+++ b/src/video_core/transform_feedback.cpp
@@ -13,7 +13,7 @@
13 13
14namespace VideoCommon { 14namespace VideoCommon {
15 15
16std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( 16std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
17 const TransformFeedbackState& state) { 17 const TransformFeedbackState& state) {
18 static constexpr std::array VECTORS{ 18 static constexpr std::array VECTORS{
19 28U, // gl_Position 19 28U, // gl_Position
@@ -62,7 +62,8 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
62 216U, // gl_TexCoord[6] 62 216U, // gl_TexCoord[6]
63 220U, // gl_TexCoord[7] 63 220U, // gl_TexCoord[7]
64 }; 64 };
65 std::vector<Shader::TransformFeedbackVarying> xfb(256); 65 std::array<Shader::TransformFeedbackVarying, 256> xfb{};
66 u32 count{0};
66 for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) { 67 for (size_t buffer = 0; buffer < state.layouts.size(); ++buffer) {
67 const auto& locations = state.varyings[buffer]; 68 const auto& locations = state.varyings[buffer];
68 const auto& layout = state.layouts[buffer]; 69 const auto& layout = state.layouts[buffer];
@@ -103,11 +104,12 @@ std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
103 } 104 }
104 } 105 }
105 xfb[attribute] = varying; 106 xfb[attribute] = varying;
107 count = std::max(count, attribute);
106 highest = std::max(highest, (base_offset + varying.components) * 4); 108 highest = std::max(highest, (base_offset + varying.components) * 4);
107 } 109 }
108 UNIMPLEMENTED_IF(highest != layout.stride); 110 UNIMPLEMENTED_IF(highest != layout.stride);
109 } 111 }
110 return xfb; 112 return {xfb, count + 1};
111} 113}
112 114
113} // namespace VideoCommon 115} // namespace VideoCommon
diff --git a/src/video_core/transform_feedback.h b/src/video_core/transform_feedback.h
index d13eb16c3..401b1352a 100644
--- a/src/video_core/transform_feedback.h
+++ b/src/video_core/transform_feedback.h
@@ -24,7 +24,7 @@ struct TransformFeedbackState {
24 varyings; 24 varyings;
25}; 25};
26 26
27std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings( 27std::pair<std::array<Shader::TransformFeedbackVarying, 256>, u32> MakeTransformFeedbackVaryings(
28 const TransformFeedbackState& state); 28 const TransformFeedbackState& state);
29 29
30} // namespace VideoCommon 30} // namespace VideoCommon
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index fa9cde75b..b11abe311 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -316,6 +316,7 @@ NvidiaArchitecture GetNvidiaArchitecture(vk::PhysicalDevice physical,
316std::vector<const char*> ExtensionListForVulkan( 316std::vector<const char*> ExtensionListForVulkan(
317 const std::set<std::string, std::less<>>& extensions) { 317 const std::set<std::string, std::less<>>& extensions) {
318 std::vector<const char*> output; 318 std::vector<const char*> output;
319 output.reserve(extensions.size());
319 for (const auto& extension : extensions) { 320 for (const auto& extension : extensions) {
320 output.push_back(extension.c_str()); 321 output.push_back(extension.c_str());
321 } 322 }